translate.c source code [qemu/target/arm/translate.c]

1	/*
2	* ARM translation
3	*
4	* Copyright (c) 2003 Fabrice Bellard
5	* Copyright (c) 2005-2007 CodeSourcery
6	* Copyright (c) 2007 OpenedHand, Ltd.
7	*
8	* This library is free software; you can redistribute it and/or
9	* modify it under the terms of the GNU Lesser General Public
10	* License as published by the Free Software Foundation; either
11	* version 2 of the License, or (at your option) any later version.
12	*
13	* This library is distributed in the hope that it will be useful,
14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16	* Lesser General Public License for more details.
17	*
18	* You should have received a copy of the GNU Lesser General Public
19	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
20	*/
21	#include "qemu/osdep.h"
22
23	#include "cpu.h"
24	#include "internals.h"
25	#include "disas/disas.h"
26	#include "exec/exec-all.h"
27	#include "tcg-op.h"
28	#include "tcg-op-gvec.h"
29	#include "qemu/log.h"
30	#include "qemu/bitops.h"
31	#include "arm_ldst.h"
32	#include "hw/semihosting/semihost.h"
33
34	#include "exec/helper-proto.h"
35	#include "exec/helper-gen.h"
36
37	#include "trace-tcg.h"
38	#include "exec/log.h"
39
40
41	#define ENABLE_ARCH_4T arm_dc_feature(s, ARM_FEATURE_V4T)
42	#define ENABLE_ARCH_5 arm_dc_feature(s, ARM_FEATURE_V5)
43	/ currently all emulated v5 cores are also v5TE, so don't bother /
44	#define ENABLE_ARCH_5TE arm_dc_feature(s, ARM_FEATURE_V5)
45	#define ENABLE_ARCH_5J dc_isar_feature(jazelle, s)
46	#define ENABLE_ARCH_6 arm_dc_feature(s, ARM_FEATURE_V6)
47	#define ENABLE_ARCH_6K arm_dc_feature(s, ARM_FEATURE_V6K)
48	#define ENABLE_ARCH_6T2 arm_dc_feature(s, ARM_FEATURE_THUMB2)
49	#define ENABLE_ARCH_7 arm_dc_feature(s, ARM_FEATURE_V7)
50	#define ENABLE_ARCH_8 arm_dc_feature(s, ARM_FEATURE_V8)
51
52	#define ARCH(x) do { if (!ENABLE_ARCH_##x) goto illegal_op; } while(0)
53
54	#include "translate.h"
55
56	#if defined(CONFIG_USER_ONLY)
57	#define IS_USER(s) 1
58	#else
59	#define IS_USER(s) (s->user)
60	#endif
61
62	/ We reuse the same 64-bit temporaries for efficiency. /
63	static TCGv_i64 cpu_V0, cpu_V1, cpu_M0;
64	static TCGv_i32 cpu_R[`16`];
65	TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF;
66	TCGv_i64 cpu_exclusive_addr;
67	TCGv_i64 cpu_exclusive_val;
68
69	#include "exec/gen-icount.h"
70
71	static const char * const regnames[] =
72	{ "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7",
73	"r8", "r9", "r10", "r11", "r12", "r13", "r14", "pc" };
74
75	/ Function prototypes for gen_ functions calling Neon helpers. /
76	typedef void NeonGenThreeOpEnvFn(TCGv_i32, TCGv_env, TCGv_i32,
77	TCGv_i32, TCGv_i32);
78	/ Function prototypes for gen_ functions for fix point conversions /
79	typedef void VFPGenFixPointFn(TCGv_i32, TCGv_i32, TCGv_i32, TCGv_ptr);
80
81	/ initialize TCG globals. /
82	void arm_translate_init(void)
83	{
84	int i;
85
86	for (i = `0`; i < `16`; i++) {
87	cpu_R[i] = tcg_global_mem_new_i32(cpu_env,
88	offsetof(CPUARMState, regs[i]),
89	regnames[i]);
90	}
91	cpu_CF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, CF), "CF");
92	cpu_NF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, NF), "NF");
93	cpu_VF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, VF), "VF");
94	cpu_ZF = tcg_global_mem_new_i32(cpu_env, offsetof(CPUARMState, ZF), "ZF");
95
96	cpu_exclusive_addr = tcg_global_mem_new_i64(cpu_env,
97	offsetof(CPUARMState, exclusive_addr), "exclusive_addr");
98	cpu_exclusive_val = tcg_global_mem_new_i64(cpu_env,
99	offsetof(CPUARMState, exclusive_val), "exclusive_val");
100
101	a64_translate_init();
102	}
103
104	/ Flags for the disas_set_da_iss info argument:*
105	* lower bits hold the Rt register number, higher bits are flags.
106	*/
107	typedef enum ISSInfo {
108	ISSNone = `0`,
109	ISSRegMask = `0x1f`,
110	ISSInvalid = (`1` << `5`),
111	ISSIsAcqRel = (`1` << `6`),
112	ISSIsWrite = (`1` << `7`),
113	ISSIs16Bit = (`1` << `8`),
114	} ISSInfo;
115
116	/ Save the syndrome information for a Data Abort /
117	static void disas_set_da_iss(DisasContext *s, MemOp memop, ISSInfo issinfo)
118	{
119	uint32_t syn;
120	int sas = memop & MO_SIZE;
121	bool sse = memop & MO_SIGN;
122	bool is_acqrel = issinfo & ISSIsAcqRel;
123	bool is_write = issinfo & ISSIsWrite;
124	bool is_16bit = issinfo & ISSIs16Bit;
125	int srt = issinfo & ISSRegMask;
126
127	if (issinfo & ISSInvalid) {
128	/ Some callsites want to conditionally provide ISS info,*
129	* eg "only if this was not a writeback"
130	*/
131	return;
132	}
133
134	if (srt == `15`) {
135	/ For AArch32, insns where the src/dest is R15 never generate*
136	* ISS information. Catching that here saves checking at all
137	* the call sites.
138	*/
139	return;
140	}
141
142	syn = syn_data_abort_with_iss(`0`, sas, sse, srt, `0`, is_acqrel,
143	`0`, `0`, `0`, is_write, `0`, is_16bit);
144	disas_set_insn_syndrome(s, syn);
145	}
146
147	static inline int get_a32_user_mem_index(DisasContext *s)
148	{
149	/ Return the core mmu_idx to use for A32/T32 "unprivileged load/store"*
150	* insns:
151	* if PL2, UNPREDICTABLE (we choose to implement as if PL0)
152	* otherwise, access as if at PL0.
153	*/
154	switch (s->mmu_idx) {
155	case ARMMMUIdx_S1E2: / this one is UNPREDICTABLE /
156	case ARMMMUIdx_S12NSE0:
157	case ARMMMUIdx_S12NSE1:
158	return arm_to_core_mmu_idx(ARMMMUIdx_S12NSE0);
159	case ARMMMUIdx_S1E3:
160	case ARMMMUIdx_S1SE0:
161	case ARMMMUIdx_S1SE1:
162	return arm_to_core_mmu_idx(ARMMMUIdx_S1SE0);
163	case ARMMMUIdx_MUser:
164	case ARMMMUIdx_MPriv:
165	return arm_to_core_mmu_idx(ARMMMUIdx_MUser);
166	case ARMMMUIdx_MUserNegPri:
167	case ARMMMUIdx_MPrivNegPri:
168	return arm_to_core_mmu_idx(ARMMMUIdx_MUserNegPri);
169	case ARMMMUIdx_MSUser:
170	case ARMMMUIdx_MSPriv:
171	return arm_to_core_mmu_idx(ARMMMUIdx_MSUser);
172	case ARMMMUIdx_MSUserNegPri:
173	case ARMMMUIdx_MSPrivNegPri:
174	return arm_to_core_mmu_idx(ARMMMUIdx_MSUserNegPri);
175	case ARMMMUIdx_S2NS:
176	default:
177	g_assert_not_reached();
178	}
179	}
180
181	static inline TCGv_i32 load_cpu_offset(int offset)
182	{
183	TCGv_i32 tmp = tcg_temp_new_i32();
184	tcg_gen_ld_i32(tmp, cpu_env, offset);
185	return tmp;
186	}
187
188	#define load_cpu_field(name) load_cpu_offset(offsetof(CPUARMState, name))
189
190	static inline void store_cpu_offset(TCGv_i32 var, int offset)
191	{
192	tcg_gen_st_i32(var, cpu_env, offset);
193	tcg_temp_free_i32(var);
194	}
195
196	#define store_cpu_field(var, name) \
197	store_cpu_offset(var, offsetof(CPUARMState, name))
198
199	/ The architectural value of PC. /
200	static uint32_t read_pc(DisasContext *s)
201	{
202	return s->pc_curr + (s->thumb ? `4` : `8`);
203	}
204
205	/ Set a variable to the value of a CPU register. /
206	static void load_reg_var(DisasContext s, TCGv_i32 var, int* reg)
207	{
208	if (reg == `15`) {
209	tcg_gen_movi_i32(var, read_pc(s));
210	} else {
211	tcg_gen_mov_i32(var, cpu_R[reg]);
212	}
213	}
214
215	/ Create a new temporary and set it to the value of a CPU register. /
216	static inline TCGv_i32 load_reg(DisasContext s, int* reg)
217	{
218	TCGv_i32 tmp = tcg_temp_new_i32();
219	load_reg_var(s, tmp, reg);
220	return tmp;
221	}
222
223	/*
224	* Create a new temp, REG + OFS, except PC is ALIGN(PC, 4).
225	* This is used for load/store for which use of PC implies (literal),
226	* or ADD that implies ADR.
227	*/
228	static TCGv_i32 add_reg_for_lit(DisasContext s, int* reg, int ofs)
229	{
230	TCGv_i32 tmp = tcg_temp_new_i32();
231
232	if (reg == `15`) {
233	tcg_gen_movi_i32(tmp, (read_pc(s) & ~`3`) + ofs);
234	} else {
235	tcg_gen_addi_i32(tmp, cpu_R[reg], ofs);
236	}
237	return tmp;
238	}
239
240	/ Set a CPU register. The source must be a temporary and will be*
241	marked as dead. /*
242	static void store_reg(DisasContext s, int* reg, TCGv_i32 var)
243	{
244	if (reg == `15`) {
245	/ In Thumb mode, we must ignore bit 0.*
246	* In ARM mode, for ARMv4 and ARMv5, it is UNPREDICTABLE if bits [1:0]
247	* are not 0b00, but for ARMv6 and above, we must ignore bits [1:0].
248	* We choose to ignore [1:0] in ARM mode for all architecture versions.
249	*/
250	tcg_gen_andi_i32(var, var, s->thumb ? ~`1` : ~`3`);
251	s->base.is_jmp = DISAS_JUMP;
252	}
253	tcg_gen_mov_i32(cpu_R[reg], var);
254	tcg_temp_free_i32(var);
255	}
256
257	/*
258	* Variant of store_reg which applies v8M stack-limit checks before updating
259	* SP. If the check fails this will result in an exception being taken.
260	* We disable the stack checks for CONFIG_USER_ONLY because we have
261	* no idea what the stack limits should be in that case.
262	* If stack checking is not being done this just acts like store_reg().
263	*/
264	static void store_sp_checked(DisasContext *s, TCGv_i32 var)
265	{
266	#ifndef CONFIG_USER_ONLY
267	if (s->v8m_stackcheck) {
268	gen_helper_v8m_stackcheck(cpu_env, var);
269	}
270	#endif
271	store_reg(s, `13`, var);
272	}
273
274	/ Value extensions. /
275	#define gen_uxtb(var) tcg_gen_ext8u_i32(var, var)
276	#define gen_uxth(var) tcg_gen_ext16u_i32(var, var)
277	#define gen_sxtb(var) tcg_gen_ext8s_i32(var, var)
278	#define gen_sxth(var) tcg_gen_ext16s_i32(var, var)
279
280	#define gen_sxtb16(var) gen_helper_sxtb16(var, var)
281	#define gen_uxtb16(var) gen_helper_uxtb16(var, var)
282
283
284	static inline void gen_set_cpsr(TCGv_i32 var, uint32_t mask)
285	{
286	TCGv_i32 tmp_mask = tcg_const_i32(mask);
287	gen_helper_cpsr_write(cpu_env, var, tmp_mask);
288	tcg_temp_free_i32(tmp_mask);
289	}
290	/ Set NZCV flags from the high 4 bits of var. /
291	#define gen_set_nzcv(var) gen_set_cpsr(var, CPSR_NZCV)
292
293	static void gen_exception_internal(int excp)
294	{
295	TCGv_i32 tcg_excp = tcg_const_i32(excp);
296
297	assert(excp_is_internal(excp));
298	gen_helper_exception_internal(cpu_env, tcg_excp);
299	tcg_temp_free_i32(tcg_excp);
300	}
301
302	static void gen_step_complete_exception(DisasContext *s)
303	{
304	/ We just completed step of an insn. Move from Active-not-pending*
305	* to Active-pending, and then also take the swstep exception.
306	* This corresponds to making the (IMPDEF) choice to prioritize
307	* swstep exceptions over asynchronous exceptions taken to an exception
308	* level where debug is disabled. This choice has the advantage that
309	* we do not need to maintain internal state corresponding to the
310	* ISV/EX syndrome bits between completion of the step and generation
311	* of the exception, and our syndrome information is always correct.
312	*/
313	gen_ss_advance(s);
314	gen_swstep_exception(s, `1`, s->is_ldex);
315	s->base.is_jmp = DISAS_NORETURN;
316	}
317
318	static void gen_singlestep_exception(DisasContext *s)
319	{
320	/ Generate the right kind of exception for singlestep, which is*
321	* either the architectural singlestep or EXCP_DEBUG for QEMU's
322	* gdb singlestepping.
323	*/
324	if (s->ss_active) {
325	gen_step_complete_exception(s);
326	} else {
327	gen_exception_internal(EXCP_DEBUG);
328	}
329	}
330
331	static inline bool is_singlestepping(DisasContext *s)
332	{
333	/ Return true if we are singlestepping either because of*
334	* architectural singlestep or QEMU gdbstub singlestep. This does
335	* not include the command line '-singlestep' mode which is rather
336	* misnamed as it only means "one instruction per TB" and doesn't
337	* affect the code we generate.
338	*/
339	return s->base.singlestep_enabled \|\| s->ss_active;
340	}
341
342	static void gen_smul_dual(TCGv_i32 a, TCGv_i32 b)
343	{
344	TCGv_i32 tmp1 = tcg_temp_new_i32();
345	TCGv_i32 tmp2 = tcg_temp_new_i32();
346	tcg_gen_ext16s_i32(tmp1, a);
347	tcg_gen_ext16s_i32(tmp2, b);
348	tcg_gen_mul_i32(tmp1, tmp1, tmp2);
349	tcg_temp_free_i32(tmp2);
350	tcg_gen_sari_i32(a, a, `16`);
351	tcg_gen_sari_i32(b, b, `16`);
352	tcg_gen_mul_i32(b, b, a);
353	tcg_gen_mov_i32(a, tmp1);
354	tcg_temp_free_i32(tmp1);
355	}
356
357	/ Byteswap each halfword. /
358	static void gen_rev16(TCGv_i32 dest, TCGv_i32 var)
359	{
360	TCGv_i32 tmp = tcg_temp_new_i32();
361	TCGv_i32 mask = tcg_const_i32(`0x00ff00ff`);
362	tcg_gen_shri_i32(tmp, var, `8`);
363	tcg_gen_and_i32(tmp, tmp, mask);
364	tcg_gen_and_i32(var, var, mask);
365	tcg_gen_shli_i32(var, var, `8`);
366	tcg_gen_or_i32(dest, var, tmp);
367	tcg_temp_free_i32(mask);
368	tcg_temp_free_i32(tmp);
369	}
370
371	/ Byteswap low halfword and sign extend. /
372	static void gen_revsh(TCGv_i32 dest, TCGv_i32 var)
373	{
374	tcg_gen_ext16u_i32(var, var);
375	tcg_gen_bswap16_i32(var, var);
376	tcg_gen_ext16s_i32(dest, var);
377	}
378
379	/ 32x32->64 multiply. Marks inputs as dead. /
380	static TCGv_i64 gen_mulu_i64_i32(TCGv_i32 a, TCGv_i32 b)
381	{
382	TCGv_i32 lo = tcg_temp_new_i32();
383	TCGv_i32 hi = tcg_temp_new_i32();
384	TCGv_i64 ret;
385
386	tcg_gen_mulu2_i32(lo, hi, a, b);
387	tcg_temp_free_i32(a);
388	tcg_temp_free_i32(b);
389
390	ret = tcg_temp_new_i64();
391	tcg_gen_concat_i32_i64(ret, lo, hi);
392	tcg_temp_free_i32(lo);
393	tcg_temp_free_i32(hi);
394
395	return ret;
396	}
397
398	static TCGv_i64 gen_muls_i64_i32(TCGv_i32 a, TCGv_i32 b)
399	{
400	TCGv_i32 lo = tcg_temp_new_i32();
401	TCGv_i32 hi = tcg_temp_new_i32();
402	TCGv_i64 ret;
403
404	tcg_gen_muls2_i32(lo, hi, a, b);
405	tcg_temp_free_i32(a);
406	tcg_temp_free_i32(b);
407
408	ret = tcg_temp_new_i64();
409	tcg_gen_concat_i32_i64(ret, lo, hi);
410	tcg_temp_free_i32(lo);
411	tcg_temp_free_i32(hi);
412
413	return ret;
414	}
415
416	/ Swap low and high halfwords. /
417	static void gen_swap_half(TCGv_i32 var)
418	{
419	tcg_gen_rotri_i32(var, var, `16`);
420	}
421
422	/ Dual 16-bit add. Result placed in t0 and t1 is marked as dead.*
423	tmp = (t0 ^ t1) & 0x8000;
424	t0 &= ~0x8000;
425	t1 &= ~0x8000;
426	t0 = (t0 + t1) ^ tmp;
427	*/
428
429	static void gen_add16(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
430	{
431	TCGv_i32 tmp = tcg_temp_new_i32();
432	tcg_gen_xor_i32(tmp, t0, t1);
433	tcg_gen_andi_i32(tmp, tmp, `0x8000`);
434	tcg_gen_andi_i32(t0, t0, ~`0x8000`);
435	tcg_gen_andi_i32(t1, t1, ~`0x8000`);
436	tcg_gen_add_i32(t0, t0, t1);
437	tcg_gen_xor_i32(dest, t0, tmp);
438	tcg_temp_free_i32(tmp);
439	}
440
441	/ Set N and Z flags from var. /
442	static inline void gen_logic_CC(TCGv_i32 var)
443	{
444	tcg_gen_mov_i32(cpu_NF, var);
445	tcg_gen_mov_i32(cpu_ZF, var);
446	}
447
448	/ dest = T0 + T1 + CF. /
449	static void gen_add_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
450	{
451	tcg_gen_add_i32(dest, t0, t1);
452	tcg_gen_add_i32(dest, dest, cpu_CF);
453	}
454
455	/ dest = T0 - T1 + CF - 1. /
456	static void gen_sub_carry(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
457	{
458	tcg_gen_sub_i32(dest, t0, t1);
459	tcg_gen_add_i32(dest, dest, cpu_CF);
460	tcg_gen_subi_i32(dest, dest, `1`);
461	}
462
463	/ dest = T0 + T1. Compute C, N, V and Z flags /
464	static void gen_add_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
465	{
466	TCGv_i32 tmp = tcg_temp_new_i32();
467	tcg_gen_movi_i32(tmp, `0`);
468	tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, t1, tmp);
469	tcg_gen_mov_i32(cpu_ZF, cpu_NF);
470	tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
471	tcg_gen_xor_i32(tmp, t0, t1);
472	tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
473	tcg_temp_free_i32(tmp);
474	tcg_gen_mov_i32(dest, cpu_NF);
475	}
476
477	/ dest = T0 + T1 + CF. Compute C, N, V and Z flags /
478	static void gen_adc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
479	{
480	TCGv_i32 tmp = tcg_temp_new_i32();
481	if (TCG_TARGET_HAS_add2_i32) {
482	tcg_gen_movi_i32(tmp, `0`);
483	tcg_gen_add2_i32(cpu_NF, cpu_CF, t0, tmp, cpu_CF, tmp);
484	tcg_gen_add2_i32(cpu_NF, cpu_CF, cpu_NF, cpu_CF, t1, tmp);
485	} else {
486	TCGv_i64 q0 = tcg_temp_new_i64();
487	TCGv_i64 q1 = tcg_temp_new_i64();
488	tcg_gen_extu_i32_i64(q0, t0);
489	tcg_gen_extu_i32_i64(q1, t1);
490	tcg_gen_add_i64(q0, q0, q1);
491	tcg_gen_extu_i32_i64(q1, cpu_CF);
492	tcg_gen_add_i64(q0, q0, q1);
493	tcg_gen_extr_i64_i32(cpu_NF, cpu_CF, q0);
494	tcg_temp_free_i64(q0);
495	tcg_temp_free_i64(q1);
496	}
497	tcg_gen_mov_i32(cpu_ZF, cpu_NF);
498	tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
499	tcg_gen_xor_i32(tmp, t0, t1);
500	tcg_gen_andc_i32(cpu_VF, cpu_VF, tmp);
501	tcg_temp_free_i32(tmp);
502	tcg_gen_mov_i32(dest, cpu_NF);
503	}
504
505	/ dest = T0 - T1. Compute C, N, V and Z flags /
506	static void gen_sub_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
507	{
508	TCGv_i32 tmp;
509	tcg_gen_sub_i32(cpu_NF, t0, t1);
510	tcg_gen_mov_i32(cpu_ZF, cpu_NF);
511	tcg_gen_setcond_i32(TCG_COND_GEU, cpu_CF, t0, t1);
512	tcg_gen_xor_i32(cpu_VF, cpu_NF, t0);
513	tmp = tcg_temp_new_i32();
514	tcg_gen_xor_i32(tmp, t0, t1);
515	tcg_gen_and_i32(cpu_VF, cpu_VF, tmp);
516	tcg_temp_free_i32(tmp);
517	tcg_gen_mov_i32(dest, cpu_NF);
518	}
519
520	/ dest = T0 + ~T1 + CF. Compute C, N, V and Z flags /
521	static void gen_sbc_CC(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
522	{
523	TCGv_i32 tmp = tcg_temp_new_i32();
524	tcg_gen_not_i32(tmp, t1);
525	gen_adc_CC(dest, t0, tmp);
526	tcg_temp_free_i32(tmp);
527	}
528
529	#define GEN_SHIFT(name) \
530	static void gen_##name(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1) \
531	{ \
532	TCGv_i32 tmp1, tmp2, tmp3; \
533	tmp1 = tcg_temp_new_i32(); \
534	tcg_gen_andi_i32(tmp1, t1, 0xff); \
535	tmp2 = tcg_const_i32(0); \
536	tmp3 = tcg_const_i32(0x1f); \
537	tcg_gen_movcond_i32(TCG_COND_GTU, tmp2, tmp1, tmp3, tmp2, t0); \
538	tcg_temp_free_i32(tmp3); \
539	tcg_gen_andi_i32(tmp1, tmp1, 0x1f); \
540	tcg_gen_##name##_i32(dest, tmp2, tmp1); \
541	tcg_temp_free_i32(tmp2); \
542	tcg_temp_free_i32(tmp1); \
543	}
544	GEN_SHIFT(shl)
545	GEN_SHIFT(shr)
546	#undef GEN_SHIFT
547
548	static void gen_sar(TCGv_i32 dest, TCGv_i32 t0, TCGv_i32 t1)
549	{
550	TCGv_i32 tmp1, tmp2;
551	tmp1 = tcg_temp_new_i32();
552	tcg_gen_andi_i32(tmp1, t1, `0xff`);
553	tmp2 = tcg_const_i32(`0x1f`);
554	tcg_gen_movcond_i32(TCG_COND_GTU, tmp1, tmp1, tmp2, tmp2, tmp1);
555	tcg_temp_free_i32(tmp2);
556	tcg_gen_sar_i32(dest, t0, tmp1);
557	tcg_temp_free_i32(tmp1);
558	}
559
560	static void shifter_out_im(TCGv_i32 var, int shift)
561	{
562	tcg_gen_extract_i32(cpu_CF, var, shift, `1`);
563	}
564
565	/ Shift by immediate. Includes special handling for shift == 0. /
566	static inline void gen_arm_shift_im(TCGv_i32 var, int shiftop,
567	int shift, int flags)
568	{
569	switch (shiftop) {
570	case `0`: / LSL /
571	if (shift != `0`) {
572	if (flags)
573	shifter_out_im(var, `32` - shift);
574	tcg_gen_shli_i32(var, var, shift);
575	}
576	break;
577	case `1`: / LSR /
578	if (shift == `0`) {
579	if (flags) {
580	tcg_gen_shri_i32(cpu_CF, var, `31`);
581	}
582	tcg_gen_movi_i32(var, `0`);
583	} else {
584	if (flags)
585	shifter_out_im(var, shift - `1`);
586	tcg_gen_shri_i32(var, var, shift);
587	}
588	break;
589	case `2`: / ASR /
590	if (shift == `0`)
591	shift = `32`;
592	if (flags)
593	shifter_out_im(var, shift - `1`);
594	if (shift == `32`)
595	shift = `31`;
596	tcg_gen_sari_i32(var, var, shift);
597	break;
598	case `3`: / ROR/RRX /
599	if (shift != `0`) {
600	if (flags)
601	shifter_out_im(var, shift - `1`);
602	tcg_gen_rotri_i32(var, var, shift); break;
603	} else {
604	TCGv_i32 tmp = tcg_temp_new_i32();
605	tcg_gen_shli_i32(tmp, cpu_CF, `31`);
606	if (flags)
607	shifter_out_im(var, `0`);
608	tcg_gen_shri_i32(var, var, `1`);
609	tcg_gen_or_i32(var, var, tmp);
610	tcg_temp_free_i32(tmp);
611	}
612	}
613	};
614
615	static inline void gen_arm_shift_reg(TCGv_i32 var, int shiftop,
616	TCGv_i32 shift, int flags)
617	{
618	if (flags) {
619	switch (shiftop) {
620	case `0`: gen_helper_shl_cc(var, cpu_env, var, shift); break;
621	case `1`: gen_helper_shr_cc(var, cpu_env, var, shift); break;
622	case `2`: gen_helper_sar_cc(var, cpu_env, var, shift); break;
623	case `3`: gen_helper_ror_cc(var, cpu_env, var, shift); break;
624	}
625	} else {
626	switch (shiftop) {
627	case `0`:
628	gen_shl(var, var, shift);
629	break;
630	case `1`:
631	gen_shr(var, var, shift);
632	break;
633	case `2`:
634	gen_sar(var, var, shift);
635	break;
636	case `3`: tcg_gen_andi_i32(shift, shift, `0x1f`);
637	tcg_gen_rotr_i32(var, var, shift); break;
638	}
639	}
640	tcg_temp_free_i32(shift);
641	}
642
643	/*
644	* Generate a conditional based on ARM condition code cc.
645	* This is common between ARM and Aarch64 targets.
646	*/
647	void arm_test_cc(DisasCompare cmp, int* cc)
648	{
649	TCGv_i32 value;
650	TCGCond cond;
651	bool global = true;
652
653	switch (cc) {
654	case `0`: / eq: Z /
655	case `1`: / ne: !Z /
656	cond = TCG_COND_EQ;
657	value = cpu_ZF;
658	break;
659
660	case `2`: / cs: C /
661	case `3`: / cc: !C /
662	cond = TCG_COND_NE;
663	value = cpu_CF;
664	break;
665
666	case `4`: / mi: N /
667	case `5`: / pl: !N /
668	cond = TCG_COND_LT;
669	value = cpu_NF;
670	break;
671
672	case `6`: / vs: V /
673	case `7`: / vc: !V /
674	cond = TCG_COND_LT;
675	value = cpu_VF;
676	break;
677
678	case `8`: / hi: C && !Z /
679	case `9`: / ls: !C \|\| Z -> !(C && !Z) /
680	cond = TCG_COND_NE;
681	value = tcg_temp_new_i32();
682	global = false;
683	/ CF is 1 for C, so -CF is an all-bits-set mask for C;*
684	ZF is non-zero for !Z; so AND the two subexpressions. /*
685	tcg_gen_neg_i32(value, cpu_CF);
686	tcg_gen_and_i32(value, value, cpu_ZF);
687	break;
688
689	case `10`: / ge: N == V -> N ^ V == 0 /
690	case `11`: / lt: N != V -> N ^ V != 0 /
691	/ Since we're only interested in the sign bit, == 0 is >= 0. /
692	cond = TCG_COND_GE;
693	value = tcg_temp_new_i32();
694	global = false;
695	tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
696	break;
697
698	case `12`: / gt: !Z && N == V /
699	case `13`: / le: Z \|\| N != V /
700	cond = TCG_COND_NE;
701	value = tcg_temp_new_i32();
702	global = false;
703	/ (N == V) is equal to the sign bit of ~(NF ^ VF). Propagate*
704	* the sign bit then AND with ZF to yield the result. */
705	tcg_gen_xor_i32(value, cpu_VF, cpu_NF);
706	tcg_gen_sari_i32(value, value, `31`);
707	tcg_gen_andc_i32(value, cpu_ZF, value);
708	break;
709
710	case `14`: / always /
711	case `15`: / always /
712	/ Use the ALWAYS condition, which will fold early.*
713	* It doesn't matter what we use for the value. */
714	cond = TCG_COND_ALWAYS;
715	value = cpu_ZF;
716	goto no_invert;
717
718	default:
719	fprintf(stderr, "Bad condition code 0x%x\n", cc);
720	abort();
721	}
722
723	if (cc & `1`) {
724	cond = tcg_invert_cond(cond);
725	}
726
727	no_invert:
728	cmp->cond = cond;
729	cmp->value = value;
730	cmp->value_global = global;
731	}
732
733	void arm_free_cc(DisasCompare *cmp)
734	{
735	if (!cmp->value_global) {
736	tcg_temp_free_i32(cmp->value);
737	}
738	}
739
740	void arm_jump_cc(DisasCompare cmp, TCGLabel label)
741	{
742	tcg_gen_brcondi_i32(cmp->cond, cmp->value, `0`, label);
743	}
744
745	void arm_gen_test_cc(int cc, TCGLabel *label)
746	{
747	DisasCompare cmp;
748	arm_test_cc(&cmp, cc);
749	arm_jump_cc(&cmp, label);
750	arm_free_cc(&cmp);
751	}
752
753	static inline void gen_set_condexec(DisasContext *s)
754	{
755	if (s->condexec_mask) {
756	uint32_t val = (s->condexec_cond << `4`) \| (s->condexec_mask >> `1`);
757	TCGv_i32 tmp = tcg_temp_new_i32();
758	tcg_gen_movi_i32(tmp, val);
759	store_cpu_field(tmp, condexec_bits);
760	}
761	}
762
763	static inline void gen_set_pc_im(DisasContext *s, target_ulong val)
764	{
765	tcg_gen_movi_i32(cpu_R[`15`], val);
766	}
767
768	/ Set PC and Thumb state from var. var is marked as dead. /
769	static inline void gen_bx(DisasContext *s, TCGv_i32 var)
770	{
771	s->base.is_jmp = DISAS_JUMP;
772	tcg_gen_andi_i32(cpu_R[`15`], var, ~`1`);
773	tcg_gen_andi_i32(var, var, `1`);
774	store_cpu_field(var, thumb);
775	}
776
777	/*
778	* Set PC and Thumb state from var. var is marked as dead.
779	* For M-profile CPUs, include logic to detect exception-return
780	* branches and handle them. This is needed for Thumb POP/LDM to PC, LDR to PC,
781	* and BX reg, and no others, and happens only for code in Handler mode.
782	* The Security Extension also requires us to check for the FNC_RETURN
783	* which signals a function return from non-secure state; this can happen
784	* in both Handler and Thread mode.
785	* To avoid having to do multiple comparisons in inline generated code,
786	* we make the check we do here loose, so it will match for EXC_RETURN
787	* in Thread mode. For system emulation do_v7m_exception_exit() checks
788	* for these spurious cases and returns without doing anything (giving
789	* the same behaviour as for a branch to a non-magic address).
790	*
791	* In linux-user mode it is unclear what the right behaviour for an
792	* attempted FNC_RETURN should be, because in real hardware this will go
793	* directly to Secure code (ie not the Linux kernel) which will then treat
794	* the error in any way it chooses. For QEMU we opt to make the FNC_RETURN
795	* attempt behave the way it would on a CPU without the security extension,
796	* which is to say "like a normal branch". That means we can simply treat
797	* all branches as normal with no magic address behaviour.
798	*/
799	static inline void gen_bx_excret(DisasContext *s, TCGv_i32 var)
800	{
801	/ Generate the same code here as for a simple bx, but flag via*
802	* s->base.is_jmp that we need to do the rest of the work later.
803	*/
804	gen_bx(s, var);
805	#ifndef CONFIG_USER_ONLY
806	if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY) \|\|
807	(s->v7m_handler_mode && arm_dc_feature(s, ARM_FEATURE_M))) {
808	s->base.is_jmp = DISAS_BX_EXCRET;
809	}
810	#endif
811	}
812
813	static inline void gen_bx_excret_final_code(DisasContext *s)
814	{
815	/ Generate the code to finish possible exception return and end the TB /
816	TCGLabel *excret_label = gen_new_label();
817	uint32_t min_magic;
818
819	if (arm_dc_feature(s, ARM_FEATURE_M_SECURITY)) {
820	/ Covers FNC_RETURN and EXC_RETURN magic /
821	min_magic = FNC_RETURN_MIN_MAGIC;
822	} else {
823	/ EXC_RETURN magic only /
824	min_magic = EXC_RETURN_MIN_MAGIC;
825	}
826
827	/ Is the new PC value in the magic range indicating exception return? /
828	tcg_gen_brcondi_i32(TCG_COND_GEU, cpu_R[`15`], min_magic, excret_label);
829	/ No: end the TB as we would for a DISAS_JMP /
830	if (is_singlestepping(s)) {
831	gen_singlestep_exception(s);
832	} else {
833	tcg_gen_exit_tb(NULL, `0`);
834	}
835	gen_set_label(excret_label);
836	/ Yes: this is an exception return.*
837	* At this point in runtime env->regs[15] and env->thumb will hold
838	* the exception-return magic number, which do_v7m_exception_exit()
839	* will read. Nothing else will be able to see those values because
840	* the cpu-exec main loop guarantees that we will always go straight
841	* from raising the exception to the exception-handling code.
842	*
843	* gen_ss_advance(s) does nothing on M profile currently but
844	* calling it is conceptually the right thing as we have executed
845	* this instruction (compare SWI, HVC, SMC handling).
846	*/
847	gen_ss_advance(s);
848	gen_exception_internal(EXCP_EXCEPTION_EXIT);
849	}
850
851	static inline void gen_bxns(DisasContext s, int* rm)
852	{
853	TCGv_i32 var = load_reg(s, rm);
854
855	/ The bxns helper may raise an EXCEPTION_EXIT exception, so in theory*
856	* we need to sync state before calling it, but:
857	* - we don't need to do gen_set_pc_im() because the bxns helper will
858	* always set the PC itself
859	* - we don't need to do gen_set_condexec() because BXNS is UNPREDICTABLE
860	* unless it's outside an IT block or the last insn in an IT block,
861	* so we know that condexec == 0 (already set at the top of the TB)
862	* is correct in the non-UNPREDICTABLE cases, and we can choose
863	* "zeroes the IT bits" as our UNPREDICTABLE behaviour otherwise.
864	*/
865	gen_helper_v7m_bxns(cpu_env, var);
866	tcg_temp_free_i32(var);
867	s->base.is_jmp = DISAS_EXIT;
868	}
869
870	static inline void gen_blxns(DisasContext s, int* rm)
871	{
872	TCGv_i32 var = load_reg(s, rm);
873
874	/ We don't need to sync condexec state, for the same reason as bxns.*
875	* We do however need to set the PC, because the blxns helper reads it.
876	* The blxns helper may throw an exception.
877	*/
878	gen_set_pc_im(s, s->base.pc_next);
879	gen_helper_v7m_blxns(cpu_env, var);
880	tcg_temp_free_i32(var);
881	s->base.is_jmp = DISAS_EXIT;
882	}
883
884	/ Variant of store_reg which uses branch&exchange logic when storing*
885	to r15 in ARM architecture v7 and above. The source must be a temporary
886	and will be marked as dead. /*
887	static inline void store_reg_bx(DisasContext s, int* reg, TCGv_i32 var)
888	{
889	if (reg == `15` && ENABLE_ARCH_7) {
890	gen_bx(s, var);
891	} else {
892	store_reg(s, reg, var);
893	}
894	}
895
896	/ Variant of store_reg which uses branch&exchange logic when storing*
897	* to r15 in ARM architecture v5T and above. This is used for storing
898	* the results of a LDR/LDM/POP into r15, and corresponds to the cases
899	* in the ARM ARM which use the LoadWritePC() pseudocode function. */
900	static inline void store_reg_from_load(DisasContext s, int* reg, TCGv_i32 var)
901	{
902	if (reg == `15` && ENABLE_ARCH_5) {
903	gen_bx_excret(s, var);
904	} else {
905	store_reg(s, reg, var);
906	}
907	}
908
909	#ifdef CONFIG_USER_ONLY
910	#define IS_USER_ONLY 1
911	#else
912	#define IS_USER_ONLY 0
913	#endif
914
915	/ Abstractions of "generate code to do a guest load/store for*
916	* AArch32", where a vaddr is always 32 bits (and is zero
917	* extended if we're a 64 bit core) and data is also
918	* 32 bits unless specifically doing a 64 bit access.
919	* These functions work like tcg_gen_qemu_{ld,st}* except
920	* that the address argument is TCGv_i32 rather than TCGv.
921	*/
922
923	static inline TCGv gen_aa32_addr(DisasContext *s, TCGv_i32 a32, MemOp op)
924	{
925	TCGv addr = tcg_temp_new();
926	tcg_gen_extu_i32_tl(addr, a32);
927
928	/ Not needed for user-mode BE32, where we use MO_BE instead. /
929	if (!IS_USER_ONLY && s->sctlr_b && (op & MO_SIZE) < MO_32) {
930	tcg_gen_xori_tl(addr, addr, `4` - (`1` << (op & MO_SIZE)));
931	}
932	return addr;
933	}
934
935	static void gen_aa32_ld_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
936	int index, MemOp opc)
937	{
938	TCGv addr;
939
940	if (arm_dc_feature(s, ARM_FEATURE_M) &&
941	!arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
942	opc \|= MO_ALIGN;
943	}
944
945	addr = gen_aa32_addr(s, a32, opc);
946	tcg_gen_qemu_ld_i32(val, addr, index, opc);
947	tcg_temp_free(addr);
948	}
949
950	static void gen_aa32_st_i32(DisasContext *s, TCGv_i32 val, TCGv_i32 a32,
951	int index, MemOp opc)
952	{
953	TCGv addr;
954
955	if (arm_dc_feature(s, ARM_FEATURE_M) &&
956	!arm_dc_feature(s, ARM_FEATURE_M_MAIN)) {
957	opc \|= MO_ALIGN;
958	}
959
960	addr = gen_aa32_addr(s, a32, opc);
961	tcg_gen_qemu_st_i32(val, addr, index, opc);
962	tcg_temp_free(addr);
963	}
964
965	#define DO_GEN_LD(SUFF, OPC) \
966	static inline void gen_aa32_ld##SUFF(DisasContext *s, TCGv_i32 val, \
967	TCGv_i32 a32, int index) \
968	{ \
969	gen_aa32_ld_i32(s, val, a32, index, OPC \| s->be_data); \
970	}
971
972	#define DO_GEN_ST(SUFF, OPC) \
973	static inline void gen_aa32_st##SUFF(DisasContext *s, TCGv_i32 val, \
974	TCGv_i32 a32, int index) \
975	{ \
976	gen_aa32_st_i32(s, val, a32, index, OPC \| s->be_data); \
977	}
978
979	static inline void gen_aa32_frob64(DisasContext *s, TCGv_i64 val)
980	{
981	/ Not needed for user-mode BE32, where we use MO_BE instead. /
982	if (!IS_USER_ONLY && s->sctlr_b) {
983	tcg_gen_rotri_i64(val, val, `32`);
984	}
985	}
986
987	static void gen_aa32_ld_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
988	int index, MemOp opc)
989	{
990	TCGv addr = gen_aa32_addr(s, a32, opc);
991	tcg_gen_qemu_ld_i64(val, addr, index, opc);
992	gen_aa32_frob64(s, val);
993	tcg_temp_free(addr);
994	}
995
996	static inline void gen_aa32_ld64(DisasContext *s, TCGv_i64 val,
997	TCGv_i32 a32, int index)
998	{
999	gen_aa32_ld_i64(s, val, a32, index, MO_Q \| s->be_data);
1000	}
1001
1002	static void gen_aa32_st_i64(DisasContext *s, TCGv_i64 val, TCGv_i32 a32,
1003	int index, MemOp opc)
1004	{
1005	TCGv addr = gen_aa32_addr(s, a32, opc);
1006
1007	/ Not needed for user-mode BE32, where we use MO_BE instead. /
1008	if (!IS_USER_ONLY && s->sctlr_b) {
1009	TCGv_i64 tmp = tcg_temp_new_i64();
1010	tcg_gen_rotri_i64(tmp, val, `32`);
1011	tcg_gen_qemu_st_i64(tmp, addr, index, opc);
1012	tcg_temp_free_i64(tmp);
1013	} else {
1014	tcg_gen_qemu_st_i64(val, addr, index, opc);
1015	}
1016	tcg_temp_free(addr);
1017	}
1018
1019	static inline void gen_aa32_st64(DisasContext *s, TCGv_i64 val,
1020	TCGv_i32 a32, int index)
1021	{
1022	gen_aa32_st_i64(s, val, a32, index, MO_Q \| s->be_data);
1023	}
1024
1025	DO_GEN_LD(`8u`, MO_UB)
1026	DO_GEN_LD(`16u`, MO_UW)
1027	DO_GEN_LD(`32u`, MO_UL)
1028	DO_GEN_ST(`8`, MO_UB)
1029	DO_GEN_ST(`16`, MO_UW)
1030	DO_GEN_ST(`32`, MO_UL)
1031
1032	static inline void gen_hvc(DisasContext s, int* imm16)
1033	{
1034	/ The pre HVC helper handles cases when HVC gets trapped*
1035	* as an undefined insn by runtime configuration (ie before
1036	* the insn really executes).
1037	*/
1038	gen_set_pc_im(s, s->pc_curr);
1039	gen_helper_pre_hvc(cpu_env);
1040	/ Otherwise we will treat this as a real exception which*
1041	* happens after execution of the insn. (The distinction matters
1042	* for the PC value reported to the exception handler and also
1043	* for single stepping.)
1044	*/
1045	s->svc_imm = imm16;
1046	gen_set_pc_im(s, s->base.pc_next);
1047	s->base.is_jmp = DISAS_HVC;
1048	}
1049
1050	static inline void gen_smc(DisasContext *s)
1051	{
1052	/ As with HVC, we may take an exception either before or after*
1053	* the insn executes.
1054	*/
1055	TCGv_i32 tmp;
1056
1057	gen_set_pc_im(s, s->pc_curr);
1058	tmp = tcg_const_i32(syn_aa32_smc());
1059	gen_helper_pre_smc(cpu_env, tmp);
1060	tcg_temp_free_i32(tmp);
1061	gen_set_pc_im(s, s->base.pc_next);
1062	s->base.is_jmp = DISAS_SMC;
1063	}
1064
1065	static void gen_exception_internal_insn(DisasContext s, uint32_t pc, int* excp)
1066	{
1067	gen_set_condexec(s);
1068	gen_set_pc_im(s, pc);
1069	gen_exception_internal(excp);
1070	s->base.is_jmp = DISAS_NORETURN;
1071	}
1072
1073	static void gen_exception_insn(DisasContext s, uint32_t pc, int* excp,
1074	int syn, uint32_t target_el)
1075	{
1076	gen_set_condexec(s);
1077	gen_set_pc_im(s, pc);
1078	gen_exception(excp, syn, target_el);
1079	s->base.is_jmp = DISAS_NORETURN;
1080	}
1081
1082	static void gen_exception_bkpt_insn(DisasContext *s, uint32_t syn)
1083	{
1084	TCGv_i32 tcg_syn;
1085
1086	gen_set_condexec(s);
1087	gen_set_pc_im(s, s->pc_curr);
1088	tcg_syn = tcg_const_i32(syn);
1089	gen_helper_exception_bkpt_insn(cpu_env, tcg_syn);
1090	tcg_temp_free_i32(tcg_syn);
1091	s->base.is_jmp = DISAS_NORETURN;
1092	}
1093
1094	static void unallocated_encoding(DisasContext *s)
1095	{
1096	/ Unallocated and reserved encodings are uncategorized /
1097	gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(),
1098	default_exception_el(s));
1099	}
1100
1101	/ Force a TB lookup after an instruction that changes the CPU state. /
1102	static inline void gen_lookup_tb(DisasContext *s)
1103	{
1104	tcg_gen_movi_i32(cpu_R[`15`], s->base.pc_next);
1105	s->base.is_jmp = DISAS_EXIT;
1106	}
1107
1108	static inline void gen_hlt(DisasContext s, int* imm)
1109	{
1110	/ HLT. This has two purposes.*
1111	* Architecturally, it is an external halting debug instruction.
1112	* Since QEMU doesn't implement external debug, we treat this as
1113	* it is required for halting debug disabled: it will UNDEF.
1114	* Secondly, "HLT 0x3C" is a T32 semihosting trap instruction,
1115	* and "HLT 0xF000" is an A32 semihosting syscall. These traps
1116	* must trigger semihosting even for ARMv7 and earlier, where
1117	* HLT was an undefined encoding.
1118	* In system mode, we don't allow userspace access to
1119	* semihosting, to provide some semblance of security
1120	* (and for consistency with our 32-bit semihosting).
1121	*/
1122	if (semihosting_enabled() &&
1123	#ifndef CONFIG_USER_ONLY
1124	s->current_el != `0` &&
1125	#endif
1126	(imm == (s->thumb ? `0x3c` : `0xf000`))) {
1127	gen_exception_internal_insn(s, s->base.pc_next, EXCP_SEMIHOST);
1128	return;
1129	}
1130
1131	unallocated_encoding(s);
1132	}
1133
1134	static TCGv_ptr get_fpstatus_ptr(int neon)
1135	{
1136	TCGv_ptr statusptr = tcg_temp_new_ptr();
1137	int offset;
1138	if (neon) {
1139	offset = offsetof(CPUARMState, vfp.standard_fp_status);
1140	} else {
1141	offset = offsetof(CPUARMState, vfp.fp_status);
1142	}
1143	tcg_gen_addi_ptr(statusptr, cpu_env, offset);
1144	return statusptr;
1145	}
1146
1147	static inline long vfp_reg_offset(bool dp, unsigned reg)
1148	{
1149	if (dp) {
1150	return offsetof(CPUARMState, vfp.zregs[reg >> `1`].d[reg & `1`]);
1151	} else {
1152	long ofs = offsetof(CPUARMState, vfp.zregs[reg >> `2`].d[(reg >> `1`) & `1`]);
1153	if (reg & `1`) {
1154	ofs += offsetof(CPU_DoubleU, l.upper);
1155	} else {
1156	ofs += offsetof(CPU_DoubleU, l.lower);
1157	}
1158	return ofs;
1159	}
1160	}
1161
1162	/ Return the offset of a 32-bit piece of a NEON register.*
1163	zero is the least significant end of the register. /*
1164	static inline long
1165	neon_reg_offset (int reg, int n)
1166	{
1167	int sreg;
1168	sreg = reg * `2` + n;
1169	return vfp_reg_offset(`0`, sreg);
1170	}
1171
1172	/ Return the offset of a 2*SIZE piece of a NEON register, at index ELE,
1173	* where 0 is the least significant end of the register.
1174	*/
1175	static inline long
1176	neon_element_offset(int reg, int element, MemOp size)
1177	{
1178	int element_size = `1` << size;
1179	int ofs = element * element_size;
1180	#ifdef HOST_WORDS_BIGENDIAN
1181	/ Calculate the offset assuming fully little-endian,*
1182	* then XOR to account for the order of the 8-byte units.
1183	*/
1184	if (element_size < `8`) {
1185	ofs ^= `8` - element_size;
1186	}
1187	#endif
1188	return neon_reg_offset(reg, `0`) + ofs;
1189	}
1190
1191	static TCGv_i32 neon_load_reg(int reg, int pass)
1192	{
1193	TCGv_i32 tmp = tcg_temp_new_i32();
1194	tcg_gen_ld_i32(tmp, cpu_env, neon_reg_offset(reg, pass));
1195	return tmp;
1196	}
1197
1198	static void neon_load_element(TCGv_i32 var, int reg, int ele, MemOp mop)
1199	{
1200	long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1201
1202	switch (mop) {
1203	case MO_UB:
1204	tcg_gen_ld8u_i32(var, cpu_env, offset);
1205	break;
1206	case MO_UW:
1207	tcg_gen_ld16u_i32(var, cpu_env, offset);
1208	break;
1209	case MO_UL:
1210	tcg_gen_ld_i32(var, cpu_env, offset);
1211	break;
1212	default:
1213	g_assert_not_reached();
1214	}
1215	}
1216
1217	static void neon_load_element64(TCGv_i64 var, int reg, int ele, MemOp mop)
1218	{
1219	long offset = neon_element_offset(reg, ele, mop & MO_SIZE);
1220
1221	switch (mop) {
1222	case MO_UB:
1223	tcg_gen_ld8u_i64(var, cpu_env, offset);
1224	break;
1225	case MO_UW:
1226	tcg_gen_ld16u_i64(var, cpu_env, offset);
1227	break;
1228	case MO_UL:
1229	tcg_gen_ld32u_i64(var, cpu_env, offset);
1230	break;
1231	case MO_Q:
1232	tcg_gen_ld_i64(var, cpu_env, offset);
1233	break;
1234	default:
1235	g_assert_not_reached();
1236	}
1237	}
1238
1239	static void neon_store_reg(int reg, int pass, TCGv_i32 var)
1240	{
1241	tcg_gen_st_i32(var, cpu_env, neon_reg_offset(reg, pass));
1242	tcg_temp_free_i32(var);
1243	}
1244
1245	static void neon_store_element(int reg, int ele, MemOp size, TCGv_i32 var)
1246	{
1247	long offset = neon_element_offset(reg, ele, size);
1248
1249	switch (size) {
1250	case MO_8:
1251	tcg_gen_st8_i32(var, cpu_env, offset);
1252	break;
1253	case MO_16:
1254	tcg_gen_st16_i32(var, cpu_env, offset);
1255	break;
1256	case MO_32:
1257	tcg_gen_st_i32(var, cpu_env, offset);
1258	break;
1259	default:
1260	g_assert_not_reached();
1261	}
1262	}
1263
1264	static void neon_store_element64(int reg, int ele, MemOp size, TCGv_i64 var)
1265	{
1266	long offset = neon_element_offset(reg, ele, size);
1267
1268	switch (size) {
1269	case MO_8:
1270	tcg_gen_st8_i64(var, cpu_env, offset);
1271	break;
1272	case MO_16:
1273	tcg_gen_st16_i64(var, cpu_env, offset);
1274	break;
1275	case MO_32:
1276	tcg_gen_st32_i64(var, cpu_env, offset);
1277	break;
1278	case MO_64:
1279	tcg_gen_st_i64(var, cpu_env, offset);
1280	break;
1281	default:
1282	g_assert_not_reached();
1283	}
1284	}
1285
1286	static inline void neon_load_reg64(TCGv_i64 var, int reg)
1287	{
1288	tcg_gen_ld_i64(var, cpu_env, vfp_reg_offset(`1`, reg));
1289	}
1290
1291	static inline void neon_store_reg64(TCGv_i64 var, int reg)
1292	{
1293	tcg_gen_st_i64(var, cpu_env, vfp_reg_offset(`1`, reg));
1294	}
1295
1296	static inline void neon_load_reg32(TCGv_i32 var, int reg)
1297	{
1298	tcg_gen_ld_i32(var, cpu_env, vfp_reg_offset(false, reg));
1299	}
1300
1301	static inline void neon_store_reg32(TCGv_i32 var, int reg)
1302	{
1303	tcg_gen_st_i32(var, cpu_env, vfp_reg_offset(false, reg));
1304	}
1305
1306	static TCGv_ptr vfp_reg_ptr(bool dp, int reg)
1307	{
1308	TCGv_ptr ret = tcg_temp_new_ptr();
1309	tcg_gen_addi_ptr(ret, cpu_env, vfp_reg_offset(dp, reg));
1310	return ret;
1311	}
1312
1313	#define ARM_CP_RW_BIT (1 << 20)
1314
1315	/ Include the VFP decoder /
1316	#include "translate-vfp.inc.c"
1317
1318	static inline void iwmmxt_load_reg(TCGv_i64 var, int reg)
1319	{
1320	tcg_gen_ld_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1321	}
1322
1323	static inline void iwmmxt_store_reg(TCGv_i64 var, int reg)
1324	{
1325	tcg_gen_st_i64(var, cpu_env, offsetof(CPUARMState, iwmmxt.regs[reg]));
1326	}
1327
1328	static inline TCGv_i32 iwmmxt_load_creg(int reg)
1329	{
1330	TCGv_i32 var = tcg_temp_new_i32();
1331	tcg_gen_ld_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1332	return var;
1333	}
1334
1335	static inline void iwmmxt_store_creg(int reg, TCGv_i32 var)
1336	{
1337	tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, iwmmxt.cregs[reg]));
1338	tcg_temp_free_i32(var);
1339	}
1340
1341	static inline void gen_op_iwmmxt_movq_wRn_M0(int rn)
1342	{
1343	iwmmxt_store_reg(cpu_M0, rn);
1344	}
1345
1346	static inline void gen_op_iwmmxt_movq_M0_wRn(int rn)
1347	{
1348	iwmmxt_load_reg(cpu_M0, rn);
1349	}
1350
1351	static inline void gen_op_iwmmxt_orq_M0_wRn(int rn)
1352	{
1353	iwmmxt_load_reg(cpu_V1, rn);
1354	tcg_gen_or_i64(cpu_M0, cpu_M0, cpu_V1);
1355	}
1356
1357	static inline void gen_op_iwmmxt_andq_M0_wRn(int rn)
1358	{
1359	iwmmxt_load_reg(cpu_V1, rn);
1360	tcg_gen_and_i64(cpu_M0, cpu_M0, cpu_V1);
1361	}
1362
1363	static inline void gen_op_iwmmxt_xorq_M0_wRn(int rn)
1364	{
1365	iwmmxt_load_reg(cpu_V1, rn);
1366	tcg_gen_xor_i64(cpu_M0, cpu_M0, cpu_V1);
1367	}
1368
1369	#define IWMMXT_OP(name) \
1370	static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1371	{ \
1372	iwmmxt_load_reg(cpu_V1, rn); \
1373	gen_helper_iwmmxt_##name(cpu_M0, cpu_M0, cpu_V1); \
1374	}
1375
1376	#define IWMMXT_OP_ENV(name) \
1377	static inline void gen_op_iwmmxt_##name##_M0_wRn(int rn) \
1378	{ \
1379	iwmmxt_load_reg(cpu_V1, rn); \
1380	gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0, cpu_V1); \
1381	}
1382
1383	#define IWMMXT_OP_ENV_SIZE(name) \
1384	IWMMXT_OP_ENV(name##b) \
1385	IWMMXT_OP_ENV(name##w) \
1386	IWMMXT_OP_ENV(name##l)
1387
1388	#define IWMMXT_OP_ENV1(name) \
1389	static inline void gen_op_iwmmxt_##name##_M0(void) \
1390	{ \
1391	gen_helper_iwmmxt_##name(cpu_M0, cpu_env, cpu_M0); \
1392	}
1393
1394	IWMMXT_OP(maddsq)
1395	IWMMXT_OP(madduq)
1396	IWMMXT_OP(sadb)
1397	IWMMXT_OP(sadw)
1398	IWMMXT_OP(mulslw)
1399	IWMMXT_OP(mulshw)
1400	IWMMXT_OP(mululw)
1401	IWMMXT_OP(muluhw)
1402	IWMMXT_OP(macsw)
1403	IWMMXT_OP(macuw)
1404
1405	IWMMXT_OP_ENV_SIZE(unpackl)
1406	IWMMXT_OP_ENV_SIZE(unpackh)
1407
1408	IWMMXT_OP_ENV1(unpacklub)
1409	IWMMXT_OP_ENV1(unpackluw)
1410	IWMMXT_OP_ENV1(unpacklul)
1411	IWMMXT_OP_ENV1(unpackhub)
1412	IWMMXT_OP_ENV1(unpackhuw)
1413	IWMMXT_OP_ENV1(unpackhul)
1414	IWMMXT_OP_ENV1(unpacklsb)
1415	IWMMXT_OP_ENV1(unpacklsw)
1416	IWMMXT_OP_ENV1(unpacklsl)
1417	IWMMXT_OP_ENV1(unpackhsb)
1418	IWMMXT_OP_ENV1(unpackhsw)
1419	IWMMXT_OP_ENV1(unpackhsl)
1420
1421	IWMMXT_OP_ENV_SIZE(cmpeq)
1422	IWMMXT_OP_ENV_SIZE(cmpgtu)
1423	IWMMXT_OP_ENV_SIZE(cmpgts)
1424
1425	IWMMXT_OP_ENV_SIZE(mins)
1426	IWMMXT_OP_ENV_SIZE(minu)
1427	IWMMXT_OP_ENV_SIZE(maxs)
1428	IWMMXT_OP_ENV_SIZE(maxu)
1429
1430	IWMMXT_OP_ENV_SIZE(subn)
1431	IWMMXT_OP_ENV_SIZE(addn)
1432	IWMMXT_OP_ENV_SIZE(subu)
1433	IWMMXT_OP_ENV_SIZE(addu)
1434	IWMMXT_OP_ENV_SIZE(subs)
1435	IWMMXT_OP_ENV_SIZE(adds)
1436
1437	IWMMXT_OP_ENV(avgb0)
1438	IWMMXT_OP_ENV(avgb1)
1439	IWMMXT_OP_ENV(avgw0)
1440	IWMMXT_OP_ENV(avgw1)
1441
1442	IWMMXT_OP_ENV(packuw)
1443	IWMMXT_OP_ENV(packul)
1444	IWMMXT_OP_ENV(packuq)
1445	IWMMXT_OP_ENV(packsw)
1446	IWMMXT_OP_ENV(packsl)
1447	IWMMXT_OP_ENV(packsq)
1448
1449	static void gen_op_iwmmxt_set_mup(void)
1450	{
1451	TCGv_i32 tmp;
1452	tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1453	tcg_gen_ori_i32(tmp, tmp, `2`);
1454	store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1455	}
1456
1457	static void gen_op_iwmmxt_set_cup(void)
1458	{
1459	TCGv_i32 tmp;
1460	tmp = load_cpu_field(iwmmxt.cregs[ARM_IWMMXT_wCon]);
1461	tcg_gen_ori_i32(tmp, tmp, `1`);
1462	store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCon]);
1463	}
1464
1465	static void gen_op_iwmmxt_setpsr_nz(void)
1466	{
1467	TCGv_i32 tmp = tcg_temp_new_i32();
1468	gen_helper_iwmmxt_setpsr_nz(tmp, cpu_M0);
1469	store_cpu_field(tmp, iwmmxt.cregs[ARM_IWMMXT_wCASF]);
1470	}
1471
1472	static inline void gen_op_iwmmxt_addl_M0_wRn(int rn)
1473	{
1474	iwmmxt_load_reg(cpu_V1, rn);
1475	tcg_gen_ext32u_i64(cpu_V1, cpu_V1);
1476	tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1477	}
1478
1479	static inline int gen_iwmmxt_address(DisasContext *s, uint32_t insn,
1480	TCGv_i32 dest)
1481	{
1482	int rd;
1483	uint32_t offset;
1484	TCGv_i32 tmp;
1485
1486	rd = (insn >> `16`) & `0xf`;
1487	tmp = load_reg(s, rd);
1488
1489	offset = (insn & `0xff`) << ((insn >> `7`) & `2`);
1490	if (insn & (`1` << `24`)) {
1491	/ Pre indexed /
1492	if (insn & (`1` << `23`))
1493	tcg_gen_addi_i32(tmp, tmp, offset);
1494	else
1495	tcg_gen_addi_i32(tmp, tmp, -offset);
1496	tcg_gen_mov_i32(dest, tmp);
1497	if (insn & (`1` << `21`))
1498	store_reg(s, rd, tmp);
1499	else
1500	tcg_temp_free_i32(tmp);
1501	} else if (insn & (`1` << `21`)) {
1502	/ Post indexed /
1503	tcg_gen_mov_i32(dest, tmp);
1504	if (insn & (`1` << `23`))
1505	tcg_gen_addi_i32(tmp, tmp, offset);
1506	else
1507	tcg_gen_addi_i32(tmp, tmp, -offset);
1508	store_reg(s, rd, tmp);
1509	} else if (!(insn & (`1` << `23`)))
1510	return `1`;
1511	return `0`;
1512	}
1513
1514	static inline int gen_iwmmxt_shift(uint32_t insn, uint32_t mask, TCGv_i32 dest)
1515	{
1516	int rd = (insn >> `0`) & `0xf`;
1517	TCGv_i32 tmp;
1518
1519	if (insn & (`1` << `8`)) {
1520	if (rd < ARM_IWMMXT_wCGR0 \|\| rd > ARM_IWMMXT_wCGR3) {
1521	return `1`;
1522	} else {
1523	tmp = iwmmxt_load_creg(rd);
1524	}
1525	} else {
1526	tmp = tcg_temp_new_i32();
1527	iwmmxt_load_reg(cpu_V0, rd);
1528	tcg_gen_extrl_i64_i32(tmp, cpu_V0);
1529	}
1530	tcg_gen_andi_i32(tmp, tmp, mask);
1531	tcg_gen_mov_i32(dest, tmp);
1532	tcg_temp_free_i32(tmp);
1533	return `0`;
1534	}
1535
1536	/ Disassemble an iwMMXt instruction. Returns nonzero if an error occurred*
1537	(ie. an undefined instruction). /*
1538	static int disas_iwmmxt_insn(DisasContext *s, uint32_t insn)
1539	{
1540	int rd, wrd;
1541	int rdhi, rdlo, rd0, rd1, i;
1542	TCGv_i32 addr;
1543	TCGv_i32 tmp, tmp2, tmp3;
1544
1545	if ((insn & `0x0e000e00`) == `0x0c000000`) {
1546	if ((insn & `0x0fe00ff0`) == `0x0c400000`) {
1547	wrd = insn & `0xf`;
1548	rdlo = (insn >> `12`) & `0xf`;
1549	rdhi = (insn >> `16`) & `0xf`;
1550	if (insn & ARM_CP_RW_BIT) { / TMRRC /
1551	iwmmxt_load_reg(cpu_V0, wrd);
1552	tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
1553	tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
1554	} else { / TMCRR /
1555	tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
1556	iwmmxt_store_reg(cpu_V0, wrd);
1557	gen_op_iwmmxt_set_mup();
1558	}
1559	return `0`;
1560	}
1561
1562	wrd = (insn >> `12`) & `0xf`;
1563	addr = tcg_temp_new_i32();
1564	if (gen_iwmmxt_address(s, insn, addr)) {
1565	tcg_temp_free_i32(addr);
1566	return `1`;
1567	}
1568	if (insn & ARM_CP_RW_BIT) {
1569	if ((insn >> `28`) == `0xf`) { / WLDRW wCx /
1570	tmp = tcg_temp_new_i32();
1571	gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1572	iwmmxt_store_creg(wrd, tmp);
1573	} else {
1574	i = `1`;
1575	if (insn & (`1` << `8`)) {
1576	if (insn & (`1` << `22`)) { / WLDRD /
1577	gen_aa32_ld64(s, cpu_M0, addr, get_mem_index(s));
1578	i = `0`;
1579	} else { / WLDRW wRd /
1580	tmp = tcg_temp_new_i32();
1581	gen_aa32_ld32u(s, tmp, addr, get_mem_index(s));
1582	}
1583	} else {
1584	tmp = tcg_temp_new_i32();
1585	if (insn & (`1` << `22`)) { / WLDRH /
1586	gen_aa32_ld16u(s, tmp, addr, get_mem_index(s));
1587	} else { / WLDRB /
1588	gen_aa32_ld8u(s, tmp, addr, get_mem_index(s));
1589	}
1590	}
1591	if (i) {
1592	tcg_gen_extu_i32_i64(cpu_M0, tmp);
1593	tcg_temp_free_i32(tmp);
1594	}
1595	gen_op_iwmmxt_movq_wRn_M0(wrd);
1596	}
1597	} else {
1598	if ((insn >> `28`) == `0xf`) { / WSTRW wCx /
1599	tmp = iwmmxt_load_creg(wrd);
1600	gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1601	} else {
1602	gen_op_iwmmxt_movq_M0_wRn(wrd);
1603	tmp = tcg_temp_new_i32();
1604	if (insn & (`1` << `8`)) {
1605	if (insn & (`1` << `22`)) { / WSTRD /
1606	gen_aa32_st64(s, cpu_M0, addr, get_mem_index(s));
1607	} else { / WSTRW wRd /
1608	tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1609	gen_aa32_st32(s, tmp, addr, get_mem_index(s));
1610	}
1611	} else {
1612	if (insn & (`1` << `22`)) { / WSTRH /
1613	tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1614	gen_aa32_st16(s, tmp, addr, get_mem_index(s));
1615	} else { / WSTRB /
1616	tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1617	gen_aa32_st8(s, tmp, addr, get_mem_index(s));
1618	}
1619	}
1620	}
1621	tcg_temp_free_i32(tmp);
1622	}
1623	tcg_temp_free_i32(addr);
1624	return `0`;
1625	}
1626
1627	if ((insn & `0x0f000000`) != `0x0e000000`)
1628	return `1`;
1629
1630	switch (((insn >> `12`) & `0xf00`) \| ((insn >> `4`) & `0xff`)) {
1631	case `0x000`: / WOR /
1632	wrd = (insn >> `12`) & `0xf`;
1633	rd0 = (insn >> `0`) & `0xf`;
1634	rd1 = (insn >> `16`) & `0xf`;
1635	gen_op_iwmmxt_movq_M0_wRn(rd0);
1636	gen_op_iwmmxt_orq_M0_wRn(rd1);
1637	gen_op_iwmmxt_setpsr_nz();
1638	gen_op_iwmmxt_movq_wRn_M0(wrd);
1639	gen_op_iwmmxt_set_mup();
1640	gen_op_iwmmxt_set_cup();
1641	break;
1642	case `0x011`: / TMCR /
1643	if (insn & `0xf`)
1644	return `1`;
1645	rd = (insn >> `12`) & `0xf`;
1646	wrd = (insn >> `16`) & `0xf`;
1647	switch (wrd) {
1648	case ARM_IWMMXT_wCID:
1649	case ARM_IWMMXT_wCASF:
1650	break;
1651	case ARM_IWMMXT_wCon:
1652	gen_op_iwmmxt_set_cup();
1653	/ Fall through. /
1654	case ARM_IWMMXT_wCSSF:
1655	tmp = iwmmxt_load_creg(wrd);
1656	tmp2 = load_reg(s, rd);
1657	tcg_gen_andc_i32(tmp, tmp, tmp2);
1658	tcg_temp_free_i32(tmp2);
1659	iwmmxt_store_creg(wrd, tmp);
1660	break;
1661	case ARM_IWMMXT_wCGR0:
1662	case ARM_IWMMXT_wCGR1:
1663	case ARM_IWMMXT_wCGR2:
1664	case ARM_IWMMXT_wCGR3:
1665	gen_op_iwmmxt_set_cup();
1666	tmp = load_reg(s, rd);
1667	iwmmxt_store_creg(wrd, tmp);
1668	break;
1669	default:
1670	return `1`;
1671	}
1672	break;
1673	case `0x100`: / WXOR /
1674	wrd = (insn >> `12`) & `0xf`;
1675	rd0 = (insn >> `0`) & `0xf`;
1676	rd1 = (insn >> `16`) & `0xf`;
1677	gen_op_iwmmxt_movq_M0_wRn(rd0);
1678	gen_op_iwmmxt_xorq_M0_wRn(rd1);
1679	gen_op_iwmmxt_setpsr_nz();
1680	gen_op_iwmmxt_movq_wRn_M0(wrd);
1681	gen_op_iwmmxt_set_mup();
1682	gen_op_iwmmxt_set_cup();
1683	break;
1684	case `0x111`: / TMRC /
1685	if (insn & `0xf`)
1686	return `1`;
1687	rd = (insn >> `12`) & `0xf`;
1688	wrd = (insn >> `16`) & `0xf`;
1689	tmp = iwmmxt_load_creg(wrd);
1690	store_reg(s, rd, tmp);
1691	break;
1692	case `0x300`: / WANDN /
1693	wrd = (insn >> `12`) & `0xf`;
1694	rd0 = (insn >> `0`) & `0xf`;
1695	rd1 = (insn >> `16`) & `0xf`;
1696	gen_op_iwmmxt_movq_M0_wRn(rd0);
1697	tcg_gen_neg_i64(cpu_M0, cpu_M0);
1698	gen_op_iwmmxt_andq_M0_wRn(rd1);
1699	gen_op_iwmmxt_setpsr_nz();
1700	gen_op_iwmmxt_movq_wRn_M0(wrd);
1701	gen_op_iwmmxt_set_mup();
1702	gen_op_iwmmxt_set_cup();
1703	break;
1704	case `0x200`: / WAND /
1705	wrd = (insn >> `12`) & `0xf`;
1706	rd0 = (insn >> `0`) & `0xf`;
1707	rd1 = (insn >> `16`) & `0xf`;
1708	gen_op_iwmmxt_movq_M0_wRn(rd0);
1709	gen_op_iwmmxt_andq_M0_wRn(rd1);
1710	gen_op_iwmmxt_setpsr_nz();
1711	gen_op_iwmmxt_movq_wRn_M0(wrd);
1712	gen_op_iwmmxt_set_mup();
1713	gen_op_iwmmxt_set_cup();
1714	break;
1715	case `0x810`: case `0xa10`: / WMADD /
1716	wrd = (insn >> `12`) & `0xf`;
1717	rd0 = (insn >> `0`) & `0xf`;
1718	rd1 = (insn >> `16`) & `0xf`;
1719	gen_op_iwmmxt_movq_M0_wRn(rd0);
1720	if (insn & (`1` << `21`))
1721	gen_op_iwmmxt_maddsq_M0_wRn(rd1);
1722	else
1723	gen_op_iwmmxt_madduq_M0_wRn(rd1);
1724	gen_op_iwmmxt_movq_wRn_M0(wrd);
1725	gen_op_iwmmxt_set_mup();
1726	break;
1727	case `0x10e`: case `0x50e`: case `0x90e`: case `0xd0e`: / WUNPCKIL /
1728	wrd = (insn >> `12`) & `0xf`;
1729	rd0 = (insn >> `16`) & `0xf`;
1730	rd1 = (insn >> `0`) & `0xf`;
1731	gen_op_iwmmxt_movq_M0_wRn(rd0);
1732	switch ((insn >> `22`) & `3`) {
1733	case `0`:
1734	gen_op_iwmmxt_unpacklb_M0_wRn(rd1);
1735	break;
1736	case `1`:
1737	gen_op_iwmmxt_unpacklw_M0_wRn(rd1);
1738	break;
1739	case `2`:
1740	gen_op_iwmmxt_unpackll_M0_wRn(rd1);
1741	break;
1742	case `3`:
1743	return `1`;
1744	}
1745	gen_op_iwmmxt_movq_wRn_M0(wrd);
1746	gen_op_iwmmxt_set_mup();
1747	gen_op_iwmmxt_set_cup();
1748	break;
1749	case `0x10c`: case `0x50c`: case `0x90c`: case `0xd0c`: / WUNPCKIH /
1750	wrd = (insn >> `12`) & `0xf`;
1751	rd0 = (insn >> `16`) & `0xf`;
1752	rd1 = (insn >> `0`) & `0xf`;
1753	gen_op_iwmmxt_movq_M0_wRn(rd0);
1754	switch ((insn >> `22`) & `3`) {
1755	case `0`:
1756	gen_op_iwmmxt_unpackhb_M0_wRn(rd1);
1757	break;
1758	case `1`:
1759	gen_op_iwmmxt_unpackhw_M0_wRn(rd1);
1760	break;
1761	case `2`:
1762	gen_op_iwmmxt_unpackhl_M0_wRn(rd1);
1763	break;
1764	case `3`:
1765	return `1`;
1766	}
1767	gen_op_iwmmxt_movq_wRn_M0(wrd);
1768	gen_op_iwmmxt_set_mup();
1769	gen_op_iwmmxt_set_cup();
1770	break;
1771	case `0x012`: case `0x112`: case `0x412`: case `0x512`: / WSAD /
1772	wrd = (insn >> `12`) & `0xf`;
1773	rd0 = (insn >> `16`) & `0xf`;
1774	rd1 = (insn >> `0`) & `0xf`;
1775	gen_op_iwmmxt_movq_M0_wRn(rd0);
1776	if (insn & (`1` << `22`))
1777	gen_op_iwmmxt_sadw_M0_wRn(rd1);
1778	else
1779	gen_op_iwmmxt_sadb_M0_wRn(rd1);
1780	if (!(insn & (`1` << `20`)))
1781	gen_op_iwmmxt_addl_M0_wRn(wrd);
1782	gen_op_iwmmxt_movq_wRn_M0(wrd);
1783	gen_op_iwmmxt_set_mup();
1784	break;
1785	case `0x010`: case `0x110`: case `0x210`: case `0x310`: / WMUL /
1786	wrd = (insn >> `12`) & `0xf`;
1787	rd0 = (insn >> `16`) & `0xf`;
1788	rd1 = (insn >> `0`) & `0xf`;
1789	gen_op_iwmmxt_movq_M0_wRn(rd0);
1790	if (insn & (`1` << `21`)) {
1791	if (insn & (`1` << `20`))
1792	gen_op_iwmmxt_mulshw_M0_wRn(rd1);
1793	else
1794	gen_op_iwmmxt_mulslw_M0_wRn(rd1);
1795	} else {
1796	if (insn & (`1` << `20`))
1797	gen_op_iwmmxt_muluhw_M0_wRn(rd1);
1798	else
1799	gen_op_iwmmxt_mululw_M0_wRn(rd1);
1800	}
1801	gen_op_iwmmxt_movq_wRn_M0(wrd);
1802	gen_op_iwmmxt_set_mup();
1803	break;
1804	case `0x410`: case `0x510`: case `0x610`: case `0x710`: / WMAC /
1805	wrd = (insn >> `12`) & `0xf`;
1806	rd0 = (insn >> `16`) & `0xf`;
1807	rd1 = (insn >> `0`) & `0xf`;
1808	gen_op_iwmmxt_movq_M0_wRn(rd0);
1809	if (insn & (`1` << `21`))
1810	gen_op_iwmmxt_macsw_M0_wRn(rd1);
1811	else
1812	gen_op_iwmmxt_macuw_M0_wRn(rd1);
1813	if (!(insn & (`1` << `20`))) {
1814	iwmmxt_load_reg(cpu_V1, wrd);
1815	tcg_gen_add_i64(cpu_M0, cpu_M0, cpu_V1);
1816	}
1817	gen_op_iwmmxt_movq_wRn_M0(wrd);
1818	gen_op_iwmmxt_set_mup();
1819	break;
1820	case `0x006`: case `0x406`: case `0x806`: case `0xc06`: / WCMPEQ /
1821	wrd = (insn >> `12`) & `0xf`;
1822	rd0 = (insn >> `16`) & `0xf`;
1823	rd1 = (insn >> `0`) & `0xf`;
1824	gen_op_iwmmxt_movq_M0_wRn(rd0);
1825	switch ((insn >> `22`) & `3`) {
1826	case `0`:
1827	gen_op_iwmmxt_cmpeqb_M0_wRn(rd1);
1828	break;
1829	case `1`:
1830	gen_op_iwmmxt_cmpeqw_M0_wRn(rd1);
1831	break;
1832	case `2`:
1833	gen_op_iwmmxt_cmpeql_M0_wRn(rd1);
1834	break;
1835	case `3`:
1836	return `1`;
1837	}
1838	gen_op_iwmmxt_movq_wRn_M0(wrd);
1839	gen_op_iwmmxt_set_mup();
1840	gen_op_iwmmxt_set_cup();
1841	break;
1842	case `0x800`: case `0x900`: case `0xc00`: case `0xd00`: / WAVG2 /
1843	wrd = (insn >> `12`) & `0xf`;
1844	rd0 = (insn >> `16`) & `0xf`;
1845	rd1 = (insn >> `0`) & `0xf`;
1846	gen_op_iwmmxt_movq_M0_wRn(rd0);
1847	if (insn & (`1` << `22`)) {
1848	if (insn & (`1` << `20`))
1849	gen_op_iwmmxt_avgw1_M0_wRn(rd1);
1850	else
1851	gen_op_iwmmxt_avgw0_M0_wRn(rd1);
1852	} else {
1853	if (insn & (`1` << `20`))
1854	gen_op_iwmmxt_avgb1_M0_wRn(rd1);
1855	else
1856	gen_op_iwmmxt_avgb0_M0_wRn(rd1);
1857	}
1858	gen_op_iwmmxt_movq_wRn_M0(wrd);
1859	gen_op_iwmmxt_set_mup();
1860	gen_op_iwmmxt_set_cup();
1861	break;
1862	case `0x802`: case `0x902`: case `0xa02`: case `0xb02`: / WALIGNR /
1863	wrd = (insn >> `12`) & `0xf`;
1864	rd0 = (insn >> `16`) & `0xf`;
1865	rd1 = (insn >> `0`) & `0xf`;
1866	gen_op_iwmmxt_movq_M0_wRn(rd0);
1867	tmp = iwmmxt_load_creg(ARM_IWMMXT_wCGR0 + ((insn >> `20`) & `3`));
1868	tcg_gen_andi_i32(tmp, tmp, `7`);
1869	iwmmxt_load_reg(cpu_V1, rd1);
1870	gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
1871	tcg_temp_free_i32(tmp);
1872	gen_op_iwmmxt_movq_wRn_M0(wrd);
1873	gen_op_iwmmxt_set_mup();
1874	break;
1875	case `0x601`: case `0x605`: case `0x609`: case `0x60d`: / TINSR /
1876	if (((insn >> `6`) & `3`) == `3`)
1877	return `1`;
1878	rd = (insn >> `12`) & `0xf`;
1879	wrd = (insn >> `16`) & `0xf`;
1880	tmp = load_reg(s, rd);
1881	gen_op_iwmmxt_movq_M0_wRn(wrd);
1882	switch ((insn >> `6`) & `3`) {
1883	case `0`:
1884	tmp2 = tcg_const_i32(`0xff`);
1885	tmp3 = tcg_const_i32((insn & `7`) << `3`);
1886	break;
1887	case `1`:
1888	tmp2 = tcg_const_i32(`0xffff`);
1889	tmp3 = tcg_const_i32((insn & `3`) << `4`);
1890	break;
1891	case `2`:
1892	tmp2 = tcg_const_i32(`0xffffffff`);
1893	tmp3 = tcg_const_i32((insn & `1`) << `5`);
1894	break;
1895	default:
1896	tmp2 = NULL;
1897	tmp3 = NULL;
1898	}
1899	gen_helper_iwmmxt_insr(cpu_M0, cpu_M0, tmp, tmp2, tmp3);
1900	tcg_temp_free_i32(tmp3);
1901	tcg_temp_free_i32(tmp2);
1902	tcg_temp_free_i32(tmp);
1903	gen_op_iwmmxt_movq_wRn_M0(wrd);
1904	gen_op_iwmmxt_set_mup();
1905	break;
1906	case `0x107`: case `0x507`: case `0x907`: case `0xd07`: / TEXTRM /
1907	rd = (insn >> `12`) & `0xf`;
1908	wrd = (insn >> `16`) & `0xf`;
1909	if (rd == `15` \|\| ((insn >> `22`) & `3`) == `3`)
1910	return `1`;
1911	gen_op_iwmmxt_movq_M0_wRn(wrd);
1912	tmp = tcg_temp_new_i32();
1913	switch ((insn >> `22`) & `3`) {
1914	case `0`:
1915	tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & `7`) << `3`);
1916	tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1917	if (insn & `8`) {
1918	tcg_gen_ext8s_i32(tmp, tmp);
1919	} else {
1920	tcg_gen_andi_i32(tmp, tmp, `0xff`);
1921	}
1922	break;
1923	case `1`:
1924	tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & `3`) << `4`);
1925	tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1926	if (insn & `8`) {
1927	tcg_gen_ext16s_i32(tmp, tmp);
1928	} else {
1929	tcg_gen_andi_i32(tmp, tmp, `0xffff`);
1930	}
1931	break;
1932	case `2`:
1933	tcg_gen_shri_i64(cpu_M0, cpu_M0, (insn & `1`) << `5`);
1934	tcg_gen_extrl_i64_i32(tmp, cpu_M0);
1935	break;
1936	}
1937	store_reg(s, rd, tmp);
1938	break;
1939	case `0x117`: case `0x517`: case `0x917`: case `0xd17`: / TEXTRC /
1940	if ((insn & `0x000ff008`) != `0x0003f000` \|\| ((insn >> `22`) & `3`) == `3`)
1941	return `1`;
1942	tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1943	switch ((insn >> `22`) & `3`) {
1944	case `0`:
1945	tcg_gen_shri_i32(tmp, tmp, ((insn & `7`) << `2`) + `0`);
1946	break;
1947	case `1`:
1948	tcg_gen_shri_i32(tmp, tmp, ((insn & `3`) << `3`) + `4`);
1949	break;
1950	case `2`:
1951	tcg_gen_shri_i32(tmp, tmp, ((insn & `1`) << `4`) + `12`);
1952	break;
1953	}
1954	tcg_gen_shli_i32(tmp, tmp, `28`);
1955	gen_set_nzcv(tmp);
1956	tcg_temp_free_i32(tmp);
1957	break;
1958	case `0x401`: case `0x405`: case `0x409`: case `0x40d`: / TBCST /
1959	if (((insn >> `6`) & `3`) == `3`)
1960	return `1`;
1961	rd = (insn >> `12`) & `0xf`;
1962	wrd = (insn >> `16`) & `0xf`;
1963	tmp = load_reg(s, rd);
1964	switch ((insn >> `6`) & `3`) {
1965	case `0`:
1966	gen_helper_iwmmxt_bcstb(cpu_M0, tmp);
1967	break;
1968	case `1`:
1969	gen_helper_iwmmxt_bcstw(cpu_M0, tmp);
1970	break;
1971	case `2`:
1972	gen_helper_iwmmxt_bcstl(cpu_M0, tmp);
1973	break;
1974	}
1975	tcg_temp_free_i32(tmp);
1976	gen_op_iwmmxt_movq_wRn_M0(wrd);
1977	gen_op_iwmmxt_set_mup();
1978	break;
1979	case `0x113`: case `0x513`: case `0x913`: case `0xd13`: / TANDC /
1980	if ((insn & `0x000ff00f`) != `0x0003f000` \|\| ((insn >> `22`) & `3`) == `3`)
1981	return `1`;
1982	tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
1983	tmp2 = tcg_temp_new_i32();
1984	tcg_gen_mov_i32(tmp2, tmp);
1985	switch ((insn >> `22`) & `3`) {
1986	case `0`:
1987	for (i = `0`; i < `7`; i ++) {
1988	tcg_gen_shli_i32(tmp2, tmp2, `4`);
1989	tcg_gen_and_i32(tmp, tmp, tmp2);
1990	}
1991	break;
1992	case `1`:
1993	for (i = `0`; i < `3`; i ++) {
1994	tcg_gen_shli_i32(tmp2, tmp2, `8`);
1995	tcg_gen_and_i32(tmp, tmp, tmp2);
1996	}
1997	break;
1998	case `2`:
1999	tcg_gen_shli_i32(tmp2, tmp2, `16`);
2000	tcg_gen_and_i32(tmp, tmp, tmp2);
2001	break;
2002	}
2003	gen_set_nzcv(tmp);
2004	tcg_temp_free_i32(tmp2);
2005	tcg_temp_free_i32(tmp);
2006	break;
2007	case `0x01c`: case `0x41c`: case `0x81c`: case `0xc1c`: / WACC /
2008	wrd = (insn >> `12`) & `0xf`;
2009	rd0 = (insn >> `16`) & `0xf`;
2010	gen_op_iwmmxt_movq_M0_wRn(rd0);
2011	switch ((insn >> `22`) & `3`) {
2012	case `0`:
2013	gen_helper_iwmmxt_addcb(cpu_M0, cpu_M0);
2014	break;
2015	case `1`:
2016	gen_helper_iwmmxt_addcw(cpu_M0, cpu_M0);
2017	break;
2018	case `2`:
2019	gen_helper_iwmmxt_addcl(cpu_M0, cpu_M0);
2020	break;
2021	case `3`:
2022	return `1`;
2023	}
2024	gen_op_iwmmxt_movq_wRn_M0(wrd);
2025	gen_op_iwmmxt_set_mup();
2026	break;
2027	case `0x115`: case `0x515`: case `0x915`: case `0xd15`: / TORC /
2028	if ((insn & `0x000ff00f`) != `0x0003f000` \|\| ((insn >> `22`) & `3`) == `3`)
2029	return `1`;
2030	tmp = iwmmxt_load_creg(ARM_IWMMXT_wCASF);
2031	tmp2 = tcg_temp_new_i32();
2032	tcg_gen_mov_i32(tmp2, tmp);
2033	switch ((insn >> `22`) & `3`) {
2034	case `0`:
2035	for (i = `0`; i < `7`; i ++) {
2036	tcg_gen_shli_i32(tmp2, tmp2, `4`);
2037	tcg_gen_or_i32(tmp, tmp, tmp2);
2038	}
2039	break;
2040	case `1`:
2041	for (i = `0`; i < `3`; i ++) {
2042	tcg_gen_shli_i32(tmp2, tmp2, `8`);
2043	tcg_gen_or_i32(tmp, tmp, tmp2);
2044	}
2045	break;
2046	case `2`:
2047	tcg_gen_shli_i32(tmp2, tmp2, `16`);
2048	tcg_gen_or_i32(tmp, tmp, tmp2);
2049	break;
2050	}
2051	gen_set_nzcv(tmp);
2052	tcg_temp_free_i32(tmp2);
2053	tcg_temp_free_i32(tmp);
2054	break;
2055	case `0x103`: case `0x503`: case `0x903`: case `0xd03`: / TMOVMSK /
2056	rd = (insn >> `12`) & `0xf`;
2057	rd0 = (insn >> `16`) & `0xf`;
2058	if ((insn & `0xf`) != `0` \|\| ((insn >> `22`) & `3`) == `3`)
2059	return `1`;
2060	gen_op_iwmmxt_movq_M0_wRn(rd0);
2061	tmp = tcg_temp_new_i32();
2062	switch ((insn >> `22`) & `3`) {
2063	case `0`:
2064	gen_helper_iwmmxt_msbb(tmp, cpu_M0);
2065	break;
2066	case `1`:
2067	gen_helper_iwmmxt_msbw(tmp, cpu_M0);
2068	break;
2069	case `2`:
2070	gen_helper_iwmmxt_msbl(tmp, cpu_M0);
2071	break;
2072	}
2073	store_reg(s, rd, tmp);
2074	break;
2075	case `0x106`: case `0x306`: case `0x506`: case `0x706`: / WCMPGT /
2076	case `0x906`: case `0xb06`: case `0xd06`: case `0xf06`:
2077	wrd = (insn >> `12`) & `0xf`;
2078	rd0 = (insn >> `16`) & `0xf`;
2079	rd1 = (insn >> `0`) & `0xf`;
2080	gen_op_iwmmxt_movq_M0_wRn(rd0);
2081	switch ((insn >> `22`) & `3`) {
2082	case `0`:
2083	if (insn & (`1` << `21`))
2084	gen_op_iwmmxt_cmpgtsb_M0_wRn(rd1);
2085	else
2086	gen_op_iwmmxt_cmpgtub_M0_wRn(rd1);
2087	break;
2088	case `1`:
2089	if (insn & (`1` << `21`))
2090	gen_op_iwmmxt_cmpgtsw_M0_wRn(rd1);
2091	else
2092	gen_op_iwmmxt_cmpgtuw_M0_wRn(rd1);
2093	break;
2094	case `2`:
2095	if (insn & (`1` << `21`))
2096	gen_op_iwmmxt_cmpgtsl_M0_wRn(rd1);
2097	else
2098	gen_op_iwmmxt_cmpgtul_M0_wRn(rd1);
2099	break;
2100	case `3`:
2101	return `1`;
2102	}
2103	gen_op_iwmmxt_movq_wRn_M0(wrd);
2104	gen_op_iwmmxt_set_mup();
2105	gen_op_iwmmxt_set_cup();
2106	break;
2107	case `0x00e`: case `0x20e`: case `0x40e`: case `0x60e`: / WUNPCKEL /
2108	case `0x80e`: case `0xa0e`: case `0xc0e`: case `0xe0e`:
2109	wrd = (insn >> `12`) & `0xf`;
2110	rd0 = (insn >> `16`) & `0xf`;
2111	gen_op_iwmmxt_movq_M0_wRn(rd0);
2112	switch ((insn >> `22`) & `3`) {
2113	case `0`:
2114	if (insn & (`1` << `21`))
2115	gen_op_iwmmxt_unpacklsb_M0();
2116	else
2117	gen_op_iwmmxt_unpacklub_M0();
2118	break;
2119	case `1`:
2120	if (insn & (`1` << `21`))
2121	gen_op_iwmmxt_unpacklsw_M0();
2122	else
2123	gen_op_iwmmxt_unpackluw_M0();
2124	break;
2125	case `2`:
2126	if (insn & (`1` << `21`))
2127	gen_op_iwmmxt_unpacklsl_M0();
2128	else
2129	gen_op_iwmmxt_unpacklul_M0();
2130	break;
2131	case `3`:
2132	return `1`;
2133	}
2134	gen_op_iwmmxt_movq_wRn_M0(wrd);
2135	gen_op_iwmmxt_set_mup();
2136	gen_op_iwmmxt_set_cup();
2137	break;
2138	case `0x00c`: case `0x20c`: case `0x40c`: case `0x60c`: / WUNPCKEH /
2139	case `0x80c`: case `0xa0c`: case `0xc0c`: case `0xe0c`:
2140	wrd = (insn >> `12`) & `0xf`;
2141	rd0 = (insn >> `16`) & `0xf`;
2142	gen_op_iwmmxt_movq_M0_wRn(rd0);
2143	switch ((insn >> `22`) & `3`) {
2144	case `0`:
2145	if (insn & (`1` << `21`))
2146	gen_op_iwmmxt_unpackhsb_M0();
2147	else
2148	gen_op_iwmmxt_unpackhub_M0();
2149	break;
2150	case `1`:
2151	if (insn & (`1` << `21`))
2152	gen_op_iwmmxt_unpackhsw_M0();
2153	else
2154	gen_op_iwmmxt_unpackhuw_M0();
2155	break;
2156	case `2`:
2157	if (insn & (`1` << `21`))
2158	gen_op_iwmmxt_unpackhsl_M0();
2159	else
2160	gen_op_iwmmxt_unpackhul_M0();
2161	break;
2162	case `3`:
2163	return `1`;
2164	}
2165	gen_op_iwmmxt_movq_wRn_M0(wrd);
2166	gen_op_iwmmxt_set_mup();
2167	gen_op_iwmmxt_set_cup();
2168	break;
2169	case `0x204`: case `0x604`: case `0xa04`: case `0xe04`: / WSRL /
2170	case `0x214`: case `0x614`: case `0xa14`: case `0xe14`:
2171	if (((insn >> `22`) & `3`) == `0`)
2172	return `1`;
2173	wrd = (insn >> `12`) & `0xf`;
2174	rd0 = (insn >> `16`) & `0xf`;
2175	gen_op_iwmmxt_movq_M0_wRn(rd0);
2176	tmp = tcg_temp_new_i32();
2177	if (gen_iwmmxt_shift(insn, `0xff`, tmp)) {
2178	tcg_temp_free_i32(tmp);
2179	return `1`;
2180	}
2181	switch ((insn >> `22`) & `3`) {
2182	case `1`:
2183	gen_helper_iwmmxt_srlw(cpu_M0, cpu_env, cpu_M0, tmp);
2184	break;
2185	case `2`:
2186	gen_helper_iwmmxt_srll(cpu_M0, cpu_env, cpu_M0, tmp);
2187	break;
2188	case `3`:
2189	gen_helper_iwmmxt_srlq(cpu_M0, cpu_env, cpu_M0, tmp);
2190	break;
2191	}
2192	tcg_temp_free_i32(tmp);
2193	gen_op_iwmmxt_movq_wRn_M0(wrd);
2194	gen_op_iwmmxt_set_mup();
2195	gen_op_iwmmxt_set_cup();
2196	break;
2197	case `0x004`: case `0x404`: case `0x804`: case `0xc04`: / WSRA /
2198	case `0x014`: case `0x414`: case `0x814`: case `0xc14`:
2199	if (((insn >> `22`) & `3`) == `0`)
2200	return `1`;
2201	wrd = (insn >> `12`) & `0xf`;
2202	rd0 = (insn >> `16`) & `0xf`;
2203	gen_op_iwmmxt_movq_M0_wRn(rd0);
2204	tmp = tcg_temp_new_i32();
2205	if (gen_iwmmxt_shift(insn, `0xff`, tmp)) {
2206	tcg_temp_free_i32(tmp);
2207	return `1`;
2208	}
2209	switch ((insn >> `22`) & `3`) {
2210	case `1`:
2211	gen_helper_iwmmxt_sraw(cpu_M0, cpu_env, cpu_M0, tmp);
2212	break;
2213	case `2`:
2214	gen_helper_iwmmxt_sral(cpu_M0, cpu_env, cpu_M0, tmp);
2215	break;
2216	case `3`:
2217	gen_helper_iwmmxt_sraq(cpu_M0, cpu_env, cpu_M0, tmp);
2218	break;
2219	}
2220	tcg_temp_free_i32(tmp);
2221	gen_op_iwmmxt_movq_wRn_M0(wrd);
2222	gen_op_iwmmxt_set_mup();
2223	gen_op_iwmmxt_set_cup();
2224	break;
2225	case `0x104`: case `0x504`: case `0x904`: case `0xd04`: / WSLL /
2226	case `0x114`: case `0x514`: case `0x914`: case `0xd14`:
2227	if (((insn >> `22`) & `3`) == `0`)
2228	return `1`;
2229	wrd = (insn >> `12`) & `0xf`;
2230	rd0 = (insn >> `16`) & `0xf`;
2231	gen_op_iwmmxt_movq_M0_wRn(rd0);
2232	tmp = tcg_temp_new_i32();
2233	if (gen_iwmmxt_shift(insn, `0xff`, tmp)) {
2234	tcg_temp_free_i32(tmp);
2235	return `1`;
2236	}
2237	switch ((insn >> `22`) & `3`) {
2238	case `1`:
2239	gen_helper_iwmmxt_sllw(cpu_M0, cpu_env, cpu_M0, tmp);
2240	break;
2241	case `2`:
2242	gen_helper_iwmmxt_slll(cpu_M0, cpu_env, cpu_M0, tmp);
2243	break;
2244	case `3`:
2245	gen_helper_iwmmxt_sllq(cpu_M0, cpu_env, cpu_M0, tmp);
2246	break;
2247	}
2248	tcg_temp_free_i32(tmp);
2249	gen_op_iwmmxt_movq_wRn_M0(wrd);
2250	gen_op_iwmmxt_set_mup();
2251	gen_op_iwmmxt_set_cup();
2252	break;
2253	case `0x304`: case `0x704`: case `0xb04`: case `0xf04`: / WROR /
2254	case `0x314`: case `0x714`: case `0xb14`: case `0xf14`:
2255	if (((insn >> `22`) & `3`) == `0`)
2256	return `1`;
2257	wrd = (insn >> `12`) & `0xf`;
2258	rd0 = (insn >> `16`) & `0xf`;
2259	gen_op_iwmmxt_movq_M0_wRn(rd0);
2260	tmp = tcg_temp_new_i32();
2261	switch ((insn >> `22`) & `3`) {
2262	case `1`:
2263	if (gen_iwmmxt_shift(insn, `0xf`, tmp)) {
2264	tcg_temp_free_i32(tmp);
2265	return `1`;
2266	}
2267	gen_helper_iwmmxt_rorw(cpu_M0, cpu_env, cpu_M0, tmp);
2268	break;
2269	case `2`:
2270	if (gen_iwmmxt_shift(insn, `0x1f`, tmp)) {
2271	tcg_temp_free_i32(tmp);
2272	return `1`;
2273	}
2274	gen_helper_iwmmxt_rorl(cpu_M0, cpu_env, cpu_M0, tmp);
2275	break;
2276	case `3`:
2277	if (gen_iwmmxt_shift(insn, `0x3f`, tmp)) {
2278	tcg_temp_free_i32(tmp);
2279	return `1`;
2280	}
2281	gen_helper_iwmmxt_rorq(cpu_M0, cpu_env, cpu_M0, tmp);
2282	break;
2283	}
2284	tcg_temp_free_i32(tmp);
2285	gen_op_iwmmxt_movq_wRn_M0(wrd);
2286	gen_op_iwmmxt_set_mup();
2287	gen_op_iwmmxt_set_cup();
2288	break;
2289	case `0x116`: case `0x316`: case `0x516`: case `0x716`: / WMIN /
2290	case `0x916`: case `0xb16`: case `0xd16`: case `0xf16`:
2291	wrd = (insn >> `12`) & `0xf`;
2292	rd0 = (insn >> `16`) & `0xf`;
2293	rd1 = (insn >> `0`) & `0xf`;
2294	gen_op_iwmmxt_movq_M0_wRn(rd0);
2295	switch ((insn >> `22`) & `3`) {
2296	case `0`:
2297	if (insn & (`1` << `21`))
2298	gen_op_iwmmxt_minsb_M0_wRn(rd1);
2299	else
2300	gen_op_iwmmxt_minub_M0_wRn(rd1);
2301	break;
2302	case `1`:
2303	if (insn & (`1` << `21`))
2304	gen_op_iwmmxt_minsw_M0_wRn(rd1);
2305	else
2306	gen_op_iwmmxt_minuw_M0_wRn(rd1);
2307	break;
2308	case `2`:
2309	if (insn & (`1` << `21`))
2310	gen_op_iwmmxt_minsl_M0_wRn(rd1);
2311	else
2312	gen_op_iwmmxt_minul_M0_wRn(rd1);
2313	break;
2314	case `3`:
2315	return `1`;
2316	}
2317	gen_op_iwmmxt_movq_wRn_M0(wrd);
2318	gen_op_iwmmxt_set_mup();
2319	break;
2320	case `0x016`: case `0x216`: case `0x416`: case `0x616`: / WMAX /
2321	case `0x816`: case `0xa16`: case `0xc16`: case `0xe16`:
2322	wrd = (insn >> `12`) & `0xf`;
2323	rd0 = (insn >> `16`) & `0xf`;
2324	rd1 = (insn >> `0`) & `0xf`;
2325	gen_op_iwmmxt_movq_M0_wRn(rd0);
2326	switch ((insn >> `22`) & `3`) {
2327	case `0`:
2328	if (insn & (`1` << `21`))
2329	gen_op_iwmmxt_maxsb_M0_wRn(rd1);
2330	else
2331	gen_op_iwmmxt_maxub_M0_wRn(rd1);
2332	break;
2333	case `1`:
2334	if (insn & (`1` << `21`))
2335	gen_op_iwmmxt_maxsw_M0_wRn(rd1);
2336	else
2337	gen_op_iwmmxt_maxuw_M0_wRn(rd1);
2338	break;
2339	case `2`:
2340	if (insn & (`1` << `21`))
2341	gen_op_iwmmxt_maxsl_M0_wRn(rd1);
2342	else
2343	gen_op_iwmmxt_maxul_M0_wRn(rd1);
2344	break;
2345	case `3`:
2346	return `1`;
2347	}
2348	gen_op_iwmmxt_movq_wRn_M0(wrd);
2349	gen_op_iwmmxt_set_mup();
2350	break;
2351	case `0x002`: case `0x102`: case `0x202`: case `0x302`: / WALIGNI /
2352	case `0x402`: case `0x502`: case `0x602`: case `0x702`:
2353	wrd = (insn >> `12`) & `0xf`;
2354	rd0 = (insn >> `16`) & `0xf`;
2355	rd1 = (insn >> `0`) & `0xf`;
2356	gen_op_iwmmxt_movq_M0_wRn(rd0);
2357	tmp = tcg_const_i32((insn >> `20`) & `3`);
2358	iwmmxt_load_reg(cpu_V1, rd1);
2359	gen_helper_iwmmxt_align(cpu_M0, cpu_M0, cpu_V1, tmp);
2360	tcg_temp_free_i32(tmp);
2361	gen_op_iwmmxt_movq_wRn_M0(wrd);
2362	gen_op_iwmmxt_set_mup();
2363	break;
2364	case `0x01a`: case `0x11a`: case `0x21a`: case `0x31a`: / WSUB /
2365	case `0x41a`: case `0x51a`: case `0x61a`: case `0x71a`:
2366	case `0x81a`: case `0x91a`: case `0xa1a`: case `0xb1a`:
2367	case `0xc1a`: case `0xd1a`: case `0xe1a`: case `0xf1a`:
2368	wrd = (insn >> `12`) & `0xf`;
2369	rd0 = (insn >> `16`) & `0xf`;
2370	rd1 = (insn >> `0`) & `0xf`;
2371	gen_op_iwmmxt_movq_M0_wRn(rd0);
2372	switch ((insn >> `20`) & `0xf`) {
2373	case `0x0`:
2374	gen_op_iwmmxt_subnb_M0_wRn(rd1);
2375	break;
2376	case `0x1`:
2377	gen_op_iwmmxt_subub_M0_wRn(rd1);
2378	break;
2379	case `0x3`:
2380	gen_op_iwmmxt_subsb_M0_wRn(rd1);
2381	break;
2382	case `0x4`:
2383	gen_op_iwmmxt_subnw_M0_wRn(rd1);
2384	break;
2385	case `0x5`:
2386	gen_op_iwmmxt_subuw_M0_wRn(rd1);
2387	break;
2388	case `0x7`:
2389	gen_op_iwmmxt_subsw_M0_wRn(rd1);
2390	break;
2391	case `0x8`:
2392	gen_op_iwmmxt_subnl_M0_wRn(rd1);
2393	break;
2394	case `0x9`:
2395	gen_op_iwmmxt_subul_M0_wRn(rd1);
2396	break;
2397	case `0xb`:
2398	gen_op_iwmmxt_subsl_M0_wRn(rd1);
2399	break;
2400	default:
2401	return `1`;
2402	}
2403	gen_op_iwmmxt_movq_wRn_M0(wrd);
2404	gen_op_iwmmxt_set_mup();
2405	gen_op_iwmmxt_set_cup();
2406	break;
2407	case `0x01e`: case `0x11e`: case `0x21e`: case `0x31e`: / WSHUFH /
2408	case `0x41e`: case `0x51e`: case `0x61e`: case `0x71e`:
2409	case `0x81e`: case `0x91e`: case `0xa1e`: case `0xb1e`:
2410	case `0xc1e`: case `0xd1e`: case `0xe1e`: case `0xf1e`:
2411	wrd = (insn >> `12`) & `0xf`;
2412	rd0 = (insn >> `16`) & `0xf`;
2413	gen_op_iwmmxt_movq_M0_wRn(rd0);
2414	tmp = tcg_const_i32(((insn >> `16`) & `0xf0`) \| (insn & `0x0f`));
2415	gen_helper_iwmmxt_shufh(cpu_M0, cpu_env, cpu_M0, tmp);
2416	tcg_temp_free_i32(tmp);
2417	gen_op_iwmmxt_movq_wRn_M0(wrd);
2418	gen_op_iwmmxt_set_mup();
2419	gen_op_iwmmxt_set_cup();
2420	break;
2421	case `0x018`: case `0x118`: case `0x218`: case `0x318`: / WADD /
2422	case `0x418`: case `0x518`: case `0x618`: case `0x718`:
2423	case `0x818`: case `0x918`: case `0xa18`: case `0xb18`:
2424	case `0xc18`: case `0xd18`: case `0xe18`: case `0xf18`:
2425	wrd = (insn >> `12`) & `0xf`;
2426	rd0 = (insn >> `16`) & `0xf`;
2427	rd1 = (insn >> `0`) & `0xf`;
2428	gen_op_iwmmxt_movq_M0_wRn(rd0);
2429	switch ((insn >> `20`) & `0xf`) {
2430	case `0x0`:
2431	gen_op_iwmmxt_addnb_M0_wRn(rd1);
2432	break;
2433	case `0x1`:
2434	gen_op_iwmmxt_addub_M0_wRn(rd1);
2435	break;
2436	case `0x3`:
2437	gen_op_iwmmxt_addsb_M0_wRn(rd1);
2438	break;
2439	case `0x4`:
2440	gen_op_iwmmxt_addnw_M0_wRn(rd1);
2441	break;
2442	case `0x5`:
2443	gen_op_iwmmxt_adduw_M0_wRn(rd1);
2444	break;
2445	case `0x7`:
2446	gen_op_iwmmxt_addsw_M0_wRn(rd1);
2447	break;
2448	case `0x8`:
2449	gen_op_iwmmxt_addnl_M0_wRn(rd1);
2450	break;
2451	case `0x9`:
2452	gen_op_iwmmxt_addul_M0_wRn(rd1);
2453	break;
2454	case `0xb`:
2455	gen_op_iwmmxt_addsl_M0_wRn(rd1);
2456	break;
2457	default:
2458	return `1`;
2459	}
2460	gen_op_iwmmxt_movq_wRn_M0(wrd);
2461	gen_op_iwmmxt_set_mup();
2462	gen_op_iwmmxt_set_cup();
2463	break;
2464	case `0x008`: case `0x108`: case `0x208`: case `0x308`: / WPACK /
2465	case `0x408`: case `0x508`: case `0x608`: case `0x708`:
2466	case `0x808`: case `0x908`: case `0xa08`: case `0xb08`:
2467	case `0xc08`: case `0xd08`: case `0xe08`: case `0xf08`:
2468	if (!(insn & (`1` << `20`)) \|\| ((insn >> `22`) & `3`) == `0`)
2469	return `1`;
2470	wrd = (insn >> `12`) & `0xf`;
2471	rd0 = (insn >> `16`) & `0xf`;
2472	rd1 = (insn >> `0`) & `0xf`;
2473	gen_op_iwmmxt_movq_M0_wRn(rd0);
2474	switch ((insn >> `22`) & `3`) {
2475	case `1`:
2476	if (insn & (`1` << `21`))
2477	gen_op_iwmmxt_packsw_M0_wRn(rd1);
2478	else
2479	gen_op_iwmmxt_packuw_M0_wRn(rd1);
2480	break;
2481	case `2`:
2482	if (insn & (`1` << `21`))
2483	gen_op_iwmmxt_packsl_M0_wRn(rd1);
2484	else
2485	gen_op_iwmmxt_packul_M0_wRn(rd1);
2486	break;
2487	case `3`:
2488	if (insn & (`1` << `21`))
2489	gen_op_iwmmxt_packsq_M0_wRn(rd1);
2490	else
2491	gen_op_iwmmxt_packuq_M0_wRn(rd1);
2492	break;
2493	}
2494	gen_op_iwmmxt_movq_wRn_M0(wrd);
2495	gen_op_iwmmxt_set_mup();
2496	gen_op_iwmmxt_set_cup();
2497	break;
2498	case `0x201`: case `0x203`: case `0x205`: case `0x207`:
2499	case `0x209`: case `0x20b`: case `0x20d`: case `0x20f`:
2500	case `0x211`: case `0x213`: case `0x215`: case `0x217`:
2501	case `0x219`: case `0x21b`: case `0x21d`: case `0x21f`:
2502	wrd = (insn >> `5`) & `0xf`;
2503	rd0 = (insn >> `12`) & `0xf`;
2504	rd1 = (insn >> `0`) & `0xf`;
2505	if (rd0 == `0xf` \|\| rd1 == `0xf`)
2506	return `1`;
2507	gen_op_iwmmxt_movq_M0_wRn(wrd);
2508	tmp = load_reg(s, rd0);
2509	tmp2 = load_reg(s, rd1);
2510	switch ((insn >> `16`) & `0xf`) {
2511	case `0x0`: / TMIA /
2512	gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2513	break;
2514	case `0x8`: / TMIAPH /
2515	gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2516	break;
2517	case `0xc`: case `0xd`: case `0xe`: case `0xf`: / TMIAxy /
2518	if (insn & (`1` << `16`))
2519	tcg_gen_shri_i32(tmp, tmp, `16`);
2520	if (insn & (`1` << `17`))
2521	tcg_gen_shri_i32(tmp2, tmp2, `16`);
2522	gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2523	break;
2524	default:
2525	tcg_temp_free_i32(tmp2);
2526	tcg_temp_free_i32(tmp);
2527	return `1`;
2528	}
2529	tcg_temp_free_i32(tmp2);
2530	tcg_temp_free_i32(tmp);
2531	gen_op_iwmmxt_movq_wRn_M0(wrd);
2532	gen_op_iwmmxt_set_mup();
2533	break;
2534	default:
2535	return `1`;
2536	}
2537
2538	return `0`;
2539	}
2540
2541	/ Disassemble an XScale DSP instruction. Returns nonzero if an error occurred*
2542	(ie. an undefined instruction). /*
2543	static int disas_dsp_insn(DisasContext *s, uint32_t insn)
2544	{
2545	int acc, rd0, rd1, rdhi, rdlo;
2546	TCGv_i32 tmp, tmp2;
2547
2548	if ((insn & `0x0ff00f10`) == `0x0e200010`) {
2549	/ Multiply with Internal Accumulate Format /
2550	rd0 = (insn >> `12`) & `0xf`;
2551	rd1 = insn & `0xf`;
2552	acc = (insn >> `5`) & `7`;
2553
2554	if (acc != `0`)
2555	return `1`;
2556
2557	tmp = load_reg(s, rd0);
2558	tmp2 = load_reg(s, rd1);
2559	switch ((insn >> `16`) & `0xf`) {
2560	case `0x0`: / MIA /
2561	gen_helper_iwmmxt_muladdsl(cpu_M0, cpu_M0, tmp, tmp2);
2562	break;
2563	case `0x8`: / MIAPH /
2564	gen_helper_iwmmxt_muladdsw(cpu_M0, cpu_M0, tmp, tmp2);
2565	break;
2566	case `0xc`: / MIABB /
2567	case `0xd`: / MIABT /
2568	case `0xe`: / MIATB /
2569	case `0xf`: / MIATT /
2570	if (insn & (`1` << `16`))
2571	tcg_gen_shri_i32(tmp, tmp, `16`);
2572	if (insn & (`1` << `17`))
2573	tcg_gen_shri_i32(tmp2, tmp2, `16`);
2574	gen_helper_iwmmxt_muladdswl(cpu_M0, cpu_M0, tmp, tmp2);
2575	break;
2576	default:
2577	return `1`;
2578	}
2579	tcg_temp_free_i32(tmp2);
2580	tcg_temp_free_i32(tmp);
2581
2582	gen_op_iwmmxt_movq_wRn_M0(acc);
2583	return `0`;
2584	}
2585
2586	if ((insn & `0x0fe00ff8`) == `0x0c400000`) {
2587	/ Internal Accumulator Access Format /
2588	rdhi = (insn >> `16`) & `0xf`;
2589	rdlo = (insn >> `12`) & `0xf`;
2590	acc = insn & `7`;
2591
2592	if (acc != `0`)
2593	return `1`;
2594
2595	if (insn & ARM_CP_RW_BIT) { / MRA /
2596	iwmmxt_load_reg(cpu_V0, acc);
2597	tcg_gen_extrl_i64_i32(cpu_R[rdlo], cpu_V0);
2598	tcg_gen_extrh_i64_i32(cpu_R[rdhi], cpu_V0);
2599	tcg_gen_andi_i32(cpu_R[rdhi], cpu_R[rdhi], (`1` << (`40` - `32`)) - `1`);
2600	} else { / MAR /
2601	tcg_gen_concat_i32_i64(cpu_V0, cpu_R[rdlo], cpu_R[rdhi]);
2602	iwmmxt_store_reg(cpu_V0, acc);
2603	}
2604	return `0`;
2605	}
2606
2607	return `1`;
2608	}
2609
2610	#define VFP_REG_SHR(x, n) (((n) > 0) ? (x) >> (n) : (x) << -(n))
2611	#define VFP_SREG(insn, bigbit, smallbit) \
2612	((VFP_REG_SHR(insn, bigbit - 1) & 0x1e) \| (((insn) >> (smallbit)) & 1))
2613	#define VFP_DREG(reg, insn, bigbit, smallbit) do { \
2614	if (arm_dc_feature(s, ARM_FEATURE_VFP3)) { \
2615	reg = (((insn) >> (bigbit)) & 0x0f) \
2616	\| (((insn) >> ((smallbit) - 4)) & 0x10); \
2617	} else { \
2618	if (insn & (1 << (smallbit))) \
2619	return 1; \
2620	reg = ((insn) >> (bigbit)) & 0x0f; \
2621	}} while (0)
2622
2623	#define VFP_SREG_D(insn) VFP_SREG(insn, 12, 22)
2624	#define VFP_DREG_D(reg, insn) VFP_DREG(reg, insn, 12, 22)
2625	#define VFP_SREG_N(insn) VFP_SREG(insn, 16, 7)
2626	#define VFP_DREG_N(reg, insn) VFP_DREG(reg, insn, 16, 7)
2627	#define VFP_SREG_M(insn) VFP_SREG(insn, 0, 5)
2628	#define VFP_DREG_M(reg, insn) VFP_DREG(reg, insn, 0, 5)
2629
2630	static void gen_neon_dup_low16(TCGv_i32 var)
2631	{
2632	TCGv_i32 tmp = tcg_temp_new_i32();
2633	tcg_gen_ext16u_i32(var, var);
2634	tcg_gen_shli_i32(tmp, var, `16`);
2635	tcg_gen_or_i32(var, var, tmp);
2636	tcg_temp_free_i32(tmp);
2637	}
2638
2639	static void gen_neon_dup_high16(TCGv_i32 var)
2640	{
2641	TCGv_i32 tmp = tcg_temp_new_i32();
2642	tcg_gen_andi_i32(var, var, `0xffff0000`);
2643	tcg_gen_shri_i32(tmp, var, `16`);
2644	tcg_gen_or_i32(var, var, tmp);
2645	tcg_temp_free_i32(tmp);
2646	}
2647
2648	/*
2649	* Disassemble a VFP instruction. Returns nonzero if an error occurred
2650	* (ie. an undefined instruction).
2651	*/
2652	static int disas_vfp_insn(DisasContext *s, uint32_t insn)
2653	{
2654	if (!arm_dc_feature(s, ARM_FEATURE_VFP)) {
2655	return `1`;
2656	}
2657
2658	/*
2659	* If the decodetree decoder handles this insn it will always
2660	* emit code to either execute the insn or generate an appropriate
2661	* exception; so we don't need to ever return non-zero to tell
2662	* the calling code to emit an UNDEF exception.
2663	*/
2664	if (extract32(insn, `28`, `4`) == `0xf`) {
2665	if (disas_vfp_uncond(s, insn)) {
2666	return `0`;
2667	}
2668	} else {
2669	if (disas_vfp(s, insn)) {
2670	return `0`;
2671	}
2672	}
2673	/ If the decodetree decoder didn't handle this insn, it must be UNDEF /
2674	return `1`;
2675	}
2676
2677	static inline bool use_goto_tb(DisasContext *s, target_ulong dest)
2678	{
2679	#ifndef CONFIG_USER_ONLY
2680	return (s->base.tb->pc & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK) \|\|
2681	((s->base.pc_next - `1`) & TARGET_PAGE_MASK) == (dest & TARGET_PAGE_MASK);
2682	#else
2683	return true;
2684	#endif
2685	}
2686
2687	static void gen_goto_ptr(void)
2688	{
2689	tcg_gen_lookup_and_goto_ptr();
2690	}
2691
2692	/ This will end the TB but doesn't guarantee we'll return to*
2693	* cpu_loop_exec. Any live exit_requests will be processed as we
2694	* enter the next TB.
2695	*/
2696	static void gen_goto_tb(DisasContext s, int* n, target_ulong dest)
2697	{
2698	if (use_goto_tb(s, dest)) {
2699	tcg_gen_goto_tb(n);
2700	gen_set_pc_im(s, dest);
2701	tcg_gen_exit_tb(s->base.tb, n);
2702	} else {
2703	gen_set_pc_im(s, dest);
2704	gen_goto_ptr();
2705	}
2706	s->base.is_jmp = DISAS_NORETURN;
2707	}
2708
2709	static inline void gen_jmp (DisasContext *s, uint32_t dest)
2710	{
2711	if (unlikely(is_singlestepping(s))) {
2712	/ An indirect jump so that we still trigger the debug exception. /
2713	gen_set_pc_im(s, dest);
2714	s->base.is_jmp = DISAS_JUMP;
2715	} else {
2716	gen_goto_tb(s, `0`, dest);
2717	}
2718	}
2719
2720	static inline void gen_mulxy(TCGv_i32 t0, TCGv_i32 t1, int x, int y)
2721	{
2722	if (x)
2723	tcg_gen_sari_i32(t0, t0, `16`);
2724	else
2725	gen_sxth(t0);
2726	if (y)
2727	tcg_gen_sari_i32(t1, t1, `16`);
2728	else
2729	gen_sxth(t1);
2730	tcg_gen_mul_i32(t0, t0, t1);
2731	}
2732
2733	/ Return the mask of PSR bits set by a MSR instruction. /
2734	static uint32_t msr_mask(DisasContext s, int* flags, int spsr)
2735	{
2736	uint32_t mask;
2737
2738	mask = `0`;
2739	if (flags & (`1` << `0`))
2740	mask \|= `0xff`;
2741	if (flags & (`1` << `1`))
2742	mask \|= `0xff00`;
2743	if (flags & (`1` << `2`))
2744	mask \|= `0xff0000`;
2745	if (flags & (`1` << `3`))
2746	mask \|= `0xff000000`;
2747
2748	/ Mask out undefined bits. /
2749	mask &= ~CPSR_RESERVED;
2750	if (!arm_dc_feature(s, ARM_FEATURE_V4T)) {
2751	mask &= ~CPSR_T;
2752	}
2753	if (!arm_dc_feature(s, ARM_FEATURE_V5)) {
2754	mask &= ~CPSR_Q; / V5TE in reality/
2755	}
2756	if (!arm_dc_feature(s, ARM_FEATURE_V6)) {
2757	mask &= ~(CPSR_E \| CPSR_GE);
2758	}
2759	if (!arm_dc_feature(s, ARM_FEATURE_THUMB2)) {
2760	mask &= ~CPSR_IT;
2761	}
2762	/ Mask out execution state and reserved bits. /
2763	if (!spsr) {
2764	mask &= ~(CPSR_EXEC \| CPSR_RESERVED);
2765	}
2766	/ Mask out privileged bits. /
2767	if (IS_USER(s))
2768	mask &= CPSR_USER;
2769	return mask;
2770	}
2771
2772	/ Returns nonzero if access to the PSR is not permitted. Marks t0 as dead. /
2773	static int gen_set_psr(DisasContext s, uint32_t mask, int* spsr, TCGv_i32 t0)
2774	{
2775	TCGv_i32 tmp;
2776	if (spsr) {
2777	/ ??? This is also undefined in system mode. /
2778	if (IS_USER(s))
2779	return `1`;
2780
2781	tmp = load_cpu_field(spsr);
2782	tcg_gen_andi_i32(tmp, tmp, ~mask);
2783	tcg_gen_andi_i32(t0, t0, mask);
2784	tcg_gen_or_i32(tmp, tmp, t0);
2785	store_cpu_field(tmp, spsr);
2786	} else {
2787	gen_set_cpsr(t0, mask);
2788	}
2789	tcg_temp_free_i32(t0);
2790	gen_lookup_tb(s);
2791	return `0`;
2792	}
2793
2794	/ Returns nonzero if access to the PSR is not permitted. /
2795	static int gen_set_psr_im(DisasContext s, uint32_t mask, int* spsr, uint32_t val)
2796	{
2797	TCGv_i32 tmp;
2798	tmp = tcg_temp_new_i32();
2799	tcg_gen_movi_i32(tmp, val);
2800	return gen_set_psr(s, mask, spsr, tmp);
2801	}
2802
2803	static bool msr_banked_access_decode(DisasContext s, int* r, int sysm, int rn,
2804	int tgtmode, int* *regno)
2805	{
2806	/ Decode the r and sysm fields of MSR/MRS banked accesses into*
2807	* the target mode and register number, and identify the various
2808	* unpredictable cases.
2809	* MSR (banked) and MRS (banked) are CONSTRAINED UNPREDICTABLE if:
2810	* + executed in user mode
2811	* + using R15 as the src/dest register
2812	* + accessing an unimplemented register
2813	* + accessing a register that's inaccessible at current PL/security state*
2814	* + accessing a register that you could access with a different insn
2815	* We choose to UNDEF in all these cases.
2816	* Since we don't know which of the various AArch32 modes we are in
2817	* we have to defer some checks to runtime.
2818	* Accesses to Monitor mode registers from Secure EL1 (which implies
2819	* that EL3 is AArch64) must trap to EL3.
2820	*
2821	* If the access checks fail this function will emit code to take
2822	* an exception and return false. Otherwise it will return true,
2823	* and set tgtmode and regno appropriately.
2824	*/
2825	int exc_target = default_exception_el(s);
2826
2827	/ These instructions are present only in ARMv8, or in ARMv7 with the*
2828	* Virtualization Extensions.
2829	*/
2830	if (!arm_dc_feature(s, ARM_FEATURE_V8) &&
2831	!arm_dc_feature(s, ARM_FEATURE_EL2)) {
2832	goto undef;
2833	}
2834
2835	if (IS_USER(s) \|\| rn == `15`) {
2836	goto undef;
2837	}
2838
2839	/ The table in the v8 ARM ARM section F5.2.3 describes the encoding*
2840	* of registers into (r, sysm).
2841	*/
2842	if (r) {
2843	/ SPSRs for other modes /
2844	switch (sysm) {
2845	case `0xe`: / SPSR_fiq /
2846	*tgtmode = ARM_CPU_MODE_FIQ;
2847	break;
2848	case `0x10`: / SPSR_irq /
2849	*tgtmode = ARM_CPU_MODE_IRQ;
2850	break;
2851	case `0x12`: / SPSR_svc /
2852	*tgtmode = ARM_CPU_MODE_SVC;
2853	break;
2854	case `0x14`: / SPSR_abt /
2855	*tgtmode = ARM_CPU_MODE_ABT;
2856	break;
2857	case `0x16`: / SPSR_und /
2858	*tgtmode = ARM_CPU_MODE_UND;
2859	break;
2860	case `0x1c`: / SPSR_mon /
2861	*tgtmode = ARM_CPU_MODE_MON;
2862	break;
2863	case `0x1e`: / SPSR_hyp /
2864	*tgtmode = ARM_CPU_MODE_HYP;
2865	break;
2866	default: / unallocated /
2867	goto undef;
2868	}
2869	/ We arbitrarily assign SPSR a register number of 16. /
2870	*regno = `16`;
2871	} else {
2872	/ general purpose registers for other modes /
2873	switch (sysm) {
2874	case `0x0` ... `0x6`: / 0b00xxx : r8_usr ... r14_usr /
2875	*tgtmode = ARM_CPU_MODE_USR;
2876	*regno = sysm + `8`;
2877	break;
2878	case `0x8` ... `0xe`: / 0b01xxx : r8_fiq ... r14_fiq /
2879	*tgtmode = ARM_CPU_MODE_FIQ;
2880	*regno = sysm;
2881	break;
2882	case `0x10` ... `0x11`: / 0b1000x : r14_irq, r13_irq /
2883	*tgtmode = ARM_CPU_MODE_IRQ;
2884	*regno = sysm & `1` ? `13` : `14`;
2885	break;
2886	case `0x12` ... `0x13`: / 0b1001x : r14_svc, r13_svc /
2887	*tgtmode = ARM_CPU_MODE_SVC;
2888	*regno = sysm & `1` ? `13` : `14`;
2889	break;
2890	case `0x14` ... `0x15`: / 0b1010x : r14_abt, r13_abt /
2891	*tgtmode = ARM_CPU_MODE_ABT;
2892	*regno = sysm & `1` ? `13` : `14`;
2893	break;
2894	case `0x16` ... `0x17`: / 0b1011x : r14_und, r13_und /
2895	*tgtmode = ARM_CPU_MODE_UND;
2896	*regno = sysm & `1` ? `13` : `14`;
2897	break;
2898	case `0x1c` ... `0x1d`: / 0b1110x : r14_mon, r13_mon /
2899	*tgtmode = ARM_CPU_MODE_MON;
2900	*regno = sysm & `1` ? `13` : `14`;
2901	break;
2902	case `0x1e` ... `0x1f`: / 0b1111x : elr_hyp, r13_hyp /
2903	*tgtmode = ARM_CPU_MODE_HYP;
2904	/ Arbitrarily pick 17 for ELR_Hyp (which is not a banked LR!) /
2905	*regno = sysm & `1` ? `13` : `17`;
2906	break;
2907	default: / unallocated /
2908	goto undef;
2909	}
2910	}
2911
2912	/ Catch the 'accessing inaccessible register' cases we can detect*
2913	* at translate time.
2914	*/
2915	switch (*tgtmode) {
2916	case ARM_CPU_MODE_MON:
2917	if (!arm_dc_feature(s, ARM_FEATURE_EL3) \|\| s->ns) {
2918	goto undef;
2919	}
2920	if (s->current_el == `1`) {
2921	/ If we're in Secure EL1 (which implies that EL3 is AArch64)*
2922	* then accesses to Mon registers trap to EL3
2923	*/
2924	exc_target = `3`;
2925	goto undef;
2926	}
2927	break;
2928	case ARM_CPU_MODE_HYP:
2929	/*
2930	* SPSR_hyp and r13_hyp can only be accessed from Monitor mode
2931	* (and so we can forbid accesses from EL2 or below). elr_hyp
2932	* can be accessed also from Hyp mode, so forbid accesses from
2933	* EL0 or EL1.
2934	*/
2935	if (!arm_dc_feature(s, ARM_FEATURE_EL2) \|\| s->current_el < `2` \|\|
2936	(s->current_el < `3` && *regno != `17`)) {
2937	goto undef;
2938	}
2939	break;
2940	default:
2941	break;
2942	}
2943
2944	return true;
2945
2946	undef:
2947	/ If we get here then some access check did not pass /
2948	gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
2949	syn_uncategorized(), exc_target);
2950	return false;
2951	}
2952
2953	static void gen_msr_banked(DisasContext s, int* r, int sysm, int rn)
2954	{
2955	TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2956	int tgtmode = `0`, regno = `0`;
2957
2958	if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2959	return;
2960	}
2961
2962	/ Sync state because msr_banked() can raise exceptions /
2963	gen_set_condexec(s);
2964	gen_set_pc_im(s, s->pc_curr);
2965	tcg_reg = load_reg(s, rn);
2966	tcg_tgtmode = tcg_const_i32(tgtmode);
2967	tcg_regno = tcg_const_i32(regno);
2968	gen_helper_msr_banked(cpu_env, tcg_reg, tcg_tgtmode, tcg_regno);
2969	tcg_temp_free_i32(tcg_tgtmode);
2970	tcg_temp_free_i32(tcg_regno);
2971	tcg_temp_free_i32(tcg_reg);
2972	s->base.is_jmp = DISAS_UPDATE;
2973	}
2974
2975	static void gen_mrs_banked(DisasContext s, int* r, int sysm, int rn)
2976	{
2977	TCGv_i32 tcg_reg, tcg_tgtmode, tcg_regno;
2978	int tgtmode = `0`, regno = `0`;
2979
2980	if (!msr_banked_access_decode(s, r, sysm, rn, &tgtmode, &regno)) {
2981	return;
2982	}
2983
2984	/ Sync state because mrs_banked() can raise exceptions /
2985	gen_set_condexec(s);
2986	gen_set_pc_im(s, s->pc_curr);
2987	tcg_reg = tcg_temp_new_i32();
2988	tcg_tgtmode = tcg_const_i32(tgtmode);
2989	tcg_regno = tcg_const_i32(regno);
2990	gen_helper_mrs_banked(tcg_reg, cpu_env, tcg_tgtmode, tcg_regno);
2991	tcg_temp_free_i32(tcg_tgtmode);
2992	tcg_temp_free_i32(tcg_regno);
2993	store_reg(s, rn, tcg_reg);
2994	s->base.is_jmp = DISAS_UPDATE;
2995	}
2996
2997	/ Store value to PC as for an exception return (ie don't*
2998	* mask bits). The subsequent call to gen_helper_cpsr_write_eret()
2999	* will do the masking based on the new value of the Thumb bit.
3000	*/
3001	static void store_pc_exc_ret(DisasContext *s, TCGv_i32 pc)
3002	{
3003	tcg_gen_mov_i32(cpu_R[`15`], pc);
3004	tcg_temp_free_i32(pc);
3005	}
3006
3007	/ Generate a v6 exception return. Marks both values as dead. /
3008	static void gen_rfe(DisasContext *s, TCGv_i32 pc, TCGv_i32 cpsr)
3009	{
3010	store_pc_exc_ret(s, pc);
3011	/ The cpsr_write_eret helper will mask the low bits of PC*
3012	* appropriately depending on the new Thumb bit, so it must
3013	* be called after storing the new PC.
3014	*/
3015	if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
3016	gen_io_start();
3017	}
3018	gen_helper_cpsr_write_eret(cpu_env, cpsr);
3019	tcg_temp_free_i32(cpsr);
3020	/ Must exit loop to check un-masked IRQs /
3021	s->base.is_jmp = DISAS_EXIT;
3022	}
3023
3024	/ Generate an old-style exception return. Marks pc as dead. /
3025	static void gen_exception_return(DisasContext *s, TCGv_i32 pc)
3026	{
3027	gen_rfe(s, pc, load_cpu_field(spsr));
3028	}
3029
3030	#define CPU_V001 cpu_V0, cpu_V0, cpu_V1
3031
3032	static inline void gen_neon_add(int size, TCGv_i32 t0, TCGv_i32 t1)
3033	{
3034	switch (size) {
3035	case `0`: gen_helper_neon_add_u8(t0, t0, t1); break;
3036	case `1`: gen_helper_neon_add_u16(t0, t0, t1); break;
3037	case `2`: tcg_gen_add_i32(t0, t0, t1); break;
3038	default: abort();
3039	}
3040	}
3041
3042	static inline void gen_neon_rsb(int size, TCGv_i32 t0, TCGv_i32 t1)
3043	{
3044	switch (size) {
3045	case `0`: gen_helper_neon_sub_u8(t0, t1, t0); break;
3046	case `1`: gen_helper_neon_sub_u16(t0, t1, t0); break;
3047	case `2`: tcg_gen_sub_i32(t0, t1, t0); break;
3048	default: return;
3049	}
3050	}
3051
3052	/ 32-bit pairwise ops end up the same as the elementwise versions. /
3053	#define gen_helper_neon_pmax_s32 tcg_gen_smax_i32
3054	#define gen_helper_neon_pmax_u32 tcg_gen_umax_i32
3055	#define gen_helper_neon_pmin_s32 tcg_gen_smin_i32
3056	#define gen_helper_neon_pmin_u32 tcg_gen_umin_i32
3057
3058	#define GEN_NEON_INTEGER_OP_ENV(name) do { \
3059	switch ((size << 1) \| u) { \
3060	case 0: \
3061	gen_helper_neon_##name##_s8(tmp, cpu_env, tmp, tmp2); \
3062	break; \
3063	case 1: \
3064	gen_helper_neon_##name##_u8(tmp, cpu_env, tmp, tmp2); \
3065	break; \
3066	case 2: \
3067	gen_helper_neon_##name##_s16(tmp, cpu_env, tmp, tmp2); \
3068	break; \
3069	case 3: \
3070	gen_helper_neon_##name##_u16(tmp, cpu_env, tmp, tmp2); \
3071	break; \
3072	case 4: \
3073	gen_helper_neon_##name##_s32(tmp, cpu_env, tmp, tmp2); \
3074	break; \
3075	case 5: \
3076	gen_helper_neon_##name##_u32(tmp, cpu_env, tmp, tmp2); \
3077	break; \
3078	default: return 1; \
3079	}} while (0)
3080
3081	#define GEN_NEON_INTEGER_OP(name) do { \
3082	switch ((size << 1) \| u) { \
3083	case 0: \
3084	gen_helper_neon_##name##_s8(tmp, tmp, tmp2); \
3085	break; \
3086	case 1: \
3087	gen_helper_neon_##name##_u8(tmp, tmp, tmp2); \
3088	break; \
3089	case 2: \
3090	gen_helper_neon_##name##_s16(tmp, tmp, tmp2); \
3091	break; \
3092	case 3: \
3093	gen_helper_neon_##name##_u16(tmp, tmp, tmp2); \
3094	break; \
3095	case 4: \
3096	gen_helper_neon_##name##_s32(tmp, tmp, tmp2); \
3097	break; \
3098	case 5: \
3099	gen_helper_neon_##name##_u32(tmp, tmp, tmp2); \
3100	break; \
3101	default: return 1; \
3102	}} while (0)
3103
3104	static TCGv_i32 neon_load_scratch(int scratch)
3105	{
3106	TCGv_i32 tmp = tcg_temp_new_i32();
3107	tcg_gen_ld_i32(tmp, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3108	return tmp;
3109	}
3110
3111	static void neon_store_scratch(int scratch, TCGv_i32 var)
3112	{
3113	tcg_gen_st_i32(var, cpu_env, offsetof(CPUARMState, vfp.scratch[scratch]));
3114	tcg_temp_free_i32(var);
3115	}
3116
3117	static inline TCGv_i32 neon_get_scalar(int size, int reg)
3118	{
3119	TCGv_i32 tmp;
3120	if (size == `1`) {
3121	tmp = neon_load_reg(reg & `7`, reg >> `4`);
3122	if (reg & `8`) {
3123	gen_neon_dup_high16(tmp);
3124	} else {
3125	gen_neon_dup_low16(tmp);
3126	}
3127	} else {
3128	tmp = neon_load_reg(reg & `15`, reg >> `4`);
3129	}
3130	return tmp;
3131	}
3132
3133	static int gen_neon_unzip(int rd, int rm, int size, int q)
3134	{
3135	TCGv_ptr pd, pm;
3136
3137	if (!q && size == `2`) {
3138	return `1`;
3139	}
3140	pd = vfp_reg_ptr(true, rd);
3141	pm = vfp_reg_ptr(true, rm);
3142	if (q) {
3143	switch (size) {
3144	case `0`:
3145	gen_helper_neon_qunzip8(pd, pm);
3146	break;
3147	case `1`:
3148	gen_helper_neon_qunzip16(pd, pm);
3149	break;
3150	case `2`:
3151	gen_helper_neon_qunzip32(pd, pm);
3152	break;
3153	default:
3154	abort();
3155	}
3156	} else {
3157	switch (size) {
3158	case `0`:
3159	gen_helper_neon_unzip8(pd, pm);
3160	break;
3161	case `1`:
3162	gen_helper_neon_unzip16(pd, pm);
3163	break;
3164	default:
3165	abort();
3166	}
3167	}
3168	tcg_temp_free_ptr(pd);
3169	tcg_temp_free_ptr(pm);
3170	return `0`;
3171	}
3172
3173	static int gen_neon_zip(int rd, int rm, int size, int q)
3174	{
3175	TCGv_ptr pd, pm;
3176
3177	if (!q && size == `2`) {
3178	return `1`;
3179	}
3180	pd = vfp_reg_ptr(true, rd);
3181	pm = vfp_reg_ptr(true, rm);
3182	if (q) {
3183	switch (size) {
3184	case `0`:
3185	gen_helper_neon_qzip8(pd, pm);
3186	break;
3187	case `1`:
3188	gen_helper_neon_qzip16(pd, pm);
3189	break;
3190	case `2`:
3191	gen_helper_neon_qzip32(pd, pm);
3192	break;
3193	default:
3194	abort();
3195	}
3196	} else {
3197	switch (size) {
3198	case `0`:
3199	gen_helper_neon_zip8(pd, pm);
3200	break;
3201	case `1`:
3202	gen_helper_neon_zip16(pd, pm);
3203	break;
3204	default:
3205	abort();
3206	}
3207	}
3208	tcg_temp_free_ptr(pd);
3209	tcg_temp_free_ptr(pm);
3210	return `0`;
3211	}
3212
3213	static void gen_neon_trn_u8(TCGv_i32 t0, TCGv_i32 t1)
3214	{
3215	TCGv_i32 rd, tmp;
3216
3217	rd = tcg_temp_new_i32();
3218	tmp = tcg_temp_new_i32();
3219
3220	tcg_gen_shli_i32(rd, t0, `8`);
3221	tcg_gen_andi_i32(rd, rd, `0xff00ff00`);
3222	tcg_gen_andi_i32(tmp, t1, `0x00ff00ff`);
3223	tcg_gen_or_i32(rd, rd, tmp);
3224
3225	tcg_gen_shri_i32(t1, t1, `8`);
3226	tcg_gen_andi_i32(t1, t1, `0x00ff00ff`);
3227	tcg_gen_andi_i32(tmp, t0, `0xff00ff00`);
3228	tcg_gen_or_i32(t1, t1, tmp);
3229	tcg_gen_mov_i32(t0, rd);
3230
3231	tcg_temp_free_i32(tmp);
3232	tcg_temp_free_i32(rd);
3233	}
3234
3235	static void gen_neon_trn_u16(TCGv_i32 t0, TCGv_i32 t1)
3236	{
3237	TCGv_i32 rd, tmp;
3238
3239	rd = tcg_temp_new_i32();
3240	tmp = tcg_temp_new_i32();
3241
3242	tcg_gen_shli_i32(rd, t0, `16`);
3243	tcg_gen_andi_i32(tmp, t1, `0xffff`);
3244	tcg_gen_or_i32(rd, rd, tmp);
3245	tcg_gen_shri_i32(t1, t1, `16`);
3246	tcg_gen_andi_i32(tmp, t0, `0xffff0000`);
3247	tcg_gen_or_i32(t1, t1, tmp);
3248	tcg_gen_mov_i32(t0, rd);
3249
3250	tcg_temp_free_i32(tmp);
3251	tcg_temp_free_i32(rd);
3252	}
3253
3254
3255	static struct {
3256	int nregs;
3257	int interleave;
3258	int spacing;
3259	} const neon_ls_element_type[`11`] = {
3260	{`1`, `4`, `1`},
3261	{`1`, `4`, `2`},
3262	{`4`, `1`, `1`},
3263	{`2`, `2`, `2`},
3264	{`1`, `3`, `1`},
3265	{`1`, `3`, `2`},
3266	{`3`, `1`, `1`},
3267	{`1`, `1`, `1`},
3268	{`1`, `2`, `1`},
3269	{`1`, `2`, `2`},
3270	{`2`, `1`, `1`}
3271	};
3272
3273	/ Translate a NEON load/store element instruction. Return nonzero if the*
3274	instruction is invalid. /*
3275	static int disas_neon_ls_insn(DisasContext *s, uint32_t insn)
3276	{
3277	int rd, rn, rm;
3278	int op;
3279	int nregs;
3280	int interleave;
3281	int spacing;
3282	int stride;
3283	int size;
3284	int reg;
3285	int load;
3286	int n;
3287	int vec_size;
3288	int mmu_idx;
3289	MemOp endian;
3290	TCGv_i32 addr;
3291	TCGv_i32 tmp;
3292	TCGv_i32 tmp2;
3293	TCGv_i64 tmp64;
3294
3295	/ FIXME: this access check should not take precedence over UNDEF*
3296	* for invalid encodings; we will generate incorrect syndrome information
3297	* for attempts to execute invalid vfp/neon encodings with FP disabled.
3298	*/
3299	if (s->fp_excp_el) {
3300	gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
3301	syn_simd_access_trap(`1`, `0xe`, false), s->fp_excp_el);
3302	return `0`;
3303	}
3304
3305	if (!s->vfp_enabled)
3306	return `1`;
3307	VFP_DREG_D(rd, insn);
3308	rn = (insn >> `16`) & `0xf`;
3309	rm = insn & `0xf`;
3310	load = (insn & (`1` << `21`)) != `0`;
3311	endian = s->be_data;
3312	mmu_idx = get_mem_index(s);
3313	if ((insn & (`1` << `23`)) == `0`) {
3314	/ Load store all elements. /
3315	op = (insn >> `8`) & `0xf`;
3316	size = (insn >> `6`) & `3`;
3317	if (op > `10`)
3318	return `1`;
3319	/ Catch UNDEF cases for bad values of align field /
3320	switch (op & `0xc`) {
3321	case `4`:
3322	if (((insn >> `5`) & `1`) == `1`) {
3323	return `1`;
3324	}
3325	break;
3326	case `8`:
3327	if (((insn >> `4`) & `3`) == `3`) {
3328	return `1`;
3329	}
3330	break;
3331	default:
3332	break;
3333	}
3334	nregs = neon_ls_element_type[op].nregs;
3335	interleave = neon_ls_element_type[op].interleave;
3336	spacing = neon_ls_element_type[op].spacing;
3337	if (size == `3` && (interleave \| spacing) != `1`) {
3338	return `1`;
3339	}
3340	/ For our purposes, bytes are always little-endian. /
3341	if (size == `0`) {
3342	endian = MO_LE;
3343	}
3344	/ Consecutive little-endian elements from a single register*
3345	* can be promoted to a larger little-endian operation.
3346	*/
3347	if (interleave == `1` && endian == MO_LE) {
3348	size = `3`;
3349	}
3350	tmp64 = tcg_temp_new_i64();
3351	addr = tcg_temp_new_i32();
3352	tmp2 = tcg_const_i32(`1` << size);
3353	load_reg_var(s, addr, rn);
3354	for (reg = `0`; reg < nregs; reg++) {
3355	for (n = `0`; n < `8` >> size; n++) {
3356	int xs;
3357	for (xs = `0`; xs < interleave; xs++) {
3358	int tt = rd + reg + spacing * xs;
3359
3360	if (load) {
3361	gen_aa32_ld_i64(s, tmp64, addr, mmu_idx, endian \| size);
3362	neon_store_element64(tt, n, size, tmp64);
3363	} else {
3364	neon_load_element64(tmp64, tt, n, size);
3365	gen_aa32_st_i64(s, tmp64, addr, mmu_idx, endian \| size);
3366	}
3367	tcg_gen_add_i32(addr, addr, tmp2);
3368	}
3369	}
3370	}
3371	tcg_temp_free_i32(addr);
3372	tcg_temp_free_i32(tmp2);
3373	tcg_temp_free_i64(tmp64);
3374	stride = nregs * interleave * `8`;
3375	} else {
3376	size = (insn >> `10`) & `3`;
3377	if (size == `3`) {
3378	/ Load single element to all lanes. /
3379	int a = (insn >> `4`) & `1`;
3380	if (!load) {
3381	return `1`;
3382	}
3383	size = (insn >> `6`) & `3`;
3384	nregs = ((insn >> `8`) & `3`) + `1`;
3385
3386	if (size == `3`) {
3387	if (nregs != `4` \|\| a == `0`) {
3388	return `1`;
3389	}
3390	/ For VLD4 size==3 a == 1 means 32 bits at 16 byte alignment /
3391	size = `2`;
3392	}
3393	if (nregs == `1` && a == `1` && size == `0`) {
3394	return `1`;
3395	}
3396	if (nregs == `3` && a == `1`) {
3397	return `1`;
3398	}
3399	addr = tcg_temp_new_i32();
3400	load_reg_var(s, addr, rn);
3401
3402	/ VLD1 to all lanes: bit 5 indicates how many Dregs to write.*
3403	* VLD2/3/4 to all lanes: bit 5 indicates register stride.
3404	*/
3405	stride = (insn & (`1` << `5`)) ? `2` : `1`;
3406	vec_size = nregs == `1` ? stride * `8` : `8`;
3407
3408	tmp = tcg_temp_new_i32();
3409	for (reg = `0`; reg < nregs; reg++) {
3410	gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3411	s->be_data \| size);
3412	if ((rd & `1`) && vec_size == `16`) {
3413	/ We cannot write 16 bytes at once because the*
3414	* destination is unaligned.
3415	*/
3416	tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, `0`),
3417	`8`, `8`, tmp);
3418	tcg_gen_gvec_mov(`0`, neon_reg_offset(rd + `1`, `0`),
3419	neon_reg_offset(rd, `0`), `8`, `8`);
3420	} else {
3421	tcg_gen_gvec_dup_i32(size, neon_reg_offset(rd, `0`),
3422	vec_size, vec_size, tmp);
3423	}
3424	tcg_gen_addi_i32(addr, addr, `1` << size);
3425	rd += stride;
3426	}
3427	tcg_temp_free_i32(tmp);
3428	tcg_temp_free_i32(addr);
3429	stride = (`1` << size) * nregs;
3430	} else {
3431	/ Single element. /
3432	int idx = (insn >> `4`) & `0xf`;
3433	int reg_idx;
3434	switch (size) {
3435	case `0`:
3436	reg_idx = (insn >> `5`) & `7`;
3437	stride = `1`;
3438	break;
3439	case `1`:
3440	reg_idx = (insn >> `6`) & `3`;
3441	stride = (insn & (`1` << `5`)) ? `2` : `1`;
3442	break;
3443	case `2`:
3444	reg_idx = (insn >> `7`) & `1`;
3445	stride = (insn & (`1` << `6`)) ? `2` : `1`;
3446	break;
3447	default:
3448	abort();
3449	}
3450	nregs = ((insn >> `8`) & `3`) + `1`;
3451	/ Catch the UNDEF cases. This is unavoidably a bit messy. /
3452	switch (nregs) {
3453	case `1`:
3454	if (((idx & (`1` << size)) != `0`) \|\|
3455	(size == `2` && ((idx & `3`) == `1` \|\| (idx & `3`) == `2`))) {
3456	return `1`;
3457	}
3458	break;
3459	case `3`:
3460	if ((idx & `1`) != `0`) {
3461	return `1`;
3462	}
3463	/ fall through /
3464	case `2`:
3465	if (size == `2` && (idx & `2`) != `0`) {
3466	return `1`;
3467	}
3468	break;
3469	case `4`:
3470	if ((size == `2`) && ((idx & `3`) == `3`)) {
3471	return `1`;
3472	}
3473	break;
3474	default:
3475	abort();
3476	}
3477	if ((rd + stride * (nregs - `1`)) > `31`) {
3478	/ Attempts to write off the end of the register file*
3479	* are UNPREDICTABLE; we choose to UNDEF because otherwise
3480	* the neon_load_reg() would write off the end of the array.
3481	*/
3482	return `1`;
3483	}
3484	tmp = tcg_temp_new_i32();
3485	addr = tcg_temp_new_i32();
3486	load_reg_var(s, addr, rn);
3487	for (reg = `0`; reg < nregs; reg++) {
3488	if (load) {
3489	gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
3490	s->be_data \| size);
3491	neon_store_element(rd, reg_idx, size, tmp);
3492	} else { / Store /
3493	neon_load_element(tmp, rd, reg_idx, size);
3494	gen_aa32_st_i32(s, tmp, addr, get_mem_index(s),
3495	s->be_data \| size);
3496	}
3497	rd += stride;
3498	tcg_gen_addi_i32(addr, addr, `1` << size);
3499	}
3500	tcg_temp_free_i32(addr);
3501	tcg_temp_free_i32(tmp);
3502	stride = nregs * (`1` << size);
3503	}
3504	}
3505	if (rm != `15`) {
3506	TCGv_i32 base;
3507
3508	base = load_reg(s, rn);
3509	if (rm == `13`) {
3510	tcg_gen_addi_i32(base, base, stride);
3511	} else {
3512	TCGv_i32 index;
3513	index = load_reg(s, rm);
3514	tcg_gen_add_i32(base, base, index);
3515	tcg_temp_free_i32(index);
3516	}
3517	store_reg(s, rn, base);
3518	}
3519	return `0`;
3520	}
3521
3522	static inline void gen_neon_narrow(int size, TCGv_i32 dest, TCGv_i64 src)
3523	{
3524	switch (size) {
3525	case `0`: gen_helper_neon_narrow_u8(dest, src); break;
3526	case `1`: gen_helper_neon_narrow_u16(dest, src); break;
3527	case `2`: tcg_gen_extrl_i64_i32(dest, src); break;
3528	default: abort();
3529	}
3530	}
3531
3532	static inline void gen_neon_narrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3533	{
3534	switch (size) {
3535	case `0`: gen_helper_neon_narrow_sat_s8(dest, cpu_env, src); break;
3536	case `1`: gen_helper_neon_narrow_sat_s16(dest, cpu_env, src); break;
3537	case `2`: gen_helper_neon_narrow_sat_s32(dest, cpu_env, src); break;
3538	default: abort();
3539	}
3540	}
3541
3542	static inline void gen_neon_narrow_satu(int size, TCGv_i32 dest, TCGv_i64 src)
3543	{
3544	switch (size) {
3545	case `0`: gen_helper_neon_narrow_sat_u8(dest, cpu_env, src); break;
3546	case `1`: gen_helper_neon_narrow_sat_u16(dest, cpu_env, src); break;
3547	case `2`: gen_helper_neon_narrow_sat_u32(dest, cpu_env, src); break;
3548	default: abort();
3549	}
3550	}
3551
3552	static inline void gen_neon_unarrow_sats(int size, TCGv_i32 dest, TCGv_i64 src)
3553	{
3554	switch (size) {
3555	case `0`: gen_helper_neon_unarrow_sat8(dest, cpu_env, src); break;
3556	case `1`: gen_helper_neon_unarrow_sat16(dest, cpu_env, src); break;
3557	case `2`: gen_helper_neon_unarrow_sat32(dest, cpu_env, src); break;
3558	default: abort();
3559	}
3560	}
3561
3562	static inline void gen_neon_shift_narrow(int size, TCGv_i32 var, TCGv_i32 shift,
3563	int q, int u)
3564	{
3565	if (q) {
3566	if (u) {
3567	switch (size) {
3568	case `1`: gen_helper_neon_rshl_u16(var, var, shift); break;
3569	case `2`: gen_helper_neon_rshl_u32(var, var, shift); break;
3570	default: abort();
3571	}
3572	} else {
3573	switch (size) {
3574	case `1`: gen_helper_neon_rshl_s16(var, var, shift); break;
3575	case `2`: gen_helper_neon_rshl_s32(var, var, shift); break;
3576	default: abort();
3577	}
3578	}
3579	} else {
3580	if (u) {
3581	switch (size) {
3582	case `1`: gen_helper_neon_shl_u16(var, var, shift); break;
3583	case `2`: gen_helper_neon_shl_u32(var, var, shift); break;
3584	default: abort();
3585	}
3586	} else {
3587	switch (size) {
3588	case `1`: gen_helper_neon_shl_s16(var, var, shift); break;
3589	case `2`: gen_helper_neon_shl_s32(var, var, shift); break;
3590	default: abort();
3591	}
3592	}
3593	}
3594	}
3595
3596	static inline void gen_neon_widen(TCGv_i64 dest, TCGv_i32 src, int size, int u)
3597	{
3598	if (u) {
3599	switch (size) {
3600	case `0`: gen_helper_neon_widen_u8(dest, src); break;
3601	case `1`: gen_helper_neon_widen_u16(dest, src); break;
3602	case `2`: tcg_gen_extu_i32_i64(dest, src); break;
3603	default: abort();
3604	}
3605	} else {
3606	switch (size) {
3607	case `0`: gen_helper_neon_widen_s8(dest, src); break;
3608	case `1`: gen_helper_neon_widen_s16(dest, src); break;
3609	case `2`: tcg_gen_ext_i32_i64(dest, src); break;
3610	default: abort();
3611	}
3612	}
3613	tcg_temp_free_i32(src);
3614	}
3615
3616	static inline void gen_neon_addl(int size)
3617	{
3618	switch (size) {
3619	case `0`: gen_helper_neon_addl_u16(CPU_V001); break;
3620	case `1`: gen_helper_neon_addl_u32(CPU_V001); break;
3621	case `2`: tcg_gen_add_i64(CPU_V001); break;
3622	default: abort();
3623	}
3624	}
3625
3626	static inline void gen_neon_subl(int size)
3627	{
3628	switch (size) {
3629	case `0`: gen_helper_neon_subl_u16(CPU_V001); break;
3630	case `1`: gen_helper_neon_subl_u32(CPU_V001); break;
3631	case `2`: tcg_gen_sub_i64(CPU_V001); break;
3632	default: abort();
3633	}
3634	}
3635
3636	static inline void gen_neon_negl(TCGv_i64 var, int size)
3637	{
3638	switch (size) {
3639	case `0`: gen_helper_neon_negl_u16(var, var); break;
3640	case `1`: gen_helper_neon_negl_u32(var, var); break;
3641	case `2`:
3642	tcg_gen_neg_i64(var, var);
3643	break;
3644	default: abort();
3645	}
3646	}
3647
3648	static inline void gen_neon_addl_saturate(TCGv_i64 op0, TCGv_i64 op1, int size)
3649	{
3650	switch (size) {
3651	case `1`: gen_helper_neon_addl_saturate_s32(op0, cpu_env, op0, op1); break;
3652	case `2`: gen_helper_neon_addl_saturate_s64(op0, cpu_env, op0, op1); break;
3653	default: abort();
3654	}
3655	}
3656
3657	static inline void gen_neon_mull(TCGv_i64 dest, TCGv_i32 a, TCGv_i32 b,
3658	int size, int u)
3659	{
3660	TCGv_i64 tmp;
3661
3662	switch ((size << `1`) \| u) {
3663	case `0`: gen_helper_neon_mull_s8(dest, a, b); break;
3664	case `1`: gen_helper_neon_mull_u8(dest, a, b); break;
3665	case `2`: gen_helper_neon_mull_s16(dest, a, b); break;
3666	case `3`: gen_helper_neon_mull_u16(dest, a, b); break;
3667	case `4`:
3668	tmp = gen_muls_i64_i32(a, b);
3669	tcg_gen_mov_i64(dest, tmp);
3670	tcg_temp_free_i64(tmp);
3671	break;
3672	case `5`:
3673	tmp = gen_mulu_i64_i32(a, b);
3674	tcg_gen_mov_i64(dest, tmp);
3675	tcg_temp_free_i64(tmp);
3676	break;
3677	default: abort();
3678	}
3679
3680	/ gen_helper_neon_mull_[su]{8\|16} do not free their parameters.*
3681	Don't forget to clean them now. /*
3682	if (size < `2`) {
3683	tcg_temp_free_i32(a);
3684	tcg_temp_free_i32(b);
3685	}
3686	}
3687
3688	static void gen_neon_narrow_op(int op, int u, int size,
3689	TCGv_i32 dest, TCGv_i64 src)
3690	{
3691	if (op) {
3692	if (u) {
3693	gen_neon_unarrow_sats(size, dest, src);
3694	} else {
3695	gen_neon_narrow(size, dest, src);
3696	}
3697	} else {
3698	if (u) {
3699	gen_neon_narrow_satu(size, dest, src);
3700	} else {
3701	gen_neon_narrow_sats(size, dest, src);
3702	}
3703	}
3704	}
3705
3706	/ Symbolic constants for op fields for Neon 3-register same-length.*
3707	* The values correspond to bits [11:8,4]; see the ARM ARM DDI0406B
3708	* table A7-9.
3709	*/
3710	#define NEON_3R_VHADD 0
3711	#define NEON_3R_VQADD 1
3712	#define NEON_3R_VRHADD 2
3713	#define NEON_3R_LOGIC 3 /* VAND,VBIC,VORR,VMOV,VORN,VEOR,VBIF,VBIT,VBSL */
3714	#define NEON_3R_VHSUB 4
3715	#define NEON_3R_VQSUB 5
3716	#define NEON_3R_VCGT 6
3717	#define NEON_3R_VCGE 7
3718	#define NEON_3R_VSHL 8
3719	#define NEON_3R_VQSHL 9
3720	#define NEON_3R_VRSHL 10
3721	#define NEON_3R_VQRSHL 11
3722	#define NEON_3R_VMAX 12
3723	#define NEON_3R_VMIN 13
3724	#define NEON_3R_VABD 14
3725	#define NEON_3R_VABA 15
3726	#define NEON_3R_VADD_VSUB 16
3727	#define NEON_3R_VTST_VCEQ 17
3728	#define NEON_3R_VML 18 /* VMLA, VMLS */
3729	#define NEON_3R_VMUL 19
3730	#define NEON_3R_VPMAX 20
3731	#define NEON_3R_VPMIN 21
3732	#define NEON_3R_VQDMULH_VQRDMULH 22
3733	#define NEON_3R_VPADD_VQRDMLAH 23
3734	#define NEON_3R_SHA 24 /* SHA1C,SHA1P,SHA1M,SHA1SU0,SHA256H{2},SHA256SU1 */
3735	#define NEON_3R_VFM_VQRDMLSH 25 /* VFMA, VFMS, VQRDMLSH */
3736	#define NEON_3R_FLOAT_ARITH 26 /* float VADD, VSUB, VPADD, VABD */
3737	#define NEON_3R_FLOAT_MULTIPLY 27 /* float VMLA, VMLS, VMUL */
3738	#define NEON_3R_FLOAT_CMP 28 /* float VCEQ, VCGE, VCGT */
3739	#define NEON_3R_FLOAT_ACMP 29 /* float VACGE, VACGT, VACLE, VACLT */
3740	#define NEON_3R_FLOAT_MINMAX 30 /* float VMIN, VMAX */
3741	#define NEON_3R_FLOAT_MISC 31 /* float VRECPS, VRSQRTS, VMAXNM/MINNM */
3742
3743	static const uint8_t neon_3r_sizes[] = {
3744	[NEON_3R_VHADD] = `0x7`,
3745	[NEON_3R_VQADD] = `0xf`,
3746	[NEON_3R_VRHADD] = `0x7`,
3747	[NEON_3R_LOGIC] = `0xf`, / size field encodes op type /
3748	[NEON_3R_VHSUB] = `0x7`,
3749	[NEON_3R_VQSUB] = `0xf`,
3750	[NEON_3R_VCGT] = `0x7`,
3751	[NEON_3R_VCGE] = `0x7`,
3752	[NEON_3R_VSHL] = `0xf`,
3753	[NEON_3R_VQSHL] = `0xf`,
3754	[NEON_3R_VRSHL] = `0xf`,
3755	[NEON_3R_VQRSHL] = `0xf`,
3756	[NEON_3R_VMAX] = `0x7`,
3757	[NEON_3R_VMIN] = `0x7`,
3758	[NEON_3R_VABD] = `0x7`,
3759	[NEON_3R_VABA] = `0x7`,
3760	[NEON_3R_VADD_VSUB] = `0xf`,
3761	[NEON_3R_VTST_VCEQ] = `0x7`,
3762	[NEON_3R_VML] = `0x7`,
3763	[NEON_3R_VMUL] = `0x7`,
3764	[NEON_3R_VPMAX] = `0x7`,
3765	[NEON_3R_VPMIN] = `0x7`,
3766	[NEON_3R_VQDMULH_VQRDMULH] = `0x6`,
3767	[NEON_3R_VPADD_VQRDMLAH] = `0x7`,
3768	[NEON_3R_SHA] = `0xf`, / size field encodes op type /
3769	[NEON_3R_VFM_VQRDMLSH] = `0x7`, / For VFM, size bit 1 encodes op /
3770	[NEON_3R_FLOAT_ARITH] = `0x5`, / size bit 1 encodes op /
3771	[NEON_3R_FLOAT_MULTIPLY] = `0x5`, / size bit 1 encodes op /
3772	[NEON_3R_FLOAT_CMP] = `0x5`, / size bit 1 encodes op /
3773	[NEON_3R_FLOAT_ACMP] = `0x5`, / size bit 1 encodes op /
3774	[NEON_3R_FLOAT_MINMAX] = `0x5`, / size bit 1 encodes op /
3775	[NEON_3R_FLOAT_MISC] = `0x5`, / size bit 1 encodes op /
3776	};
3777
3778	/ Symbolic constants for op fields for Neon 2-register miscellaneous.*
3779	* The values correspond to bits [17:16,10:7]; see the ARM ARM DDI0406B
3780	* table A7-13.
3781	*/
3782	#define NEON_2RM_VREV64 0
3783	#define NEON_2RM_VREV32 1
3784	#define NEON_2RM_VREV16 2
3785	#define NEON_2RM_VPADDL 4
3786	#define NEON_2RM_VPADDL_U 5
3787	#define NEON_2RM_AESE 6 /* Includes AESD */
3788	#define NEON_2RM_AESMC 7 /* Includes AESIMC */
3789	#define NEON_2RM_VCLS 8
3790	#define NEON_2RM_VCLZ 9
3791	#define NEON_2RM_VCNT 10
3792	#define NEON_2RM_VMVN 11
3793	#define NEON_2RM_VPADAL 12
3794	#define NEON_2RM_VPADAL_U 13
3795	#define NEON_2RM_VQABS 14
3796	#define NEON_2RM_VQNEG 15
3797	#define NEON_2RM_VCGT0 16
3798	#define NEON_2RM_VCGE0 17
3799	#define NEON_2RM_VCEQ0 18
3800	#define NEON_2RM_VCLE0 19
3801	#define NEON_2RM_VCLT0 20
3802	#define NEON_2RM_SHA1H 21
3803	#define NEON_2RM_VABS 22
3804	#define NEON_2RM_VNEG 23
3805	#define NEON_2RM_VCGT0_F 24
3806	#define NEON_2RM_VCGE0_F 25
3807	#define NEON_2RM_VCEQ0_F 26
3808	#define NEON_2RM_VCLE0_F 27
3809	#define NEON_2RM_VCLT0_F 28
3810	#define NEON_2RM_VABS_F 30
3811	#define NEON_2RM_VNEG_F 31
3812	#define NEON_2RM_VSWP 32
3813	#define NEON_2RM_VTRN 33
3814	#define NEON_2RM_VUZP 34
3815	#define NEON_2RM_VZIP 35
3816	#define NEON_2RM_VMOVN 36 /* Includes VQMOVN, VQMOVUN */
3817	#define NEON_2RM_VQMOVN 37 /* Includes VQMOVUN */
3818	#define NEON_2RM_VSHLL 38
3819	#define NEON_2RM_SHA1SU1 39 /* Includes SHA256SU0 */
3820	#define NEON_2RM_VRINTN 40
3821	#define NEON_2RM_VRINTX 41
3822	#define NEON_2RM_VRINTA 42
3823	#define NEON_2RM_VRINTZ 43
3824	#define NEON_2RM_VCVT_F16_F32 44
3825	#define NEON_2RM_VRINTM 45
3826	#define NEON_2RM_VCVT_F32_F16 46
3827	#define NEON_2RM_VRINTP 47
3828	#define NEON_2RM_VCVTAU 48
3829	#define NEON_2RM_VCVTAS 49
3830	#define NEON_2RM_VCVTNU 50
3831	#define NEON_2RM_VCVTNS 51
3832	#define NEON_2RM_VCVTPU 52
3833	#define NEON_2RM_VCVTPS 53
3834	#define NEON_2RM_VCVTMU 54
3835	#define NEON_2RM_VCVTMS 55
3836	#define NEON_2RM_VRECPE 56
3837	#define NEON_2RM_VRSQRTE 57
3838	#define NEON_2RM_VRECPE_F 58
3839	#define NEON_2RM_VRSQRTE_F 59
3840	#define NEON_2RM_VCVT_FS 60
3841	#define NEON_2RM_VCVT_FU 61
3842	#define NEON_2RM_VCVT_SF 62
3843	#define NEON_2RM_VCVT_UF 63
3844
3845	static bool neon_2rm_is_v8_op(int op)
3846	{
3847	/ Return true if this neon 2reg-misc op is ARMv8 and up /
3848	switch (op) {
3849	case NEON_2RM_VRINTN:
3850	case NEON_2RM_VRINTA:
3851	case NEON_2RM_VRINTM:
3852	case NEON_2RM_VRINTP:
3853	case NEON_2RM_VRINTZ:
3854	case NEON_2RM_VRINTX:
3855	case NEON_2RM_VCVTAU:
3856	case NEON_2RM_VCVTAS:
3857	case NEON_2RM_VCVTNU:
3858	case NEON_2RM_VCVTNS:
3859	case NEON_2RM_VCVTPU:
3860	case NEON_2RM_VCVTPS:
3861	case NEON_2RM_VCVTMU:
3862	case NEON_2RM_VCVTMS:
3863	return true;
3864	default:
3865	return false;
3866	}
3867	}
3868
3869	/ Each entry in this array has bit n set if the insn allows*
3870	* size value n (otherwise it will UNDEF). Since unallocated
3871	* op values will have no bits set they always UNDEF.
3872	*/
3873	static const uint8_t neon_2rm_sizes[] = {
3874	[NEON_2RM_VREV64] = `0x7`,
3875	[NEON_2RM_VREV32] = `0x3`,
3876	[NEON_2RM_VREV16] = `0x1`,
3877	[NEON_2RM_VPADDL] = `0x7`,
3878	[NEON_2RM_VPADDL_U] = `0x7`,
3879	[NEON_2RM_AESE] = `0x1`,
3880	[NEON_2RM_AESMC] = `0x1`,
3881	[NEON_2RM_VCLS] = `0x7`,
3882	[NEON_2RM_VCLZ] = `0x7`,
3883	[NEON_2RM_VCNT] = `0x1`,
3884	[NEON_2RM_VMVN] = `0x1`,
3885	[NEON_2RM_VPADAL] = `0x7`,
3886	[NEON_2RM_VPADAL_U] = `0x7`,
3887	[NEON_2RM_VQABS] = `0x7`,
3888	[NEON_2RM_VQNEG] = `0x7`,
3889	[NEON_2RM_VCGT0] = `0x7`,
3890	[NEON_2RM_VCGE0] = `0x7`,
3891	[NEON_2RM_VCEQ0] = `0x7`,
3892	[NEON_2RM_VCLE0] = `0x7`,
3893	[NEON_2RM_VCLT0] = `0x7`,
3894	[NEON_2RM_SHA1H] = `0x4`,
3895	[NEON_2RM_VABS] = `0x7`,
3896	[NEON_2RM_VNEG] = `0x7`,
3897	[NEON_2RM_VCGT0_F] = `0x4`,
3898	[NEON_2RM_VCGE0_F] = `0x4`,
3899	[NEON_2RM_VCEQ0_F] = `0x4`,
3900	[NEON_2RM_VCLE0_F] = `0x4`,
3901	[NEON_2RM_VCLT0_F] = `0x4`,
3902	[NEON_2RM_VABS_F] = `0x4`,
3903	[NEON_2RM_VNEG_F] = `0x4`,
3904	[NEON_2RM_VSWP] = `0x1`,
3905	[NEON_2RM_VTRN] = `0x7`,
3906	[NEON_2RM_VUZP] = `0x7`,
3907	[NEON_2RM_VZIP] = `0x7`,
3908	[NEON_2RM_VMOVN] = `0x7`,
3909	[NEON_2RM_VQMOVN] = `0x7`,
3910	[NEON_2RM_VSHLL] = `0x7`,
3911	[NEON_2RM_SHA1SU1] = `0x4`,
3912	[NEON_2RM_VRINTN] = `0x4`,
3913	[NEON_2RM_VRINTX] = `0x4`,
3914	[NEON_2RM_VRINTA] = `0x4`,
3915	[NEON_2RM_VRINTZ] = `0x4`,
3916	[NEON_2RM_VCVT_F16_F32] = `0x2`,
3917	[NEON_2RM_VRINTM] = `0x4`,
3918	[NEON_2RM_VCVT_F32_F16] = `0x2`,
3919	[NEON_2RM_VRINTP] = `0x4`,
3920	[NEON_2RM_VCVTAU] = `0x4`,
3921	[NEON_2RM_VCVTAS] = `0x4`,
3922	[NEON_2RM_VCVTNU] = `0x4`,
3923	[NEON_2RM_VCVTNS] = `0x4`,
3924	[NEON_2RM_VCVTPU] = `0x4`,
3925	[NEON_2RM_VCVTPS] = `0x4`,
3926	[NEON_2RM_VCVTMU] = `0x4`,
3927	[NEON_2RM_VCVTMS] = `0x4`,
3928	[NEON_2RM_VRECPE] = `0x4`,
3929	[NEON_2RM_VRSQRTE] = `0x4`,
3930	[NEON_2RM_VRECPE_F] = `0x4`,
3931	[NEON_2RM_VRSQRTE_F] = `0x4`,
3932	[NEON_2RM_VCVT_FS] = `0x4`,
3933	[NEON_2RM_VCVT_FU] = `0x4`,
3934	[NEON_2RM_VCVT_SF] = `0x4`,
3935	[NEON_2RM_VCVT_UF] = `0x4`,
3936	};
3937
3938
3939	/ Expand v8.1 simd helper. /
3940	static int do_v81_helper(DisasContext s, gen_helper_gvec_3_ptr fn,
3941	int q, int rd, int rn, int rm)
3942	{
3943	if (dc_isar_feature(aa32_rdm, s)) {
3944	int opr_sz = (`1` + q) * `8`;
3945	tcg_gen_gvec_3_ptr(vfp_reg_offset(`1`, rd),
3946	vfp_reg_offset(`1`, rn),
3947	vfp_reg_offset(`1`, rm), cpu_env,
3948	opr_sz, opr_sz, `0`, fn);
3949	return `0`;
3950	}
3951	return `1`;
3952	}
3953
3954	static void gen_ssra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3955	{
3956	tcg_gen_vec_sar8i_i64(a, a, shift);
3957	tcg_gen_vec_add8_i64(d, d, a);
3958	}
3959
3960	static void gen_ssra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3961	{
3962	tcg_gen_vec_sar16i_i64(a, a, shift);
3963	tcg_gen_vec_add16_i64(d, d, a);
3964	}
3965
3966	static void gen_ssra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
3967	{
3968	tcg_gen_sari_i32(a, a, shift);
3969	tcg_gen_add_i32(d, d, a);
3970	}
3971
3972	static void gen_ssra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
3973	{
3974	tcg_gen_sari_i64(a, a, shift);
3975	tcg_gen_add_i64(d, d, a);
3976	}
3977
3978	static void gen_ssra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
3979	{
3980	tcg_gen_sari_vec(vece, a, a, sh);
3981	tcg_gen_add_vec(vece, d, d, a);
3982	}
3983
3984	static const TCGOpcode vecop_list_ssra[] = {
3985	INDEX_op_sari_vec, INDEX_op_add_vec, `0`
3986	};
3987
3988	const GVecGen2i ssra_op[`4`] = {
3989	{ .fni8 = gen_ssra8_i64,
3990	.fniv = gen_ssra_vec,
3991	.load_dest = true,
3992	.opt_opc = vecop_list_ssra,
3993	.vece = MO_8 },
3994	{ .fni8 = gen_ssra16_i64,
3995	.fniv = gen_ssra_vec,
3996	.load_dest = true,
3997	.opt_opc = vecop_list_ssra,
3998	.vece = MO_16 },
3999	{ .fni4 = gen_ssra32_i32,
4000	.fniv = gen_ssra_vec,
4001	.load_dest = true,
4002	.opt_opc = vecop_list_ssra,
4003	.vece = MO_32 },
4004	{ .fni8 = gen_ssra64_i64,
4005	.fniv = gen_ssra_vec,
4006	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
4007	.opt_opc = vecop_list_ssra,
4008	.load_dest = true,
4009	.vece = MO_64 },
4010	};
4011
4012	static void gen_usra8_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4013	{
4014	tcg_gen_vec_shr8i_i64(a, a, shift);
4015	tcg_gen_vec_add8_i64(d, d, a);
4016	}
4017
4018	static void gen_usra16_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4019	{
4020	tcg_gen_vec_shr16i_i64(a, a, shift);
4021	tcg_gen_vec_add16_i64(d, d, a);
4022	}
4023
4024	static void gen_usra32_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4025	{
4026	tcg_gen_shri_i32(a, a, shift);
4027	tcg_gen_add_i32(d, d, a);
4028	}
4029
4030	static void gen_usra64_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4031	{
4032	tcg_gen_shri_i64(a, a, shift);
4033	tcg_gen_add_i64(d, d, a);
4034	}
4035
4036	static void gen_usra_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4037	{
4038	tcg_gen_shri_vec(vece, a, a, sh);
4039	tcg_gen_add_vec(vece, d, d, a);
4040	}
4041
4042	static const TCGOpcode vecop_list_usra[] = {
4043	INDEX_op_shri_vec, INDEX_op_add_vec, `0`
4044	};
4045
4046	const GVecGen2i usra_op[`4`] = {
4047	{ .fni8 = gen_usra8_i64,
4048	.fniv = gen_usra_vec,
4049	.load_dest = true,
4050	.opt_opc = vecop_list_usra,
4051	.vece = MO_8, },
4052	{ .fni8 = gen_usra16_i64,
4053	.fniv = gen_usra_vec,
4054	.load_dest = true,
4055	.opt_opc = vecop_list_usra,
4056	.vece = MO_16, },
4057	{ .fni4 = gen_usra32_i32,
4058	.fniv = gen_usra_vec,
4059	.load_dest = true,
4060	.opt_opc = vecop_list_usra,
4061	.vece = MO_32, },
4062	{ .fni8 = gen_usra64_i64,
4063	.fniv = gen_usra_vec,
4064	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
4065	.load_dest = true,
4066	.opt_opc = vecop_list_usra,
4067	.vece = MO_64, },
4068	};
4069
4070	static void gen_shr8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4071	{
4072	uint64_t mask = dup_const(MO_8, `0xff` >> shift);
4073	TCGv_i64 t = tcg_temp_new_i64();
4074
4075	tcg_gen_shri_i64(t, a, shift);
4076	tcg_gen_andi_i64(t, t, mask);
4077	tcg_gen_andi_i64(d, d, ~mask);
4078	tcg_gen_or_i64(d, d, t);
4079	tcg_temp_free_i64(t);
4080	}
4081
4082	static void gen_shr16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4083	{
4084	uint64_t mask = dup_const(MO_16, `0xffff` >> shift);
4085	TCGv_i64 t = tcg_temp_new_i64();
4086
4087	tcg_gen_shri_i64(t, a, shift);
4088	tcg_gen_andi_i64(t, t, mask);
4089	tcg_gen_andi_i64(d, d, ~mask);
4090	tcg_gen_or_i64(d, d, t);
4091	tcg_temp_free_i64(t);
4092	}
4093
4094	static void gen_shr32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4095	{
4096	tcg_gen_shri_i32(a, a, shift);
4097	tcg_gen_deposit_i32(d, d, a, `0`, `32` - shift);
4098	}
4099
4100	static void gen_shr64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4101	{
4102	tcg_gen_shri_i64(a, a, shift);
4103	tcg_gen_deposit_i64(d, d, a, `0`, `64` - shift);
4104	}
4105
4106	static void gen_shr_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4107	{
4108	if (sh == `0`) {
4109	tcg_gen_mov_vec(d, a);
4110	} else {
4111	TCGv_vec t = tcg_temp_new_vec_matching(d);
4112	TCGv_vec m = tcg_temp_new_vec_matching(d);
4113
4114	tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK((`8` << vece) - sh, sh));
4115	tcg_gen_shri_vec(vece, t, a, sh);
4116	tcg_gen_and_vec(vece, d, d, m);
4117	tcg_gen_or_vec(vece, d, d, t);
4118
4119	tcg_temp_free_vec(t);
4120	tcg_temp_free_vec(m);
4121	}
4122	}
4123
4124	static const TCGOpcode vecop_list_sri[] = { INDEX_op_shri_vec, `0` };
4125
4126	const GVecGen2i sri_op[`4`] = {
4127	{ .fni8 = gen_shr8_ins_i64,
4128	.fniv = gen_shr_ins_vec,
4129	.load_dest = true,
4130	.opt_opc = vecop_list_sri,
4131	.vece = MO_8 },
4132	{ .fni8 = gen_shr16_ins_i64,
4133	.fniv = gen_shr_ins_vec,
4134	.load_dest = true,
4135	.opt_opc = vecop_list_sri,
4136	.vece = MO_16 },
4137	{ .fni4 = gen_shr32_ins_i32,
4138	.fniv = gen_shr_ins_vec,
4139	.load_dest = true,
4140	.opt_opc = vecop_list_sri,
4141	.vece = MO_32 },
4142	{ .fni8 = gen_shr64_ins_i64,
4143	.fniv = gen_shr_ins_vec,
4144	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
4145	.load_dest = true,
4146	.opt_opc = vecop_list_sri,
4147	.vece = MO_64 },
4148	};
4149
4150	static void gen_shl8_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4151	{
4152	uint64_t mask = dup_const(MO_8, `0xff` << shift);
4153	TCGv_i64 t = tcg_temp_new_i64();
4154
4155	tcg_gen_shli_i64(t, a, shift);
4156	tcg_gen_andi_i64(t, t, mask);
4157	tcg_gen_andi_i64(d, d, ~mask);
4158	tcg_gen_or_i64(d, d, t);
4159	tcg_temp_free_i64(t);
4160	}
4161
4162	static void gen_shl16_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4163	{
4164	uint64_t mask = dup_const(MO_16, `0xffff` << shift);
4165	TCGv_i64 t = tcg_temp_new_i64();
4166
4167	tcg_gen_shli_i64(t, a, shift);
4168	tcg_gen_andi_i64(t, t, mask);
4169	tcg_gen_andi_i64(d, d, ~mask);
4170	tcg_gen_or_i64(d, d, t);
4171	tcg_temp_free_i64(t);
4172	}
4173
4174	static void gen_shl32_ins_i32(TCGv_i32 d, TCGv_i32 a, int32_t shift)
4175	{
4176	tcg_gen_deposit_i32(d, d, a, shift, `32` - shift);
4177	}
4178
4179	static void gen_shl64_ins_i64(TCGv_i64 d, TCGv_i64 a, int64_t shift)
4180	{
4181	tcg_gen_deposit_i64(d, d, a, shift, `64` - shift);
4182	}
4183
4184	static void gen_shl_ins_vec(unsigned vece, TCGv_vec d, TCGv_vec a, int64_t sh)
4185	{
4186	if (sh == `0`) {
4187	tcg_gen_mov_vec(d, a);
4188	} else {
4189	TCGv_vec t = tcg_temp_new_vec_matching(d);
4190	TCGv_vec m = tcg_temp_new_vec_matching(d);
4191
4192	tcg_gen_dupi_vec(vece, m, MAKE_64BIT_MASK(`0`, sh));
4193	tcg_gen_shli_vec(vece, t, a, sh);
4194	tcg_gen_and_vec(vece, d, d, m);
4195	tcg_gen_or_vec(vece, d, d, t);
4196
4197	tcg_temp_free_vec(t);
4198	tcg_temp_free_vec(m);
4199	}
4200	}
4201
4202	static const TCGOpcode vecop_list_sli[] = { INDEX_op_shli_vec, `0` };
4203
4204	const GVecGen2i sli_op[`4`] = {
4205	{ .fni8 = gen_shl8_ins_i64,
4206	.fniv = gen_shl_ins_vec,
4207	.load_dest = true,
4208	.opt_opc = vecop_list_sli,
4209	.vece = MO_8 },
4210	{ .fni8 = gen_shl16_ins_i64,
4211	.fniv = gen_shl_ins_vec,
4212	.load_dest = true,
4213	.opt_opc = vecop_list_sli,
4214	.vece = MO_16 },
4215	{ .fni4 = gen_shl32_ins_i32,
4216	.fniv = gen_shl_ins_vec,
4217	.load_dest = true,
4218	.opt_opc = vecop_list_sli,
4219	.vece = MO_32 },
4220	{ .fni8 = gen_shl64_ins_i64,
4221	.fniv = gen_shl_ins_vec,
4222	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
4223	.load_dest = true,
4224	.opt_opc = vecop_list_sli,
4225	.vece = MO_64 },
4226	};
4227
4228	static void gen_mla8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4229	{
4230	gen_helper_neon_mul_u8(a, a, b);
4231	gen_helper_neon_add_u8(d, d, a);
4232	}
4233
4234	static void gen_mls8_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4235	{
4236	gen_helper_neon_mul_u8(a, a, b);
4237	gen_helper_neon_sub_u8(d, d, a);
4238	}
4239
4240	static void gen_mla16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4241	{
4242	gen_helper_neon_mul_u16(a, a, b);
4243	gen_helper_neon_add_u16(d, d, a);
4244	}
4245
4246	static void gen_mls16_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4247	{
4248	gen_helper_neon_mul_u16(a, a, b);
4249	gen_helper_neon_sub_u16(d, d, a);
4250	}
4251
4252	static void gen_mla32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4253	{
4254	tcg_gen_mul_i32(a, a, b);
4255	tcg_gen_add_i32(d, d, a);
4256	}
4257
4258	static void gen_mls32_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4259	{
4260	tcg_gen_mul_i32(a, a, b);
4261	tcg_gen_sub_i32(d, d, a);
4262	}
4263
4264	static void gen_mla64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4265	{
4266	tcg_gen_mul_i64(a, a, b);
4267	tcg_gen_add_i64(d, d, a);
4268	}
4269
4270	static void gen_mls64_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4271	{
4272	tcg_gen_mul_i64(a, a, b);
4273	tcg_gen_sub_i64(d, d, a);
4274	}
4275
4276	static void gen_mla_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4277	{
4278	tcg_gen_mul_vec(vece, a, a, b);
4279	tcg_gen_add_vec(vece, d, d, a);
4280	}
4281
4282	static void gen_mls_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4283	{
4284	tcg_gen_mul_vec(vece, a, a, b);
4285	tcg_gen_sub_vec(vece, d, d, a);
4286	}
4287
4288	/ Note that while NEON does not support VMLA and VMLS as 64-bit ops,*
4289	* these tables are shared with AArch64 which does support them.
4290	*/
4291
4292	static const TCGOpcode vecop_list_mla[] = {
4293	INDEX_op_mul_vec, INDEX_op_add_vec, `0`
4294	};
4295
4296	static const TCGOpcode vecop_list_mls[] = {
4297	INDEX_op_mul_vec, INDEX_op_sub_vec, `0`
4298	};
4299
4300	const GVecGen3 mla_op[`4`] = {
4301	{ .fni4 = gen_mla8_i32,
4302	.fniv = gen_mla_vec,
4303	.load_dest = true,
4304	.opt_opc = vecop_list_mla,
4305	.vece = MO_8 },
4306	{ .fni4 = gen_mla16_i32,
4307	.fniv = gen_mla_vec,
4308	.load_dest = true,
4309	.opt_opc = vecop_list_mla,
4310	.vece = MO_16 },
4311	{ .fni4 = gen_mla32_i32,
4312	.fniv = gen_mla_vec,
4313	.load_dest = true,
4314	.opt_opc = vecop_list_mla,
4315	.vece = MO_32 },
4316	{ .fni8 = gen_mla64_i64,
4317	.fniv = gen_mla_vec,
4318	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
4319	.load_dest = true,
4320	.opt_opc = vecop_list_mla,
4321	.vece = MO_64 },
4322	};
4323
4324	const GVecGen3 mls_op[`4`] = {
4325	{ .fni4 = gen_mls8_i32,
4326	.fniv = gen_mls_vec,
4327	.load_dest = true,
4328	.opt_opc = vecop_list_mls,
4329	.vece = MO_8 },
4330	{ .fni4 = gen_mls16_i32,
4331	.fniv = gen_mls_vec,
4332	.load_dest = true,
4333	.opt_opc = vecop_list_mls,
4334	.vece = MO_16 },
4335	{ .fni4 = gen_mls32_i32,
4336	.fniv = gen_mls_vec,
4337	.load_dest = true,
4338	.opt_opc = vecop_list_mls,
4339	.vece = MO_32 },
4340	{ .fni8 = gen_mls64_i64,
4341	.fniv = gen_mls_vec,
4342	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
4343	.load_dest = true,
4344	.opt_opc = vecop_list_mls,
4345	.vece = MO_64 },
4346	};
4347
4348	/ CMTST : test is "if (X & Y != 0)". /
4349	static void gen_cmtst_i32(TCGv_i32 d, TCGv_i32 a, TCGv_i32 b)
4350	{
4351	tcg_gen_and_i32(d, a, b);
4352	tcg_gen_setcondi_i32(TCG_COND_NE, d, d, `0`);
4353	tcg_gen_neg_i32(d, d);
4354	}
4355
4356	void gen_cmtst_i64(TCGv_i64 d, TCGv_i64 a, TCGv_i64 b)
4357	{
4358	tcg_gen_and_i64(d, a, b);
4359	tcg_gen_setcondi_i64(TCG_COND_NE, d, d, `0`);
4360	tcg_gen_neg_i64(d, d);
4361	}
4362
4363	static void gen_cmtst_vec(unsigned vece, TCGv_vec d, TCGv_vec a, TCGv_vec b)
4364	{
4365	tcg_gen_and_vec(vece, d, a, b);
4366	tcg_gen_dupi_vec(vece, a, `0`);
4367	tcg_gen_cmp_vec(TCG_COND_NE, vece, d, d, a);
4368	}
4369
4370	static const TCGOpcode vecop_list_cmtst[] = { INDEX_op_cmp_vec, `0` };
4371
4372	const GVecGen3 cmtst_op[`4`] = {
4373	{ .fni4 = gen_helper_neon_tst_u8,
4374	.fniv = gen_cmtst_vec,
4375	.opt_opc = vecop_list_cmtst,
4376	.vece = MO_8 },
4377	{ .fni4 = gen_helper_neon_tst_u16,
4378	.fniv = gen_cmtst_vec,
4379	.opt_opc = vecop_list_cmtst,
4380	.vece = MO_16 },
4381	{ .fni4 = gen_cmtst_i32,
4382	.fniv = gen_cmtst_vec,
4383	.opt_opc = vecop_list_cmtst,
4384	.vece = MO_32 },
4385	{ .fni8 = gen_cmtst_i64,
4386	.fniv = gen_cmtst_vec,
4387	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
4388	.opt_opc = vecop_list_cmtst,
4389	.vece = MO_64 },
4390	};
4391
4392	static void gen_uqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4393	TCGv_vec a, TCGv_vec b)
4394	{
4395	TCGv_vec x = tcg_temp_new_vec_matching(t);
4396	tcg_gen_add_vec(vece, x, a, b);
4397	tcg_gen_usadd_vec(vece, t, a, b);
4398	tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4399	tcg_gen_or_vec(vece, sat, sat, x);
4400	tcg_temp_free_vec(x);
4401	}
4402
4403	static const TCGOpcode vecop_list_uqadd[] = {
4404	INDEX_op_usadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, `0`
4405	};
4406
4407	const GVecGen4 uqadd_op[`4`] = {
4408	{ .fniv = gen_uqadd_vec,
4409	.fno = gen_helper_gvec_uqadd_b,
4410	.write_aofs = true,
4411	.opt_opc = vecop_list_uqadd,
4412	.vece = MO_8 },
4413	{ .fniv = gen_uqadd_vec,
4414	.fno = gen_helper_gvec_uqadd_h,
4415	.write_aofs = true,
4416	.opt_opc = vecop_list_uqadd,
4417	.vece = MO_16 },
4418	{ .fniv = gen_uqadd_vec,
4419	.fno = gen_helper_gvec_uqadd_s,
4420	.write_aofs = true,
4421	.opt_opc = vecop_list_uqadd,
4422	.vece = MO_32 },
4423	{ .fniv = gen_uqadd_vec,
4424	.fno = gen_helper_gvec_uqadd_d,
4425	.write_aofs = true,
4426	.opt_opc = vecop_list_uqadd,
4427	.vece = MO_64 },
4428	};
4429
4430	static void gen_sqadd_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4431	TCGv_vec a, TCGv_vec b)
4432	{
4433	TCGv_vec x = tcg_temp_new_vec_matching(t);
4434	tcg_gen_add_vec(vece, x, a, b);
4435	tcg_gen_ssadd_vec(vece, t, a, b);
4436	tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4437	tcg_gen_or_vec(vece, sat, sat, x);
4438	tcg_temp_free_vec(x);
4439	}
4440
4441	static const TCGOpcode vecop_list_sqadd[] = {
4442	INDEX_op_ssadd_vec, INDEX_op_cmp_vec, INDEX_op_add_vec, `0`
4443	};
4444
4445	const GVecGen4 sqadd_op[`4`] = {
4446	{ .fniv = gen_sqadd_vec,
4447	.fno = gen_helper_gvec_sqadd_b,
4448	.opt_opc = vecop_list_sqadd,
4449	.write_aofs = true,
4450	.vece = MO_8 },
4451	{ .fniv = gen_sqadd_vec,
4452	.fno = gen_helper_gvec_sqadd_h,
4453	.opt_opc = vecop_list_sqadd,
4454	.write_aofs = true,
4455	.vece = MO_16 },
4456	{ .fniv = gen_sqadd_vec,
4457	.fno = gen_helper_gvec_sqadd_s,
4458	.opt_opc = vecop_list_sqadd,
4459	.write_aofs = true,
4460	.vece = MO_32 },
4461	{ .fniv = gen_sqadd_vec,
4462	.fno = gen_helper_gvec_sqadd_d,
4463	.opt_opc = vecop_list_sqadd,
4464	.write_aofs = true,
4465	.vece = MO_64 },
4466	};
4467
4468	static void gen_uqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4469	TCGv_vec a, TCGv_vec b)
4470	{
4471	TCGv_vec x = tcg_temp_new_vec_matching(t);
4472	tcg_gen_sub_vec(vece, x, a, b);
4473	tcg_gen_ussub_vec(vece, t, a, b);
4474	tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4475	tcg_gen_or_vec(vece, sat, sat, x);
4476	tcg_temp_free_vec(x);
4477	}
4478
4479	static const TCGOpcode vecop_list_uqsub[] = {
4480	INDEX_op_ussub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, `0`
4481	};
4482
4483	const GVecGen4 uqsub_op[`4`] = {
4484	{ .fniv = gen_uqsub_vec,
4485	.fno = gen_helper_gvec_uqsub_b,
4486	.opt_opc = vecop_list_uqsub,
4487	.write_aofs = true,
4488	.vece = MO_8 },
4489	{ .fniv = gen_uqsub_vec,
4490	.fno = gen_helper_gvec_uqsub_h,
4491	.opt_opc = vecop_list_uqsub,
4492	.write_aofs = true,
4493	.vece = MO_16 },
4494	{ .fniv = gen_uqsub_vec,
4495	.fno = gen_helper_gvec_uqsub_s,
4496	.opt_opc = vecop_list_uqsub,
4497	.write_aofs = true,
4498	.vece = MO_32 },
4499	{ .fniv = gen_uqsub_vec,
4500	.fno = gen_helper_gvec_uqsub_d,
4501	.opt_opc = vecop_list_uqsub,
4502	.write_aofs = true,
4503	.vece = MO_64 },
4504	};
4505
4506	static void gen_sqsub_vec(unsigned vece, TCGv_vec t, TCGv_vec sat,
4507	TCGv_vec a, TCGv_vec b)
4508	{
4509	TCGv_vec x = tcg_temp_new_vec_matching(t);
4510	tcg_gen_sub_vec(vece, x, a, b);
4511	tcg_gen_sssub_vec(vece, t, a, b);
4512	tcg_gen_cmp_vec(TCG_COND_NE, vece, x, x, t);
4513	tcg_gen_or_vec(vece, sat, sat, x);
4514	tcg_temp_free_vec(x);
4515	}
4516
4517	static const TCGOpcode vecop_list_sqsub[] = {
4518	INDEX_op_sssub_vec, INDEX_op_cmp_vec, INDEX_op_sub_vec, `0`
4519	};
4520
4521	const GVecGen4 sqsub_op[`4`] = {
4522	{ .fniv = gen_sqsub_vec,
4523	.fno = gen_helper_gvec_sqsub_b,
4524	.opt_opc = vecop_list_sqsub,
4525	.write_aofs = true,
4526	.vece = MO_8 },
4527	{ .fniv = gen_sqsub_vec,
4528	.fno = gen_helper_gvec_sqsub_h,
4529	.opt_opc = vecop_list_sqsub,
4530	.write_aofs = true,
4531	.vece = MO_16 },
4532	{ .fniv = gen_sqsub_vec,
4533	.fno = gen_helper_gvec_sqsub_s,
4534	.opt_opc = vecop_list_sqsub,
4535	.write_aofs = true,
4536	.vece = MO_32 },
4537	{ .fniv = gen_sqsub_vec,
4538	.fno = gen_helper_gvec_sqsub_d,
4539	.opt_opc = vecop_list_sqsub,
4540	.write_aofs = true,
4541	.vece = MO_64 },
4542	};
4543
4544	/ Translate a NEON data processing instruction. Return nonzero if the*
4545	instruction is invalid.
4546	We process data in a mixture of 32-bit and 64-bit chunks.
4547	Mostly we use 32-bit chunks so we can use normal scalar instructions. /*
4548
4549	static int disas_neon_data_insn(DisasContext *s, uint32_t insn)
4550	{
4551	int op;
4552	int q;
4553	int rd, rn, rm, rd_ofs, rn_ofs, rm_ofs;
4554	int size;
4555	int shift;
4556	int pass;
4557	int count;
4558	int pairwise;
4559	int u;
4560	int vec_size;
4561	uint32_t imm;
4562	TCGv_i32 tmp, tmp2, tmp3, tmp4, tmp5;
4563	TCGv_ptr ptr1, ptr2, ptr3;
4564	TCGv_i64 tmp64;
4565
4566	/ FIXME: this access check should not take precedence over UNDEF*
4567	* for invalid encodings; we will generate incorrect syndrome information
4568	* for attempts to execute invalid vfp/neon encodings with FP disabled.
4569	*/
4570	if (s->fp_excp_el) {
4571	gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
4572	syn_simd_access_trap(`1`, `0xe`, false), s->fp_excp_el);
4573	return `0`;
4574	}
4575
4576	if (!s->vfp_enabled)
4577	return `1`;
4578	q = (insn & (`1` << `6`)) != `0`;
4579	u = (insn >> `24`) & `1`;
4580	VFP_DREG_D(rd, insn);
4581	VFP_DREG_N(rn, insn);
4582	VFP_DREG_M(rm, insn);
4583	size = (insn >> `20`) & `3`;
4584	vec_size = q ? `16` : `8`;
4585	rd_ofs = neon_reg_offset(rd, `0`);
4586	rn_ofs = neon_reg_offset(rn, `0`);
4587	rm_ofs = neon_reg_offset(rm, `0`);
4588
4589	if ((insn & (`1` << `23`)) == `0`) {
4590	/ Three register same length. /
4591	op = ((insn >> `7`) & `0x1e`) \| ((insn >> `4`) & `1`);
4592	/ Catch invalid op and bad size combinations: UNDEF /
4593	if ((neon_3r_sizes[op] & (`1` << size)) == `0`) {
4594	return `1`;
4595	}
4596	/ All insns of this form UNDEF for either this condition or the*
4597	* superset of cases "Q==1"; we catch the latter later.
4598	*/
4599	if (q && ((rd \| rn \| rm) & `1`)) {
4600	return `1`;
4601	}
4602	switch (op) {
4603	case NEON_3R_SHA:
4604	/ The SHA-1/SHA-256 3-register instructions require special*
4605	* treatment here, as their size field is overloaded as an
4606	* op type selector, and they all consume their input in a
4607	* single pass.
4608	*/
4609	if (!q) {
4610	return `1`;
4611	}
4612	if (!u) { / SHA-1 /
4613	if (!dc_isar_feature(aa32_sha1, s)) {
4614	return `1`;
4615	}
4616	ptr1 = vfp_reg_ptr(true, rd);
4617	ptr2 = vfp_reg_ptr(true, rn);
4618	ptr3 = vfp_reg_ptr(true, rm);
4619	tmp4 = tcg_const_i32(size);
4620	gen_helper_crypto_sha1_3reg(ptr1, ptr2, ptr3, tmp4);
4621	tcg_temp_free_i32(tmp4);
4622	} else { / SHA-256 /
4623	if (!dc_isar_feature(aa32_sha2, s) \|\| size == `3`) {
4624	return `1`;
4625	}
4626	ptr1 = vfp_reg_ptr(true, rd);
4627	ptr2 = vfp_reg_ptr(true, rn);
4628	ptr3 = vfp_reg_ptr(true, rm);
4629	switch (size) {
4630	case `0`:
4631	gen_helper_crypto_sha256h(ptr1, ptr2, ptr3);
4632	break;
4633	case `1`:
4634	gen_helper_crypto_sha256h2(ptr1, ptr2, ptr3);
4635	break;
4636	case `2`:
4637	gen_helper_crypto_sha256su1(ptr1, ptr2, ptr3);
4638	break;
4639	}
4640	}
4641	tcg_temp_free_ptr(ptr1);
4642	tcg_temp_free_ptr(ptr2);
4643	tcg_temp_free_ptr(ptr3);
4644	return `0`;
4645
4646	case NEON_3R_VPADD_VQRDMLAH:
4647	if (!u) {
4648	break; / VPADD /
4649	}
4650	/ VQRDMLAH /
4651	switch (size) {
4652	case `1`:
4653	return do_v81_helper(s, gen_helper_gvec_qrdmlah_s16,
4654	q, rd, rn, rm);
4655	case `2`:
4656	return do_v81_helper(s, gen_helper_gvec_qrdmlah_s32,
4657	q, rd, rn, rm);
4658	}
4659	return `1`;
4660
4661	case NEON_3R_VFM_VQRDMLSH:
4662	if (!u) {
4663	/ VFM, VFMS /
4664	if (size == `1`) {
4665	return `1`;
4666	}
4667	break;
4668	}
4669	/ VQRDMLSH /
4670	switch (size) {
4671	case `1`:
4672	return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s16,
4673	q, rd, rn, rm);
4674	case `2`:
4675	return do_v81_helper(s, gen_helper_gvec_qrdmlsh_s32,
4676	q, rd, rn, rm);
4677	}
4678	return `1`;
4679
4680	case NEON_3R_LOGIC: / Logic ops. /
4681	switch ((u << `2`) \| size) {
4682	case `0`: / VAND /
4683	tcg_gen_gvec_and(`0`, rd_ofs, rn_ofs, rm_ofs,
4684	vec_size, vec_size);
4685	break;
4686	case `1`: / VBIC /
4687	tcg_gen_gvec_andc(`0`, rd_ofs, rn_ofs, rm_ofs,
4688	vec_size, vec_size);
4689	break;
4690	case `2`: / VORR /
4691	tcg_gen_gvec_or(`0`, rd_ofs, rn_ofs, rm_ofs,
4692	vec_size, vec_size);
4693	break;
4694	case `3`: / VORN /
4695	tcg_gen_gvec_orc(`0`, rd_ofs, rn_ofs, rm_ofs,
4696	vec_size, vec_size);
4697	break;
4698	case `4`: / VEOR /
4699	tcg_gen_gvec_xor(`0`, rd_ofs, rn_ofs, rm_ofs,
4700	vec_size, vec_size);
4701	break;
4702	case `5`: / VBSL /
4703	tcg_gen_gvec_bitsel(MO_8, rd_ofs, rd_ofs, rn_ofs, rm_ofs,
4704	vec_size, vec_size);
4705	break;
4706	case `6`: / VBIT /
4707	tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rn_ofs, rd_ofs,
4708	vec_size, vec_size);
4709	break;
4710	case `7`: / VBIF /
4711	tcg_gen_gvec_bitsel(MO_8, rd_ofs, rm_ofs, rd_ofs, rn_ofs,
4712	vec_size, vec_size);
4713	break;
4714	}
4715	return `0`;
4716
4717	case NEON_3R_VADD_VSUB:
4718	if (u) {
4719	tcg_gen_gvec_sub(size, rd_ofs, rn_ofs, rm_ofs,
4720	vec_size, vec_size);
4721	} else {
4722	tcg_gen_gvec_add(size, rd_ofs, rn_ofs, rm_ofs,
4723	vec_size, vec_size);
4724	}
4725	return `0`;
4726
4727	case NEON_3R_VQADD:
4728	tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4729	rn_ofs, rm_ofs, vec_size, vec_size,
4730	(u ? uqadd_op : sqadd_op) + size);
4731	return `0`;
4732
4733	case NEON_3R_VQSUB:
4734	tcg_gen_gvec_4(rd_ofs, offsetof(CPUARMState, vfp.qc),
4735	rn_ofs, rm_ofs, vec_size, vec_size,
4736	(u ? uqsub_op : sqsub_op) + size);
4737	return `0`;
4738
4739	case NEON_3R_VMUL: / VMUL /
4740	if (u) {
4741	/ Polynomial case allows only P8 and is handled below. /
4742	if (size != `0`) {
4743	return `1`;
4744	}
4745	} else {
4746	tcg_gen_gvec_mul(size, rd_ofs, rn_ofs, rm_ofs,
4747	vec_size, vec_size);
4748	return `0`;
4749	}
4750	break;
4751
4752	case NEON_3R_VML: / VMLA, VMLS /
4753	tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size,
4754	u ? &mls_op[size] : &mla_op[size]);
4755	return `0`;
4756
4757	case NEON_3R_VTST_VCEQ:
4758	if (u) { / VCEQ /
4759	tcg_gen_gvec_cmp(TCG_COND_EQ, size, rd_ofs, rn_ofs, rm_ofs,
4760	vec_size, vec_size);
4761	} else { / VTST /
4762	tcg_gen_gvec_3(rd_ofs, rn_ofs, rm_ofs,
4763	vec_size, vec_size, &cmtst_op[size]);
4764	}
4765	return `0`;
4766
4767	case NEON_3R_VCGT:
4768	tcg_gen_gvec_cmp(u ? TCG_COND_GTU : TCG_COND_GT, size,
4769	rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
4770	return `0`;
4771
4772	case NEON_3R_VCGE:
4773	tcg_gen_gvec_cmp(u ? TCG_COND_GEU : TCG_COND_GE, size,
4774	rd_ofs, rn_ofs, rm_ofs, vec_size, vec_size);
4775	return `0`;
4776
4777	case NEON_3R_VMAX:
4778	if (u) {
4779	tcg_gen_gvec_umax(size, rd_ofs, rn_ofs, rm_ofs,
4780	vec_size, vec_size);
4781	} else {
4782	tcg_gen_gvec_smax(size, rd_ofs, rn_ofs, rm_ofs,
4783	vec_size, vec_size);
4784	}
4785	return `0`;
4786	case NEON_3R_VMIN:
4787	if (u) {
4788	tcg_gen_gvec_umin(size, rd_ofs, rn_ofs, rm_ofs,
4789	vec_size, vec_size);
4790	} else {
4791	tcg_gen_gvec_smin(size, rd_ofs, rn_ofs, rm_ofs,
4792	vec_size, vec_size);
4793	}
4794	return `0`;
4795	}
4796
4797	if (size == `3`) {
4798	/ 64-bit element instructions. /
4799	for (pass = `0`; pass < (q ? `2` : `1`); pass++) {
4800	neon_load_reg64(cpu_V0, rn + pass);
4801	neon_load_reg64(cpu_V1, rm + pass);
4802	switch (op) {
4803	case NEON_3R_VSHL:
4804	if (u) {
4805	gen_helper_neon_shl_u64(cpu_V0, cpu_V1, cpu_V0);
4806	} else {
4807	gen_helper_neon_shl_s64(cpu_V0, cpu_V1, cpu_V0);
4808	}
4809	break;
4810	case NEON_3R_VQSHL:
4811	if (u) {
4812	gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
4813	cpu_V1, cpu_V0);
4814	} else {
4815	gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
4816	cpu_V1, cpu_V0);
4817	}
4818	break;
4819	case NEON_3R_VRSHL:
4820	if (u) {
4821	gen_helper_neon_rshl_u64(cpu_V0, cpu_V1, cpu_V0);
4822	} else {
4823	gen_helper_neon_rshl_s64(cpu_V0, cpu_V1, cpu_V0);
4824	}
4825	break;
4826	case NEON_3R_VQRSHL:
4827	if (u) {
4828	gen_helper_neon_qrshl_u64(cpu_V0, cpu_env,
4829	cpu_V1, cpu_V0);
4830	} else {
4831	gen_helper_neon_qrshl_s64(cpu_V0, cpu_env,
4832	cpu_V1, cpu_V0);
4833	}
4834	break;
4835	default:
4836	abort();
4837	}
4838	neon_store_reg64(cpu_V0, rd + pass);
4839	}
4840	return `0`;
4841	}
4842	pairwise = `0`;
4843	switch (op) {
4844	case NEON_3R_VSHL:
4845	case NEON_3R_VQSHL:
4846	case NEON_3R_VRSHL:
4847	case NEON_3R_VQRSHL:
4848	{
4849	int rtmp;
4850	/ Shift instruction operands are reversed. /
4851	rtmp = rn;
4852	rn = rm;
4853	rm = rtmp;
4854	}
4855	break;
4856	case NEON_3R_VPADD_VQRDMLAH:
4857	case NEON_3R_VPMAX:
4858	case NEON_3R_VPMIN:
4859	pairwise = `1`;
4860	break;
4861	case NEON_3R_FLOAT_ARITH:
4862	pairwise = (u && size < `2`); / if VPADD (float) /
4863	break;
4864	case NEON_3R_FLOAT_MINMAX:
4865	pairwise = u; / if VPMIN/VPMAX (float) /
4866	break;
4867	case NEON_3R_FLOAT_CMP:
4868	if (!u && size) {
4869	/ no encoding for U=0 C=1x /
4870	return `1`;
4871	}
4872	break;
4873	case NEON_3R_FLOAT_ACMP:
4874	if (!u) {
4875	return `1`;
4876	}
4877	break;
4878	case NEON_3R_FLOAT_MISC:
4879	/ VMAXNM/VMINNM in ARMv8 /
4880	if (u && !arm_dc_feature(s, ARM_FEATURE_V8)) {
4881	return `1`;
4882	}
4883	break;
4884	case NEON_3R_VFM_VQRDMLSH:
4885	if (!arm_dc_feature(s, ARM_FEATURE_VFP4)) {
4886	return `1`;
4887	}
4888	break;
4889	default:
4890	break;
4891	}
4892
4893	if (pairwise && q) {
4894	/ All the pairwise insns UNDEF if Q is set /
4895	return `1`;
4896	}
4897
4898	for (pass = `0`; pass < (q ? `4` : `2`); pass++) {
4899
4900	if (pairwise) {
4901	/ Pairwise. /
4902	if (pass < `1`) {
4903	tmp = neon_load_reg(rn, `0`);
4904	tmp2 = neon_load_reg(rn, `1`);
4905	} else {
4906	tmp = neon_load_reg(rm, `0`);
4907	tmp2 = neon_load_reg(rm, `1`);
4908	}
4909	} else {
4910	/ Elementwise. /
4911	tmp = neon_load_reg(rn, pass);
4912	tmp2 = neon_load_reg(rm, pass);
4913	}
4914	switch (op) {
4915	case NEON_3R_VHADD:
4916	GEN_NEON_INTEGER_OP(hadd);
4917	break;
4918	case NEON_3R_VRHADD:
4919	GEN_NEON_INTEGER_OP(rhadd);
4920	break;
4921	case NEON_3R_VHSUB:
4922	GEN_NEON_INTEGER_OP(hsub);
4923	break;
4924	case NEON_3R_VSHL:
4925	GEN_NEON_INTEGER_OP(shl);
4926	break;
4927	case NEON_3R_VQSHL:
4928	GEN_NEON_INTEGER_OP_ENV(qshl);
4929	break;
4930	case NEON_3R_VRSHL:
4931	GEN_NEON_INTEGER_OP(rshl);
4932	break;
4933	case NEON_3R_VQRSHL:
4934	GEN_NEON_INTEGER_OP_ENV(qrshl);
4935	break;
4936	case NEON_3R_VABD:
4937	GEN_NEON_INTEGER_OP(abd);
4938	break;
4939	case NEON_3R_VABA:
4940	GEN_NEON_INTEGER_OP(abd);
4941	tcg_temp_free_i32(tmp2);
4942	tmp2 = neon_load_reg(rd, pass);
4943	gen_neon_add(size, tmp, tmp2);
4944	break;
4945	case NEON_3R_VMUL:
4946	/ VMUL.P8; other cases already eliminated. /
4947	gen_helper_neon_mul_p8(tmp, tmp, tmp2);
4948	break;
4949	case NEON_3R_VPMAX:
4950	GEN_NEON_INTEGER_OP(pmax);
4951	break;
4952	case NEON_3R_VPMIN:
4953	GEN_NEON_INTEGER_OP(pmin);
4954	break;
4955	case NEON_3R_VQDMULH_VQRDMULH: / Multiply high. /
4956	if (!u) { / VQDMULH /
4957	switch (size) {
4958	case `1`:
4959	gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
4960	break;
4961	case `2`:
4962	gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
4963	break;
4964	default: abort();
4965	}
4966	} else { / VQRDMULH /
4967	switch (size) {
4968	case `1`:
4969	gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
4970	break;
4971	case `2`:
4972	gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
4973	break;
4974	default: abort();
4975	}
4976	}
4977	break;
4978	case NEON_3R_VPADD_VQRDMLAH:
4979	switch (size) {
4980	case `0`: gen_helper_neon_padd_u8(tmp, tmp, tmp2); break;
4981	case `1`: gen_helper_neon_padd_u16(tmp, tmp, tmp2); break;
4982	case `2`: tcg_gen_add_i32(tmp, tmp, tmp2); break;
4983	default: abort();
4984	}
4985	break;
4986	case NEON_3R_FLOAT_ARITH: / Floating point arithmetic. /
4987	{
4988	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
4989	switch ((u << `2`) \| size) {
4990	case `0`: / VADD /
4991	case `4`: / VPADD /
4992	gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
4993	break;
4994	case `2`: / VSUB /
4995	gen_helper_vfp_subs(tmp, tmp, tmp2, fpstatus);
4996	break;
4997	case `6`: / VABD /
4998	gen_helper_neon_abd_f32(tmp, tmp, tmp2, fpstatus);
4999	break;
5000	default:
5001	abort();
5002	}
5003	tcg_temp_free_ptr(fpstatus);
5004	break;
5005	}
5006	case NEON_3R_FLOAT_MULTIPLY:
5007	{
5008	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
5009	gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5010	if (!u) {
5011	tcg_temp_free_i32(tmp2);
5012	tmp2 = neon_load_reg(rd, pass);
5013	if (size == `0`) {
5014	gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5015	} else {
5016	gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5017	}
5018	}
5019	tcg_temp_free_ptr(fpstatus);
5020	break;
5021	}
5022	case NEON_3R_FLOAT_CMP:
5023	{
5024	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
5025	if (!u) {
5026	gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
5027	} else {
5028	if (size == `0`) {
5029	gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
5030	} else {
5031	gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
5032	}
5033	}
5034	tcg_temp_free_ptr(fpstatus);
5035	break;
5036	}
5037	case NEON_3R_FLOAT_ACMP:
5038	{
5039	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
5040	if (size == `0`) {
5041	gen_helper_neon_acge_f32(tmp, tmp, tmp2, fpstatus);
5042	} else {
5043	gen_helper_neon_acgt_f32(tmp, tmp, tmp2, fpstatus);
5044	}
5045	tcg_temp_free_ptr(fpstatus);
5046	break;
5047	}
5048	case NEON_3R_FLOAT_MINMAX:
5049	{
5050	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
5051	if (size == `0`) {
5052	gen_helper_vfp_maxs(tmp, tmp, tmp2, fpstatus);
5053	} else {
5054	gen_helper_vfp_mins(tmp, tmp, tmp2, fpstatus);
5055	}
5056	tcg_temp_free_ptr(fpstatus);
5057	break;
5058	}
5059	case NEON_3R_FLOAT_MISC:
5060	if (u) {
5061	/ VMAXNM/VMINNM /
5062	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
5063	if (size == `0`) {
5064	gen_helper_vfp_maxnums(tmp, tmp, tmp2, fpstatus);
5065	} else {
5066	gen_helper_vfp_minnums(tmp, tmp, tmp2, fpstatus);
5067	}
5068	tcg_temp_free_ptr(fpstatus);
5069	} else {
5070	if (size == `0`) {
5071	gen_helper_recps_f32(tmp, tmp, tmp2, cpu_env);
5072	} else {
5073	gen_helper_rsqrts_f32(tmp, tmp, tmp2, cpu_env);
5074	}
5075	}
5076	break;
5077	case NEON_3R_VFM_VQRDMLSH:
5078	{
5079	/ VFMA, VFMS: fused multiply-add /
5080	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
5081	TCGv_i32 tmp3 = neon_load_reg(rd, pass);
5082	if (size) {
5083	/ VFMS /
5084	gen_helper_vfp_negs(tmp, tmp);
5085	}
5086	gen_helper_vfp_muladds(tmp, tmp, tmp2, tmp3, fpstatus);
5087	tcg_temp_free_i32(tmp3);
5088	tcg_temp_free_ptr(fpstatus);
5089	break;
5090	}
5091	default:
5092	abort();
5093	}
5094	tcg_temp_free_i32(tmp2);
5095
5096	/ Save the result. For elementwise operations we can put it*
5097	straight into the destination register. For pairwise operations
5098	we have to be careful to avoid clobbering the source operands. /*
5099	if (pairwise && rd == rm) {
5100	neon_store_scratch(pass, tmp);
5101	} else {
5102	neon_store_reg(rd, pass, tmp);
5103	}
5104
5105	} / for pass /
5106	if (pairwise && rd == rm) {
5107	for (pass = `0`; pass < (q ? `4` : `2`); pass++) {
5108	tmp = neon_load_scratch(pass);
5109	neon_store_reg(rd, pass, tmp);
5110	}
5111	}
5112	/ End of 3 register same size operations. /
5113	} else if (insn & (`1` << `4`)) {
5114	if ((insn & `0x00380080`) != `0`) {
5115	/ Two registers and shift. /
5116	op = (insn >> `8`) & `0xf`;
5117	if (insn & (`1` << `7`)) {
5118	/ 64-bit shift. /
5119	if (op > `7`) {
5120	return `1`;
5121	}
5122	size = `3`;
5123	} else {
5124	size = `2`;
5125	while ((insn & (`1` << (size + `19`))) == `0`)
5126	size--;
5127	}
5128	shift = (insn >> `16`) & ((`1` << (`3` + size)) - `1`);
5129	if (op < `8`) {
5130	/ Shift by immediate:*
5131	VSHR, VSRA, VRSHR, VRSRA, VSRI, VSHL, VQSHL, VQSHLU. /*
5132	if (q && ((rd \| rm) & `1`)) {
5133	return `1`;
5134	}
5135	if (!u && (op == `4` \|\| op == `6`)) {
5136	return `1`;
5137	}
5138	/ Right shifts are encoded as N - shift, where N is the*
5139	element size in bits. /*
5140	if (op <= `4`) {
5141	shift = shift - (`1` << (size + `3`));
5142	}
5143
5144	switch (op) {
5145	case `0`: / VSHR /
5146	/ Right shift comes here negative. /
5147	shift = -shift;
5148	/ Shifts larger than the element size are architecturally*
5149	* valid. Unsigned results in all zeros; signed results
5150	* in all sign bits.
5151	*/
5152	if (!u) {
5153	tcg_gen_gvec_sari(size, rd_ofs, rm_ofs,
5154	MIN(shift, (`8` << size) - `1`),
5155	vec_size, vec_size);
5156	} else if (shift >= `8` << size) {
5157	tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, `0`);
5158	} else {
5159	tcg_gen_gvec_shri(size, rd_ofs, rm_ofs, shift,
5160	vec_size, vec_size);
5161	}
5162	return `0`;
5163
5164	case `1`: / VSRA /
5165	/ Right shift comes here negative. /
5166	shift = -shift;
5167	/ Shifts larger than the element size are architecturally*
5168	* valid. Unsigned results in all zeros; signed results
5169	* in all sign bits.
5170	*/
5171	if (!u) {
5172	tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5173	MIN(shift, (`8` << size) - `1`),
5174	&ssra_op[size]);
5175	} else if (shift >= `8` << size) {
5176	/ rd += 0 /
5177	} else {
5178	tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5179	shift, &usra_op[size]);
5180	}
5181	return `0`;
5182
5183	case `4`: / VSRI /
5184	if (!u) {
5185	return `1`;
5186	}
5187	/ Right shift comes here negative. /
5188	shift = -shift;
5189	/ Shift out of range leaves destination unchanged. /
5190	if (shift < `8` << size) {
5191	tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size, vec_size,
5192	shift, &sri_op[size]);
5193	}
5194	return `0`;
5195
5196	case `5`: / VSHL, VSLI /
5197	if (u) { / VSLI /
5198	/ Shift out of range leaves destination unchanged. /
5199	if (shift < `8` << size) {
5200	tcg_gen_gvec_2i(rd_ofs, rm_ofs, vec_size,
5201	vec_size, shift, &sli_op[size]);
5202	}
5203	} else { / VSHL /
5204	/ Shifts larger than the element size are*
5205	* architecturally valid and results in zero.
5206	*/
5207	if (shift >= `8` << size) {
5208	tcg_gen_gvec_dup8i(rd_ofs, vec_size, vec_size, `0`);
5209	} else {
5210	tcg_gen_gvec_shli(size, rd_ofs, rm_ofs, shift,
5211	vec_size, vec_size);
5212	}
5213	}
5214	return `0`;
5215	}
5216
5217	if (size == `3`) {
5218	count = q + `1`;
5219	} else {
5220	count = q ? `4`: `2`;
5221	}
5222
5223	/ To avoid excessive duplication of ops we implement shift*
5224	* by immediate using the variable shift operations.
5225	*/
5226	imm = dup_const(size, shift);
5227
5228	for (pass = `0`; pass < count; pass++) {
5229	if (size == `3`) {
5230	neon_load_reg64(cpu_V0, rm + pass);
5231	tcg_gen_movi_i64(cpu_V1, imm);
5232	switch (op) {
5233	case `2`: / VRSHR /
5234	case `3`: / VRSRA /
5235	if (u)
5236	gen_helper_neon_rshl_u64(cpu_V0, cpu_V0, cpu_V1);
5237	else
5238	gen_helper_neon_rshl_s64(cpu_V0, cpu_V0, cpu_V1);
5239	break;
5240	case `6`: / VQSHLU /
5241	gen_helper_neon_qshlu_s64(cpu_V0, cpu_env,
5242	cpu_V0, cpu_V1);
5243	break;
5244	case `7`: / VQSHL /
5245	if (u) {
5246	gen_helper_neon_qshl_u64(cpu_V0, cpu_env,
5247	cpu_V0, cpu_V1);
5248	} else {
5249	gen_helper_neon_qshl_s64(cpu_V0, cpu_env,
5250	cpu_V0, cpu_V1);
5251	}
5252	break;
5253	default:
5254	g_assert_not_reached();
5255	}
5256	if (op == `3`) {
5257	/ Accumulate. /
5258	neon_load_reg64(cpu_V1, rd + pass);
5259	tcg_gen_add_i64(cpu_V0, cpu_V0, cpu_V1);
5260	}
5261	neon_store_reg64(cpu_V0, rd + pass);
5262	} else { / size < 3 /
5263	/ Operands in T0 and T1. /
5264	tmp = neon_load_reg(rm, pass);
5265	tmp2 = tcg_temp_new_i32();
5266	tcg_gen_movi_i32(tmp2, imm);
5267	switch (op) {
5268	case `2`: / VRSHR /
5269	case `3`: / VRSRA /
5270	GEN_NEON_INTEGER_OP(rshl);
5271	break;
5272	case `6`: / VQSHLU /
5273	switch (size) {
5274	case `0`:
5275	gen_helper_neon_qshlu_s8(tmp, cpu_env,
5276	tmp, tmp2);
5277	break;
5278	case `1`:
5279	gen_helper_neon_qshlu_s16(tmp, cpu_env,
5280	tmp, tmp2);
5281	break;
5282	case `2`:
5283	gen_helper_neon_qshlu_s32(tmp, cpu_env,
5284	tmp, tmp2);
5285	break;
5286	default:
5287	abort();
5288	}
5289	break;
5290	case `7`: / VQSHL /
5291	GEN_NEON_INTEGER_OP_ENV(qshl);
5292	break;
5293	default:
5294	g_assert_not_reached();
5295	}
5296	tcg_temp_free_i32(tmp2);
5297
5298	if (op == `3`) {
5299	/ Accumulate. /
5300	tmp2 = neon_load_reg(rd, pass);
5301	gen_neon_add(size, tmp, tmp2);
5302	tcg_temp_free_i32(tmp2);
5303	}
5304	neon_store_reg(rd, pass, tmp);
5305	}
5306	} / for pass /
5307	} else if (op < `10`) {
5308	/ Shift by immediate and narrow:*
5309	VSHRN, VRSHRN, VQSHRN, VQRSHRN. /*
5310	int input_unsigned = (op == `8`) ? !u : u;
5311	if (rm & `1`) {
5312	return `1`;
5313	}
5314	shift = shift - (`1` << (size + `3`));
5315	size++;
5316	if (size == `3`) {
5317	tmp64 = tcg_const_i64(shift);
5318	neon_load_reg64(cpu_V0, rm);
5319	neon_load_reg64(cpu_V1, rm + `1`);
5320	for (pass = `0`; pass < `2`; pass++) {
5321	TCGv_i64 in;
5322	if (pass == `0`) {
5323	in = cpu_V0;
5324	} else {
5325	in = cpu_V1;
5326	}
5327	if (q) {
5328	if (input_unsigned) {
5329	gen_helper_neon_rshl_u64(cpu_V0, in, tmp64);
5330	} else {
5331	gen_helper_neon_rshl_s64(cpu_V0, in, tmp64);
5332	}
5333	} else {
5334	if (input_unsigned) {
5335	gen_helper_neon_shl_u64(cpu_V0, in, tmp64);
5336	} else {
5337	gen_helper_neon_shl_s64(cpu_V0, in, tmp64);
5338	}
5339	}
5340	tmp = tcg_temp_new_i32();
5341	gen_neon_narrow_op(op == `8`, u, size - `1`, tmp, cpu_V0);
5342	neon_store_reg(rd, pass, tmp);
5343	} / for pass /
5344	tcg_temp_free_i64(tmp64);
5345	} else {
5346	if (size == `1`) {
5347	imm = (uint16_t)shift;
5348	imm \|= imm << `16`;
5349	} else {
5350	/ size == 2 /
5351	imm = (uint32_t)shift;
5352	}
5353	tmp2 = tcg_const_i32(imm);
5354	tmp4 = neon_load_reg(rm + `1`, `0`);
5355	tmp5 = neon_load_reg(rm + `1`, `1`);
5356	for (pass = `0`; pass < `2`; pass++) {
5357	if (pass == `0`) {
5358	tmp = neon_load_reg(rm, `0`);
5359	} else {
5360	tmp = tmp4;
5361	}
5362	gen_neon_shift_narrow(size, tmp, tmp2, q,
5363	input_unsigned);
5364	if (pass == `0`) {
5365	tmp3 = neon_load_reg(rm, `1`);
5366	} else {
5367	tmp3 = tmp5;
5368	}
5369	gen_neon_shift_narrow(size, tmp3, tmp2, q,
5370	input_unsigned);
5371	tcg_gen_concat_i32_i64(cpu_V0, tmp, tmp3);
5372	tcg_temp_free_i32(tmp);
5373	tcg_temp_free_i32(tmp3);
5374	tmp = tcg_temp_new_i32();
5375	gen_neon_narrow_op(op == `8`, u, size - `1`, tmp, cpu_V0);
5376	neon_store_reg(rd, pass, tmp);
5377	} / for pass /
5378	tcg_temp_free_i32(tmp2);
5379	}
5380	} else if (op == `10`) {
5381	/ VSHLL, VMOVL /
5382	if (q \|\| (rd & `1`)) {
5383	return `1`;
5384	}
5385	tmp = neon_load_reg(rm, `0`);
5386	tmp2 = neon_load_reg(rm, `1`);
5387	for (pass = `0`; pass < `2`; pass++) {
5388	if (pass == `1`)
5389	tmp = tmp2;
5390
5391	gen_neon_widen(cpu_V0, tmp, size, u);
5392
5393	if (shift != `0`) {
5394	/ The shift is less than the width of the source*
5395	type, so we can just shift the whole register. /*
5396	tcg_gen_shli_i64(cpu_V0, cpu_V0, shift);
5397	/ Widen the result of shift: we need to clear*
5398	* the potential overflow bits resulting from
5399	* left bits of the narrow input appearing as
5400	* right bits of left the neighbour narrow
5401	* input. */
5402	if (size < `2` \|\| !u) {
5403	uint64_t imm64;
5404	if (size == `0`) {
5405	imm = (`0xffu` >> (`8` - shift));
5406	imm \|= imm << `16`;
5407	} else if (size == `1`) {
5408	imm = `0xffff` >> (`16` - shift);
5409	} else {
5410	/ size == 2 /
5411	imm = `0xffffffff` >> (`32` - shift);
5412	}
5413	if (size < `2`) {
5414	imm64 = imm \| (((uint64_t)imm) << `32`);
5415	} else {
5416	imm64 = imm;
5417	}
5418	tcg_gen_andi_i64(cpu_V0, cpu_V0, ~imm64);
5419	}
5420	}
5421	neon_store_reg64(cpu_V0, rd + pass);
5422	}
5423	} else if (op >= `14`) {
5424	/ VCVT fixed-point. /
5425	TCGv_ptr fpst;
5426	TCGv_i32 shiftv;
5427	VFPGenFixPointFn *fn;
5428
5429	if (!(insn & (`1` << `21`)) \|\| (q && ((rd \| rm) & `1`))) {
5430	return `1`;
5431	}
5432
5433	if (!(op & `1`)) {
5434	if (u) {
5435	fn = gen_helper_vfp_ultos;
5436	} else {
5437	fn = gen_helper_vfp_sltos;
5438	}
5439	} else {
5440	if (u) {
5441	fn = gen_helper_vfp_touls_round_to_zero;
5442	} else {
5443	fn = gen_helper_vfp_tosls_round_to_zero;
5444	}
5445	}
5446
5447	/ We have already masked out the must-be-1 top bit of imm6,*
5448	* hence this 32-shift where the ARM ARM has 64-imm6.
5449	*/
5450	shift = `32` - shift;
5451	fpst = get_fpstatus_ptr(`1`);
5452	shiftv = tcg_const_i32(shift);
5453	for (pass = `0`; pass < (q ? `4` : `2`); pass++) {
5454	TCGv_i32 tmpf = neon_load_reg(rm, pass);
5455	fn(tmpf, tmpf, shiftv, fpst);
5456	neon_store_reg(rd, pass, tmpf);
5457	}
5458	tcg_temp_free_ptr(fpst);
5459	tcg_temp_free_i32(shiftv);
5460	} else {
5461	return `1`;
5462	}
5463	} else { / (insn & 0x00380080) == 0 /
5464	int invert, reg_ofs, vec_size;
5465
5466	if (q && (rd & `1`)) {
5467	return `1`;
5468	}
5469
5470	op = (insn >> `8`) & `0xf`;
5471	/ One register and immediate. /
5472	imm = (u << `7`) \| ((insn >> `12`) & `0x70`) \| (insn & `0xf`);
5473	invert = (insn & (`1` << `5`)) != `0`;
5474	/ Note that op = 2,3,4,5,6,7,10,11,12,13 imm=0 is UNPREDICTABLE.*
5475	* We choose to not special-case this and will behave as if a
5476	* valid constant encoding of 0 had been given.
5477	*/
5478	switch (op) {
5479	case `0`: case `1`:
5480	/ no-op /
5481	break;
5482	case `2`: case `3`:
5483	imm <<= `8`;
5484	break;
5485	case `4`: case `5`:
5486	imm <<= `16`;
5487	break;
5488	case `6`: case `7`:
5489	imm <<= `24`;
5490	break;
5491	case `8`: case `9`:
5492	imm \|= imm << `16`;
5493	break;
5494	case `10`: case `11`:
5495	imm = (imm << `8`) \| (imm << `24`);
5496	break;
5497	case `12`:
5498	imm = (imm << `8`) \| `0xff`;
5499	break;
5500	case `13`:
5501	imm = (imm << `16`) \| `0xffff`;
5502	break;
5503	case `14`:
5504	imm \|= (imm << `8`) \| (imm << `16`) \| (imm << `24`);
5505	if (invert) {
5506	imm = ~imm;
5507	}
5508	break;
5509	case `15`:
5510	if (invert) {
5511	return `1`;
5512	}
5513	imm = ((imm & `0x80`) << `24`) \| ((imm & `0x3f`) << `19`)
5514	\| ((imm & `0x40`) ? (`0x1f` << `25`) : (`1` << `30`));
5515	break;
5516	}
5517	if (invert) {
5518	imm = ~imm;
5519	}
5520
5521	reg_ofs = neon_reg_offset(rd, `0`);
5522	vec_size = q ? `16` : `8`;
5523
5524	if (op & `1` && op < `12`) {
5525	if (invert) {
5526	/ The immediate value has already been inverted,*
5527	* so BIC becomes AND.
5528	*/
5529	tcg_gen_gvec_andi(MO_32, reg_ofs, reg_ofs, imm,
5530	vec_size, vec_size);
5531	} else {
5532	tcg_gen_gvec_ori(MO_32, reg_ofs, reg_ofs, imm,
5533	vec_size, vec_size);
5534	}
5535	} else {
5536	/ VMOV, VMVN. /
5537	if (op == `14` && invert) {
5538	TCGv_i64 t64 = tcg_temp_new_i64();
5539
5540	for (pass = `0`; pass <= q; ++pass) {
5541	uint64_t val = `0`;
5542	int n;
5543
5544	for (n = `0`; n < `8`; n++) {
5545	if (imm & (`1` << (n + pass * `8`))) {
5546	val \|= `0xffull` << (n * `8`);
5547	}
5548	}
5549	tcg_gen_movi_i64(t64, val);
5550	neon_store_reg64(t64, rd + pass);
5551	}
5552	tcg_temp_free_i64(t64);
5553	} else {
5554	tcg_gen_gvec_dup32i(reg_ofs, vec_size, vec_size, imm);
5555	}
5556	}
5557	}
5558	} else { / (insn & 0x00800010 == 0x00800000) /
5559	if (size != `3`) {
5560	op = (insn >> `8`) & `0xf`;
5561	if ((insn & (`1` << `6`)) == `0`) {
5562	/ Three registers of different lengths. /
5563	int src1_wide;
5564	int src2_wide;
5565	int prewiden;
5566	/ undefreq: bit 0 : UNDEF if size == 0*
5567	* bit 1 : UNDEF if size == 1
5568	* bit 2 : UNDEF if size == 2
5569	* bit 3 : UNDEF if U == 1
5570	* Note that [2:0] set implies 'always UNDEF'
5571	*/
5572	int undefreq;
5573	/ prewiden, src1_wide, src2_wide, undefreq /
5574	static const int neon_3reg_wide[`16`][`4`] = {
5575	{`1`, `0`, `0`, `0`}, / VADDL /
5576	{`1`, `1`, `0`, `0`}, / VADDW /
5577	{`1`, `0`, `0`, `0`}, / VSUBL /
5578	{`1`, `1`, `0`, `0`}, / VSUBW /
5579	{`0`, `1`, `1`, `0`}, / VADDHN /
5580	{`0`, `0`, `0`, `0`}, / VABAL /
5581	{`0`, `1`, `1`, `0`}, / VSUBHN /
5582	{`0`, `0`, `0`, `0`}, / VABDL /
5583	{`0`, `0`, `0`, `0`}, / VMLAL /
5584	{`0`, `0`, `0`, `9`}, / VQDMLAL /
5585	{`0`, `0`, `0`, `0`}, / VMLSL /
5586	{`0`, `0`, `0`, `9`}, / VQDMLSL /
5587	{`0`, `0`, `0`, `0`}, / Integer VMULL /
5588	{`0`, `0`, `0`, `1`}, / VQDMULL /
5589	{`0`, `0`, `0`, `0xa`}, / Polynomial VMULL /
5590	{`0`, `0`, `0`, `7`}, / Reserved: always UNDEF /
5591	};
5592
5593	prewiden = neon_3reg_wide[op][`0`];
5594	src1_wide = neon_3reg_wide[op][`1`];
5595	src2_wide = neon_3reg_wide[op][`2`];
5596	undefreq = neon_3reg_wide[op][`3`];
5597
5598	if ((undefreq & (`1` << size)) \|\|
5599	((undefreq & `8`) && u)) {
5600	return `1`;
5601	}
5602	if ((src1_wide && (rn & `1`)) \|\|
5603	(src2_wide && (rm & `1`)) \|\|
5604	(!src2_wide && (rd & `1`))) {
5605	return `1`;
5606	}
5607
5608	/ Handle VMULL.P64 (Polynomial 64x64 to 128 bit multiply)*
5609	* outside the loop below as it only performs a single pass.
5610	*/
5611	if (op == `14` && size == `2`) {
5612	TCGv_i64 tcg_rn, tcg_rm, tcg_rd;
5613
5614	if (!dc_isar_feature(aa32_pmull, s)) {
5615	return `1`;
5616	}
5617	tcg_rn = tcg_temp_new_i64();
5618	tcg_rm = tcg_temp_new_i64();
5619	tcg_rd = tcg_temp_new_i64();
5620	neon_load_reg64(tcg_rn, rn);
5621	neon_load_reg64(tcg_rm, rm);
5622	gen_helper_neon_pmull_64_lo(tcg_rd, tcg_rn, tcg_rm);
5623	neon_store_reg64(tcg_rd, rd);
5624	gen_helper_neon_pmull_64_hi(tcg_rd, tcg_rn, tcg_rm);
5625	neon_store_reg64(tcg_rd, rd + `1`);
5626	tcg_temp_free_i64(tcg_rn);
5627	tcg_temp_free_i64(tcg_rm);
5628	tcg_temp_free_i64(tcg_rd);
5629	return `0`;
5630	}
5631
5632	/ Avoid overlapping operands. Wide source operands are*
5633	always aligned so will never overlap with wide
5634	destinations in problematic ways. /*
5635	if (rd == rm && !src2_wide) {
5636	tmp = neon_load_reg(rm, `1`);
5637	neon_store_scratch(`2`, tmp);
5638	} else if (rd == rn && !src1_wide) {
5639	tmp = neon_load_reg(rn, `1`);
5640	neon_store_scratch(`2`, tmp);
5641	}
5642	tmp3 = NULL;
5643	for (pass = `0`; pass < `2`; pass++) {
5644	if (src1_wide) {
5645	neon_load_reg64(cpu_V0, rn + pass);
5646	tmp = NULL;
5647	} else {
5648	if (pass == `1` && rd == rn) {
5649	tmp = neon_load_scratch(`2`);
5650	} else {
5651	tmp = neon_load_reg(rn, pass);
5652	}
5653	if (prewiden) {
5654	gen_neon_widen(cpu_V0, tmp, size, u);
5655	}
5656	}
5657	if (src2_wide) {
5658	neon_load_reg64(cpu_V1, rm + pass);
5659	tmp2 = NULL;
5660	} else {
5661	if (pass == `1` && rd == rm) {
5662	tmp2 = neon_load_scratch(`2`);
5663	} else {
5664	tmp2 = neon_load_reg(rm, pass);
5665	}
5666	if (prewiden) {
5667	gen_neon_widen(cpu_V1, tmp2, size, u);
5668	}
5669	}
5670	switch (op) {
5671	case `0`: case `1`: case `4`: / VADDL, VADDW, VADDHN, VRADDHN /
5672	gen_neon_addl(size);
5673	break;
5674	case `2`: case `3`: case `6`: / VSUBL, VSUBW, VSUBHN, VRSUBHN /
5675	gen_neon_subl(size);
5676	break;
5677	case `5`: case `7`: / VABAL, VABDL /
5678	switch ((size << `1`) \| u) {
5679	case `0`:
5680	gen_helper_neon_abdl_s16(cpu_V0, tmp, tmp2);
5681	break;
5682	case `1`:
5683	gen_helper_neon_abdl_u16(cpu_V0, tmp, tmp2);
5684	break;
5685	case `2`:
5686	gen_helper_neon_abdl_s32(cpu_V0, tmp, tmp2);
5687	break;
5688	case `3`:
5689	gen_helper_neon_abdl_u32(cpu_V0, tmp, tmp2);
5690	break;
5691	case `4`:
5692	gen_helper_neon_abdl_s64(cpu_V0, tmp, tmp2);
5693	break;
5694	case `5`:
5695	gen_helper_neon_abdl_u64(cpu_V0, tmp, tmp2);
5696	break;
5697	default: abort();
5698	}
5699	tcg_temp_free_i32(tmp2);
5700	tcg_temp_free_i32(tmp);
5701	break;
5702	case `8`: case `9`: case `10`: case `11`: case `12`: case `13`:
5703	/ VMLAL, VQDMLAL, VMLSL, VQDMLSL, VMULL, VQDMULL /
5704	gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5705	break;
5706	case `14`: / Polynomial VMULL /
5707	gen_helper_neon_mull_p8(cpu_V0, tmp, tmp2);
5708	tcg_temp_free_i32(tmp2);
5709	tcg_temp_free_i32(tmp);
5710	break;
5711	default: / 15 is RESERVED: caught earlier /
5712	abort();
5713	}
5714	if (op == `13`) {
5715	/ VQDMULL /
5716	gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5717	neon_store_reg64(cpu_V0, rd + pass);
5718	} else if (op == `5` \|\| (op >= `8` && op <= `11`)) {
5719	/ Accumulate. /
5720	neon_load_reg64(cpu_V1, rd + pass);
5721	switch (op) {
5722	case `10`: / VMLSL /
5723	gen_neon_negl(cpu_V0, size);
5724	/ Fall through /
5725	case `5`: case `8`: / VABAL, VMLAL /
5726	gen_neon_addl(size);
5727	break;
5728	case `9`: case `11`: / VQDMLAL, VQDMLSL /
5729	gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5730	if (op == `11`) {
5731	gen_neon_negl(cpu_V0, size);
5732	}
5733	gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5734	break;
5735	default:
5736	abort();
5737	}
5738	neon_store_reg64(cpu_V0, rd + pass);
5739	} else if (op == `4` \|\| op == `6`) {
5740	/ Narrowing operation. /
5741	tmp = tcg_temp_new_i32();
5742	if (!u) {
5743	switch (size) {
5744	case `0`:
5745	gen_helper_neon_narrow_high_u8(tmp, cpu_V0);
5746	break;
5747	case `1`:
5748	gen_helper_neon_narrow_high_u16(tmp, cpu_V0);
5749	break;
5750	case `2`:
5751	tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5752	break;
5753	default: abort();
5754	}
5755	} else {
5756	switch (size) {
5757	case `0`:
5758	gen_helper_neon_narrow_round_high_u8(tmp, cpu_V0);
5759	break;
5760	case `1`:
5761	gen_helper_neon_narrow_round_high_u16(tmp, cpu_V0);
5762	break;
5763	case `2`:
5764	tcg_gen_addi_i64(cpu_V0, cpu_V0, `1u` << `31`);
5765	tcg_gen_extrh_i64_i32(tmp, cpu_V0);
5766	break;
5767	default: abort();
5768	}
5769	}
5770	if (pass == `0`) {
5771	tmp3 = tmp;
5772	} else {
5773	neon_store_reg(rd, `0`, tmp3);
5774	neon_store_reg(rd, `1`, tmp);
5775	}
5776	} else {
5777	/ Write back the result. /
5778	neon_store_reg64(cpu_V0, rd + pass);
5779	}
5780	}
5781	} else {
5782	/ Two registers and a scalar. NB that for ops of this form*
5783	* the ARM ARM labels bit 24 as Q, but it is in our variable
5784	* 'u', not 'q'.
5785	*/
5786	if (size == `0`) {
5787	return `1`;
5788	}
5789	switch (op) {
5790	case `1`: / Float VMLA scalar /
5791	case `5`: / Floating point VMLS scalar /
5792	case `9`: / Floating point VMUL scalar /
5793	if (size == `1`) {
5794	return `1`;
5795	}
5796	/ fall through /
5797	case `0`: / Integer VMLA scalar /
5798	case `4`: / Integer VMLS scalar /
5799	case `8`: / Integer VMUL scalar /
5800	case `12`: / VQDMULH scalar /
5801	case `13`: / VQRDMULH scalar /
5802	if (u && ((rd \| rn) & `1`)) {
5803	return `1`;
5804	}
5805	tmp = neon_get_scalar(size, rm);
5806	neon_store_scratch(`0`, tmp);
5807	for (pass = `0`; pass < (u ? `4` : `2`); pass++) {
5808	tmp = neon_load_scratch(`0`);
5809	tmp2 = neon_load_reg(rn, pass);
5810	if (op == `12`) {
5811	if (size == `1`) {
5812	gen_helper_neon_qdmulh_s16(tmp, cpu_env, tmp, tmp2);
5813	} else {
5814	gen_helper_neon_qdmulh_s32(tmp, cpu_env, tmp, tmp2);
5815	}
5816	} else if (op == `13`) {
5817	if (size == `1`) {
5818	gen_helper_neon_qrdmulh_s16(tmp, cpu_env, tmp, tmp2);
5819	} else {
5820	gen_helper_neon_qrdmulh_s32(tmp, cpu_env, tmp, tmp2);
5821	}
5822	} else if (op & `1`) {
5823	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
5824	gen_helper_vfp_muls(tmp, tmp, tmp2, fpstatus);
5825	tcg_temp_free_ptr(fpstatus);
5826	} else {
5827	switch (size) {
5828	case `0`: gen_helper_neon_mul_u8(tmp, tmp, tmp2); break;
5829	case `1`: gen_helper_neon_mul_u16(tmp, tmp, tmp2); break;
5830	case `2`: tcg_gen_mul_i32(tmp, tmp, tmp2); break;
5831	default: abort();
5832	}
5833	}
5834	tcg_temp_free_i32(tmp2);
5835	if (op < `8`) {
5836	/ Accumulate. /
5837	tmp2 = neon_load_reg(rd, pass);
5838	switch (op) {
5839	case `0`:
5840	gen_neon_add(size, tmp, tmp2);
5841	break;
5842	case `1`:
5843	{
5844	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
5845	gen_helper_vfp_adds(tmp, tmp, tmp2, fpstatus);
5846	tcg_temp_free_ptr(fpstatus);
5847	break;
5848	}
5849	case `4`:
5850	gen_neon_rsb(size, tmp, tmp2);
5851	break;
5852	case `5`:
5853	{
5854	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
5855	gen_helper_vfp_subs(tmp, tmp2, tmp, fpstatus);
5856	tcg_temp_free_ptr(fpstatus);
5857	break;
5858	}
5859	default:
5860	abort();
5861	}
5862	tcg_temp_free_i32(tmp2);
5863	}
5864	neon_store_reg(rd, pass, tmp);
5865	}
5866	break;
5867	case `3`: / VQDMLAL scalar /
5868	case `7`: / VQDMLSL scalar /
5869	case `11`: / VQDMULL scalar /
5870	if (u == `1`) {
5871	return `1`;
5872	}
5873	/ fall through /
5874	case `2`: / VMLAL sclar /
5875	case `6`: / VMLSL scalar /
5876	case `10`: / VMULL scalar /
5877	if (rd & `1`) {
5878	return `1`;
5879	}
5880	tmp2 = neon_get_scalar(size, rm);
5881	/ We need a copy of tmp2 because gen_neon_mull*
5882	* deletes it during pass 0. */
5883	tmp4 = tcg_temp_new_i32();
5884	tcg_gen_mov_i32(tmp4, tmp2);
5885	tmp3 = neon_load_reg(rn, `1`);
5886
5887	for (pass = `0`; pass < `2`; pass++) {
5888	if (pass == `0`) {
5889	tmp = neon_load_reg(rn, `0`);
5890	} else {
5891	tmp = tmp3;
5892	tmp2 = tmp4;
5893	}
5894	gen_neon_mull(cpu_V0, tmp, tmp2, size, u);
5895	if (op != `11`) {
5896	neon_load_reg64(cpu_V1, rd + pass);
5897	}
5898	switch (op) {
5899	case `6`:
5900	gen_neon_negl(cpu_V0, size);
5901	/ Fall through /
5902	case `2`:
5903	gen_neon_addl(size);
5904	break;
5905	case `3`: case `7`:
5906	gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5907	if (op == `7`) {
5908	gen_neon_negl(cpu_V0, size);
5909	}
5910	gen_neon_addl_saturate(cpu_V0, cpu_V1, size);
5911	break;
5912	case `10`:
5913	/ no-op /
5914	break;
5915	case `11`:
5916	gen_neon_addl_saturate(cpu_V0, cpu_V0, size);
5917	break;
5918	default:
5919	abort();
5920	}
5921	neon_store_reg64(cpu_V0, rd + pass);
5922	}
5923	break;
5924	case `14`: / VQRDMLAH scalar /
5925	case `15`: / VQRDMLSH scalar /
5926	{
5927	NeonGenThreeOpEnvFn *fn;
5928
5929	if (!dc_isar_feature(aa32_rdm, s)) {
5930	return `1`;
5931	}
5932	if (u && ((rd \| rn) & `1`)) {
5933	return `1`;
5934	}
5935	if (op == `14`) {
5936	if (size == `1`) {
5937	fn = gen_helper_neon_qrdmlah_s16;
5938	} else {
5939	fn = gen_helper_neon_qrdmlah_s32;
5940	}
5941	} else {
5942	if (size == `1`) {
5943	fn = gen_helper_neon_qrdmlsh_s16;
5944	} else {
5945	fn = gen_helper_neon_qrdmlsh_s32;
5946	}
5947	}
5948
5949	tmp2 = neon_get_scalar(size, rm);
5950	for (pass = `0`; pass < (u ? `4` : `2`); pass++) {
5951	tmp = neon_load_reg(rn, pass);
5952	tmp3 = neon_load_reg(rd, pass);
5953	fn(tmp, cpu_env, tmp, tmp2, tmp3);
5954	tcg_temp_free_i32(tmp3);
5955	neon_store_reg(rd, pass, tmp);
5956	}
5957	tcg_temp_free_i32(tmp2);
5958	}
5959	break;
5960	default:
5961	g_assert_not_reached();
5962	}
5963	}
5964	} else { / size == 3 /
5965	if (!u) {
5966	/ Extract. /
5967	imm = (insn >> `8`) & `0xf`;
5968
5969	if (imm > `7` && !q)
5970	return `1`;
5971
5972	if (q && ((rd \| rn \| rm) & `1`)) {
5973	return `1`;
5974	}
5975
5976	if (imm == `0`) {
5977	neon_load_reg64(cpu_V0, rn);
5978	if (q) {
5979	neon_load_reg64(cpu_V1, rn + `1`);
5980	}
5981	} else if (imm == `8`) {
5982	neon_load_reg64(cpu_V0, rn + `1`);
5983	if (q) {
5984	neon_load_reg64(cpu_V1, rm);
5985	}
5986	} else if (q) {
5987	tmp64 = tcg_temp_new_i64();
5988	if (imm < `8`) {
5989	neon_load_reg64(cpu_V0, rn);
5990	neon_load_reg64(tmp64, rn + `1`);
5991	} else {
5992	neon_load_reg64(cpu_V0, rn + `1`);
5993	neon_load_reg64(tmp64, rm);
5994	}
5995	tcg_gen_shri_i64(cpu_V0, cpu_V0, (imm & `7`) * `8`);
5996	tcg_gen_shli_i64(cpu_V1, tmp64, `64` - ((imm & `7`) * `8`));
5997	tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
5998	if (imm < `8`) {
5999	neon_load_reg64(cpu_V1, rm);
6000	} else {
6001	neon_load_reg64(cpu_V1, rm + `1`);
6002	imm -= `8`;
6003	}
6004	tcg_gen_shli_i64(cpu_V1, cpu_V1, `64` - (imm * `8`));
6005	tcg_gen_shri_i64(tmp64, tmp64, imm * `8`);
6006	tcg_gen_or_i64(cpu_V1, cpu_V1, tmp64);
6007	tcg_temp_free_i64(tmp64);
6008	} else {
6009	/ BUGFIX /
6010	neon_load_reg64(cpu_V0, rn);
6011	tcg_gen_shri_i64(cpu_V0, cpu_V0, imm * `8`);
6012	neon_load_reg64(cpu_V1, rm);
6013	tcg_gen_shli_i64(cpu_V1, cpu_V1, `64` - (imm * `8`));
6014	tcg_gen_or_i64(cpu_V0, cpu_V0, cpu_V1);
6015	}
6016	neon_store_reg64(cpu_V0, rd);
6017	if (q) {
6018	neon_store_reg64(cpu_V1, rd + `1`);
6019	}
6020	} else if ((insn & (`1` << `11`)) == `0`) {
6021	/ Two register misc. /
6022	op = ((insn >> `12`) & `0x30`) \| ((insn >> `7`) & `0xf`);
6023	size = (insn >> `18`) & `3`;
6024	/ UNDEF for unknown op values and bad op-size combinations /
6025	if ((neon_2rm_sizes[op] & (`1` << size)) == `0`) {
6026	return `1`;
6027	}
6028	if (neon_2rm_is_v8_op(op) &&
6029	!arm_dc_feature(s, ARM_FEATURE_V8)) {
6030	return `1`;
6031	}
6032	if ((op != NEON_2RM_VMOVN && op != NEON_2RM_VQMOVN) &&
6033	q && ((rm \| rd) & `1`)) {
6034	return `1`;
6035	}
6036	switch (op) {
6037	case NEON_2RM_VREV64:
6038	for (pass = `0`; pass < (q ? `2` : `1`); pass++) {
6039	tmp = neon_load_reg(rm, pass * `2`);
6040	tmp2 = neon_load_reg(rm, pass * `2` + `1`);
6041	switch (size) {
6042	case `0`: tcg_gen_bswap32_i32(tmp, tmp); break;
6043	case `1`: gen_swap_half(tmp); break;
6044	case `2`: / no-op / break;
6045	default: abort();
6046	}
6047	neon_store_reg(rd, pass * `2` + `1`, tmp);
6048	if (size == `2`) {
6049	neon_store_reg(rd, pass * `2`, tmp2);
6050	} else {
6051	switch (size) {
6052	case `0`: tcg_gen_bswap32_i32(tmp2, tmp2); break;
6053	case `1`: gen_swap_half(tmp2); break;
6054	default: abort();
6055	}
6056	neon_store_reg(rd, pass * `2`, tmp2);
6057	}
6058	}
6059	break;
6060	case NEON_2RM_VPADDL: case NEON_2RM_VPADDL_U:
6061	case NEON_2RM_VPADAL: case NEON_2RM_VPADAL_U:
6062	for (pass = `0`; pass < q + `1`; pass++) {
6063	tmp = neon_load_reg(rm, pass * `2`);
6064	gen_neon_widen(cpu_V0, tmp, size, op & `1`);
6065	tmp = neon_load_reg(rm, pass * `2` + `1`);
6066	gen_neon_widen(cpu_V1, tmp, size, op & `1`);
6067	switch (size) {
6068	case `0`: gen_helper_neon_paddl_u16(CPU_V001); break;
6069	case `1`: gen_helper_neon_paddl_u32(CPU_V001); break;
6070	case `2`: tcg_gen_add_i64(CPU_V001); break;
6071	default: abort();
6072	}
6073	if (op >= NEON_2RM_VPADAL) {
6074	/ Accumulate. /
6075	neon_load_reg64(cpu_V1, rd + pass);
6076	gen_neon_addl(size);
6077	}
6078	neon_store_reg64(cpu_V0, rd + pass);
6079	}
6080	break;
6081	case NEON_2RM_VTRN:
6082	if (size == `2`) {
6083	int n;
6084	for (n = `0`; n < (q ? `4` : `2`); n += `2`) {
6085	tmp = neon_load_reg(rm, n);
6086	tmp2 = neon_load_reg(rd, n + `1`);
6087	neon_store_reg(rm, n, tmp2);
6088	neon_store_reg(rd, n + `1`, tmp);
6089	}
6090	} else {
6091	goto elementwise;
6092	}
6093	break;
6094	case NEON_2RM_VUZP:
6095	if (gen_neon_unzip(rd, rm, size, q)) {
6096	return `1`;
6097	}
6098	break;
6099	case NEON_2RM_VZIP:
6100	if (gen_neon_zip(rd, rm, size, q)) {
6101	return `1`;
6102	}
6103	break;
6104	case NEON_2RM_VMOVN: case NEON_2RM_VQMOVN:
6105	/ also VQMOVUN; op field and mnemonics don't line up /
6106	if (rm & `1`) {
6107	return `1`;
6108	}
6109	tmp2 = NULL;
6110	for (pass = `0`; pass < `2`; pass++) {
6111	neon_load_reg64(cpu_V0, rm + pass);
6112	tmp = tcg_temp_new_i32();
6113	gen_neon_narrow_op(op == NEON_2RM_VMOVN, q, size,
6114	tmp, cpu_V0);
6115	if (pass == `0`) {
6116	tmp2 = tmp;
6117	} else {
6118	neon_store_reg(rd, `0`, tmp2);
6119	neon_store_reg(rd, `1`, tmp);
6120	}
6121	}
6122	break;
6123	case NEON_2RM_VSHLL:
6124	if (q \|\| (rd & `1`)) {
6125	return `1`;
6126	}
6127	tmp = neon_load_reg(rm, `0`);
6128	tmp2 = neon_load_reg(rm, `1`);
6129	for (pass = `0`; pass < `2`; pass++) {
6130	if (pass == `1`)
6131	tmp = tmp2;
6132	gen_neon_widen(cpu_V0, tmp, size, `1`);
6133	tcg_gen_shli_i64(cpu_V0, cpu_V0, `8` << size);
6134	neon_store_reg64(cpu_V0, rd + pass);
6135	}
6136	break;
6137	case NEON_2RM_VCVT_F16_F32:
6138	{
6139	TCGv_ptr fpst;
6140	TCGv_i32 ahp;
6141
6142	if (!dc_isar_feature(aa32_fp16_spconv, s) \|\|
6143	q \|\| (rm & `1`)) {
6144	return `1`;
6145	}
6146	fpst = get_fpstatus_ptr(true);
6147	ahp = get_ahp_flag();
6148	tmp = neon_load_reg(rm, `0`);
6149	gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6150	tmp2 = neon_load_reg(rm, `1`);
6151	gen_helper_vfp_fcvt_f32_to_f16(tmp2, tmp2, fpst, ahp);
6152	tcg_gen_shli_i32(tmp2, tmp2, `16`);
6153	tcg_gen_or_i32(tmp2, tmp2, tmp);
6154	tcg_temp_free_i32(tmp);
6155	tmp = neon_load_reg(rm, `2`);
6156	gen_helper_vfp_fcvt_f32_to_f16(tmp, tmp, fpst, ahp);
6157	tmp3 = neon_load_reg(rm, `3`);
6158	neon_store_reg(rd, `0`, tmp2);
6159	gen_helper_vfp_fcvt_f32_to_f16(tmp3, tmp3, fpst, ahp);
6160	tcg_gen_shli_i32(tmp3, tmp3, `16`);
6161	tcg_gen_or_i32(tmp3, tmp3, tmp);
6162	neon_store_reg(rd, `1`, tmp3);
6163	tcg_temp_free_i32(tmp);
6164	tcg_temp_free_i32(ahp);
6165	tcg_temp_free_ptr(fpst);
6166	break;
6167	}
6168	case NEON_2RM_VCVT_F32_F16:
6169	{
6170	TCGv_ptr fpst;
6171	TCGv_i32 ahp;
6172	if (!dc_isar_feature(aa32_fp16_spconv, s) \|\|
6173	q \|\| (rd & `1`)) {
6174	return `1`;
6175	}
6176	fpst = get_fpstatus_ptr(true);
6177	ahp = get_ahp_flag();
6178	tmp3 = tcg_temp_new_i32();
6179	tmp = neon_load_reg(rm, `0`);
6180	tmp2 = neon_load_reg(rm, `1`);
6181	tcg_gen_ext16u_i32(tmp3, tmp);
6182	gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6183	neon_store_reg(rd, `0`, tmp3);
6184	tcg_gen_shri_i32(tmp, tmp, `16`);
6185	gen_helper_vfp_fcvt_f16_to_f32(tmp, tmp, fpst, ahp);
6186	neon_store_reg(rd, `1`, tmp);
6187	tmp3 = tcg_temp_new_i32();
6188	tcg_gen_ext16u_i32(tmp3, tmp2);
6189	gen_helper_vfp_fcvt_f16_to_f32(tmp3, tmp3, fpst, ahp);
6190	neon_store_reg(rd, `2`, tmp3);
6191	tcg_gen_shri_i32(tmp2, tmp2, `16`);
6192	gen_helper_vfp_fcvt_f16_to_f32(tmp2, tmp2, fpst, ahp);
6193	neon_store_reg(rd, `3`, tmp2);
6194	tcg_temp_free_i32(ahp);
6195	tcg_temp_free_ptr(fpst);
6196	break;
6197	}
6198	case NEON_2RM_AESE: case NEON_2RM_AESMC:
6199	if (!dc_isar_feature(aa32_aes, s) \|\| ((rm \| rd) & `1`)) {
6200	return `1`;
6201	}
6202	ptr1 = vfp_reg_ptr(true, rd);
6203	ptr2 = vfp_reg_ptr(true, rm);
6204
6205	/ Bit 6 is the lowest opcode bit; it distinguishes between*
6206	* encryption (AESE/AESMC) and decryption (AESD/AESIMC)
6207	*/
6208	tmp3 = tcg_const_i32(extract32(insn, `6`, `1`));
6209
6210	if (op == NEON_2RM_AESE) {
6211	gen_helper_crypto_aese(ptr1, ptr2, tmp3);
6212	} else {
6213	gen_helper_crypto_aesmc(ptr1, ptr2, tmp3);
6214	}
6215	tcg_temp_free_ptr(ptr1);
6216	tcg_temp_free_ptr(ptr2);
6217	tcg_temp_free_i32(tmp3);
6218	break;
6219	case NEON_2RM_SHA1H:
6220	if (!dc_isar_feature(aa32_sha1, s) \|\| ((rm \| rd) & `1`)) {
6221	return `1`;
6222	}
6223	ptr1 = vfp_reg_ptr(true, rd);
6224	ptr2 = vfp_reg_ptr(true, rm);
6225
6226	gen_helper_crypto_sha1h(ptr1, ptr2);
6227
6228	tcg_temp_free_ptr(ptr1);
6229	tcg_temp_free_ptr(ptr2);
6230	break;
6231	case NEON_2RM_SHA1SU1:
6232	if ((rm \| rd) & `1`) {
6233	return `1`;
6234	}
6235	/ bit 6 (q): set -> SHA256SU0, cleared -> SHA1SU1 /
6236	if (q) {
6237	if (!dc_isar_feature(aa32_sha2, s)) {
6238	return `1`;
6239	}
6240	} else if (!dc_isar_feature(aa32_sha1, s)) {
6241	return `1`;
6242	}
6243	ptr1 = vfp_reg_ptr(true, rd);
6244	ptr2 = vfp_reg_ptr(true, rm);
6245	if (q) {
6246	gen_helper_crypto_sha256su0(ptr1, ptr2);
6247	} else {
6248	gen_helper_crypto_sha1su1(ptr1, ptr2);
6249	}
6250	tcg_temp_free_ptr(ptr1);
6251	tcg_temp_free_ptr(ptr2);
6252	break;
6253
6254	case NEON_2RM_VMVN:
6255	tcg_gen_gvec_not(`0`, rd_ofs, rm_ofs, vec_size, vec_size);
6256	break;
6257	case NEON_2RM_VNEG:
6258	tcg_gen_gvec_neg(size, rd_ofs, rm_ofs, vec_size, vec_size);
6259	break;
6260	case NEON_2RM_VABS:
6261	tcg_gen_gvec_abs(size, rd_ofs, rm_ofs, vec_size, vec_size);
6262	break;
6263
6264	default:
6265	elementwise:
6266	for (pass = `0`; pass < (q ? `4` : `2`); pass++) {
6267	tmp = neon_load_reg(rm, pass);
6268	switch (op) {
6269	case NEON_2RM_VREV32:
6270	switch (size) {
6271	case `0`: tcg_gen_bswap32_i32(tmp, tmp); break;
6272	case `1`: gen_swap_half(tmp); break;
6273	default: abort();
6274	}
6275	break;
6276	case NEON_2RM_VREV16:
6277	gen_rev16(tmp, tmp);
6278	break;
6279	case NEON_2RM_VCLS:
6280	switch (size) {
6281	case `0`: gen_helper_neon_cls_s8(tmp, tmp); break;
6282	case `1`: gen_helper_neon_cls_s16(tmp, tmp); break;
6283	case `2`: gen_helper_neon_cls_s32(tmp, tmp); break;
6284	default: abort();
6285	}
6286	break;
6287	case NEON_2RM_VCLZ:
6288	switch (size) {
6289	case `0`: gen_helper_neon_clz_u8(tmp, tmp); break;
6290	case `1`: gen_helper_neon_clz_u16(tmp, tmp); break;
6291	case `2`: tcg_gen_clzi_i32(tmp, tmp, `32`); break;
6292	default: abort();
6293	}
6294	break;
6295	case NEON_2RM_VCNT:
6296	gen_helper_neon_cnt_u8(tmp, tmp);
6297	break;
6298	case NEON_2RM_VQABS:
6299	switch (size) {
6300	case `0`:
6301	gen_helper_neon_qabs_s8(tmp, cpu_env, tmp);
6302	break;
6303	case `1`:
6304	gen_helper_neon_qabs_s16(tmp, cpu_env, tmp);
6305	break;
6306	case `2`:
6307	gen_helper_neon_qabs_s32(tmp, cpu_env, tmp);
6308	break;
6309	default: abort();
6310	}
6311	break;
6312	case NEON_2RM_VQNEG:
6313	switch (size) {
6314	case `0`:
6315	gen_helper_neon_qneg_s8(tmp, cpu_env, tmp);
6316	break;
6317	case `1`:
6318	gen_helper_neon_qneg_s16(tmp, cpu_env, tmp);
6319	break;
6320	case `2`:
6321	gen_helper_neon_qneg_s32(tmp, cpu_env, tmp);
6322	break;
6323	default: abort();
6324	}
6325	break;
6326	case NEON_2RM_VCGT0: case NEON_2RM_VCLE0:
6327	tmp2 = tcg_const_i32(`0`);
6328	switch(size) {
6329	case `0`: gen_helper_neon_cgt_s8(tmp, tmp, tmp2); break;
6330	case `1`: gen_helper_neon_cgt_s16(tmp, tmp, tmp2); break;
6331	case `2`: gen_helper_neon_cgt_s32(tmp, tmp, tmp2); break;
6332	default: abort();
6333	}
6334	tcg_temp_free_i32(tmp2);
6335	if (op == NEON_2RM_VCLE0) {
6336	tcg_gen_not_i32(tmp, tmp);
6337	}
6338	break;
6339	case NEON_2RM_VCGE0: case NEON_2RM_VCLT0:
6340	tmp2 = tcg_const_i32(`0`);
6341	switch(size) {
6342	case `0`: gen_helper_neon_cge_s8(tmp, tmp, tmp2); break;
6343	case `1`: gen_helper_neon_cge_s16(tmp, tmp, tmp2); break;
6344	case `2`: gen_helper_neon_cge_s32(tmp, tmp, tmp2); break;
6345	default: abort();
6346	}
6347	tcg_temp_free_i32(tmp2);
6348	if (op == NEON_2RM_VCLT0) {
6349	tcg_gen_not_i32(tmp, tmp);
6350	}
6351	break;
6352	case NEON_2RM_VCEQ0:
6353	tmp2 = tcg_const_i32(`0`);
6354	switch(size) {
6355	case `0`: gen_helper_neon_ceq_u8(tmp, tmp, tmp2); break;
6356	case `1`: gen_helper_neon_ceq_u16(tmp, tmp, tmp2); break;
6357	case `2`: gen_helper_neon_ceq_u32(tmp, tmp, tmp2); break;
6358	default: abort();
6359	}
6360	tcg_temp_free_i32(tmp2);
6361	break;
6362	case NEON_2RM_VCGT0_F:
6363	{
6364	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6365	tmp2 = tcg_const_i32(`0`);
6366	gen_helper_neon_cgt_f32(tmp, tmp, tmp2, fpstatus);
6367	tcg_temp_free_i32(tmp2);
6368	tcg_temp_free_ptr(fpstatus);
6369	break;
6370	}
6371	case NEON_2RM_VCGE0_F:
6372	{
6373	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6374	tmp2 = tcg_const_i32(`0`);
6375	gen_helper_neon_cge_f32(tmp, tmp, tmp2, fpstatus);
6376	tcg_temp_free_i32(tmp2);
6377	tcg_temp_free_ptr(fpstatus);
6378	break;
6379	}
6380	case NEON_2RM_VCEQ0_F:
6381	{
6382	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6383	tmp2 = tcg_const_i32(`0`);
6384	gen_helper_neon_ceq_f32(tmp, tmp, tmp2, fpstatus);
6385	tcg_temp_free_i32(tmp2);
6386	tcg_temp_free_ptr(fpstatus);
6387	break;
6388	}
6389	case NEON_2RM_VCLE0_F:
6390	{
6391	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6392	tmp2 = tcg_const_i32(`0`);
6393	gen_helper_neon_cge_f32(tmp, tmp2, tmp, fpstatus);
6394	tcg_temp_free_i32(tmp2);
6395	tcg_temp_free_ptr(fpstatus);
6396	break;
6397	}
6398	case NEON_2RM_VCLT0_F:
6399	{
6400	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6401	tmp2 = tcg_const_i32(`0`);
6402	gen_helper_neon_cgt_f32(tmp, tmp2, tmp, fpstatus);
6403	tcg_temp_free_i32(tmp2);
6404	tcg_temp_free_ptr(fpstatus);
6405	break;
6406	}
6407	case NEON_2RM_VABS_F:
6408	gen_helper_vfp_abss(tmp, tmp);
6409	break;
6410	case NEON_2RM_VNEG_F:
6411	gen_helper_vfp_negs(tmp, tmp);
6412	break;
6413	case NEON_2RM_VSWP:
6414	tmp2 = neon_load_reg(rd, pass);
6415	neon_store_reg(rm, pass, tmp2);
6416	break;
6417	case NEON_2RM_VTRN:
6418	tmp2 = neon_load_reg(rd, pass);
6419	switch (size) {
6420	case `0`: gen_neon_trn_u8(tmp, tmp2); break;
6421	case `1`: gen_neon_trn_u16(tmp, tmp2); break;
6422	default: abort();
6423	}
6424	neon_store_reg(rm, pass, tmp2);
6425	break;
6426	case NEON_2RM_VRINTN:
6427	case NEON_2RM_VRINTA:
6428	case NEON_2RM_VRINTM:
6429	case NEON_2RM_VRINTP:
6430	case NEON_2RM_VRINTZ:
6431	{
6432	TCGv_i32 tcg_rmode;
6433	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6434	int rmode;
6435
6436	if (op == NEON_2RM_VRINTZ) {
6437	rmode = FPROUNDING_ZERO;
6438	} else {
6439	rmode = fp_decode_rm[((op & `0x6`) >> `1`) ^ `1`];
6440	}
6441
6442	tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6443	gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6444	cpu_env);
6445	gen_helper_rints(tmp, tmp, fpstatus);
6446	gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6447	cpu_env);
6448	tcg_temp_free_ptr(fpstatus);
6449	tcg_temp_free_i32(tcg_rmode);
6450	break;
6451	}
6452	case NEON_2RM_VRINTX:
6453	{
6454	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6455	gen_helper_rints_exact(tmp, tmp, fpstatus);
6456	tcg_temp_free_ptr(fpstatus);
6457	break;
6458	}
6459	case NEON_2RM_VCVTAU:
6460	case NEON_2RM_VCVTAS:
6461	case NEON_2RM_VCVTNU:
6462	case NEON_2RM_VCVTNS:
6463	case NEON_2RM_VCVTPU:
6464	case NEON_2RM_VCVTPS:
6465	case NEON_2RM_VCVTMU:
6466	case NEON_2RM_VCVTMS:
6467	{
6468	bool is_signed = !extract32(insn, `7`, `1`);
6469	TCGv_ptr fpst = get_fpstatus_ptr(`1`);
6470	TCGv_i32 tcg_rmode, tcg_shift;
6471	int rmode = fp_decode_rm[extract32(insn, `8`, `2`)];
6472
6473	tcg_shift = tcg_const_i32(`0`);
6474	tcg_rmode = tcg_const_i32(arm_rmode_to_sf(rmode));
6475	gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6476	cpu_env);
6477
6478	if (is_signed) {
6479	gen_helper_vfp_tosls(tmp, tmp,
6480	tcg_shift, fpst);
6481	} else {
6482	gen_helper_vfp_touls(tmp, tmp,
6483	tcg_shift, fpst);
6484	}
6485
6486	gen_helper_set_neon_rmode(tcg_rmode, tcg_rmode,
6487	cpu_env);
6488	tcg_temp_free_i32(tcg_rmode);
6489	tcg_temp_free_i32(tcg_shift);
6490	tcg_temp_free_ptr(fpst);
6491	break;
6492	}
6493	case NEON_2RM_VRECPE:
6494	{
6495	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6496	gen_helper_recpe_u32(tmp, tmp, fpstatus);
6497	tcg_temp_free_ptr(fpstatus);
6498	break;
6499	}
6500	case NEON_2RM_VRSQRTE:
6501	{
6502	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6503	gen_helper_rsqrte_u32(tmp, tmp, fpstatus);
6504	tcg_temp_free_ptr(fpstatus);
6505	break;
6506	}
6507	case NEON_2RM_VRECPE_F:
6508	{
6509	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6510	gen_helper_recpe_f32(tmp, tmp, fpstatus);
6511	tcg_temp_free_ptr(fpstatus);
6512	break;
6513	}
6514	case NEON_2RM_VRSQRTE_F:
6515	{
6516	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6517	gen_helper_rsqrte_f32(tmp, tmp, fpstatus);
6518	tcg_temp_free_ptr(fpstatus);
6519	break;
6520	}
6521	case NEON_2RM_VCVT_FS: / VCVT.F32.S32 /
6522	{
6523	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6524	gen_helper_vfp_sitos(tmp, tmp, fpstatus);
6525	tcg_temp_free_ptr(fpstatus);
6526	break;
6527	}
6528	case NEON_2RM_VCVT_FU: / VCVT.F32.U32 /
6529	{
6530	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6531	gen_helper_vfp_uitos(tmp, tmp, fpstatus);
6532	tcg_temp_free_ptr(fpstatus);
6533	break;
6534	}
6535	case NEON_2RM_VCVT_SF: / VCVT.S32.F32 /
6536	{
6537	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6538	gen_helper_vfp_tosizs(tmp, tmp, fpstatus);
6539	tcg_temp_free_ptr(fpstatus);
6540	break;
6541	}
6542	case NEON_2RM_VCVT_UF: / VCVT.U32.F32 /
6543	{
6544	TCGv_ptr fpstatus = get_fpstatus_ptr(`1`);
6545	gen_helper_vfp_touizs(tmp, tmp, fpstatus);
6546	tcg_temp_free_ptr(fpstatus);
6547	break;
6548	}
6549	default:
6550	/ Reserved op values were caught by the*
6551	* neon_2rm_sizes[] check earlier.
6552	*/
6553	abort();
6554	}
6555	neon_store_reg(rd, pass, tmp);
6556	}
6557	break;
6558	}
6559	} else if ((insn & (`1` << `10`)) == `0`) {
6560	/ VTBL, VTBX. /
6561	int n = ((insn >> `8`) & `3`) + `1`;
6562	if ((rn + n) > `32`) {
6563	/ This is UNPREDICTABLE; we choose to UNDEF to avoid the*
6564	* helper function running off the end of the register file.
6565	*/
6566	return `1`;
6567	}
6568	n <<= `3`;
6569	if (insn & (`1` << `6`)) {
6570	tmp = neon_load_reg(rd, `0`);
6571	} else {
6572	tmp = tcg_temp_new_i32();
6573	tcg_gen_movi_i32(tmp, `0`);
6574	}
6575	tmp2 = neon_load_reg(rm, `0`);
6576	ptr1 = vfp_reg_ptr(true, rn);
6577	tmp5 = tcg_const_i32(n);
6578	gen_helper_neon_tbl(tmp2, tmp2, tmp, ptr1, tmp5);
6579	tcg_temp_free_i32(tmp);
6580	if (insn & (`1` << `6`)) {
6581	tmp = neon_load_reg(rd, `1`);
6582	} else {
6583	tmp = tcg_temp_new_i32();
6584	tcg_gen_movi_i32(tmp, `0`);
6585	}
6586	tmp3 = neon_load_reg(rm, `1`);
6587	gen_helper_neon_tbl(tmp3, tmp3, tmp, ptr1, tmp5);
6588	tcg_temp_free_i32(tmp5);
6589	tcg_temp_free_ptr(ptr1);
6590	neon_store_reg(rd, `0`, tmp2);
6591	neon_store_reg(rd, `1`, tmp3);
6592	tcg_temp_free_i32(tmp);
6593	} else if ((insn & `0x380`) == `0`) {
6594	/ VDUP /
6595	int element;
6596	MemOp size;
6597
6598	if ((insn & (`7` << `16`)) == `0` \|\| (q && (rd & `1`))) {
6599	return `1`;
6600	}
6601	if (insn & (`1` << `16`)) {
6602	size = MO_8;
6603	element = (insn >> `17`) & `7`;
6604	} else if (insn & (`1` << `17`)) {
6605	size = MO_16;
6606	element = (insn >> `18`) & `3`;
6607	} else {
6608	size = MO_32;
6609	element = (insn >> `19`) & `1`;
6610	}
6611	tcg_gen_gvec_dup_mem(size, neon_reg_offset(rd, `0`),
6612	neon_element_offset(rm, element, size),
6613	q ? `16` : `8`, q ? `16` : `8`);
6614	} else {
6615	return `1`;
6616	}
6617	}
6618	}
6619	return `0`;
6620	}
6621
6622	/ Advanced SIMD three registers of the same length extension.*
6623	* 31 25 23 22 20 16 12 11 10 9 8 3 0
6624	* +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
6625	* \| 1 1 1 1 1 1 0 \| op1 \| D \| op2 \| Vn \| Vd \| 1 \| o3 \| 0 \| o4 \| N Q M U \| Vm \|
6626	* +---------------+-----+---+-----+----+----+---+----+---+----+---------+----+
6627	*/
6628	static int disas_neon_insn_3same_ext(DisasContext *s, uint32_t insn)
6629	{
6630	gen_helper_gvec_3 *fn_gvec = NULL;
6631	gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
6632	int rd, rn, rm, opr_sz;
6633	int data = `0`;
6634	int off_rn, off_rm;
6635	bool is_long = false, q = extract32(insn, `6`, `1`);
6636	bool ptr_is_env = false;
6637
6638	if ((insn & `0xfe200f10`) == `0xfc200800`) {
6639	/ VCMLA -- 1111 110R R.1S .... .... 1000 ...0 .... /
6640	int size = extract32(insn, `20`, `1`);
6641	data = extract32(insn, `23`, `2`); / rot /
6642	if (!dc_isar_feature(aa32_vcma, s)
6643	\|\| (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
6644	return `1`;
6645	}
6646	fn_gvec_ptr = size ? gen_helper_gvec_fcmlas : gen_helper_gvec_fcmlah;
6647	} else if ((insn & `0xfea00f10`) == `0xfc800800`) {
6648	/ VCADD -- 1111 110R 1.0S .... .... 1000 ...0 .... /
6649	int size = extract32(insn, `20`, `1`);
6650	data = extract32(insn, `24`, `1`); / rot /
6651	if (!dc_isar_feature(aa32_vcma, s)
6652	\|\| (!size && !dc_isar_feature(aa32_fp16_arith, s))) {
6653	return `1`;
6654	}
6655	fn_gvec_ptr = size ? gen_helper_gvec_fcadds : gen_helper_gvec_fcaddh;
6656	} else if ((insn & `0xfeb00f00`) == `0xfc200d00`) {
6657	/ V[US]DOT -- 1111 1100 0.10 .... .... 1101 .Q.U .... /
6658	bool u = extract32(insn, `4`, `1`);
6659	if (!dc_isar_feature(aa32_dp, s)) {
6660	return `1`;
6661	}
6662	fn_gvec = u ? gen_helper_gvec_udot_b : gen_helper_gvec_sdot_b;
6663	} else if ((insn & `0xff300f10`) == `0xfc200810`) {
6664	/ VFM[AS]L -- 1111 1100 S.10 .... .... 1000 .Q.1 .... /
6665	int is_s = extract32(insn, `23`, `1`);
6666	if (!dc_isar_feature(aa32_fhm, s)) {
6667	return `1`;
6668	}
6669	is_long = true;
6670	data = is_s; / is_2 == 0 /
6671	fn_gvec_ptr = gen_helper_gvec_fmlal_a32;
6672	ptr_is_env = true;
6673	} else {
6674	return `1`;
6675	}
6676
6677	VFP_DREG_D(rd, insn);
6678	if (rd & q) {
6679	return `1`;
6680	}
6681	if (q \|\| !is_long) {
6682	VFP_DREG_N(rn, insn);
6683	VFP_DREG_M(rm, insn);
6684	if ((rn \| rm) & q & !is_long) {
6685	return `1`;
6686	}
6687	off_rn = vfp_reg_offset(`1`, rn);
6688	off_rm = vfp_reg_offset(`1`, rm);
6689	} else {
6690	rn = VFP_SREG_N(insn);
6691	rm = VFP_SREG_M(insn);
6692	off_rn = vfp_reg_offset(`0`, rn);
6693	off_rm = vfp_reg_offset(`0`, rm);
6694	}
6695
6696	if (s->fp_excp_el) {
6697	gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
6698	syn_simd_access_trap(`1`, `0xe`, false), s->fp_excp_el);
6699	return `0`;
6700	}
6701	if (!s->vfp_enabled) {
6702	return `1`;
6703	}
6704
6705	opr_sz = (`1` + q) * `8`;
6706	if (fn_gvec_ptr) {
6707	TCGv_ptr ptr;
6708	if (ptr_is_env) {
6709	ptr = cpu_env;
6710	} else {
6711	ptr = get_fpstatus_ptr(`1`);
6712	}
6713	tcg_gen_gvec_3_ptr(vfp_reg_offset(`1`, rd), off_rn, off_rm, ptr,
6714	opr_sz, opr_sz, data, fn_gvec_ptr);
6715	if (!ptr_is_env) {
6716	tcg_temp_free_ptr(ptr);
6717	}
6718	} else {
6719	tcg_gen_gvec_3_ool(vfp_reg_offset(`1`, rd), off_rn, off_rm,
6720	opr_sz, opr_sz, data, fn_gvec);
6721	}
6722	return `0`;
6723	}
6724
6725	/ Advanced SIMD two registers and a scalar extension.*
6726	* 31 24 23 22 20 16 12 11 10 9 8 3 0
6727	* +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
6728	* \| 1 1 1 1 1 1 1 0 \| o1 \| D \| o2 \| Vn \| Vd \| 1 \| o3 \| 0 \| o4 \| N Q M U \| Vm \|
6729	* +-----------------+----+---+----+----+----+---+----+---+----+---------+----+
6730	*
6731	*/
6732
6733	static int disas_neon_insn_2reg_scalar_ext(DisasContext *s, uint32_t insn)
6734	{
6735	gen_helper_gvec_3 *fn_gvec = NULL;
6736	gen_helper_gvec_3_ptr *fn_gvec_ptr = NULL;
6737	int rd, rn, rm, opr_sz, data;
6738	int off_rn, off_rm;
6739	bool is_long = false, q = extract32(insn, `6`, `1`);
6740	bool ptr_is_env = false;
6741
6742	if ((insn & `0xff000f10`) == `0xfe000800`) {
6743	/ VCMLA (indexed) -- 1111 1110 S.RR .... .... 1000 ...0 .... /
6744	int rot = extract32(insn, `20`, `2`);
6745	int size = extract32(insn, `23`, `1`);
6746	int index;
6747
6748	if (!dc_isar_feature(aa32_vcma, s)) {
6749	return `1`;
6750	}
6751	if (size == `0`) {
6752	if (!dc_isar_feature(aa32_fp16_arith, s)) {
6753	return `1`;
6754	}
6755	/ For fp16, rm is just Vm, and index is M. /
6756	rm = extract32(insn, `0`, `4`);
6757	index = extract32(insn, `5`, `1`);
6758	} else {
6759	/ For fp32, rm is the usual M:Vm, and index is 0. /
6760	VFP_DREG_M(rm, insn);
6761	index = `0`;
6762	}
6763	data = (index << `2`) \| rot;
6764	fn_gvec_ptr = (size ? gen_helper_gvec_fcmlas_idx
6765	: gen_helper_gvec_fcmlah_idx);
6766	} else if ((insn & `0xffb00f00`) == `0xfe200d00`) {
6767	/ V[US]DOT -- 1111 1110 0.10 .... .... 1101 .Q.U .... /
6768	int u = extract32(insn, `4`, `1`);
6769
6770	if (!dc_isar_feature(aa32_dp, s)) {
6771	return `1`;
6772	}
6773	fn_gvec = u ? gen_helper_gvec_udot_idx_b : gen_helper_gvec_sdot_idx_b;
6774	/ rm is just Vm, and index is M. /
6775	data = extract32(insn, `5`, `1`); / index /
6776	rm = extract32(insn, `0`, `4`);
6777	} else if ((insn & `0xffa00f10`) == `0xfe000810`) {
6778	/ VFM[AS]L -- 1111 1110 0.0S .... .... 1000 .Q.1 .... /
6779	int is_s = extract32(insn, `20`, `1`);
6780	int vm20 = extract32(insn, `0`, `3`);
6781	int vm3 = extract32(insn, `3`, `1`);
6782	int m = extract32(insn, `5`, `1`);
6783	int index;
6784
6785	if (!dc_isar_feature(aa32_fhm, s)) {
6786	return `1`;
6787	}
6788	if (q) {
6789	rm = vm20;
6790	index = m * `2` + vm3;
6791	} else {
6792	rm = vm20 * `2` + m;
6793	index = vm3;
6794	}
6795	is_long = true;
6796	data = (index << `2`) \| is_s; / is_2 == 0 /
6797	fn_gvec_ptr = gen_helper_gvec_fmlal_idx_a32;
6798	ptr_is_env = true;
6799	} else {
6800	return `1`;
6801	}
6802
6803	VFP_DREG_D(rd, insn);
6804	if (rd & q) {
6805	return `1`;
6806	}
6807	if (q \|\| !is_long) {
6808	VFP_DREG_N(rn, insn);
6809	if (rn & q & !is_long) {
6810	return `1`;
6811	}
6812	off_rn = vfp_reg_offset(`1`, rn);
6813	off_rm = vfp_reg_offset(`1`, rm);
6814	} else {
6815	rn = VFP_SREG_N(insn);
6816	off_rn = vfp_reg_offset(`0`, rn);
6817	off_rm = vfp_reg_offset(`0`, rm);
6818	}
6819	if (s->fp_excp_el) {
6820	gen_exception_insn(s, s->pc_curr, EXCP_UDEF,
6821	syn_simd_access_trap(`1`, `0xe`, false), s->fp_excp_el);
6822	return `0`;
6823	}
6824	if (!s->vfp_enabled) {
6825	return `1`;
6826	}
6827
6828	opr_sz = (`1` + q) * `8`;
6829	if (fn_gvec_ptr) {
6830	TCGv_ptr ptr;
6831	if (ptr_is_env) {
6832	ptr = cpu_env;
6833	} else {
6834	ptr = get_fpstatus_ptr(`1`);
6835	}
6836	tcg_gen_gvec_3_ptr(vfp_reg_offset(`1`, rd), off_rn, off_rm, ptr,
6837	opr_sz, opr_sz, data, fn_gvec_ptr);
6838	if (!ptr_is_env) {
6839	tcg_temp_free_ptr(ptr);
6840	}
6841	} else {
6842	tcg_gen_gvec_3_ool(vfp_reg_offset(`1`, rd), off_rn, off_rm,
6843	opr_sz, opr_sz, data, fn_gvec);
6844	}
6845	return `0`;
6846	}
6847
6848	static int disas_coproc_insn(DisasContext *s, uint32_t insn)
6849	{
6850	int cpnum, is64, crn, crm, opc1, opc2, isread, rt, rt2;
6851	const ARMCPRegInfo *ri;
6852
6853	cpnum = (insn >> `8`) & `0xf`;
6854
6855	/ First check for coprocessor space used for XScale/iwMMXt insns /
6856	if (arm_dc_feature(s, ARM_FEATURE_XSCALE) && (cpnum < `2`)) {
6857	if (extract32(s->c15_cpar, cpnum, `1`) == `0`) {
6858	return `1`;
6859	}
6860	if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
6861	return disas_iwmmxt_insn(s, insn);
6862	} else if (arm_dc_feature(s, ARM_FEATURE_XSCALE)) {
6863	return disas_dsp_insn(s, insn);
6864	}
6865	return `1`;
6866	}
6867
6868	/ Otherwise treat as a generic register access /
6869	is64 = (insn & (`1` << `25`)) == `0`;
6870	if (!is64 && ((insn & (`1` << `4`)) == `0`)) {
6871	/ cdp /
6872	return `1`;
6873	}
6874
6875	crm = insn & `0xf`;
6876	if (is64) {
6877	crn = `0`;
6878	opc1 = (insn >> `4`) & `0xf`;
6879	opc2 = `0`;
6880	rt2 = (insn >> `16`) & `0xf`;
6881	} else {
6882	crn = (insn >> `16`) & `0xf`;
6883	opc1 = (insn >> `21`) & `7`;
6884	opc2 = (insn >> `5`) & `7`;
6885	rt2 = `0`;
6886	}
6887	isread = (insn >> `20`) & `1`;
6888	rt = (insn >> `12`) & `0xf`;
6889
6890	ri = get_arm_cp_reginfo(s->cp_regs,
6891	ENCODE_CP_REG(cpnum, is64, s->ns, crn, crm, opc1, opc2));
6892	if (ri) {
6893	/ Check access permissions /
6894	if (!cp_access_ok(s->current_el, ri, isread)) {
6895	return `1`;
6896	}
6897
6898	if (ri->accessfn \|\|
6899	(arm_dc_feature(s, ARM_FEATURE_XSCALE) && cpnum < `14`)) {
6900	/ Emit code to perform further access permissions checks at*
6901	* runtime; this may result in an exception.
6902	* Note that on XScale all cp0..c13 registers do an access check
6903	* call in order to handle c15_cpar.
6904	*/
6905	TCGv_ptr tmpptr;
6906	TCGv_i32 tcg_syn, tcg_isread;
6907	uint32_t syndrome;
6908
6909	/ Note that since we are an implementation which takes an*
6910	* exception on a trapped conditional instruction only if the
6911	* instruction passes its condition code check, we can take
6912	* advantage of the clause in the ARM ARM that allows us to set
6913	* the COND field in the instruction to 0xE in all cases.
6914	* We could fish the actual condition out of the insn (ARM)
6915	* or the condexec bits (Thumb) but it isn't necessary.
6916	*/
6917	switch (cpnum) {
6918	case `14`:
6919	if (is64) {
6920	syndrome = syn_cp14_rrt_trap(`1`, `0xe`, opc1, crm, rt, rt2,
6921	isread, false);
6922	} else {
6923	syndrome = syn_cp14_rt_trap(`1`, `0xe`, opc1, opc2, crn, crm,
6924	rt, isread, false);
6925	}
6926	break;
6927	case `15`:
6928	if (is64) {
6929	syndrome = syn_cp15_rrt_trap(`1`, `0xe`, opc1, crm, rt, rt2,
6930	isread, false);
6931	} else {
6932	syndrome = syn_cp15_rt_trap(`1`, `0xe`, opc1, opc2, crn, crm,
6933	rt, isread, false);
6934	}
6935	break;
6936	default:
6937	/ ARMv8 defines that only coprocessors 14 and 15 exist,*
6938	* so this can only happen if this is an ARMv7 or earlier CPU,
6939	* in which case the syndrome information won't actually be
6940	* guest visible.
6941	*/
6942	assert(!arm_dc_feature(s, ARM_FEATURE_V8));
6943	syndrome = syn_uncategorized();
6944	break;
6945	}
6946
6947	gen_set_condexec(s);
6948	gen_set_pc_im(s, s->pc_curr);
6949	tmpptr = tcg_const_ptr(ri);
6950	tcg_syn = tcg_const_i32(syndrome);
6951	tcg_isread = tcg_const_i32(isread);
6952	gen_helper_access_check_cp_reg(cpu_env, tmpptr, tcg_syn,
6953	tcg_isread);
6954	tcg_temp_free_ptr(tmpptr);
6955	tcg_temp_free_i32(tcg_syn);
6956	tcg_temp_free_i32(tcg_isread);
6957	} else if (ri->type & ARM_CP_RAISES_EXC) {
6958	/*
6959	* The readfn or writefn might raise an exception;
6960	* synchronize the CPU state in case it does.
6961	*/
6962	gen_set_condexec(s);
6963	gen_set_pc_im(s, s->pc_curr);
6964	}
6965
6966	/ Handle special cases first /
6967	switch (ri->type & ~(ARM_CP_FLAG_MASK & ~ARM_CP_SPECIAL)) {
6968	case ARM_CP_NOP:
6969	return `0`;
6970	case ARM_CP_WFI:
6971	if (isread) {
6972	return `1`;
6973	}
6974	gen_set_pc_im(s, s->base.pc_next);
6975	s->base.is_jmp = DISAS_WFI;
6976	return `0`;
6977	default:
6978	break;
6979	}
6980
6981	if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
6982	gen_io_start();
6983	}
6984
6985	if (isread) {
6986	/ Read /
6987	if (is64) {
6988	TCGv_i64 tmp64;
6989	TCGv_i32 tmp;
6990	if (ri->type & ARM_CP_CONST) {
6991	tmp64 = tcg_const_i64(ri->resetvalue);
6992	} else if (ri->readfn) {
6993	TCGv_ptr tmpptr;
6994	tmp64 = tcg_temp_new_i64();
6995	tmpptr = tcg_const_ptr(ri);
6996	gen_helper_get_cp_reg64(tmp64, cpu_env, tmpptr);
6997	tcg_temp_free_ptr(tmpptr);
6998	} else {
6999	tmp64 = tcg_temp_new_i64();
7000	tcg_gen_ld_i64(tmp64, cpu_env, ri->fieldoffset);
7001	}
7002	tmp = tcg_temp_new_i32();
7003	tcg_gen_extrl_i64_i32(tmp, tmp64);
7004	store_reg(s, rt, tmp);
7005	tmp = tcg_temp_new_i32();
7006	tcg_gen_extrh_i64_i32(tmp, tmp64);
7007	tcg_temp_free_i64(tmp64);
7008	store_reg(s, rt2, tmp);
7009	} else {
7010	TCGv_i32 tmp;
7011	if (ri->type & ARM_CP_CONST) {
7012	tmp = tcg_const_i32(ri->resetvalue);
7013	} else if (ri->readfn) {
7014	TCGv_ptr tmpptr;
7015	tmp = tcg_temp_new_i32();
7016	tmpptr = tcg_const_ptr(ri);
7017	gen_helper_get_cp_reg(tmp, cpu_env, tmpptr);
7018	tcg_temp_free_ptr(tmpptr);
7019	} else {
7020	tmp = load_cpu_offset(ri->fieldoffset);
7021	}
7022	if (rt == `15`) {
7023	/ Destination register of r15 for 32 bit loads sets*
7024	* the condition codes from the high 4 bits of the value
7025	*/
7026	gen_set_nzcv(tmp);
7027	tcg_temp_free_i32(tmp);
7028	} else {
7029	store_reg(s, rt, tmp);
7030	}
7031	}
7032	} else {
7033	/ Write /
7034	if (ri->type & ARM_CP_CONST) {
7035	/ If not forbidden by access permissions, treat as WI /
7036	return `0`;
7037	}
7038
7039	if (is64) {
7040	TCGv_i32 tmplo, tmphi;
7041	TCGv_i64 tmp64 = tcg_temp_new_i64();
7042	tmplo = load_reg(s, rt);
7043	tmphi = load_reg(s, rt2);
7044	tcg_gen_concat_i32_i64(tmp64, tmplo, tmphi);
7045	tcg_temp_free_i32(tmplo);
7046	tcg_temp_free_i32(tmphi);
7047	if (ri->writefn) {
7048	TCGv_ptr tmpptr = tcg_const_ptr(ri);
7049	gen_helper_set_cp_reg64(cpu_env, tmpptr, tmp64);
7050	tcg_temp_free_ptr(tmpptr);
7051	} else {
7052	tcg_gen_st_i64(tmp64, cpu_env, ri->fieldoffset);
7053	}
7054	tcg_temp_free_i64(tmp64);
7055	} else {
7056	if (ri->writefn) {
7057	TCGv_i32 tmp;
7058	TCGv_ptr tmpptr;
7059	tmp = load_reg(s, rt);
7060	tmpptr = tcg_const_ptr(ri);
7061	gen_helper_set_cp_reg(cpu_env, tmpptr, tmp);
7062	tcg_temp_free_ptr(tmpptr);
7063	tcg_temp_free_i32(tmp);
7064	} else {
7065	TCGv_i32 tmp = load_reg(s, rt);
7066	store_cpu_offset(tmp, ri->fieldoffset);
7067	}
7068	}
7069	}
7070
7071	if ((tb_cflags(s->base.tb) & CF_USE_ICOUNT) && (ri->type & ARM_CP_IO)) {
7072	/ I/O operations must end the TB here (whether read or write) /
7073	gen_lookup_tb(s);
7074	} else if (!isread && !(ri->type & ARM_CP_SUPPRESS_TB_END)) {
7075	/ We default to ending the TB on a coprocessor register write,*
7076	* but allow this to be suppressed by the register definition
7077	* (usually only necessary to work around guest bugs).
7078	*/
7079	gen_lookup_tb(s);
7080	}
7081
7082	return `0`;
7083	}
7084
7085	/ Unknown register; this might be a guest error or a QEMU*
7086	* unimplemented feature.
7087	*/
7088	if (is64) {
7089	qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7090	"64 bit system register cp:%d opc1: %d crm:%d "
7091	"(%s)\n",
7092	isread ? "read" : "write", cpnum, opc1, crm,
7093	s->ns ? "non-secure" : "secure");
7094	} else {
7095	qemu_log_mask(LOG_UNIMP, "%s access to unsupported AArch32 "
7096	"system register cp:%d opc1:%d crn:%d crm:%d opc2:%d "
7097	"(%s)\n",
7098	isread ? "read" : "write", cpnum, opc1, crn, crm, opc2,
7099	s->ns ? "non-secure" : "secure");
7100	}
7101
7102	return `1`;
7103	}
7104
7105
7106	/ Store a 64-bit value to a register pair. Clobbers val. /
7107	static void gen_storeq_reg(DisasContext s, int* rlow, int rhigh, TCGv_i64 val)
7108	{
7109	TCGv_i32 tmp;
7110	tmp = tcg_temp_new_i32();
7111	tcg_gen_extrl_i64_i32(tmp, val);
7112	store_reg(s, rlow, tmp);
7113	tmp = tcg_temp_new_i32();
7114	tcg_gen_extrh_i64_i32(tmp, val);
7115	store_reg(s, rhigh, tmp);
7116	}
7117
7118	/ load and add a 64-bit value from a register pair. /
7119	static void gen_addq(DisasContext s, TCGv_i64 val, int* rlow, int rhigh)
7120	{
7121	TCGv_i64 tmp;
7122	TCGv_i32 tmpl;
7123	TCGv_i32 tmph;
7124
7125	/ Load 64-bit value rd:rn. /
7126	tmpl = load_reg(s, rlow);
7127	tmph = load_reg(s, rhigh);
7128	tmp = tcg_temp_new_i64();
7129	tcg_gen_concat_i32_i64(tmp, tmpl, tmph);
7130	tcg_temp_free_i32(tmpl);
7131	tcg_temp_free_i32(tmph);
7132	tcg_gen_add_i64(val, val, tmp);
7133	tcg_temp_free_i64(tmp);
7134	}
7135
7136	/ Set N and Z flags from hi\|lo. /
7137	static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi)
7138	{
7139	tcg_gen_mov_i32(cpu_NF, hi);
7140	tcg_gen_or_i32(cpu_ZF, lo, hi);
7141	}
7142
7143	/ Load/Store exclusive instructions are implemented by remembering*
7144	the value/address loaded, and seeing if these are the same
7145	when the store is performed. This should be sufficient to implement
7146	the architecturally mandated semantics, and avoids having to monitor
7147	regular stores. The compare vs the remembered value is done during
7148	the cmpxchg operation, but we must compare the addresses manually. /*
7149	static void gen_load_exclusive(DisasContext s, int* rt, int rt2,
7150	TCGv_i32 addr, int size)
7151	{
7152	TCGv_i32 tmp = tcg_temp_new_i32();
7153	MemOp opc = size \| MO_ALIGN \| s->be_data;
7154
7155	s->is_ldex = true;
7156
7157	if (size == `3`) {
7158	TCGv_i32 tmp2 = tcg_temp_new_i32();
7159	TCGv_i64 t64 = tcg_temp_new_i64();
7160
7161	/ For AArch32, architecturally the 32-bit word at the lowest*
7162	* address is always Rt and the one at addr+4 is Rt2, even if
7163	* the CPU is big-endian. That means we don't want to do a
7164	* gen_aa32_ld_i64(), which invokes gen_aa32_frob64() as if
7165	* for an architecturally 64-bit access, but instead do a
7166	* 64-bit access using MO_BE if appropriate and then split
7167	* the two halves.
7168	* This only makes a difference for BE32 user-mode, where
7169	* frob64() must not flip the two halves of the 64-bit data
7170	* but this code must treat BE32 user-mode like BE32 system.
7171	*/
7172	TCGv taddr = gen_aa32_addr(s, addr, opc);
7173
7174	tcg_gen_qemu_ld_i64(t64, taddr, get_mem_index(s), opc);
7175	tcg_temp_free(taddr);
7176	tcg_gen_mov_i64(cpu_exclusive_val, t64);
7177	if (s->be_data == MO_BE) {
7178	tcg_gen_extr_i64_i32(tmp2, tmp, t64);
7179	} else {
7180	tcg_gen_extr_i64_i32(tmp, tmp2, t64);
7181	}
7182	tcg_temp_free_i64(t64);
7183
7184	store_reg(s, rt2, tmp2);
7185	} else {
7186	gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), opc);
7187	tcg_gen_extu_i32_i64(cpu_exclusive_val, tmp);
7188	}
7189
7190	store_reg(s, rt, tmp);
7191	tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr);
7192	}
7193
7194	static void gen_clrex(DisasContext *s)
7195	{
7196	tcg_gen_movi_i64(cpu_exclusive_addr, -`1`);
7197	}
7198
7199	static void gen_store_exclusive(DisasContext s, int* rd, int rt, int rt2,
7200	TCGv_i32 addr, int size)
7201	{
7202	TCGv_i32 t0, t1, t2;
7203	TCGv_i64 extaddr;
7204	TCGv taddr;
7205	TCGLabel *done_label;
7206	TCGLabel *fail_label;
7207	MemOp opc = size \| MO_ALIGN \| s->be_data;
7208
7209	/ if (env->exclusive_addr == addr && env->exclusive_val == [addr]) {*
7210	[addr] = {Rt};
7211	{Rd} = 0;
7212	} else {
7213	{Rd} = 1;
7214	} /*
7215	fail_label = gen_new_label();
7216	done_label = gen_new_label();
7217	extaddr = tcg_temp_new_i64();
7218	tcg_gen_extu_i32_i64(extaddr, addr);
7219	tcg_gen_brcond_i64(TCG_COND_NE, extaddr, cpu_exclusive_addr, fail_label);
7220	tcg_temp_free_i64(extaddr);
7221
7222	taddr = gen_aa32_addr(s, addr, opc);
7223	t0 = tcg_temp_new_i32();
7224	t1 = load_reg(s, rt);
7225	if (size == `3`) {
7226	TCGv_i64 o64 = tcg_temp_new_i64();
7227	TCGv_i64 n64 = tcg_temp_new_i64();
7228
7229	t2 = load_reg(s, rt2);
7230	/ For AArch32, architecturally the 32-bit word at the lowest*
7231	* address is always Rt and the one at addr+4 is Rt2, even if
7232	* the CPU is big-endian. Since we're going to treat this as a
7233	* single 64-bit BE store, we need to put the two halves in the
7234	* opposite order for BE to LE, so that they end up in the right
7235	* places.
7236	* We don't want gen_aa32_frob64() because that does the wrong
7237	* thing for BE32 usermode.
7238	*/
7239	if (s->be_data == MO_BE) {
7240	tcg_gen_concat_i32_i64(n64, t2, t1);
7241	} else {
7242	tcg_gen_concat_i32_i64(n64, t1, t2);
7243	}
7244	tcg_temp_free_i32(t2);
7245
7246	tcg_gen_atomic_cmpxchg_i64(o64, taddr, cpu_exclusive_val, n64,
7247	get_mem_index(s), opc);
7248	tcg_temp_free_i64(n64);
7249
7250	tcg_gen_setcond_i64(TCG_COND_NE, o64, o64, cpu_exclusive_val);
7251	tcg_gen_extrl_i64_i32(t0, o64);
7252
7253	tcg_temp_free_i64(o64);
7254	} else {
7255	t2 = tcg_temp_new_i32();
7256	tcg_gen_extrl_i64_i32(t2, cpu_exclusive_val);
7257	tcg_gen_atomic_cmpxchg_i32(t0, taddr, t2, t1, get_mem_index(s), opc);
7258	tcg_gen_setcond_i32(TCG_COND_NE, t0, t0, t2);
7259	tcg_temp_free_i32(t2);
7260	}
7261	tcg_temp_free_i32(t1);
7262	tcg_temp_free(taddr);
7263	tcg_gen_mov_i32(cpu_R[rd], t0);
7264	tcg_temp_free_i32(t0);
7265	tcg_gen_br(done_label);
7266
7267	gen_set_label(fail_label);
7268	tcg_gen_movi_i32(cpu_R[rd], `1`);
7269	gen_set_label(done_label);
7270	tcg_gen_movi_i64(cpu_exclusive_addr, -`1`);
7271	}
7272
7273	/ gen_srs:*
7274	* @env: CPUARMState
7275	* @s: DisasContext
7276	* @mode: mode field from insn (which stack to store to)
7277	* @amode: addressing mode (DA/IA/DB/IB), encoded as per P,U bits in ARM insn
7278	* @writeback: true if writeback bit set
7279	*
7280	* Generate code for the SRS (Store Return State) insn.
7281	*/
7282	static void gen_srs(DisasContext *s,
7283	uint32_t mode, uint32_t amode, bool writeback)
7284	{
7285	int32_t offset;
7286	TCGv_i32 addr, tmp;
7287	bool undef = false;
7288
7289	/ SRS is:*
7290	* - trapped to EL3 if EL3 is AArch64 and we are at Secure EL1
7291	* and specified mode is monitor mode
7292	* - UNDEFINED in Hyp mode
7293	* - UNPREDICTABLE in User or System mode
7294	* - UNPREDICTABLE if the specified mode is:
7295	* -- not implemented
7296	* -- not a valid mode number
7297	* -- a mode that's at a higher exception level
7298	* -- Monitor, if we are Non-secure
7299	* For the UNPREDICTABLE cases we choose to UNDEF.
7300	*/
7301	if (s->current_el == `1` && !s->ns && mode == ARM_CPU_MODE_MON) {
7302	gen_exception_insn(s, s->pc_curr, EXCP_UDEF, syn_uncategorized(), `3`);
7303	return;
7304	}
7305
7306	if (s->current_el == `0` \|\| s->current_el == `2`) {
7307	undef = true;
7308	}
7309
7310	switch (mode) {
7311	case ARM_CPU_MODE_USR:
7312	case ARM_CPU_MODE_FIQ:
7313	case ARM_CPU_MODE_IRQ:
7314	case ARM_CPU_MODE_SVC:
7315	case ARM_CPU_MODE_ABT:
7316	case ARM_CPU_MODE_UND:
7317	case ARM_CPU_MODE_SYS:
7318	break;
7319	case ARM_CPU_MODE_HYP:
7320	if (s->current_el == `1` \|\| !arm_dc_feature(s, ARM_FEATURE_EL2)) {
7321	undef = true;
7322	}
7323	break;
7324	case ARM_CPU_MODE_MON:
7325	/ No need to check specifically for "are we non-secure" because*
7326	* we've already made EL0 UNDEF and handled the trap for S-EL1;
7327	* so if this isn't EL3 then we must be non-secure.
7328	*/
7329	if (s->current_el != `3`) {
7330	undef = true;
7331	}
7332	break;
7333	default:
7334	undef = true;
7335	}
7336
7337	if (undef) {
7338	unallocated_encoding(s);
7339	return;
7340	}
7341
7342	addr = tcg_temp_new_i32();
7343	tmp = tcg_const_i32(mode);
7344	/ get_r13_banked() will raise an exception if called from System mode /
7345	gen_set_condexec(s);
7346	gen_set_pc_im(s, s->pc_curr);
7347	gen_helper_get_r13_banked(addr, cpu_env, tmp);
7348	tcg_temp_free_i32(tmp);
7349	switch (amode) {
7350	case `0`: / DA /
7351	offset = -`4`;
7352	break;
7353	case `1`: / IA /
7354	offset = `0`;
7355	break;
7356	case `2`: / DB /
7357	offset = -`8`;
7358	break;
7359	case `3`: / IB /
7360	offset = `4`;
7361	break;
7362	default:
7363	abort();
7364	}
7365	tcg_gen_addi_i32(addr, addr, offset);
7366	tmp = load_reg(s, `14`);
7367	gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7368	tcg_temp_free_i32(tmp);
7369	tmp = load_cpu_field(spsr);
7370	tcg_gen_addi_i32(addr, addr, `4`);
7371	gen_aa32_st32(s, tmp, addr, get_mem_index(s));
7372	tcg_temp_free_i32(tmp);
7373	if (writeback) {
7374	switch (amode) {
7375	case `0`:
7376	offset = -`8`;
7377	break;
7378	case `1`:
7379	offset = `4`;
7380	break;
7381	case `2`:
7382	offset = -`4`;
7383	break;
7384	case `3`:
7385	offset = `0`;
7386	break;
7387	default:
7388	abort();
7389	}
7390	tcg_gen_addi_i32(addr, addr, offset);
7391	tmp = tcg_const_i32(mode);
7392	gen_helper_set_r13_banked(cpu_env, tmp, addr);
7393	tcg_temp_free_i32(tmp);
7394	}
7395	tcg_temp_free_i32(addr);
7396	s->base.is_jmp = DISAS_UPDATE;
7397	}
7398
7399	/ Generate a label used for skipping this instruction /
7400	static void arm_gen_condlabel(DisasContext *s)
7401	{
7402	if (!s->condjmp) {
7403	s->condlabel = gen_new_label();
7404	s->condjmp = `1`;
7405	}
7406	}
7407
7408	/ Skip this instruction if the ARM condition is false /
7409	static void arm_skip_unless(DisasContext *s, uint32_t cond)
7410	{
7411	arm_gen_condlabel(s);
7412	arm_gen_test_cc(cond ^ `1`, s->condlabel);
7413	}
7414
7415
7416	/*
7417	* Constant expanders for the decoders.
7418	*/
7419
7420	static int negate(DisasContext s, int* x)
7421	{
7422	return -x;
7423	}
7424
7425	static int plus_2(DisasContext s, int* x)
7426	{
7427	return x + `2`;
7428	}
7429
7430	static int times_2(DisasContext s, int* x)
7431	{
7432	return x * `2`;
7433	}
7434
7435	static int times_4(DisasContext s, int* x)
7436	{
7437	return x * `4`;
7438	}
7439
7440	/ Return only the rotation part of T32ExpandImm. /
7441	static int t32_expandimm_rot(DisasContext s, int* x)
7442	{
7443	return x & `0xc00` ? extract32(x, `7`, `5`) : `0`;
7444	}
7445
7446	/ Return the unrotated immediate from T32ExpandImm. /
7447	static int t32_expandimm_imm(DisasContext s, int* x)
7448	{
7449	int imm = extract32(x, `0`, `8`);
7450
7451	switch (extract32(x, `8`, `4`)) {
7452	case `0`: / XY /
7453	/ Nothing to do. /
7454	break;
7455	case `1`: / 00XY00XY /
7456	imm *= `0x00010001`;
7457	break;
7458	case `2`: / XY00XY00 /
7459	imm *= `0x01000100`;
7460	break;
7461	case `3`: / XYXYXYXY /
7462	imm *= `0x01010101`;
7463	break;
7464	default:
7465	/ Rotated constant. /
7466	imm \|= `0x80`;
7467	break;
7468	}
7469	return imm;
7470	}
7471
7472	static int t32_branch24(DisasContext s, int* x)
7473	{
7474	/ Convert J1:J2 at x[22:21] to I2:I1, which involves I=J^~S. /
7475	x ^= !(x < `0`) * (`3` << `21`);
7476	/ Append the final zero. /
7477	return x << `1`;
7478	}
7479
7480	static int t16_setflags(DisasContext *s)
7481	{
7482	return s->condexec_mask == `0`;
7483	}
7484
7485	static int t16_push_list(DisasContext s, int* x)
7486	{
7487	return (x & `0xff`) \| (x & `0x100`) << (`14` - `8`);
7488	}
7489
7490	static int t16_pop_list(DisasContext s, int* x)
7491	{
7492	return (x & `0xff`) \| (x & `0x100`) << (`15` - `8`);
7493	}
7494
7495	/*
7496	* Include the generated decoders.
7497	*/
7498
7499	#include "decode-a32.inc.c"
7500	#include "decode-a32-uncond.inc.c"
7501	#include "decode-t32.inc.c"
7502	#include "decode-t16.inc.c"
7503
7504	/ Helpers to swap operands for reverse-subtract. /
7505	static void gen_rsb(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7506	{
7507	tcg_gen_sub_i32(dst, b, a);
7508	}
7509
7510	static void gen_rsb_CC(TCGv_i32 dst, TCGv_i32 a, TCGv_i32 b)
7511	{
7512	gen_sub_CC(dst, b, a);
7513	}
7514
7515	static void gen_rsc(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7516	{
7517	gen_sub_carry(dest, b, a);
7518	}
7519
7520	static void gen_rsc_CC(TCGv_i32 dest, TCGv_i32 a, TCGv_i32 b)
7521	{
7522	gen_sbc_CC(dest, b, a);
7523	}
7524
7525	/*
7526	* Helpers for the data processing routines.
7527	*
7528	* After the computation store the results back.
7529	* This may be suppressed altogether (STREG_NONE), require a runtime
7530	* check against the stack limits (STREG_SP_CHECK), or generate an
7531	* exception return. Oh, or store into a register.
7532	*
7533	* Always return true, indicating success for a trans_* function.
7534	*/
7535	typedef enum {
7536	STREG_NONE,
7537	STREG_NORMAL,
7538	STREG_SP_CHECK,
7539	STREG_EXC_RET,
7540	} StoreRegKind;
7541
7542	static bool store_reg_kind(DisasContext s, int* rd,
7543	TCGv_i32 val, StoreRegKind kind)
7544	{
7545	switch (kind) {
7546	case STREG_NONE:
7547	tcg_temp_free_i32(val);
7548	return true;
7549	case STREG_NORMAL:
7550	/ See ALUWritePC: Interworking only from a32 mode. /
7551	if (s->thumb) {
7552	store_reg(s, rd, val);
7553	} else {
7554	store_reg_bx(s, rd, val);
7555	}
7556	return true;
7557	case STREG_SP_CHECK:
7558	store_sp_checked(s, val);
7559	return true;
7560	case STREG_EXC_RET:
7561	gen_exception_return(s, val);
7562	return true;
7563	}
7564	g_assert_not_reached();
7565	}
7566
7567	/*
7568	* Data Processing (register)
7569	*
7570	* Operate, with set flags, one register source,
7571	* one immediate shifted register source, and a destination.
7572	*/
7573	static bool op_s_rrr_shi(DisasContext s, arg_s_rrr_shi a,
7574	void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7575	int logic_cc, StoreRegKind kind)
7576	{
7577	TCGv_i32 tmp1, tmp2;
7578
7579	tmp2 = load_reg(s, a->rm);
7580	gen_arm_shift_im(tmp2, a->shty, a->shim, logic_cc);
7581	tmp1 = load_reg(s, a->rn);
7582
7583	gen(tmp1, tmp1, tmp2);
7584	tcg_temp_free_i32(tmp2);
7585
7586	if (logic_cc) {
7587	gen_logic_CC(tmp1);
7588	}
7589	return store_reg_kind(s, a->rd, tmp1, kind);
7590	}
7591
7592	static bool op_s_rxr_shi(DisasContext s, arg_s_rrr_shi a,
7593	void (*gen)(TCGv_i32, TCGv_i32),
7594	int logic_cc, StoreRegKind kind)
7595	{
7596	TCGv_i32 tmp;
7597
7598	tmp = load_reg(s, a->rm);
7599	gen_arm_shift_im(tmp, a->shty, a->shim, logic_cc);
7600
7601	gen(tmp, tmp);
7602	if (logic_cc) {
7603	gen_logic_CC(tmp);
7604	}
7605	return store_reg_kind(s, a->rd, tmp, kind);
7606	}
7607
7608	/*
7609	* Data-processing (register-shifted register)
7610	*
7611	* Operate, with set flags, one register source,
7612	* one register shifted register source, and a destination.
7613	*/
7614	static bool op_s_rrr_shr(DisasContext s, arg_s_rrr_shr a,
7615	void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7616	int logic_cc, StoreRegKind kind)
7617	{
7618	TCGv_i32 tmp1, tmp2;
7619
7620	tmp1 = load_reg(s, a->rs);
7621	tmp2 = load_reg(s, a->rm);
7622	gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7623	tmp1 = load_reg(s, a->rn);
7624
7625	gen(tmp1, tmp1, tmp2);
7626	tcg_temp_free_i32(tmp2);
7627
7628	if (logic_cc) {
7629	gen_logic_CC(tmp1);
7630	}
7631	return store_reg_kind(s, a->rd, tmp1, kind);
7632	}
7633
7634	static bool op_s_rxr_shr(DisasContext s, arg_s_rrr_shr a,
7635	void (*gen)(TCGv_i32, TCGv_i32),
7636	int logic_cc, StoreRegKind kind)
7637	{
7638	TCGv_i32 tmp1, tmp2;
7639
7640	tmp1 = load_reg(s, a->rs);
7641	tmp2 = load_reg(s, a->rm);
7642	gen_arm_shift_reg(tmp2, a->shty, tmp1, logic_cc);
7643
7644	gen(tmp2, tmp2);
7645	if (logic_cc) {
7646	gen_logic_CC(tmp2);
7647	}
7648	return store_reg_kind(s, a->rd, tmp2, kind);
7649	}
7650
7651	/*
7652	* Data-processing (immediate)
7653	*
7654	* Operate, with set flags, one register source,
7655	* one rotated immediate, and a destination.
7656	*
7657	* Note that logic_cc && a->rot setting CF based on the msb of the
7658	* immediate is the reason why we must pass in the unrotated form
7659	* of the immediate.
7660	*/
7661	static bool op_s_rri_rot(DisasContext s, arg_s_rri_rot a,
7662	void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32),
7663	int logic_cc, StoreRegKind kind)
7664	{
7665	TCGv_i32 tmp1, tmp2;
7666	uint32_t imm;
7667
7668	imm = ror32(a->imm, a->rot);
7669	if (logic_cc && a->rot) {
7670	tcg_gen_movi_i32(cpu_CF, imm >> `31`);
7671	}
7672	tmp2 = tcg_const_i32(imm);
7673	tmp1 = load_reg(s, a->rn);
7674
7675	gen(tmp1, tmp1, tmp2);
7676	tcg_temp_free_i32(tmp2);
7677
7678	if (logic_cc) {
7679	gen_logic_CC(tmp1);
7680	}
7681	return store_reg_kind(s, a->rd, tmp1, kind);
7682	}
7683
7684	static bool op_s_rxi_rot(DisasContext s, arg_s_rri_rot a,
7685	void (*gen)(TCGv_i32, TCGv_i32),
7686	int logic_cc, StoreRegKind kind)
7687	{
7688	TCGv_i32 tmp;
7689	uint32_t imm;
7690
7691	imm = ror32(a->imm, a->rot);
7692	if (logic_cc && a->rot) {
7693	tcg_gen_movi_i32(cpu_CF, imm >> `31`);
7694	}
7695	tmp = tcg_const_i32(imm);
7696
7697	gen(tmp, tmp);
7698	if (logic_cc) {
7699	gen_logic_CC(tmp);
7700	}
7701	return store_reg_kind(s, a->rd, tmp, kind);
7702	}
7703
7704	#define DO_ANY3(NAME, OP, L, K) \
7705	static bool trans_##NAME##_rrri(DisasContext s, arg_s_rrr_shi a) \
7706	{ StoreRegKind k = (K); return op_s_rrr_shi(s, a, OP, L, k); } \
7707	static bool trans_##NAME##_rrrr(DisasContext s, arg_s_rrr_shr a) \
7708	{ StoreRegKind k = (K); return op_s_rrr_shr(s, a, OP, L, k); } \
7709	static bool trans_##NAME##_rri(DisasContext s, arg_s_rri_rot a) \
7710	{ StoreRegKind k = (K); return op_s_rri_rot(s, a, OP, L, k); }
7711
7712	#define DO_ANY2(NAME, OP, L, K) \
7713	static bool trans_##NAME##_rxri(DisasContext s, arg_s_rrr_shi a) \
7714	{ StoreRegKind k = (K); return op_s_rxr_shi(s, a, OP, L, k); } \
7715	static bool trans_##NAME##_rxrr(DisasContext s, arg_s_rrr_shr a) \
7716	{ StoreRegKind k = (K); return op_s_rxr_shr(s, a, OP, L, k); } \
7717	static bool trans_##NAME##_rxi(DisasContext s, arg_s_rri_rot a) \
7718	{ StoreRegKind k = (K); return op_s_rxi_rot(s, a, OP, L, k); }
7719
7720	#define DO_CMP2(NAME, OP, L) \
7721	static bool trans_##NAME##_xrri(DisasContext s, arg_s_rrr_shi a) \
7722	{ return op_s_rrr_shi(s, a, OP, L, STREG_NONE); } \
7723	static bool trans_##NAME##_xrrr(DisasContext s, arg_s_rrr_shr a) \
7724	{ return op_s_rrr_shr(s, a, OP, L, STREG_NONE); } \
7725	static bool trans_##NAME##_xri(DisasContext s, arg_s_rri_rot a) \
7726	{ return op_s_rri_rot(s, a, OP, L, STREG_NONE); }
7727
7728	DO_ANY3(AND, tcg_gen_and_i32, a->s, STREG_NORMAL)
7729	DO_ANY3(EOR, tcg_gen_xor_i32, a->s, STREG_NORMAL)
7730	DO_ANY3(ORR, tcg_gen_or_i32, a->s, STREG_NORMAL)
7731	DO_ANY3(BIC, tcg_gen_andc_i32, a->s, STREG_NORMAL)
7732
7733	DO_ANY3(RSB, a->s ? gen_rsb_CC : gen_rsb, false, STREG_NORMAL)
7734	DO_ANY3(ADC, a->s ? gen_adc_CC : gen_add_carry, false, STREG_NORMAL)
7735	DO_ANY3(SBC, a->s ? gen_sbc_CC : gen_sub_carry, false, STREG_NORMAL)
7736	DO_ANY3(RSC, a->s ? gen_rsc_CC : gen_rsc, false, STREG_NORMAL)
7737
7738	DO_CMP2(TST, tcg_gen_and_i32, true)
7739	DO_CMP2(TEQ, tcg_gen_xor_i32, true)
7740	DO_CMP2(CMN, gen_add_CC, false)
7741	DO_CMP2(CMP, gen_sub_CC, false)
7742
7743	DO_ANY3(ADD, a->s ? gen_add_CC : tcg_gen_add_i32, false,
7744	a->rd == `13` && a->rn == `13` ? STREG_SP_CHECK : STREG_NORMAL)
7745
7746	/*
7747	* Note for the computation of StoreRegKind we return out of the
7748	* middle of the functions that are expanded by DO_ANY3, and that
7749	* we modify a->s via that parameter before it is used by OP.
7750	*/
7751	DO_ANY3(SUB, a->s ? gen_sub_CC : tcg_gen_sub_i32, false,
7752	({
7753	StoreRegKind ret = STREG_NORMAL;
7754	if (a->rd == `15` && a->s) {
7755	/*
7756	* See ALUExceptionReturn:
7757	* In User mode, UNPREDICTABLE; we choose UNDEF.
7758	* In Hyp mode, UNDEFINED.
7759	*/
7760	if (IS_USER(s) \|\| s->current_el == `2`) {
7761	unallocated_encoding(s);
7762	return true;
7763	}
7764	/ There is no writeback of nzcv to PSTATE. /
7765	a->s = `0`;
7766	ret = STREG_EXC_RET;
7767	} else if (a->rd == `13` && a->rn == `13`) {
7768	ret = STREG_SP_CHECK;
7769	}
7770	ret;
7771	}))
7772
7773	DO_ANY2(MOV, tcg_gen_mov_i32, a->s,
7774	({
7775	StoreRegKind ret = STREG_NORMAL;
7776	if (a->rd == `15` && a->s) {
7777	/*
7778	* See ALUExceptionReturn:
7779	* In User mode, UNPREDICTABLE; we choose UNDEF.
7780	* In Hyp mode, UNDEFINED.
7781	*/
7782	if (IS_USER(s) \|\| s->current_el == `2`) {
7783	unallocated_encoding(s);
7784	return true;
7785	}
7786	/ There is no writeback of nzcv to PSTATE. /
7787	a->s = `0`;
7788	ret = STREG_EXC_RET;
7789	} else if (a->rd == `13`) {
7790	ret = STREG_SP_CHECK;
7791	}
7792	ret;
7793	}))
7794
7795	DO_ANY2(MVN, tcg_gen_not_i32, a->s, STREG_NORMAL)
7796
7797	/*
7798	* ORN is only available with T32, so there is no register-shifted-register
7799	* form of the insn. Using the DO_ANY3 macro would create an unused function.
7800	*/
7801	static bool trans_ORN_rrri(DisasContext s, arg_s_rrr_shi a)
7802	{
7803	return op_s_rrr_shi(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7804	}
7805
7806	static bool trans_ORN_rri(DisasContext s, arg_s_rri_rot a)
7807	{
7808	return op_s_rri_rot(s, a, tcg_gen_orc_i32, a->s, STREG_NORMAL);
7809	}
7810
7811	#undef DO_ANY3
7812	#undef DO_ANY2
7813	#undef DO_CMP2
7814
7815	static bool trans_ADR(DisasContext s, arg_ri a)
7816	{
7817	store_reg_bx(s, a->rd, add_reg_for_lit(s, `15`, a->imm));
7818	return true;
7819	}
7820
7821	static bool trans_MOVW(DisasContext s, arg_MOVW a)
7822	{
7823	TCGv_i32 tmp;
7824
7825	if (!ENABLE_ARCH_6T2) {
7826	return false;
7827	}
7828
7829	tmp = tcg_const_i32(a->imm);
7830	store_reg(s, a->rd, tmp);
7831	return true;
7832	}
7833
7834	static bool trans_MOVT(DisasContext s, arg_MOVW a)
7835	{
7836	TCGv_i32 tmp;
7837
7838	if (!ENABLE_ARCH_6T2) {
7839	return false;
7840	}
7841
7842	tmp = load_reg(s, a->rd);
7843	tcg_gen_ext16u_i32(tmp, tmp);
7844	tcg_gen_ori_i32(tmp, tmp, a->imm << `16`);
7845	store_reg(s, a->rd, tmp);
7846	return true;
7847	}
7848
7849	/*
7850	* Multiply and multiply accumulate
7851	*/
7852
7853	static bool op_mla(DisasContext s, arg_s_rrrr a, bool add)
7854	{
7855	TCGv_i32 t1, t2;
7856
7857	t1 = load_reg(s, a->rn);
7858	t2 = load_reg(s, a->rm);
7859	tcg_gen_mul_i32(t1, t1, t2);
7860	tcg_temp_free_i32(t2);
7861	if (add) {
7862	t2 = load_reg(s, a->ra);
7863	tcg_gen_add_i32(t1, t1, t2);
7864	tcg_temp_free_i32(t2);
7865	}
7866	if (a->s) {
7867	gen_logic_CC(t1);
7868	}
7869	store_reg(s, a->rd, t1);
7870	return true;
7871	}
7872
7873	static bool trans_MUL(DisasContext s, arg_MUL a)
7874	{
7875	return op_mla(s, a, false);
7876	}
7877
7878	static bool trans_MLA(DisasContext s, arg_MLA a)
7879	{
7880	return op_mla(s, a, true);
7881	}
7882
7883	static bool trans_MLS(DisasContext s, arg_MLS a)
7884	{
7885	TCGv_i32 t1, t2;
7886
7887	if (!ENABLE_ARCH_6T2) {
7888	return false;
7889	}
7890	t1 = load_reg(s, a->rn);
7891	t2 = load_reg(s, a->rm);
7892	tcg_gen_mul_i32(t1, t1, t2);
7893	tcg_temp_free_i32(t2);
7894	t2 = load_reg(s, a->ra);
7895	tcg_gen_sub_i32(t1, t2, t1);
7896	tcg_temp_free_i32(t2);
7897	store_reg(s, a->rd, t1);
7898	return true;
7899	}
7900
7901	static bool op_mlal(DisasContext s, arg_s_rrrr a, bool uns, bool add)
7902	{
7903	TCGv_i32 t0, t1, t2, t3;
7904
7905	t0 = load_reg(s, a->rm);
7906	t1 = load_reg(s, a->rn);
7907	if (uns) {
7908	tcg_gen_mulu2_i32(t0, t1, t0, t1);
7909	} else {
7910	tcg_gen_muls2_i32(t0, t1, t0, t1);
7911	}
7912	if (add) {
7913	t2 = load_reg(s, a->ra);
7914	t3 = load_reg(s, a->rd);
7915	tcg_gen_add2_i32(t0, t1, t0, t1, t2, t3);
7916	tcg_temp_free_i32(t2);
7917	tcg_temp_free_i32(t3);
7918	}
7919	if (a->s) {
7920	gen_logicq_cc(t0, t1);
7921	}
7922	store_reg(s, a->ra, t0);
7923	store_reg(s, a->rd, t1);
7924	return true;
7925	}
7926
7927	static bool trans_UMULL(DisasContext s, arg_UMULL a)
7928	{
7929	return op_mlal(s, a, true, false);
7930	}
7931
7932	static bool trans_SMULL(DisasContext s, arg_SMULL a)
7933	{
7934	return op_mlal(s, a, false, false);
7935	}
7936
7937	static bool trans_UMLAL(DisasContext s, arg_UMLAL a)
7938	{
7939	return op_mlal(s, a, true, true);
7940	}
7941
7942	static bool trans_SMLAL(DisasContext s, arg_SMLAL a)
7943	{
7944	return op_mlal(s, a, false, true);
7945	}
7946
7947	static bool trans_UMAAL(DisasContext s, arg_UMAAL a)
7948	{
7949	TCGv_i32 t0, t1, t2, zero;
7950
7951	if (s->thumb
7952	? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7953	: !ENABLE_ARCH_6) {
7954	return false;
7955	}
7956
7957	t0 = load_reg(s, a->rm);
7958	t1 = load_reg(s, a->rn);
7959	tcg_gen_mulu2_i32(t0, t1, t0, t1);
7960	zero = tcg_const_i32(`0`);
7961	t2 = load_reg(s, a->ra);
7962	tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7963	tcg_temp_free_i32(t2);
7964	t2 = load_reg(s, a->rd);
7965	tcg_gen_add2_i32(t0, t1, t0, t1, t2, zero);
7966	tcg_temp_free_i32(t2);
7967	tcg_temp_free_i32(zero);
7968	store_reg(s, a->ra, t0);
7969	store_reg(s, a->rd, t1);
7970	return true;
7971	}
7972
7973	/*
7974	* Saturating addition and subtraction
7975	*/
7976
7977	static bool op_qaddsub(DisasContext s, arg_rrr a, bool add, bool doub)
7978	{
7979	TCGv_i32 t0, t1;
7980
7981	if (s->thumb
7982	? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
7983	: !ENABLE_ARCH_5TE) {
7984	return false;
7985	}
7986
7987	t0 = load_reg(s, a->rm);
7988	t1 = load_reg(s, a->rn);
7989	if (doub) {
7990	gen_helper_add_saturate(t1, cpu_env, t1, t1);
7991	}
7992	if (add) {
7993	gen_helper_add_saturate(t0, cpu_env, t0, t1);
7994	} else {
7995	gen_helper_sub_saturate(t0, cpu_env, t0, t1);
7996	}
7997	tcg_temp_free_i32(t1);
7998	store_reg(s, a->rd, t0);
7999	return true;
8000	}
8001
8002	#define DO_QADDSUB(NAME, ADD, DOUB) \
8003	static bool trans_##NAME(DisasContext s, arg_rrr a) \
8004	{ \
8005	return op_qaddsub(s, a, ADD, DOUB); \
8006	}
8007
8008	DO_QADDSUB(QADD, true, false)
8009	DO_QADDSUB(QSUB, false, false)
8010	DO_QADDSUB(QDADD, true, true)
8011	DO_QADDSUB(QDSUB, false, true)
8012
8013	#undef DO_QADDSUB
8014
8015	/*
8016	* Halfword multiply and multiply accumulate
8017	*/
8018
8019	static bool op_smlaxxx(DisasContext s, arg_rrrr a,
8020	int add_long, bool nt, bool mt)
8021	{
8022	TCGv_i32 t0, t1, tl, th;
8023
8024	if (s->thumb
8025	? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
8026	: !ENABLE_ARCH_5TE) {
8027	return false;
8028	}
8029
8030	t0 = load_reg(s, a->rn);
8031	t1 = load_reg(s, a->rm);
8032	gen_mulxy(t0, t1, nt, mt);
8033	tcg_temp_free_i32(t1);
8034
8035	switch (add_long) {
8036	case `0`:
8037	store_reg(s, a->rd, t0);
8038	break;
8039	case `1`:
8040	t1 = load_reg(s, a->ra);
8041	gen_helper_add_setq(t0, cpu_env, t0, t1);
8042	tcg_temp_free_i32(t1);
8043	store_reg(s, a->rd, t0);
8044	break;
8045	case `2`:
8046	tl = load_reg(s, a->ra);
8047	th = load_reg(s, a->rd);
8048	t1 = tcg_const_i32(`0`);
8049	tcg_gen_add2_i32(tl, th, tl, th, t0, t1);
8050	tcg_temp_free_i32(t0);
8051	tcg_temp_free_i32(t1);
8052	store_reg(s, a->ra, tl);
8053	store_reg(s, a->rd, th);
8054	break;
8055	default:
8056	g_assert_not_reached();
8057	}
8058	return true;
8059	}
8060
8061	#define DO_SMLAX(NAME, add, nt, mt) \
8062	static bool trans_##NAME(DisasContext s, arg_rrrr a) \
8063	{ \
8064	return op_smlaxxx(s, a, add, nt, mt); \
8065	}
8066
8067	DO_SMLAX(SMULBB, `0`, `0`, `0`)
8068	DO_SMLAX(SMULBT, `0`, `0`, `1`)
8069	DO_SMLAX(SMULTB, `0`, `1`, `0`)
8070	DO_SMLAX(SMULTT, `0`, `1`, `1`)
8071
8072	DO_SMLAX(SMLABB, `1`, `0`, `0`)
8073	DO_SMLAX(SMLABT, `1`, `0`, `1`)
8074	DO_SMLAX(SMLATB, `1`, `1`, `0`)
8075	DO_SMLAX(SMLATT, `1`, `1`, `1`)
8076
8077	DO_SMLAX(SMLALBB, `2`, `0`, `0`)
8078	DO_SMLAX(SMLALBT, `2`, `0`, `1`)
8079	DO_SMLAX(SMLALTB, `2`, `1`, `0`)
8080	DO_SMLAX(SMLALTT, `2`, `1`, `1`)
8081
8082	#undef DO_SMLAX
8083
8084	static bool op_smlawx(DisasContext s, arg_rrrr a, bool add, bool mt)
8085	{
8086	TCGv_i32 t0, t1;
8087
8088	if (!ENABLE_ARCH_5TE) {
8089	return false;
8090	}
8091
8092	t0 = load_reg(s, a->rn);
8093	t1 = load_reg(s, a->rm);
8094	/*
8095	* Since the nominal result is product<47:16>, shift the 16-bit
8096	* input up by 16 bits, so that the result is at product<63:32>.
8097	*/
8098	if (mt) {
8099	tcg_gen_andi_i32(t1, t1, `0xffff0000`);
8100	} else {
8101	tcg_gen_shli_i32(t1, t1, `16`);
8102	}
8103	tcg_gen_muls2_i32(t0, t1, t0, t1);
8104	tcg_temp_free_i32(t0);
8105	if (add) {
8106	t0 = load_reg(s, a->ra);
8107	gen_helper_add_setq(t1, cpu_env, t1, t0);
8108	tcg_temp_free_i32(t0);
8109	}
8110	store_reg(s, a->rd, t1);
8111	return true;
8112	}
8113
8114	#define DO_SMLAWX(NAME, add, mt) \
8115	static bool trans_##NAME(DisasContext s, arg_rrrr a) \
8116	{ \
8117	return op_smlawx(s, a, add, mt); \
8118	}
8119
8120	DO_SMLAWX(SMULWB, `0`, `0`)
8121	DO_SMLAWX(SMULWT, `0`, `1`)
8122	DO_SMLAWX(SMLAWB, `1`, `0`)
8123	DO_SMLAWX(SMLAWT, `1`, `1`)
8124
8125	#undef DO_SMLAWX
8126
8127	/*
8128	* MSR (immediate) and hints
8129	*/
8130
8131	static bool trans_YIELD(DisasContext s, arg_YIELD a)
8132	{
8133	/*
8134	* When running single-threaded TCG code, use the helper to ensure that
8135	* the next round-robin scheduled vCPU gets a crack. When running in
8136	* MTTCG we don't generate jumps to the helper as it won't affect the
8137	* scheduling of other vCPUs.
8138	*/
8139	if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8140	gen_set_pc_im(s, s->base.pc_next);
8141	s->base.is_jmp = DISAS_YIELD;
8142	}
8143	return true;
8144	}
8145
8146	static bool trans_WFE(DisasContext s, arg_WFE a)
8147	{
8148	/*
8149	* When running single-threaded TCG code, use the helper to ensure that
8150	* the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
8151	* just skip this instruction. Currently the SEV/SEVL instructions,
8152	* which are one of many ways to wake the CPU from WFE, are not
8153	* implemented so we can't sleep like WFI does.
8154	*/
8155	if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
8156	gen_set_pc_im(s, s->base.pc_next);
8157	s->base.is_jmp = DISAS_WFE;
8158	}
8159	return true;
8160	}
8161
8162	static bool trans_WFI(DisasContext s, arg_WFI a)
8163	{
8164	/ For WFI, halt the vCPU until an IRQ. /
8165	gen_set_pc_im(s, s->base.pc_next);
8166	s->base.is_jmp = DISAS_WFI;
8167	return true;
8168	}
8169
8170	static bool trans_NOP(DisasContext s, arg_NOP a)
8171	{
8172	return true;
8173	}
8174
8175	static bool trans_MSR_imm(DisasContext s, arg_MSR_imm a)
8176	{
8177	uint32_t val = ror32(a->imm, a->rot * `2`);
8178	uint32_t mask = msr_mask(s, a->mask, a->r);
8179
8180	if (gen_set_psr_im(s, mask, a->r, val)) {
8181	unallocated_encoding(s);
8182	}
8183	return true;
8184	}
8185
8186	/*
8187	* Cyclic Redundancy Check
8188	*/
8189
8190	static bool op_crc32(DisasContext s, arg_rrr a, bool c, MemOp sz)
8191	{
8192	TCGv_i32 t1, t2, t3;
8193
8194	if (!dc_isar_feature(aa32_crc32, s)) {
8195	return false;
8196	}
8197
8198	t1 = load_reg(s, a->rn);
8199	t2 = load_reg(s, a->rm);
8200	switch (sz) {
8201	case MO_8:
8202	gen_uxtb(t2);
8203	break;
8204	case MO_16:
8205	gen_uxth(t2);
8206	break;
8207	case MO_32:
8208	break;
8209	default:
8210	g_assert_not_reached();
8211	}
8212	t3 = tcg_const_i32(`1` << sz);
8213	if (c) {
8214	gen_helper_crc32c(t1, t1, t2, t3);
8215	} else {
8216	gen_helper_crc32(t1, t1, t2, t3);
8217	}
8218	tcg_temp_free_i32(t2);
8219	tcg_temp_free_i32(t3);
8220	store_reg(s, a->rd, t1);
8221	return true;
8222	}
8223
8224	#define DO_CRC32(NAME, c, sz) \
8225	static bool trans_##NAME(DisasContext s, arg_rrr a) \
8226	{ return op_crc32(s, a, c, sz); }
8227
8228	DO_CRC32(CRC32B, false, MO_8)
8229	DO_CRC32(CRC32H, false, MO_16)
8230	DO_CRC32(CRC32W, false, MO_32)
8231	DO_CRC32(CRC32CB, true, MO_8)
8232	DO_CRC32(CRC32CH, true, MO_16)
8233	DO_CRC32(CRC32CW, true, MO_32)
8234
8235	#undef DO_CRC32
8236
8237	/*
8238	* Miscellaneous instructions
8239	*/
8240
8241	static bool trans_MRS_bank(DisasContext s, arg_MRS_bank a)
8242	{
8243	if (arm_dc_feature(s, ARM_FEATURE_M)) {
8244	return false;
8245	}
8246	gen_mrs_banked(s, a->r, a->sysm, a->rd);
8247	return true;
8248	}
8249
8250	static bool trans_MSR_bank(DisasContext s, arg_MSR_bank a)
8251	{
8252	if (arm_dc_feature(s, ARM_FEATURE_M)) {
8253	return false;
8254	}
8255	gen_msr_banked(s, a->r, a->sysm, a->rn);
8256	return true;
8257	}
8258
8259	static bool trans_MRS_reg(DisasContext s, arg_MRS_reg a)
8260	{
8261	TCGv_i32 tmp;
8262
8263	if (arm_dc_feature(s, ARM_FEATURE_M)) {
8264	return false;
8265	}
8266	if (a->r) {
8267	if (IS_USER(s)) {
8268	unallocated_encoding(s);
8269	return true;
8270	}
8271	tmp = load_cpu_field(spsr);
8272	} else {
8273	tmp = tcg_temp_new_i32();
8274	gen_helper_cpsr_read(tmp, cpu_env);
8275	}
8276	store_reg(s, a->rd, tmp);
8277	return true;
8278	}
8279
8280	static bool trans_MSR_reg(DisasContext s, arg_MSR_reg a)
8281	{
8282	TCGv_i32 tmp;
8283	uint32_t mask = msr_mask(s, a->mask, a->r);
8284
8285	if (arm_dc_feature(s, ARM_FEATURE_M)) {
8286	return false;
8287	}
8288	tmp = load_reg(s, a->rn);
8289	if (gen_set_psr(s, mask, a->r, tmp)) {
8290	unallocated_encoding(s);
8291	}
8292	return true;
8293	}
8294
8295	static bool trans_MRS_v7m(DisasContext s, arg_MRS_v7m a)
8296	{
8297	TCGv_i32 tmp;
8298
8299	if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8300	return false;
8301	}
8302	tmp = tcg_const_i32(a->sysm);
8303	gen_helper_v7m_mrs(tmp, cpu_env, tmp);
8304	store_reg(s, a->rd, tmp);
8305	return true;
8306	}
8307
8308	static bool trans_MSR_v7m(DisasContext s, arg_MSR_v7m a)
8309	{
8310	TCGv_i32 addr, reg;
8311
8312	if (!arm_dc_feature(s, ARM_FEATURE_M)) {
8313	return false;
8314	}
8315	addr = tcg_const_i32((a->mask << `10`) \| a->sysm);
8316	reg = load_reg(s, a->rn);
8317	gen_helper_v7m_msr(cpu_env, addr, reg);
8318	tcg_temp_free_i32(addr);
8319	tcg_temp_free_i32(reg);
8320	gen_lookup_tb(s);
8321	return true;
8322	}
8323
8324	static bool trans_BX(DisasContext s, arg_BX a)
8325	{
8326	if (!ENABLE_ARCH_4T) {
8327	return false;
8328	}
8329	gen_bx_excret(s, load_reg(s, a->rm));
8330	return true;
8331	}
8332
8333	static bool trans_BXJ(DisasContext s, arg_BXJ a)
8334	{
8335	if (!ENABLE_ARCH_5J \|\| arm_dc_feature(s, ARM_FEATURE_M)) {
8336	return false;
8337	}
8338	/ Trivial implementation equivalent to bx. /
8339	gen_bx(s, load_reg(s, a->rm));
8340	return true;
8341	}
8342
8343	static bool trans_BLX_r(DisasContext s, arg_BLX_r a)
8344	{
8345	TCGv_i32 tmp;
8346
8347	if (!ENABLE_ARCH_5) {
8348	return false;
8349	}
8350	tmp = load_reg(s, a->rm);
8351	tcg_gen_movi_i32(cpu_R[`14`], s->base.pc_next \| s->thumb);
8352	gen_bx(s, tmp);
8353	return true;
8354	}
8355
8356	/*
8357	* BXNS/BLXNS: only exist for v8M with the security extensions,
8358	* and always UNDEF if NonSecure. We don't implement these in
8359	* the user-only mode either (in theory you can use them from
8360	* Secure User mode but they are too tied in to system emulation).
8361	*/
8362	static bool trans_BXNS(DisasContext s, arg_BXNS a)
8363	{
8364	if (!s->v8m_secure \|\| IS_USER_ONLY) {
8365	unallocated_encoding(s);
8366	} else {
8367	gen_bxns(s, a->rm);
8368	}
8369	return true;
8370	}
8371
8372	static bool trans_BLXNS(DisasContext s, arg_BLXNS a)
8373	{
8374	if (!s->v8m_secure \|\| IS_USER_ONLY) {
8375	unallocated_encoding(s);
8376	} else {
8377	gen_blxns(s, a->rm);
8378	}
8379	return true;
8380	}
8381
8382	static bool trans_CLZ(DisasContext s, arg_CLZ a)
8383	{
8384	TCGv_i32 tmp;
8385
8386	if (!ENABLE_ARCH_5) {
8387	return false;
8388	}
8389	tmp = load_reg(s, a->rm);
8390	tcg_gen_clzi_i32(tmp, tmp, `32`);
8391	store_reg(s, a->rd, tmp);
8392	return true;
8393	}
8394
8395	static bool trans_ERET(DisasContext s, arg_ERET a)
8396	{
8397	TCGv_i32 tmp;
8398
8399	if (!arm_dc_feature(s, ARM_FEATURE_V7VE)) {
8400	return false;
8401	}
8402	if (IS_USER(s)) {
8403	unallocated_encoding(s);
8404	return true;
8405	}
8406	if (s->current_el == `2`) {
8407	/ ERET from Hyp uses ELR_Hyp, not LR /
8408	tmp = load_cpu_field(elr_el[`2`]);
8409	} else {
8410	tmp = load_reg(s, `14`);
8411	}
8412	gen_exception_return(s, tmp);
8413	return true;
8414	}
8415
8416	static bool trans_HLT(DisasContext s, arg_HLT a)
8417	{
8418	gen_hlt(s, a->imm);
8419	return true;
8420	}
8421
8422	static bool trans_BKPT(DisasContext s, arg_BKPT a)
8423	{
8424	if (!ENABLE_ARCH_5) {
8425	return false;
8426	}
8427	gen_exception_bkpt_insn(s, syn_aa32_bkpt(a->imm, false));
8428	return true;
8429	}
8430
8431	static bool trans_HVC(DisasContext s, arg_HVC a)
8432	{
8433	if (!ENABLE_ARCH_7 \|\| arm_dc_feature(s, ARM_FEATURE_M)) {
8434	return false;
8435	}
8436	if (IS_USER(s)) {
8437	unallocated_encoding(s);
8438	} else {
8439	gen_hvc(s, a->imm);
8440	}
8441	return true;
8442	}
8443
8444	static bool trans_SMC(DisasContext s, arg_SMC a)
8445	{
8446	if (!ENABLE_ARCH_6K \|\| arm_dc_feature(s, ARM_FEATURE_M)) {
8447	return false;
8448	}
8449	if (IS_USER(s)) {
8450	unallocated_encoding(s);
8451	} else {
8452	gen_smc(s);
8453	}
8454	return true;
8455	}
8456
8457	static bool trans_SG(DisasContext s, arg_SG a)
8458	{
8459	if (!arm_dc_feature(s, ARM_FEATURE_M) \|\|
8460	!arm_dc_feature(s, ARM_FEATURE_V8)) {
8461	return false;
8462	}
8463	/*
8464	* SG (v8M only)
8465	* The bulk of the behaviour for this instruction is implemented
8466	* in v7m_handle_execute_nsc(), which deals with the insn when
8467	* it is executed by a CPU in non-secure state from memory
8468	* which is Secure & NonSecure-Callable.
8469	* Here we only need to handle the remaining cases:
8470	* * in NS memory (including the "security extension not
8471	* implemented" case) : NOP
8472	* * in S memory but CPU already secure (clear IT bits)
8473	* We know that the attribute for the memory this insn is
8474	* in must match the current CPU state, because otherwise
8475	* get_phys_addr_pmsav8 would have generated an exception.
8476	*/
8477	if (s->v8m_secure) {
8478	/ Like the IT insn, we don't need to generate any code /
8479	s->condexec_cond = `0`;
8480	s->condexec_mask = `0`;
8481	}
8482	return true;
8483	}
8484
8485	static bool trans_TT(DisasContext s, arg_TT a)
8486	{
8487	TCGv_i32 addr, tmp;
8488
8489	if (!arm_dc_feature(s, ARM_FEATURE_M) \|\|
8490	!arm_dc_feature(s, ARM_FEATURE_V8)) {
8491	return false;
8492	}
8493	if (a->rd == `13` \|\| a->rd == `15` \|\| a->rn == `15`) {
8494	/ We UNDEF for these UNPREDICTABLE cases /
8495	unallocated_encoding(s);
8496	return true;
8497	}
8498	if (a->A && !s->v8m_secure) {
8499	/ This case is UNDEFINED. /
8500	unallocated_encoding(s);
8501	return true;
8502	}
8503
8504	addr = load_reg(s, a->rn);
8505	tmp = tcg_const_i32((a->A << `1`) \| a->T);
8506	gen_helper_v7m_tt(tmp, cpu_env, addr, tmp);
8507	tcg_temp_free_i32(addr);
8508	store_reg(s, a->rd, tmp);
8509	return true;
8510	}
8511
8512	/*
8513	* Load/store register index
8514	*/
8515
8516	static ISSInfo make_issinfo(DisasContext s, int* rd, bool p, bool w)
8517	{
8518	ISSInfo ret;
8519
8520	/ ISS not valid if writeback /
8521	if (p && !w) {
8522	ret = rd;
8523	} else {
8524	ret = ISSInvalid;
8525	}
8526	return ret;
8527	}
8528
8529	static TCGv_i32 op_addr_rr_pre(DisasContext s, arg_ldst_rr a)
8530	{
8531	TCGv_i32 addr = load_reg(s, a->rn);
8532
8533	if (s->v8m_stackcheck && a->rn == `13` && a->w) {
8534	gen_helper_v8m_stackcheck(cpu_env, addr);
8535	}
8536
8537	if (a->p) {
8538	TCGv_i32 ofs = load_reg(s, a->rm);
8539	gen_arm_shift_im(ofs, a->shtype, a->shimm, `0`);
8540	if (a->u) {
8541	tcg_gen_add_i32(addr, addr, ofs);
8542	} else {
8543	tcg_gen_sub_i32(addr, addr, ofs);
8544	}
8545	tcg_temp_free_i32(ofs);
8546	}
8547	return addr;
8548	}
8549
8550	static void op_addr_rr_post(DisasContext s, arg_ldst_rr a,
8551	TCGv_i32 addr, int address_offset)
8552	{
8553	if (!a->p) {
8554	TCGv_i32 ofs = load_reg(s, a->rm);
8555	gen_arm_shift_im(ofs, a->shtype, a->shimm, `0`);
8556	if (a->u) {
8557	tcg_gen_add_i32(addr, addr, ofs);
8558	} else {
8559	tcg_gen_sub_i32(addr, addr, ofs);
8560	}
8561	tcg_temp_free_i32(ofs);
8562	} else if (!a->w) {
8563	tcg_temp_free_i32(addr);
8564	return;
8565	}
8566	tcg_gen_addi_i32(addr, addr, address_offset);
8567	store_reg(s, a->rn, addr);
8568	}
8569
8570	static bool op_load_rr(DisasContext s, arg_ldst_rr a,
8571	MemOp mop, int mem_idx)
8572	{
8573	ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8574	TCGv_i32 addr, tmp;
8575
8576	addr = op_addr_rr_pre(s, a);
8577
8578	tmp = tcg_temp_new_i32();
8579	gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop \| s->be_data);
8580	disas_set_da_iss(s, mop, issinfo);
8581
8582	/*
8583	* Perform base writeback before the loaded value to
8584	* ensure correct behavior with overlapping index registers.
8585	*/
8586	op_addr_rr_post(s, a, addr, `0`);
8587	store_reg_from_load(s, a->rt, tmp);
8588	return true;
8589	}
8590
8591	static bool op_store_rr(DisasContext s, arg_ldst_rr a,
8592	MemOp mop, int mem_idx)
8593	{
8594	ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) \| ISSIsWrite;
8595	TCGv_i32 addr, tmp;
8596
8597	addr = op_addr_rr_pre(s, a);
8598
8599	tmp = load_reg(s, a->rt);
8600	gen_aa32_st_i32(s, tmp, addr, mem_idx, mop \| s->be_data);
8601	disas_set_da_iss(s, mop, issinfo);
8602	tcg_temp_free_i32(tmp);
8603
8604	op_addr_rr_post(s, a, addr, `0`);
8605	return true;
8606	}
8607
8608	static bool trans_LDRD_rr(DisasContext s, arg_ldst_rr a)
8609	{
8610	int mem_idx = get_mem_index(s);
8611	TCGv_i32 addr, tmp;
8612
8613	if (!ENABLE_ARCH_5TE) {
8614	return false;
8615	}
8616	if (a->rt & `1`) {
8617	unallocated_encoding(s);
8618	return true;
8619	}
8620	addr = op_addr_rr_pre(s, a);
8621
8622	tmp = tcg_temp_new_i32();
8623	gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL \| s->be_data);
8624	store_reg(s, a->rt, tmp);
8625
8626	tcg_gen_addi_i32(addr, addr, `4`);
8627
8628	tmp = tcg_temp_new_i32();
8629	gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL \| s->be_data);
8630	store_reg(s, a->rt + `1`, tmp);
8631
8632	/ LDRD w/ base writeback is undefined if the registers overlap. /
8633	op_addr_rr_post(s, a, addr, -`4`);
8634	return true;
8635	}
8636
8637	static bool trans_STRD_rr(DisasContext s, arg_ldst_rr a)
8638	{
8639	int mem_idx = get_mem_index(s);
8640	TCGv_i32 addr, tmp;
8641
8642	if (!ENABLE_ARCH_5TE) {
8643	return false;
8644	}
8645	if (a->rt & `1`) {
8646	unallocated_encoding(s);
8647	return true;
8648	}
8649	addr = op_addr_rr_pre(s, a);
8650
8651	tmp = load_reg(s, a->rt);
8652	gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL \| s->be_data);
8653	tcg_temp_free_i32(tmp);
8654
8655	tcg_gen_addi_i32(addr, addr, `4`);
8656
8657	tmp = load_reg(s, a->rt + `1`);
8658	gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL \| s->be_data);
8659	tcg_temp_free_i32(tmp);
8660
8661	op_addr_rr_post(s, a, addr, -`4`);
8662	return true;
8663	}
8664
8665	/*
8666	* Load/store immediate index
8667	*/
8668
8669	static TCGv_i32 op_addr_ri_pre(DisasContext s, arg_ldst_ri a)
8670	{
8671	int ofs = a->imm;
8672
8673	if (!a->u) {
8674	ofs = -ofs;
8675	}
8676
8677	if (s->v8m_stackcheck && a->rn == `13` && a->w) {
8678	/*
8679	* Stackcheck. Here we know 'addr' is the current SP;
8680	* U is set if we're moving SP up, else down. It is
8681	* UNKNOWN whether the limit check triggers when SP starts
8682	* below the limit and ends up above it; we chose to do so.
8683	*/
8684	if (!a->u) {
8685	TCGv_i32 newsp = tcg_temp_new_i32();
8686	tcg_gen_addi_i32(newsp, cpu_R[`13`], ofs);
8687	gen_helper_v8m_stackcheck(cpu_env, newsp);
8688	tcg_temp_free_i32(newsp);
8689	} else {
8690	gen_helper_v8m_stackcheck(cpu_env, cpu_R[`13`]);
8691	}
8692	}
8693
8694	return add_reg_for_lit(s, a->rn, a->p ? ofs : `0`);
8695	}
8696
8697	static void op_addr_ri_post(DisasContext s, arg_ldst_ri a,
8698	TCGv_i32 addr, int address_offset)
8699	{
8700	if (!a->p) {
8701	if (a->u) {
8702	address_offset += a->imm;
8703	} else {
8704	address_offset -= a->imm;
8705	}
8706	} else if (!a->w) {
8707	tcg_temp_free_i32(addr);
8708	return;
8709	}
8710	tcg_gen_addi_i32(addr, addr, address_offset);
8711	store_reg(s, a->rn, addr);
8712	}
8713
8714	static bool op_load_ri(DisasContext s, arg_ldst_ri a,
8715	MemOp mop, int mem_idx)
8716	{
8717	ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w);
8718	TCGv_i32 addr, tmp;
8719
8720	addr = op_addr_ri_pre(s, a);
8721
8722	tmp = tcg_temp_new_i32();
8723	gen_aa32_ld_i32(s, tmp, addr, mem_idx, mop \| s->be_data);
8724	disas_set_da_iss(s, mop, issinfo);
8725
8726	/*
8727	* Perform base writeback before the loaded value to
8728	* ensure correct behavior with overlapping index registers.
8729	*/
8730	op_addr_ri_post(s, a, addr, `0`);
8731	store_reg_from_load(s, a->rt, tmp);
8732	return true;
8733	}
8734
8735	static bool op_store_ri(DisasContext s, arg_ldst_ri a,
8736	MemOp mop, int mem_idx)
8737	{
8738	ISSInfo issinfo = make_issinfo(s, a->rt, a->p, a->w) \| ISSIsWrite;
8739	TCGv_i32 addr, tmp;
8740
8741	addr = op_addr_ri_pre(s, a);
8742
8743	tmp = load_reg(s, a->rt);
8744	gen_aa32_st_i32(s, tmp, addr, mem_idx, mop \| s->be_data);
8745	disas_set_da_iss(s, mop, issinfo);
8746	tcg_temp_free_i32(tmp);
8747
8748	op_addr_ri_post(s, a, addr, `0`);
8749	return true;
8750	}
8751
8752	static bool op_ldrd_ri(DisasContext s, arg_ldst_ri a, int rt2)
8753	{
8754	int mem_idx = get_mem_index(s);
8755	TCGv_i32 addr, tmp;
8756
8757	addr = op_addr_ri_pre(s, a);
8758
8759	tmp = tcg_temp_new_i32();
8760	gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL \| s->be_data);
8761	store_reg(s, a->rt, tmp);
8762
8763	tcg_gen_addi_i32(addr, addr, `4`);
8764
8765	tmp = tcg_temp_new_i32();
8766	gen_aa32_ld_i32(s, tmp, addr, mem_idx, MO_UL \| s->be_data);
8767	store_reg(s, rt2, tmp);
8768
8769	/ LDRD w/ base writeback is undefined if the registers overlap. /
8770	op_addr_ri_post(s, a, addr, -`4`);
8771	return true;
8772	}
8773
8774	static bool trans_LDRD_ri_a32(DisasContext s, arg_ldst_ri a)
8775	{
8776	if (!ENABLE_ARCH_5TE \|\| (a->rt & `1`)) {
8777	return false;
8778	}
8779	return op_ldrd_ri(s, a, a->rt + `1`);
8780	}
8781
8782	static bool trans_LDRD_ri_t32(DisasContext s, arg_ldst_ri2 a)
8783	{
8784	arg_ldst_ri b = {
8785	.u = a->u, .w = a->w, .p = a->p,
8786	.rn = a->rn, .rt = a->rt, .imm = a->imm
8787	};
8788	return op_ldrd_ri(s, &b, a->rt2);
8789	}
8790
8791	static bool op_strd_ri(DisasContext s, arg_ldst_ri a, int rt2)
8792	{
8793	int mem_idx = get_mem_index(s);
8794	TCGv_i32 addr, tmp;
8795
8796	addr = op_addr_ri_pre(s, a);
8797
8798	tmp = load_reg(s, a->rt);
8799	gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL \| s->be_data);
8800	tcg_temp_free_i32(tmp);
8801
8802	tcg_gen_addi_i32(addr, addr, `4`);
8803
8804	tmp = load_reg(s, rt2);
8805	gen_aa32_st_i32(s, tmp, addr, mem_idx, MO_UL \| s->be_data);
8806	tcg_temp_free_i32(tmp);
8807
8808	op_addr_ri_post(s, a, addr, -`4`);
8809	return true;
8810	}
8811
8812	static bool trans_STRD_ri_a32(DisasContext s, arg_ldst_ri a)
8813	{
8814	if (!ENABLE_ARCH_5TE \|\| (a->rt & `1`)) {
8815	return false;
8816	}
8817	return op_strd_ri(s, a, a->rt + `1`);
8818	}
8819
8820	static bool trans_STRD_ri_t32(DisasContext s, arg_ldst_ri2 a)
8821	{
8822	arg_ldst_ri b = {
8823	.u = a->u, .w = a->w, .p = a->p,
8824	.rn = a->rn, .rt = a->rt, .imm = a->imm
8825	};
8826	return op_strd_ri(s, &b, a->rt2);
8827	}
8828
8829	#define DO_LDST(NAME, WHICH, MEMOP) \
8830	static bool trans_##NAME##_ri(DisasContext s, arg_ldst_ri a) \
8831	{ \
8832	return op_##WHICH##_ri(s, a, MEMOP, get_mem_index(s)); \
8833	} \
8834	static bool trans_##NAME##T_ri(DisasContext s, arg_ldst_ri a) \
8835	{ \
8836	return op_##WHICH##_ri(s, a, MEMOP, get_a32_user_mem_index(s)); \
8837	} \
8838	static bool trans_##NAME##_rr(DisasContext s, arg_ldst_rr a) \
8839	{ \
8840	return op_##WHICH##_rr(s, a, MEMOP, get_mem_index(s)); \
8841	} \
8842	static bool trans_##NAME##T_rr(DisasContext s, arg_ldst_rr a) \
8843	{ \
8844	return op_##WHICH##_rr(s, a, MEMOP, get_a32_user_mem_index(s)); \
8845	}
8846
8847	DO_LDST(LDR, load, MO_UL)
8848	DO_LDST(LDRB, load, MO_UB)
8849	DO_LDST(LDRH, load, MO_UW)
8850	DO_LDST(LDRSB, load, MO_SB)
8851	DO_LDST(LDRSH, load, MO_SW)
8852
8853	DO_LDST(STR, store, MO_UL)
8854	DO_LDST(STRB, store, MO_UB)
8855	DO_LDST(STRH, store, MO_UW)
8856
8857	#undef DO_LDST
8858
8859	/*
8860	* Synchronization primitives
8861	*/
8862
8863	static bool op_swp(DisasContext s, arg_SWP a, MemOp opc)
8864	{
8865	TCGv_i32 addr, tmp;
8866	TCGv taddr;
8867
8868	opc \|= s->be_data;
8869	addr = load_reg(s, a->rn);
8870	taddr = gen_aa32_addr(s, addr, opc);
8871	tcg_temp_free_i32(addr);
8872
8873	tmp = load_reg(s, a->rt2);
8874	tcg_gen_atomic_xchg_i32(tmp, taddr, tmp, get_mem_index(s), opc);
8875	tcg_temp_free(taddr);
8876
8877	store_reg(s, a->rt, tmp);
8878	return true;
8879	}
8880
8881	static bool trans_SWP(DisasContext s, arg_SWP a)
8882	{
8883	return op_swp(s, a, MO_UL \| MO_ALIGN);
8884	}
8885
8886	static bool trans_SWPB(DisasContext s, arg_SWP a)
8887	{
8888	return op_swp(s, a, MO_UB);
8889	}
8890
8891	/*
8892	* Load/Store Exclusive and Load-Acquire/Store-Release
8893	*/
8894
8895	static bool op_strex(DisasContext s, arg_STREX a, MemOp mop, bool rel)
8896	{
8897	TCGv_i32 addr;
8898
8899	/ We UNDEF for these UNPREDICTABLE cases. /
8900	if (a->rd == `15` \|\| a->rn == `15` \|\| a->rt == `15`
8901	\|\| a->rd == a->rn \|\| a->rd == a->rt
8902	\|\| (s->thumb && (a->rd == `13` \|\| a->rt == `13`))
8903	\|\| (mop == MO_64
8904	&& (a->rt2 == `15`
8905	\|\| a->rd == a->rt2 \|\| a->rt == a->rt2
8906	\|\| (s->thumb && a->rt2 == `13`)))) {
8907	unallocated_encoding(s);
8908	return true;
8909	}
8910
8911	if (rel) {
8912	tcg_gen_mb(TCG_MO_ALL \| TCG_BAR_STRL);
8913	}
8914
8915	addr = tcg_temp_local_new_i32();
8916	load_reg_var(s, addr, a->rn);
8917	tcg_gen_addi_i32(addr, addr, a->imm);
8918
8919	gen_store_exclusive(s, a->rd, a->rt, a->rt2, addr, mop);
8920	tcg_temp_free_i32(addr);
8921	return true;
8922	}
8923
8924	static bool trans_STREX(DisasContext s, arg_STREX a)
8925	{
8926	if (!ENABLE_ARCH_6) {
8927	return false;
8928	}
8929	return op_strex(s, a, MO_32, false);
8930	}
8931
8932	static bool trans_STREXD_a32(DisasContext s, arg_STREX a)
8933	{
8934	if (!ENABLE_ARCH_6K) {
8935	return false;
8936	}
8937	/ We UNDEF for these UNPREDICTABLE cases. /
8938	if (a->rt & `1`) {
8939	unallocated_encoding(s);
8940	return true;
8941	}
8942	a->rt2 = a->rt + `1`;
8943	return op_strex(s, a, MO_64, false);
8944	}
8945
8946	static bool trans_STREXD_t32(DisasContext s, arg_STREX a)
8947	{
8948	return op_strex(s, a, MO_64, false);
8949	}
8950
8951	static bool trans_STREXB(DisasContext s, arg_STREX a)
8952	{
8953	if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8954	return false;
8955	}
8956	return op_strex(s, a, MO_8, false);
8957	}
8958
8959	static bool trans_STREXH(DisasContext s, arg_STREX a)
8960	{
8961	if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
8962	return false;
8963	}
8964	return op_strex(s, a, MO_16, false);
8965	}
8966
8967	static bool trans_STLEX(DisasContext s, arg_STREX a)
8968	{
8969	if (!ENABLE_ARCH_8) {
8970	return false;
8971	}
8972	return op_strex(s, a, MO_32, true);
8973	}
8974
8975	static bool trans_STLEXD_a32(DisasContext s, arg_STREX a)
8976	{
8977	if (!ENABLE_ARCH_8) {
8978	return false;
8979	}
8980	/ We UNDEF for these UNPREDICTABLE cases. /
8981	if (a->rt & `1`) {
8982	unallocated_encoding(s);
8983	return true;
8984	}
8985	a->rt2 = a->rt + `1`;
8986	return op_strex(s, a, MO_64, true);
8987	}
8988
8989	static bool trans_STLEXD_t32(DisasContext s, arg_STREX a)
8990	{
8991	if (!ENABLE_ARCH_8) {
8992	return false;
8993	}
8994	return op_strex(s, a, MO_64, true);
8995	}
8996
8997	static bool trans_STLEXB(DisasContext s, arg_STREX a)
8998	{
8999	if (!ENABLE_ARCH_8) {
9000	return false;
9001	}
9002	return op_strex(s, a, MO_8, true);
9003	}
9004
9005	static bool trans_STLEXH(DisasContext s, arg_STREX a)
9006	{
9007	if (!ENABLE_ARCH_8) {
9008	return false;
9009	}
9010	return op_strex(s, a, MO_16, true);
9011	}
9012
9013	static bool op_stl(DisasContext s, arg_STL a, MemOp mop)
9014	{
9015	TCGv_i32 addr, tmp;
9016
9017	if (!ENABLE_ARCH_8) {
9018	return false;
9019	}
9020	/ We UNDEF for these UNPREDICTABLE cases. /
9021	if (a->rn == `15` \|\| a->rt == `15`) {
9022	unallocated_encoding(s);
9023	return true;
9024	}
9025
9026	addr = load_reg(s, a->rn);
9027	tmp = load_reg(s, a->rt);
9028	tcg_gen_mb(TCG_MO_ALL \| TCG_BAR_STRL);
9029	gen_aa32_st_i32(s, tmp, addr, get_mem_index(s), mop \| s->be_data);
9030	disas_set_da_iss(s, mop, a->rt \| ISSIsAcqRel \| ISSIsWrite);
9031
9032	tcg_temp_free_i32(tmp);
9033	tcg_temp_free_i32(addr);
9034	return true;
9035	}
9036
9037	static bool trans_STL(DisasContext s, arg_STL a)
9038	{
9039	return op_stl(s, a, MO_UL);
9040	}
9041
9042	static bool trans_STLB(DisasContext s, arg_STL a)
9043	{
9044	return op_stl(s, a, MO_UB);
9045	}
9046
9047	static bool trans_STLH(DisasContext s, arg_STL a)
9048	{
9049	return op_stl(s, a, MO_UW);
9050	}
9051
9052	static bool op_ldrex(DisasContext s, arg_LDREX a, MemOp mop, bool acq)
9053	{
9054	TCGv_i32 addr;
9055
9056	/ We UNDEF for these UNPREDICTABLE cases. /
9057	if (a->rn == `15` \|\| a->rt == `15`
9058	\|\| (s->thumb && a->rt == `13`)
9059	\|\| (mop == MO_64
9060	&& (a->rt2 == `15` \|\| a->rt == a->rt2
9061	\|\| (s->thumb && a->rt2 == `13`)))) {
9062	unallocated_encoding(s);
9063	return true;
9064	}
9065
9066	addr = tcg_temp_local_new_i32();
9067	load_reg_var(s, addr, a->rn);
9068	tcg_gen_addi_i32(addr, addr, a->imm);
9069
9070	gen_load_exclusive(s, a->rt, a->rt2, addr, mop);
9071	tcg_temp_free_i32(addr);
9072
9073	if (acq) {
9074	tcg_gen_mb(TCG_MO_ALL \| TCG_BAR_LDAQ);
9075	}
9076	return true;
9077	}
9078
9079	static bool trans_LDREX(DisasContext s, arg_LDREX a)
9080	{
9081	if (!ENABLE_ARCH_6) {
9082	return false;
9083	}
9084	return op_ldrex(s, a, MO_32, false);
9085	}
9086
9087	static bool trans_LDREXD_a32(DisasContext s, arg_LDREX a)
9088	{
9089	if (!ENABLE_ARCH_6K) {
9090	return false;
9091	}
9092	/ We UNDEF for these UNPREDICTABLE cases. /
9093	if (a->rt & `1`) {
9094	unallocated_encoding(s);
9095	return true;
9096	}
9097	a->rt2 = a->rt + `1`;
9098	return op_ldrex(s, a, MO_64, false);
9099	}
9100
9101	static bool trans_LDREXD_t32(DisasContext s, arg_LDREX a)
9102	{
9103	return op_ldrex(s, a, MO_64, false);
9104	}
9105
9106	static bool trans_LDREXB(DisasContext s, arg_LDREX a)
9107	{
9108	if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9109	return false;
9110	}
9111	return op_ldrex(s, a, MO_8, false);
9112	}
9113
9114	static bool trans_LDREXH(DisasContext s, arg_LDREX a)
9115	{
9116	if (s->thumb ? !ENABLE_ARCH_7 : !ENABLE_ARCH_6K) {
9117	return false;
9118	}
9119	return op_ldrex(s, a, MO_16, false);
9120	}
9121
9122	static bool trans_LDAEX(DisasContext s, arg_LDREX a)
9123	{
9124	if (!ENABLE_ARCH_8) {
9125	return false;
9126	}
9127	return op_ldrex(s, a, MO_32, true);
9128	}
9129
9130	static bool trans_LDAEXD_a32(DisasContext s, arg_LDREX a)
9131	{
9132	if (!ENABLE_ARCH_8) {
9133	return false;
9134	}
9135	/ We UNDEF for these UNPREDICTABLE cases. /
9136	if (a->rt & `1`) {
9137	unallocated_encoding(s);
9138	return true;
9139	}
9140	a->rt2 = a->rt + `1`;
9141	return op_ldrex(s, a, MO_64, true);
9142	}
9143
9144	static bool trans_LDAEXD_t32(DisasContext s, arg_LDREX a)
9145	{
9146	if (!ENABLE_ARCH_8) {
9147	return false;
9148	}
9149	return op_ldrex(s, a, MO_64, true);
9150	}
9151
9152	static bool trans_LDAEXB(DisasContext s, arg_LDREX a)
9153	{
9154	if (!ENABLE_ARCH_8) {
9155	return false;
9156	}
9157	return op_ldrex(s, a, MO_8, true);
9158	}
9159
9160	static bool trans_LDAEXH(DisasContext s, arg_LDREX a)
9161	{
9162	if (!ENABLE_ARCH_8) {
9163	return false;
9164	}
9165	return op_ldrex(s, a, MO_16, true);
9166	}
9167
9168	static bool op_lda(DisasContext s, arg_LDA a, MemOp mop)
9169	{
9170	TCGv_i32 addr, tmp;
9171
9172	if (!ENABLE_ARCH_8) {
9173	return false;
9174	}
9175	/ We UNDEF for these UNPREDICTABLE cases. /
9176	if (a->rn == `15` \|\| a->rt == `15`) {
9177	unallocated_encoding(s);
9178	return true;
9179	}
9180
9181	addr = load_reg(s, a->rn);
9182	tmp = tcg_temp_new_i32();
9183	gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s), mop \| s->be_data);
9184	disas_set_da_iss(s, mop, a->rt \| ISSIsAcqRel);
9185	tcg_temp_free_i32(addr);
9186
9187	store_reg(s, a->rt, tmp);
9188	tcg_gen_mb(TCG_MO_ALL \| TCG_BAR_STRL);
9189	return true;
9190	}
9191
9192	static bool trans_LDA(DisasContext s, arg_LDA a)
9193	{
9194	return op_lda(s, a, MO_UL);
9195	}
9196
9197	static bool trans_LDAB(DisasContext s, arg_LDA a)
9198	{
9199	return op_lda(s, a, MO_UB);
9200	}
9201
9202	static bool trans_LDAH(DisasContext s, arg_LDA a)
9203	{
9204	return op_lda(s, a, MO_UW);
9205	}
9206
9207	/*
9208	* Media instructions
9209	*/
9210
9211	static bool trans_USADA8(DisasContext s, arg_USADA8 a)
9212	{
9213	TCGv_i32 t1, t2;
9214
9215	if (!ENABLE_ARCH_6) {
9216	return false;
9217	}
9218
9219	t1 = load_reg(s, a->rn);
9220	t2 = load_reg(s, a->rm);
9221	gen_helper_usad8(t1, t1, t2);
9222	tcg_temp_free_i32(t2);
9223	if (a->ra != `15`) {
9224	t2 = load_reg(s, a->ra);
9225	tcg_gen_add_i32(t1, t1, t2);
9226	tcg_temp_free_i32(t2);
9227	}
9228	store_reg(s, a->rd, t1);
9229	return true;
9230	}
9231
9232	static bool op_bfx(DisasContext s, arg_UBFX a, bool u)
9233	{
9234	TCGv_i32 tmp;
9235	int width = a->widthm1 + `1`;
9236	int shift = a->lsb;
9237
9238	if (!ENABLE_ARCH_6T2) {
9239	return false;
9240	}
9241	if (shift + width > `32`) {
9242	/ UNPREDICTABLE; we choose to UNDEF /
9243	unallocated_encoding(s);
9244	return true;
9245	}
9246
9247	tmp = load_reg(s, a->rn);
9248	if (u) {
9249	tcg_gen_extract_i32(tmp, tmp, shift, width);
9250	} else {
9251	tcg_gen_sextract_i32(tmp, tmp, shift, width);
9252	}
9253	store_reg(s, a->rd, tmp);
9254	return true;
9255	}
9256
9257	static bool trans_SBFX(DisasContext s, arg_SBFX a)
9258	{
9259	return op_bfx(s, a, false);
9260	}
9261
9262	static bool trans_UBFX(DisasContext s, arg_UBFX a)
9263	{
9264	return op_bfx(s, a, true);
9265	}
9266
9267	static bool trans_BFCI(DisasContext s, arg_BFCI a)
9268	{
9269	TCGv_i32 tmp;
9270	int msb = a->msb, lsb = a->lsb;
9271	int width;
9272
9273	if (!ENABLE_ARCH_6T2) {
9274	return false;
9275	}
9276	if (msb < lsb) {
9277	/ UNPREDICTABLE; we choose to UNDEF /
9278	unallocated_encoding(s);
9279	return true;
9280	}
9281
9282	width = msb + `1` - lsb;
9283	if (a->rn == `15`) {
9284	/ BFC /
9285	tmp = tcg_const_i32(`0`);
9286	} else {
9287	/ BFI /
9288	tmp = load_reg(s, a->rn);
9289	}
9290	if (width != `32`) {
9291	TCGv_i32 tmp2 = load_reg(s, a->rd);
9292	tcg_gen_deposit_i32(tmp, tmp2, tmp, lsb, width);
9293	tcg_temp_free_i32(tmp2);
9294	}
9295	store_reg(s, a->rd, tmp);
9296	return true;
9297	}
9298
9299	static bool trans_UDF(DisasContext s, arg_UDF a)
9300	{
9301	unallocated_encoding(s);
9302	return true;
9303	}
9304
9305	/*
9306	* Parallel addition and subtraction
9307	*/
9308
9309	static bool op_par_addsub(DisasContext s, arg_rrr a,
9310	void (*gen)(TCGv_i32, TCGv_i32, TCGv_i32))
9311	{
9312	TCGv_i32 t0, t1;
9313
9314	if (s->thumb
9315	? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9316	: !ENABLE_ARCH_6) {
9317	return false;
9318	}
9319
9320	t0 = load_reg(s, a->rn);
9321	t1 = load_reg(s, a->rm);
9322
9323	gen(t0, t0, t1);
9324
9325	tcg_temp_free_i32(t1);
9326	store_reg(s, a->rd, t0);
9327	return true;
9328	}
9329
9330	static bool op_par_addsub_ge(DisasContext s, arg_rrr a,
9331	void (*gen)(TCGv_i32, TCGv_i32,
9332	TCGv_i32, TCGv_ptr))
9333	{
9334	TCGv_i32 t0, t1;
9335	TCGv_ptr ge;
9336
9337	if (s->thumb
9338	? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9339	: !ENABLE_ARCH_6) {
9340	return false;
9341	}
9342
9343	t0 = load_reg(s, a->rn);
9344	t1 = load_reg(s, a->rm);
9345
9346	ge = tcg_temp_new_ptr();
9347	tcg_gen_addi_ptr(ge, cpu_env, offsetof(CPUARMState, GE));
9348	gen(t0, t0, t1, ge);
9349
9350	tcg_temp_free_ptr(ge);
9351	tcg_temp_free_i32(t1);
9352	store_reg(s, a->rd, t0);
9353	return true;
9354	}
9355
9356	#define DO_PAR_ADDSUB(NAME, helper) \
9357	static bool trans_##NAME(DisasContext s, arg_rrr a) \
9358	{ \
9359	return op_par_addsub(s, a, helper); \
9360	}
9361
9362	#define DO_PAR_ADDSUB_GE(NAME, helper) \
9363	static bool trans_##NAME(DisasContext s, arg_rrr a) \
9364	{ \
9365	return op_par_addsub_ge(s, a, helper); \
9366	}
9367
9368	DO_PAR_ADDSUB_GE(SADD16, gen_helper_sadd16)
9369	DO_PAR_ADDSUB_GE(SASX, gen_helper_saddsubx)
9370	DO_PAR_ADDSUB_GE(SSAX, gen_helper_ssubaddx)
9371	DO_PAR_ADDSUB_GE(SSUB16, gen_helper_ssub16)
9372	DO_PAR_ADDSUB_GE(SADD8, gen_helper_sadd8)
9373	DO_PAR_ADDSUB_GE(SSUB8, gen_helper_ssub8)
9374
9375	DO_PAR_ADDSUB_GE(UADD16, gen_helper_uadd16)
9376	DO_PAR_ADDSUB_GE(UASX, gen_helper_uaddsubx)
9377	DO_PAR_ADDSUB_GE(USAX, gen_helper_usubaddx)
9378	DO_PAR_ADDSUB_GE(USUB16, gen_helper_usub16)
9379	DO_PAR_ADDSUB_GE(UADD8, gen_helper_uadd8)
9380	DO_PAR_ADDSUB_GE(USUB8, gen_helper_usub8)
9381
9382	DO_PAR_ADDSUB(QADD16, gen_helper_qadd16)
9383	DO_PAR_ADDSUB(QASX, gen_helper_qaddsubx)
9384	DO_PAR_ADDSUB(QSAX, gen_helper_qsubaddx)
9385	DO_PAR_ADDSUB(QSUB16, gen_helper_qsub16)
9386	DO_PAR_ADDSUB(QADD8, gen_helper_qadd8)
9387	DO_PAR_ADDSUB(QSUB8, gen_helper_qsub8)
9388
9389	DO_PAR_ADDSUB(UQADD16, gen_helper_uqadd16)
9390	DO_PAR_ADDSUB(UQASX, gen_helper_uqaddsubx)
9391	DO_PAR_ADDSUB(UQSAX, gen_helper_uqsubaddx)
9392	DO_PAR_ADDSUB(UQSUB16, gen_helper_uqsub16)
9393	DO_PAR_ADDSUB(UQADD8, gen_helper_uqadd8)
9394	DO_PAR_ADDSUB(UQSUB8, gen_helper_uqsub8)
9395
9396	DO_PAR_ADDSUB(SHADD16, gen_helper_shadd16)
9397	DO_PAR_ADDSUB(SHASX, gen_helper_shaddsubx)
9398	DO_PAR_ADDSUB(SHSAX, gen_helper_shsubaddx)
9399	DO_PAR_ADDSUB(SHSUB16, gen_helper_shsub16)
9400	DO_PAR_ADDSUB(SHADD8, gen_helper_shadd8)
9401	DO_PAR_ADDSUB(SHSUB8, gen_helper_shsub8)
9402
9403	DO_PAR_ADDSUB(UHADD16, gen_helper_uhadd16)
9404	DO_PAR_ADDSUB(UHASX, gen_helper_uhaddsubx)
9405	DO_PAR_ADDSUB(UHSAX, gen_helper_uhsubaddx)
9406	DO_PAR_ADDSUB(UHSUB16, gen_helper_uhsub16)
9407	DO_PAR_ADDSUB(UHADD8, gen_helper_uhadd8)
9408	DO_PAR_ADDSUB(UHSUB8, gen_helper_uhsub8)
9409
9410	#undef DO_PAR_ADDSUB
9411	#undef DO_PAR_ADDSUB_GE
9412
9413	/*
9414	* Packing, unpacking, saturation, and reversal
9415	*/
9416
9417	static bool trans_PKH(DisasContext s, arg_PKH a)
9418	{
9419	TCGv_i32 tn, tm;
9420	int shift = a->imm;
9421
9422	if (s->thumb
9423	? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9424	: !ENABLE_ARCH_6) {
9425	return false;
9426	}
9427
9428	tn = load_reg(s, a->rn);
9429	tm = load_reg(s, a->rm);
9430	if (a->tb) {
9431	/ PKHTB /
9432	if (shift == `0`) {
9433	shift = `31`;
9434	}
9435	tcg_gen_sari_i32(tm, tm, shift);
9436	tcg_gen_deposit_i32(tn, tn, tm, `0`, `16`);
9437	} else {
9438	/ PKHBT /
9439	tcg_gen_shli_i32(tm, tm, shift);
9440	tcg_gen_deposit_i32(tn, tm, tn, `0`, `16`);
9441	}
9442	tcg_temp_free_i32(tm);
9443	store_reg(s, a->rd, tn);
9444	return true;
9445	}
9446
9447	static bool op_sat(DisasContext s, arg_sat a,
9448	void (*gen)(TCGv_i32, TCGv_env, TCGv_i32, TCGv_i32))
9449	{
9450	TCGv_i32 tmp, satimm;
9451	int shift = a->imm;
9452
9453	if (!ENABLE_ARCH_6) {
9454	return false;
9455	}
9456
9457	tmp = load_reg(s, a->rn);
9458	if (a->sh) {
9459	tcg_gen_sari_i32(tmp, tmp, shift ? shift : `31`);
9460	} else {
9461	tcg_gen_shli_i32(tmp, tmp, shift);
9462	}
9463
9464	satimm = tcg_const_i32(a->satimm);
9465	gen(tmp, cpu_env, tmp, satimm);
9466	tcg_temp_free_i32(satimm);
9467
9468	store_reg(s, a->rd, tmp);
9469	return true;
9470	}
9471
9472	static bool trans_SSAT(DisasContext s, arg_sat a)
9473	{
9474	return op_sat(s, a, gen_helper_ssat);
9475	}
9476
9477	static bool trans_USAT(DisasContext s, arg_sat a)
9478	{
9479	return op_sat(s, a, gen_helper_usat);
9480	}
9481
9482	static bool trans_SSAT16(DisasContext s, arg_sat a)
9483	{
9484	if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9485	return false;
9486	}
9487	return op_sat(s, a, gen_helper_ssat16);
9488	}
9489
9490	static bool trans_USAT16(DisasContext s, arg_sat a)
9491	{
9492	if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9493	return false;
9494	}
9495	return op_sat(s, a, gen_helper_usat16);
9496	}
9497
9498	static bool op_xta(DisasContext s, arg_rrr_rot a,
9499	void (*gen_extract)(TCGv_i32, TCGv_i32),
9500	void (*gen_add)(TCGv_i32, TCGv_i32, TCGv_i32))
9501	{
9502	TCGv_i32 tmp;
9503
9504	if (!ENABLE_ARCH_6) {
9505	return false;
9506	}
9507
9508	tmp = load_reg(s, a->rm);
9509	/*
9510	* TODO: In many cases we could do a shift instead of a rotate.
9511	* Combined with a simple extend, that becomes an extract.
9512	*/
9513	tcg_gen_rotri_i32(tmp, tmp, a->rot * `8`);
9514	gen_extract(tmp, tmp);
9515
9516	if (a->rn != `15`) {
9517	TCGv_i32 tmp2 = load_reg(s, a->rn);
9518	gen_add(tmp, tmp, tmp2);
9519	tcg_temp_free_i32(tmp2);
9520	}
9521	store_reg(s, a->rd, tmp);
9522	return true;
9523	}
9524
9525	static bool trans_SXTAB(DisasContext s, arg_rrr_rot a)
9526	{
9527	return op_xta(s, a, tcg_gen_ext8s_i32, tcg_gen_add_i32);
9528	}
9529
9530	static bool trans_SXTAH(DisasContext s, arg_rrr_rot a)
9531	{
9532	return op_xta(s, a, tcg_gen_ext16s_i32, tcg_gen_add_i32);
9533	}
9534
9535	static bool trans_SXTAB16(DisasContext s, arg_rrr_rot a)
9536	{
9537	if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9538	return false;
9539	}
9540	return op_xta(s, a, gen_helper_sxtb16, gen_add16);
9541	}
9542
9543	static bool trans_UXTAB(DisasContext s, arg_rrr_rot a)
9544	{
9545	return op_xta(s, a, tcg_gen_ext8u_i32, tcg_gen_add_i32);
9546	}
9547
9548	static bool trans_UXTAH(DisasContext s, arg_rrr_rot a)
9549	{
9550	return op_xta(s, a, tcg_gen_ext16u_i32, tcg_gen_add_i32);
9551	}
9552
9553	static bool trans_UXTAB16(DisasContext s, arg_rrr_rot a)
9554	{
9555	if (s->thumb && !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)) {
9556	return false;
9557	}
9558	return op_xta(s, a, gen_helper_uxtb16, gen_add16);
9559	}
9560
9561	static bool trans_SEL(DisasContext s, arg_rrr a)
9562	{
9563	TCGv_i32 t1, t2, t3;
9564
9565	if (s->thumb
9566	? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9567	: !ENABLE_ARCH_6) {
9568	return false;
9569	}
9570
9571	t1 = load_reg(s, a->rn);
9572	t2 = load_reg(s, a->rm);
9573	t3 = tcg_temp_new_i32();
9574	tcg_gen_ld_i32(t3, cpu_env, offsetof(CPUARMState, GE));
9575	gen_helper_sel_flags(t1, t3, t1, t2);
9576	tcg_temp_free_i32(t3);
9577	tcg_temp_free_i32(t2);
9578	store_reg(s, a->rd, t1);
9579	return true;
9580	}
9581
9582	static bool op_rr(DisasContext s, arg_rr a,
9583	void (*gen)(TCGv_i32, TCGv_i32))
9584	{
9585	TCGv_i32 tmp;
9586
9587	tmp = load_reg(s, a->rm);
9588	gen(tmp, tmp);
9589	store_reg(s, a->rd, tmp);
9590	return true;
9591	}
9592
9593	static bool trans_REV(DisasContext s, arg_rr a)
9594	{
9595	if (!ENABLE_ARCH_6) {
9596	return false;
9597	}
9598	return op_rr(s, a, tcg_gen_bswap32_i32);
9599	}
9600
9601	static bool trans_REV16(DisasContext s, arg_rr a)
9602	{
9603	if (!ENABLE_ARCH_6) {
9604	return false;
9605	}
9606	return op_rr(s, a, gen_rev16);
9607	}
9608
9609	static bool trans_REVSH(DisasContext s, arg_rr a)
9610	{
9611	if (!ENABLE_ARCH_6) {
9612	return false;
9613	}
9614	return op_rr(s, a, gen_revsh);
9615	}
9616
9617	static bool trans_RBIT(DisasContext s, arg_rr a)
9618	{
9619	if (!ENABLE_ARCH_6T2) {
9620	return false;
9621	}
9622	return op_rr(s, a, gen_helper_rbit);
9623	}
9624
9625	/*
9626	* Signed multiply, signed and unsigned divide
9627	*/
9628
9629	static bool op_smlad(DisasContext s, arg_rrrr a, bool m_swap, bool sub)
9630	{
9631	TCGv_i32 t1, t2;
9632
9633	if (!ENABLE_ARCH_6) {
9634	return false;
9635	}
9636
9637	t1 = load_reg(s, a->rn);
9638	t2 = load_reg(s, a->rm);
9639	if (m_swap) {
9640	gen_swap_half(t2);
9641	}
9642	gen_smul_dual(t1, t2);
9643
9644	if (sub) {
9645	/ This subtraction cannot overflow. /
9646	tcg_gen_sub_i32(t1, t1, t2);
9647	} else {
9648	/*
9649	* This addition cannot overflow 32 bits; however it may
9650	* overflow considered as a signed operation, in which case
9651	* we must set the Q flag.
9652	*/
9653	gen_helper_add_setq(t1, cpu_env, t1, t2);
9654	}
9655	tcg_temp_free_i32(t2);
9656
9657	if (a->ra != `15`) {
9658	t2 = load_reg(s, a->ra);
9659	gen_helper_add_setq(t1, cpu_env, t1, t2);
9660	tcg_temp_free_i32(t2);
9661	}
9662	store_reg(s, a->rd, t1);
9663	return true;
9664	}
9665
9666	static bool trans_SMLAD(DisasContext s, arg_rrrr a)
9667	{
9668	return op_smlad(s, a, false, false);
9669	}
9670
9671	static bool trans_SMLADX(DisasContext s, arg_rrrr a)
9672	{
9673	return op_smlad(s, a, true, false);
9674	}
9675
9676	static bool trans_SMLSD(DisasContext s, arg_rrrr a)
9677	{
9678	return op_smlad(s, a, false, true);
9679	}
9680
9681	static bool trans_SMLSDX(DisasContext s, arg_rrrr a)
9682	{
9683	return op_smlad(s, a, true, true);
9684	}
9685
9686	static bool op_smlald(DisasContext s, arg_rrrr a, bool m_swap, bool sub)
9687	{
9688	TCGv_i32 t1, t2;
9689	TCGv_i64 l1, l2;
9690
9691	if (!ENABLE_ARCH_6) {
9692	return false;
9693	}
9694
9695	t1 = load_reg(s, a->rn);
9696	t2 = load_reg(s, a->rm);
9697	if (m_swap) {
9698	gen_swap_half(t2);
9699	}
9700	gen_smul_dual(t1, t2);
9701
9702	l1 = tcg_temp_new_i64();
9703	l2 = tcg_temp_new_i64();
9704	tcg_gen_ext_i32_i64(l1, t1);
9705	tcg_gen_ext_i32_i64(l2, t2);
9706	tcg_temp_free_i32(t1);
9707	tcg_temp_free_i32(t2);
9708
9709	if (sub) {
9710	tcg_gen_sub_i64(l1, l1, l2);
9711	} else {
9712	tcg_gen_add_i64(l1, l1, l2);
9713	}
9714	tcg_temp_free_i64(l2);
9715
9716	gen_addq(s, l1, a->ra, a->rd);
9717	gen_storeq_reg(s, a->ra, a->rd, l1);
9718	tcg_temp_free_i64(l1);
9719	return true;
9720	}
9721
9722	static bool trans_SMLALD(DisasContext s, arg_rrrr a)
9723	{
9724	return op_smlald(s, a, false, false);
9725	}
9726
9727	static bool trans_SMLALDX(DisasContext s, arg_rrrr a)
9728	{
9729	return op_smlald(s, a, true, false);
9730	}
9731
9732	static bool trans_SMLSLD(DisasContext s, arg_rrrr a)
9733	{
9734	return op_smlald(s, a, false, true);
9735	}
9736
9737	static bool trans_SMLSLDX(DisasContext s, arg_rrrr a)
9738	{
9739	return op_smlald(s, a, true, true);
9740	}
9741
9742	static bool op_smmla(DisasContext s, arg_rrrr a, bool round, bool sub)
9743	{
9744	TCGv_i32 t1, t2;
9745
9746	if (s->thumb
9747	? !arm_dc_feature(s, ARM_FEATURE_THUMB_DSP)
9748	: !ENABLE_ARCH_6) {
9749	return false;
9750	}
9751
9752	t1 = load_reg(s, a->rn);
9753	t2 = load_reg(s, a->rm);
9754	tcg_gen_muls2_i32(t2, t1, t1, t2);
9755
9756	if (a->ra != `15`) {
9757	TCGv_i32 t3 = load_reg(s, a->ra);
9758	if (sub) {
9759	/*
9760	* For SMMLS, we need a 64-bit subtract. Borrow caused by
9761	* a non-zero multiplicand lowpart, and the correct result
9762	* lowpart for rounding.
9763	*/
9764	TCGv_i32 zero = tcg_const_i32(`0`);
9765	tcg_gen_sub2_i32(t2, t1, zero, t3, t2, t1);
9766	tcg_temp_free_i32(zero);
9767	} else {
9768	tcg_gen_add_i32(t1, t1, t3);
9769	}
9770	tcg_temp_free_i32(t3);
9771	}
9772	if (round) {
9773	/*
9774	* Adding 0x80000000 to the 64-bit quantity means that we have
9775	* carry in to the high word when the low word has the msb set.
9776	*/
9777	tcg_gen_shri_i32(t2, t2, `31`);
9778	tcg_gen_add_i32(t1, t1, t2);
9779	}
9780	tcg_temp_free_i32(t2);
9781	store_reg(s, a->rd, t1);
9782	return true;
9783	}
9784
9785	static bool trans_SMMLA(DisasContext s, arg_rrrr a)
9786	{
9787	return op_smmla(s, a, false, false);
9788	}
9789
9790	static bool trans_SMMLAR(DisasContext s, arg_rrrr a)
9791	{
9792	return op_smmla(s, a, true, false);
9793	}
9794
9795	static bool trans_SMMLS(DisasContext s, arg_rrrr a)
9796	{
9797	return op_smmla(s, a, false, true);
9798	}
9799
9800	static bool trans_SMMLSR(DisasContext s, arg_rrrr a)
9801	{
9802	return op_smmla(s, a, true, true);
9803	}
9804
9805	static bool op_div(DisasContext s, arg_rrr a, bool u)
9806	{
9807	TCGv_i32 t1, t2;
9808
9809	if (s->thumb
9810	? !dc_isar_feature(thumb_div, s)
9811	: !dc_isar_feature(arm_div, s)) {
9812	return false;
9813	}
9814
9815	t1 = load_reg(s, a->rn);
9816	t2 = load_reg(s, a->rm);
9817	if (u) {
9818	gen_helper_udiv(t1, t1, t2);
9819	} else {
9820	gen_helper_sdiv(t1, t1, t2);
9821	}
9822	tcg_temp_free_i32(t2);
9823	store_reg(s, a->rd, t1);
9824	return true;
9825	}
9826
9827	static bool trans_SDIV(DisasContext s, arg_rrr a)
9828	{
9829	return op_div(s, a, false);
9830	}
9831
9832	static bool trans_UDIV(DisasContext s, arg_rrr a)
9833	{
9834	return op_div(s, a, true);
9835	}
9836
9837	/*
9838	* Block data transfer
9839	*/
9840
9841	static TCGv_i32 op_addr_block_pre(DisasContext s, arg_ldst_block a, int n)
9842	{
9843	TCGv_i32 addr = load_reg(s, a->rn);
9844
9845	if (a->b) {
9846	if (a->i) {
9847	/ pre increment /
9848	tcg_gen_addi_i32(addr, addr, `4`);
9849	} else {
9850	/ pre decrement /
9851	tcg_gen_addi_i32(addr, addr, -(n * `4`));
9852	}
9853	} else if (!a->i && n != `1`) {
9854	/ post decrement /
9855	tcg_gen_addi_i32(addr, addr, -((n - `1`) * `4`));
9856	}
9857
9858	if (s->v8m_stackcheck && a->rn == `13` && a->w) {
9859	/*
9860	* If the writeback is incrementing SP rather than
9861	* decrementing it, and the initial SP is below the
9862	* stack limit but the final written-back SP would
9863	* be above, then then we must not perform any memory
9864	* accesses, but it is IMPDEF whether we generate
9865	* an exception. We choose to do so in this case.
9866	* At this point 'addr' is the lowest address, so
9867	* either the original SP (if incrementing) or our
9868	* final SP (if decrementing), so that's what we check.
9869	*/
9870	gen_helper_v8m_stackcheck(cpu_env, addr);
9871	}
9872
9873	return addr;
9874	}
9875
9876	static void op_addr_block_post(DisasContext s, arg_ldst_block a,
9877	TCGv_i32 addr, int n)
9878	{
9879	if (a->w) {
9880	/ write back /
9881	if (!a->b) {
9882	if (a->i) {
9883	/ post increment /
9884	tcg_gen_addi_i32(addr, addr, `4`);
9885	} else {
9886	/ post decrement /
9887	tcg_gen_addi_i32(addr, addr, -(n * `4`));
9888	}
9889	} else if (!a->i && n != `1`) {
9890	/ pre decrement /
9891	tcg_gen_addi_i32(addr, addr, -((n - `1`) * `4`));
9892	}
9893	store_reg(s, a->rn, addr);
9894	} else {
9895	tcg_temp_free_i32(addr);
9896	}
9897	}
9898
9899	static bool op_stm(DisasContext s, arg_ldst_block a, int min_n)
9900	{
9901	int i, j, n, list, mem_idx;
9902	bool user = a->u;
9903	TCGv_i32 addr, tmp, tmp2;
9904
9905	if (user) {
9906	/ STM (user) /
9907	if (IS_USER(s)) {
9908	/ Only usable in supervisor mode. /
9909	unallocated_encoding(s);
9910	return true;
9911	}
9912	}
9913
9914	list = a->list;
9915	n = ctpop16(list);
9916	if (n < min_n \|\| a->rn == `15`) {
9917	unallocated_encoding(s);
9918	return true;
9919	}
9920
9921	addr = op_addr_block_pre(s, a, n);
9922	mem_idx = get_mem_index(s);
9923
9924	for (i = j = `0`; i < `16`; i++) {
9925	if (!(list & (`1` << i))) {
9926	continue;
9927	}
9928
9929	if (user && i != `15`) {
9930	tmp = tcg_temp_new_i32();
9931	tmp2 = tcg_const_i32(i);
9932	gen_helper_get_user_reg(tmp, cpu_env, tmp2);
9933	tcg_temp_free_i32(tmp2);
9934	} else {
9935	tmp = load_reg(s, i);
9936	}
9937	gen_aa32_st32(s, tmp, addr, mem_idx);
9938	tcg_temp_free_i32(tmp);
9939
9940	/ No need to add after the last transfer. /
9941	if (++j != n) {
9942	tcg_gen_addi_i32(addr, addr, `4`);
9943	}
9944	}
9945
9946	op_addr_block_post(s, a, addr, n);
9947	return true;
9948	}
9949
9950	static bool trans_STM(DisasContext s, arg_ldst_block a)
9951	{
9952	/ BitCount(list) < 1 is UNPREDICTABLE /
9953	return op_stm(s, a, `1`);
9954	}
9955
9956	static bool trans_STM_t32(DisasContext s, arg_ldst_block a)
9957	{
9958	/ Writeback register in register list is UNPREDICTABLE for T32. /
9959	if (a->w && (a->list & (`1` << a->rn))) {
9960	unallocated_encoding(s);
9961	return true;
9962	}
9963	/ BitCount(list) < 2 is UNPREDICTABLE /
9964	return op_stm(s, a, `2`);
9965	}
9966
9967	static bool do_ldm(DisasContext s, arg_ldst_block a, int min_n)
9968	{
9969	int i, j, n, list, mem_idx;
9970	bool loaded_base;
9971	bool user = a->u;
9972	bool exc_return = false;
9973	TCGv_i32 addr, tmp, tmp2, loaded_var;
9974
9975	if (user) {
9976	/ LDM (user), LDM (exception return) /
9977	if (IS_USER(s)) {
9978	/ Only usable in supervisor mode. /
9979	unallocated_encoding(s);
9980	return true;
9981	}
9982	if (extract32(a->list, `15`, `1`)) {
9983	exc_return = true;
9984	user = false;
9985	} else {
9986	/ LDM (user) does not allow writeback. /
9987	if (a->w) {
9988	unallocated_encoding(s);
9989	return true;
9990	}
9991	}
9992	}
9993
9994	list = a->list;
9995	n = ctpop16(list);
9996	if (n < min_n \|\| a->rn == `15`) {
9997	unallocated_encoding(s);
9998	return true;
9999	}
10000
10001	addr = op_addr_block_pre(s, a, n);
10002	mem_idx = get_mem_index(s);
10003	loaded_base = false;
10004	loaded_var = NULL;
10005
10006	for (i = j = `0`; i < `16`; i++) {
10007	if (!(list & (`1` << i))) {
10008	continue;
10009	}
10010
10011	tmp = tcg_temp_new_i32();
10012	gen_aa32_ld32u(s, tmp, addr, mem_idx);
10013	if (user) {
10014	tmp2 = tcg_const_i32(i);
10015	gen_helper_set_user_reg(cpu_env, tmp2, tmp);
10016	tcg_temp_free_i32(tmp2);
10017	tcg_temp_free_i32(tmp);
10018	} else if (i == a->rn) {
10019	loaded_var = tmp;
10020	loaded_base = true;
10021	} else if (i == `15` && exc_return) {
10022	store_pc_exc_ret(s, tmp);
10023	} else {
10024	store_reg_from_load(s, i, tmp);
10025	}
10026
10027	/ No need to add after the last transfer. /
10028	if (++j != n) {
10029	tcg_gen_addi_i32(addr, addr, `4`);
10030	}
10031	}
10032
10033	op_addr_block_post(s, a, addr, n);
10034
10035	if (loaded_base) {
10036	/ Note that we reject base == pc above. /
10037	store_reg(s, a->rn, loaded_var);
10038	}
10039
10040	if (exc_return) {
10041	/ Restore CPSR from SPSR. /
10042	tmp = load_cpu_field(spsr);
10043	if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10044	gen_io_start();
10045	}
10046	gen_helper_cpsr_write_eret(cpu_env, tmp);
10047	if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) {
10048	gen_io_end();
10049	}
10050	tcg_temp_free_i32(tmp);
10051	/ Must exit loop to check un-masked IRQs /
10052	s->base.is_jmp = DISAS_EXIT;
10053	}
10054	return true;
10055	}
10056
10057	static bool trans_LDM_a32(DisasContext s, arg_ldst_block a)
10058	{
10059	/*
10060	* Writeback register in register list is UNPREDICTABLE
10061	* for ArchVersion() >= 7. Prior to v7, A32 would write
10062	* an UNKNOWN value to the base register.
10063	*/
10064	if (ENABLE_ARCH_7 && a->w && (a->list & (`1` << a->rn))) {
10065	unallocated_encoding(s);
10066	return true;
10067	}
10068	/ BitCount(list) < 1 is UNPREDICTABLE /
10069	return do_ldm(s, a, `1`);
10070	}
10071
10072	static bool trans_LDM_t32(DisasContext s, arg_ldst_block a)
10073	{
10074	/ Writeback register in register list is UNPREDICTABLE for T32. /
10075	if (a->w && (a->list & (`1` << a->rn))) {
10076	unallocated_encoding(s);
10077	return true;
10078	}
10079	/ BitCount(list) < 2 is UNPREDICTABLE /
10080	return do_ldm(s, a, `2`);
10081	}
10082
10083	static bool trans_LDM_t16(DisasContext s, arg_ldst_block a)
10084	{
10085	/ Writeback is conditional on the base register not being loaded. /
10086	a->w = !(a->list & (`1` << a->rn));
10087	/ BitCount(list) < 1 is UNPREDICTABLE /
10088	return do_ldm(s, a, `1`);
10089	}
10090
10091	/*
10092	* Branch, branch with link
10093	*/
10094
10095	static bool trans_B(DisasContext s, arg_i a)
10096	{
10097	gen_jmp(s, read_pc(s) + a->imm);
10098	return true;
10099	}
10100
10101	static bool trans_B_cond_thumb(DisasContext s, arg_ci a)
10102	{
10103	/ This has cond from encoding, required to be outside IT block. /
10104	if (a->cond >= `0xe`) {
10105	return false;
10106	}
10107	if (s->condexec_mask) {
10108	unallocated_encoding(s);
10109	return true;
10110	}
10111	arm_skip_unless(s, a->cond);
10112	gen_jmp(s, read_pc(s) + a->imm);
10113	return true;
10114	}
10115
10116	static bool trans_BL(DisasContext s, arg_i a)
10117	{
10118	tcg_gen_movi_i32(cpu_R[`14`], s->base.pc_next \| s->thumb);
10119	gen_jmp(s, read_pc(s) + a->imm);
10120	return true;
10121	}
10122
10123	static bool trans_BLX_i(DisasContext s, arg_BLX_i a)
10124	{
10125	TCGv_i32 tmp;
10126
10127	/ For A32, ARCH(5) is checked near the start of the uncond block. /
10128	if (s->thumb && (a->imm & `2`)) {
10129	return false;
10130	}
10131	tcg_gen_movi_i32(cpu_R[`14`], s->base.pc_next \| s->thumb);
10132	tmp = tcg_const_i32(!s->thumb);
10133	store_cpu_field(tmp, thumb);
10134	gen_jmp(s, (read_pc(s) & ~`3`) + a->imm);
10135	return true;
10136	}
10137
10138	static bool trans_BL_BLX_prefix(DisasContext s, arg_BL_BLX_prefix a)
10139	{
10140	assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10141	tcg_gen_movi_i32(cpu_R[`14`], read_pc(s) + (a->imm << `12`));
10142	return true;
10143	}
10144
10145	static bool trans_BL_suffix(DisasContext s, arg_BL_suffix a)
10146	{
10147	TCGv_i32 tmp = tcg_temp_new_i32();
10148
10149	assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10150	tcg_gen_addi_i32(tmp, cpu_R[`14`], (a->imm << `1`) \| `1`);
10151	tcg_gen_movi_i32(cpu_R[`14`], s->base.pc_next \| `1`);
10152	gen_bx(s, tmp);
10153	return true;
10154	}
10155
10156	static bool trans_BLX_suffix(DisasContext s, arg_BLX_suffix a)
10157	{
10158	TCGv_i32 tmp;
10159
10160	assert(!arm_dc_feature(s, ARM_FEATURE_THUMB2));
10161	if (!ENABLE_ARCH_5) {
10162	return false;
10163	}
10164	tmp = tcg_temp_new_i32();
10165	tcg_gen_addi_i32(tmp, cpu_R[`14`], a->imm << `1`);
10166	tcg_gen_andi_i32(tmp, tmp, `0xfffffffc`);
10167	tcg_gen_movi_i32(cpu_R[`14`], s->base.pc_next \| `1`);
10168	gen_bx(s, tmp);
10169	return true;
10170	}
10171
10172	static bool op_tbranch(DisasContext s, arg_tbranch a, bool half)
10173	{
10174	TCGv_i32 addr, tmp;
10175
10176	tmp = load_reg(s, a->rm);
10177	if (half) {
10178	tcg_gen_add_i32(tmp, tmp, tmp);
10179	}
10180	addr = load_reg(s, a->rn);
10181	tcg_gen_add_i32(addr, addr, tmp);
10182
10183	gen_aa32_ld_i32(s, tmp, addr, get_mem_index(s),
10184	half ? MO_UW \| s->be_data : MO_UB);
10185	tcg_temp_free_i32(addr);
10186
10187	tcg_gen_add_i32(tmp, tmp, tmp);
10188	tcg_gen_addi_i32(tmp, tmp, read_pc(s));
10189	store_reg(s, `15`, tmp);
10190	return true;
10191	}
10192
10193	static bool trans_TBB(DisasContext s, arg_tbranch a)
10194	{
10195	return op_tbranch(s, a, false);
10196	}
10197
10198	static bool trans_TBH(DisasContext s, arg_tbranch a)
10199	{
10200	return op_tbranch(s, a, true);
10201	}
10202
10203	static bool trans_CBZ(DisasContext s, arg_CBZ a)
10204	{
10205	TCGv_i32 tmp = load_reg(s, a->rn);
10206
10207	arm_gen_condlabel(s);
10208	tcg_gen_brcondi_i32(a->nz ? TCG_COND_EQ : TCG_COND_NE,
10209	tmp, `0`, s->condlabel);
10210	tcg_temp_free_i32(tmp);
10211	gen_jmp(s, read_pc(s) + a->imm);
10212	return true;
10213	}
10214
10215	/*
10216	* Supervisor call
10217	*/
10218
10219	static bool trans_SVC(DisasContext s, arg_SVC a)
10220	{
10221	gen_set_pc_im(s, s->base.pc_next);
10222	s->svc_imm = a->imm;
10223	s->base.is_jmp = DISAS_SWI;
10224	return true;
10225	}
10226
10227	/*
10228	* Unconditional system instructions
10229	*/
10230
10231	static bool trans_RFE(DisasContext s, arg_RFE a)
10232	{
10233	static const int8_t pre_offset[`4`] = {
10234	/ DA / -`4`, / IA / `0`, / DB / -`8`, / IB / `4`
10235	};
10236	static const int8_t post_offset[`4`] = {
10237	/ DA / -`8`, / IA / `4`, / DB / -`4`, / IB / `0`
10238	};
10239	TCGv_i32 addr, t1, t2;
10240
10241	if (!ENABLE_ARCH_6 \|\| arm_dc_feature(s, ARM_FEATURE_M)) {
10242	return false;
10243	}
10244	if (IS_USER(s)) {
10245	unallocated_encoding(s);
10246	return true;
10247	}
10248
10249	addr = load_reg(s, a->rn);
10250	tcg_gen_addi_i32(addr, addr, pre_offset[a->pu]);
10251
10252	/ Load PC into tmp and CPSR into tmp2. /
10253	t1 = tcg_temp_new_i32();
10254	gen_aa32_ld32u(s, t1, addr, get_mem_index(s));
10255	tcg_gen_addi_i32(addr, addr, `4`);
10256	t2 = tcg_temp_new_i32();
10257	gen_aa32_ld32u(s, t2, addr, get_mem_index(s));
10258
10259	if (a->w) {
10260	/ Base writeback. /
10261	tcg_gen_addi_i32(addr, addr, post_offset[a->pu]);
10262	store_reg(s, a->rn, addr);
10263	} else {
10264	tcg_temp_free_i32(addr);
10265	}
10266	gen_rfe(s, t1, t2);
10267	return true;
10268	}
10269
10270	static bool trans_SRS(DisasContext s, arg_SRS a)
10271	{
10272	if (!ENABLE_ARCH_6 \|\| arm_dc_feature(s, ARM_FEATURE_M)) {
10273	return false;
10274	}
10275	gen_srs(s, a->mode, a->pu, a->w);
10276	return true;
10277	}
10278
10279	static bool trans_CPS(DisasContext s, arg_CPS a)
10280	{
10281	uint32_t mask, val;
10282
10283	if (!ENABLE_ARCH_6 \|\| arm_dc_feature(s, ARM_FEATURE_M)) {
10284	return false;
10285	}
10286	if (IS_USER(s)) {
10287	/ Implemented as NOP in user mode. /
10288	return true;
10289	}
10290	/ TODO: There are quite a lot of UNPREDICTABLE argument combinations. /
10291
10292	mask = val = `0`;
10293	if (a->imod & `2`) {
10294	if (a->A) {
10295	mask \|= CPSR_A;
10296	}
10297	if (a->I) {
10298	mask \|= CPSR_I;
10299	}
10300	if (a->F) {
10301	mask \|= CPSR_F;
10302	}
10303	if (a->imod & `1`) {
10304	val \|= mask;
10305	}
10306	}
10307	if (a->M) {
10308	mask \|= CPSR_M;
10309	val \|= a->mode;
10310	}
10311	if (mask) {
10312	gen_set_psr_im(s, mask, `0`, val);
10313	}
10314	return true;
10315	}
10316
10317	static bool trans_CPS_v7m(DisasContext s, arg_CPS_v7m a)
10318	{
10319	TCGv_i32 tmp, addr;
10320
10321	if (!arm_dc_feature(s, ARM_FEATURE_M)) {
10322	return false;
10323	}
10324	if (IS_USER(s)) {
10325	/ Implemented as NOP in user mode. /
10326	return true;
10327	}
10328
10329	tmp = tcg_const_i32(a->im);
10330	/ FAULTMASK /
10331	if (a->F) {
10332	addr = tcg_const_i32(`19`);
10333	gen_helper_v7m_msr(cpu_env, addr, tmp);
10334	tcg_temp_free_i32(addr);
10335	}
10336	/ PRIMASK /
10337	if (a->I) {
10338	addr = tcg_const_i32(`16`);
10339	gen_helper_v7m_msr(cpu_env, addr, tmp);
10340	tcg_temp_free_i32(addr);
10341	}
10342	tcg_temp_free_i32(tmp);
10343	gen_lookup_tb(s);
10344	return true;
10345	}
10346
10347	/*
10348	* Clear-Exclusive, Barriers
10349	*/
10350
10351	static bool trans_CLREX(DisasContext s, arg_CLREX a)
10352	{
10353	if (s->thumb
10354	? !ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)
10355	: !ENABLE_ARCH_6K) {
10356	return false;
10357	}
10358	gen_clrex(s);
10359	return true;
10360	}
10361
10362	static bool trans_DSB(DisasContext s, arg_DSB a)
10363	{
10364	if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10365	return false;
10366	}
10367	tcg_gen_mb(TCG_MO_ALL \| TCG_BAR_SC);
10368	return true;
10369	}
10370
10371	static bool trans_DMB(DisasContext s, arg_DMB a)
10372	{
10373	return trans_DSB(s, NULL);
10374	}
10375
10376	static bool trans_ISB(DisasContext s, arg_ISB a)
10377	{
10378	if (!ENABLE_ARCH_7 && !arm_dc_feature(s, ARM_FEATURE_M)) {
10379	return false;
10380	}
10381	/*
10382	* We need to break the TB after this insn to execute
10383	* self-modifying code correctly and also to take
10384	* any pending interrupts immediately.
10385	*/
10386	gen_goto_tb(s, `0`, s->base.pc_next);
10387	return true;
10388	}
10389
10390	static bool trans_SB(DisasContext s, arg_SB a)
10391	{
10392	if (!dc_isar_feature(aa32_sb, s)) {
10393	return false;
10394	}
10395	/*
10396	* TODO: There is no speculation barrier opcode
10397	* for TCG; MB and end the TB instead.
10398	*/
10399	tcg_gen_mb(TCG_MO_ALL \| TCG_BAR_SC);
10400	gen_goto_tb(s, `0`, s->base.pc_next);
10401	return true;
10402	}
10403
10404	static bool trans_SETEND(DisasContext s, arg_SETEND a)
10405	{
10406	if (!ENABLE_ARCH_6) {
10407	return false;
10408	}
10409	if (a->E != (s->be_data == MO_BE)) {
10410	gen_helper_setend(cpu_env);
10411	s->base.is_jmp = DISAS_UPDATE;
10412	}
10413	return true;
10414	}
10415
10416	/*
10417	* Preload instructions
10418	* All are nops, contingent on the appropriate arch level.
10419	*/
10420
10421	static bool trans_PLD(DisasContext s, arg_PLD a)
10422	{
10423	return ENABLE_ARCH_5TE;
10424	}
10425
10426	static bool trans_PLDW(DisasContext s, arg_PLD a)
10427	{
10428	return arm_dc_feature(s, ARM_FEATURE_V7MP);
10429	}
10430
10431	static bool trans_PLI(DisasContext s, arg_PLD a)
10432	{
10433	return ENABLE_ARCH_7;
10434	}
10435
10436	/*
10437	* If-then
10438	*/
10439
10440	static bool trans_IT(DisasContext s, arg_IT a)
10441	{
10442	int cond_mask = a->cond_mask;
10443
10444	/*
10445	* No actual code generated for this insn, just setup state.
10446	*
10447	* Combinations of firstcond and mask which set up an 0b1111
10448	* condition are UNPREDICTABLE; we take the CONSTRAINED
10449	* UNPREDICTABLE choice to treat 0b1111 the same as 0b1110,
10450	* i.e. both meaning "execute always".
10451	*/
10452	s->condexec_cond = (cond_mask >> `4`) & `0xe`;
10453	s->condexec_mask = cond_mask & `0x1f`;
10454	return true;
10455	}
10456
10457	/*
10458	* Legacy decoder.
10459	*/
10460
10461	static void disas_arm_insn(DisasContext s, unsigned* int insn)
10462	{
10463	unsigned int cond = insn >> `28`;
10464
10465	/ M variants do not implement ARM mode; this must raise the INVSTATE*
10466	* UsageFault exception.
10467	*/
10468	if (arm_dc_feature(s, ARM_FEATURE_M)) {
10469	gen_exception_insn(s, s->pc_curr, EXCP_INVSTATE, syn_uncategorized(),
10470	default_exception_el(s));
10471	return;
10472	}
10473
10474	if (cond == `0xf`) {
10475	/ In ARMv3 and v4 the NV condition is UNPREDICTABLE; we*
10476	* choose to UNDEF. In ARMv5 and above the space is used
10477	* for miscellaneous unconditional instructions.
10478	*/
10479	ARCH(`5`);
10480
10481	/ Unconditional instructions. /
10482	if (disas_a32_uncond(s, insn)) {
10483	return;
10484	}
10485	/ fall back to legacy decoder /
10486
10487	if (((insn >> `25`) & `7`) == `1`) {
10488	/ NEON Data processing. /
10489	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
10490	goto illegal_op;
10491	}
10492
10493	if (disas_neon_data_insn(s, insn)) {
10494	goto illegal_op;
10495	}
10496	return;
10497	}
10498	if ((insn & `0x0f100000`) == `0x04000000`) {
10499	/ NEON load/store. /
10500	if (!arm_dc_feature(s, ARM_FEATURE_NEON)) {
10501	goto illegal_op;
10502	}
10503
10504	if (disas_neon_ls_insn(s, insn)) {
10505	goto illegal_op;
10506	}
10507	return;
10508	}
10509	if ((insn & `0x0f000e10`) == `0x0e000a00`) {
10510	/ VFP. /
10511	if (disas_vfp_insn(s, insn)) {
10512	goto illegal_op;
10513	}
10514	return;
10515	}
10516	if ((insn & `0x0e000f00`) == `0x0c000100`) {
10517	if (arm_dc_feature(s, ARM_FEATURE_IWMMXT)) {
10518	/ iWMMXt register transfer. /
10519	if (extract32(s->c15_cpar, `1`, `1`)) {
10520	if (!disas_iwmmxt_insn(s, insn)) {
10521	return;
10522	}
10523	}
10524	}
10525	} else if ((insn & `0x0e000a00`) == `0x0c000800`
10526	&& arm_dc_feature(s, ARM_FEATURE_V8)) {
10527	if (disas_neon_insn_3same_ext(s, insn)) {
10528	goto illegal_op;
10529	}
10530	return;
10531	} else if ((insn & `0x0f000a00`) == `0x0e000800`
10532	&& arm_dc_feature(s, ARM_FEATURE_V8)) {
10533	if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
10534	goto illegal_op;
10535	}
10536	return;
10537	}
10538	goto illegal_op;
10539	}
10540	if (cond != `0xe`) {
10541	/ if not always execute, we generate a conditional jump to*
10542	next instruction /*
10543	arm_skip_unless(s, cond);
10544	}
10545
10546	if (disas_a32(s, insn)) {
10547	return;
10548	}
10549	/ fall back to legacy decoder /
10550
10551	switch ((insn >> `24`) & `0xf`) {
10552	case `0xc`:
10553	case `0xd`:
10554	case `0xe`:
10555	if (((insn >> `8`) & `0xe`) == `10`) {
10556	/ VFP. /
10557	if (disas_vfp_insn(s, insn)) {
10558	goto illegal_op;
10559	}
10560	} else if (disas_coproc_insn(s, insn)) {
10561	/ Coprocessor. /
10562	goto illegal_op;
10563	}
10564	break;
10565	default:
10566	illegal_op:
10567	unallocated_encoding(s);
10568	break;
10569	}
10570	}
10571
10572	static bool thumb_insn_is_16bit(DisasContext *s, uint32_t pc, uint32_t insn)
10573	{
10574	/*
10575	* Return true if this is a 16 bit instruction. We must be precise
10576	* about this (matching the decode).
10577	*/
10578	if ((insn >> `11`) < `0x1d`) {
10579	/ Definitely a 16-bit instruction /
10580	return true;
10581	}
10582
10583	/ Top five bits 0b11101 / 0b11110 / 0b11111 : this is the*
10584	* first half of a 32-bit Thumb insn. Thumb-1 cores might
10585	* end up actually treating this as two 16-bit insns, though,
10586	* if it's half of a bl/blx pair that might span a page boundary.
10587	*/
10588	if (arm_dc_feature(s, ARM_FEATURE_THUMB2) \|\|
10589	arm_dc_feature(s, ARM_FEATURE_M)) {
10590	/ Thumb2 cores (including all M profile ones) always treat*
10591	* 32-bit insns as 32-bit.
10592	*/
10593	return false;
10594	}
10595
10596	if ((insn >> `11`) == `0x1e` && pc - s->page_start < TARGET_PAGE_SIZE - `3`) {
10597	/ 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix, and the suffix*
10598	* is not on the next page; we merge this into a 32-bit
10599	* insn.
10600	*/
10601	return false;
10602	}
10603	/ 0b1110_1xxx_xxxx_xxxx : BLX suffix (or UNDEF);*
10604	* 0b1111_1xxx_xxxx_xxxx : BL suffix;
10605	* 0b1111_0xxx_xxxx_xxxx : BL/BLX prefix on the end of a page
10606	* -- handle as single 16 bit insn
10607	*/
10608	return true;
10609	}
10610
10611	/ Translate a 32-bit thumb instruction. /
10612	static void disas_thumb2_insn(DisasContext *s, uint32_t insn)
10613	{
10614	/*
10615	* ARMv6-M supports a limited subset of Thumb2 instructions.
10616	* Other Thumb1 architectures allow only 32-bit
10617	* combined BL/BLX prefix and suffix.
10618	*/
10619	if (arm_dc_feature(s, ARM_FEATURE_M) &&
10620	!arm_dc_feature(s, ARM_FEATURE_V7)) {
10621	int i;
10622	bool found = false;
10623	static const uint32_t armv6m_insn[] = {`0xf3808000` / msr /,
10624	`0xf3b08040` / dsb /,
10625	`0xf3b08050` / dmb /,
10626	`0xf3b08060` / isb /,
10627	`0xf3e08000` / mrs /,
10628	`0xf000d000` / bl /};
10629	static const uint32_t armv6m_mask[] = {`0xffe0d000`,
10630	`0xfff0d0f0`,
10631	`0xfff0d0f0`,
10632	`0xfff0d0f0`,
10633	`0xffe0d000`,
10634	`0xf800d000`};
10635
10636	for (i = `0`; i < ARRAY_SIZE(armv6m_insn); i++) {
10637	if ((insn & armv6m_mask[i]) == armv6m_insn[i]) {
10638	found = true;
10639	break;
10640	}
10641	}
10642	if (!found) {
10643	goto illegal_op;
10644	}
10645	} else if ((insn & `0xf800e800`) != `0xf000e800`) {
10646	ARCH(`6T2`);
10647	}
10648
10649	if (disas_t32(s, insn)) {
10650	return;
10651	}
10652	/ fall back to legacy decoder /
10653
10654	switch ((insn >> `25`) & `0xf`) {
10655	case `0`: case `1`: case `2`: case `3`:
10656	/ 16-bit instructions. Should never happen. /
10657	abort();
10658	case `6`: case `7`: case `14`: case `15`:
10659	/ Coprocessor. /
10660	if (arm_dc_feature(s, ARM_FEATURE_M)) {
10661	/ 0b111x_11xx_xxxx_xxxx_xxxx_xxxx_xxxx_xxxx /
10662	if (extract32(insn, `24`, `2`) == `3`) {
10663	goto illegal_op; / op0 = 0b11 : unallocated /
10664	}
10665
10666	/*
10667	* Decode VLLDM and VLSTM first: these are nonstandard because:
10668	* * if there is no FPU then these insns must NOP in
10669	* Secure state and UNDEF in Nonsecure state
10670	* * if there is an FPU then these insns do not have
10671	* the usual behaviour that disas_vfp_insn() provides of
10672	* being controlled by CPACR/NSACR enable bits or the
10673	* lazy-stacking logic.
10674	*/
10675	if (arm_dc_feature(s, ARM_FEATURE_V8) &&
10676	(insn & `0xffa00f00`) == `0xec200a00`) {
10677	/ 0b1110_1100_0x1x_xxxx_xxxx_1010_xxxx_xxxx*
10678	* - VLLDM, VLSTM
10679	* We choose to UNDEF if the RAZ bits are non-zero.
10680	*/
10681	if (!s->v8m_secure \|\| (insn & `0x0040f0ff`)) {
10682	goto illegal_op;
10683	}
10684
10685	if (arm_dc_feature(s, ARM_FEATURE_VFP)) {
10686	uint32_t rn = (insn >> `16`) & `0xf`;
10687	TCGv_i32 fptr = load_reg(s, rn);
10688
10689	if (extract32(insn, `20`, `1`)) {
10690	gen_helper_v7m_vlldm(cpu_env, fptr);
10691	} else {
10692	gen_helper_v7m_vlstm(cpu_env, fptr);
10693	}
10694	tcg_temp_free_i32(fptr);
10695
10696	/ End the TB, because we have updated FP control bits /
10697	s->base.is_jmp = DISAS_UPDATE;
10698	}
10699	break;
10700	}
10701	if (arm_dc_feature(s, ARM_FEATURE_VFP) &&
10702	((insn >> `8`) & `0xe`) == `10`) {
10703	/ FP, and the CPU supports it /
10704	if (disas_vfp_insn(s, insn)) {
10705	goto illegal_op;
10706	}
10707	break;
10708	}
10709
10710	/ All other insns: NOCP /
10711	gen_exception_insn(s, s->pc_curr, EXCP_NOCP, syn_uncategorized(),
10712	default_exception_el(s));
10713	break;
10714	}
10715	if ((insn & `0xfe000a00`) == `0xfc000800`
10716	&& arm_dc_feature(s, ARM_FEATURE_V8)) {
10717	/ The Thumb2 and ARM encodings are identical. /
10718	if (disas_neon_insn_3same_ext(s, insn)) {
10719	goto illegal_op;
10720	}
10721	} else if ((insn & `0xff000a00`) == `0xfe000800`
10722	&& arm_dc_feature(s, ARM_FEATURE_V8)) {
10723	/ The Thumb2 and ARM encodings are identical. /
10724	if (disas_neon_insn_2reg_scalar_ext(s, insn)) {
10725	goto illegal_op;
10726	}
10727	} else if (((insn >> `24`) & `3`) == `3`) {
10728	/ Translate into the equivalent ARM encoding. /
10729	insn = (insn & `0xe2ffffff`) \| ((insn & (`1` << `28`)) >> `4`) \| (`1` << `28`);
10730	if (disas_neon_data_insn(s, insn)) {
10731	goto illegal_op;
10732	}
10733	} else if (((insn >> `8`) & `0xe`) == `10`) {
10734	if (disas_vfp_insn(s, insn)) {
10735	goto illegal_op;
10736	}
10737	} else {
10738	if (insn & (`1` << `28`))
10739	goto illegal_op;
10740	if (disas_coproc_insn(s, insn)) {
10741	goto illegal_op;
10742	}
10743	}
10744	break;
10745	case `12`:
10746	if ((insn & `0x01100000`) == `0x01000000`) {
10747	if (disas_neon_ls_insn(s, insn)) {
10748	goto illegal_op;
10749	}
10750	break;
10751	}
10752	goto illegal_op;
10753	default:
10754	illegal_op:
10755	unallocated_encoding(s);
10756	}
10757	}
10758
10759	static void disas_thumb_insn(DisasContext *s, uint32_t insn)
10760	{
10761	if (!disas_t16(s, insn)) {
10762	unallocated_encoding(s);
10763	}
10764	}
10765
10766	static bool insn_crosses_page(CPUARMState env, DisasContext s)
10767	{
10768	/ Return true if the insn at dc->base.pc_next might cross a page boundary.*
10769	* (False positives are OK, false negatives are not.)
10770	* We know this is a Thumb insn, and our caller ensures we are
10771	* only called if dc->base.pc_next is less than 4 bytes from the page
10772	* boundary, so we cross the page if the first 16 bits indicate
10773	* that this is a 32 bit insn.
10774	*/
10775	uint16_t insn = arm_lduw_code(env, s->base.pc_next, s->sctlr_b);
10776
10777	return !thumb_insn_is_16bit(s, s->base.pc_next, insn);
10778	}
10779
10780	static void arm_tr_init_disas_context(DisasContextBase dcbase, CPUState cs)
10781	{
10782	DisasContext *dc = container_of(dcbase, DisasContext, base);
10783	CPUARMState *env = cs->env_ptr;
10784	ARMCPU *cpu = env_archcpu(env);
10785	uint32_t tb_flags = dc->base.tb->flags;
10786	uint32_t condexec, core_mmu_idx;
10787
10788	dc->isar = &cpu->isar;
10789	dc->condjmp = `0`;
10790
10791	dc->aarch64 = `0`;
10792	/ If we are coming from secure EL0 in a system with a 32-bit EL3, then*
10793	* there is no secure EL1, so we route exceptions to EL3.
10794	*/
10795	dc->secure_routed_to_el3 = arm_feature(env, ARM_FEATURE_EL3) &&
10796	!arm_el_is_aa64(env, `3`);
10797	dc->thumb = FIELD_EX32(tb_flags, TBFLAG_A32, THUMB);
10798	dc->sctlr_b = FIELD_EX32(tb_flags, TBFLAG_A32, SCTLR_B);
10799	dc->be_data = FIELD_EX32(tb_flags, TBFLAG_ANY, BE_DATA) ? MO_BE : MO_LE;
10800	condexec = FIELD_EX32(tb_flags, TBFLAG_A32, CONDEXEC);
10801	dc->condexec_mask = (condexec & `0xf`) << `1`;
10802	dc->condexec_cond = condexec >> `4`;
10803	core_mmu_idx = FIELD_EX32(tb_flags, TBFLAG_ANY, MMUIDX);
10804	dc->mmu_idx = core_to_arm_mmu_idx(env, core_mmu_idx);
10805	dc->current_el = arm_mmu_idx_to_el(dc->mmu_idx);
10806	#if !defined(CONFIG_USER_ONLY)
10807	dc->user = (dc->current_el == `0`);
10808	#endif
10809	dc->ns = FIELD_EX32(tb_flags, TBFLAG_A32, NS);
10810	dc->fp_excp_el = FIELD_EX32(tb_flags, TBFLAG_ANY, FPEXC_EL);
10811	dc->vfp_enabled = FIELD_EX32(tb_flags, TBFLAG_A32, VFPEN);
10812	dc->vec_len = FIELD_EX32(tb_flags, TBFLAG_A32, VECLEN);
10813	if (arm_feature(env, ARM_FEATURE_XSCALE)) {
10814	dc->c15_cpar = FIELD_EX32(tb_flags, TBFLAG_A32, XSCALE_CPAR);
10815	dc->vec_stride = `0`;
10816	} else {
10817	dc->vec_stride = FIELD_EX32(tb_flags, TBFLAG_A32, VECSTRIDE);
10818	dc->c15_cpar = `0`;
10819	}
10820	dc->v7m_handler_mode = FIELD_EX32(tb_flags, TBFLAG_A32, HANDLER);
10821	dc->v8m_secure = arm_feature(env, ARM_FEATURE_M_SECURITY) &&
10822	regime_is_secure(env, dc->mmu_idx);
10823	dc->v8m_stackcheck = FIELD_EX32(tb_flags, TBFLAG_A32, STACKCHECK);
10824	dc->v8m_fpccr_s_wrong = FIELD_EX32(tb_flags, TBFLAG_A32, FPCCR_S_WRONG);
10825	dc->v7m_new_fp_ctxt_needed =
10826	FIELD_EX32(tb_flags, TBFLAG_A32, NEW_FP_CTXT_NEEDED);
10827	dc->v7m_lspact = FIELD_EX32(tb_flags, TBFLAG_A32, LSPACT);
10828	dc->cp_regs = cpu->cp_regs;
10829	dc->features = env->features;
10830
10831	/ Single step state. The code-generation logic here is:*
10832	* SS_ACTIVE == 0:
10833	* generate code with no special handling for single-stepping (except
10834	* that anything that can make us go to SS_ACTIVE == 1 must end the TB;
10835	* this happens anyway because those changes are all system register or
10836	* PSTATE writes).
10837	* SS_ACTIVE == 1, PSTATE.SS == 1: (active-not-pending)
10838	* emit code for one insn
10839	* emit code to clear PSTATE.SS
10840	* emit code to generate software step exception for completed step
10841	* end TB (as usual for having generated an exception)
10842	* SS_ACTIVE == 1, PSTATE.SS == 0: (active-pending)
10843	* emit code to generate a software step exception
10844	* end the TB
10845	*/
10846	dc->ss_active = FIELD_EX32(tb_flags, TBFLAG_ANY, SS_ACTIVE);
10847	dc->pstate_ss = FIELD_EX32(tb_flags, TBFLAG_ANY, PSTATE_SS);
10848	dc->is_ldex = false;
10849	if (!arm_feature(env, ARM_FEATURE_M)) {
10850	dc->debug_target_el = FIELD_EX32(tb_flags, TBFLAG_ANY, DEBUG_TARGET_EL);
10851	}
10852
10853	dc->page_start = dc->base.pc_first & TARGET_PAGE_MASK;
10854
10855	/ If architectural single step active, limit to 1. /
10856	if (is_singlestepping(dc)) {
10857	dc->base.max_insns = `1`;
10858	}
10859
10860	/ ARM is a fixed-length ISA. Bound the number of insns to execute*
10861	to those left on the page. /*
10862	if (!dc->thumb) {
10863	int bound = -(dc->base.pc_first \| TARGET_PAGE_MASK) / `4`;
10864	dc->base.max_insns = MIN(dc->base.max_insns, bound);
10865	}
10866
10867	cpu_V0 = tcg_temp_new_i64();
10868	cpu_V1 = tcg_temp_new_i64();
10869	/ FIXME: cpu_M0 can probably be the same as cpu_V0. /
10870	cpu_M0 = tcg_temp_new_i64();
10871	}
10872
10873	static void arm_tr_tb_start(DisasContextBase dcbase, CPUState cpu)
10874	{
10875	DisasContext *dc = container_of(dcbase, DisasContext, base);
10876
10877	/ A note on handling of the condexec (IT) bits:*
10878	*
10879	* We want to avoid the overhead of having to write the updated condexec
10880	* bits back to the CPUARMState for every instruction in an IT block. So:
10881	* (1) if the condexec bits are not already zero then we write
10882	* zero back into the CPUARMState now. This avoids complications trying
10883	* to do it at the end of the block. (For example if we don't do this
10884	* it's hard to identify whether we can safely skip writing condexec
10885	* at the end of the TB, which we definitely want to do for the case
10886	* where a TB doesn't do anything with the IT state at all.)
10887	* (2) if we are going to leave the TB then we call gen_set_condexec()
10888	* which will write the correct value into CPUARMState if zero is wrong.
10889	* This is done both for leaving the TB at the end, and for leaving
10890	* it because of an exception we know will happen, which is done in
10891	* gen_exception_insn(). The latter is necessary because we need to
10892	* leave the TB with the PC/IT state just prior to execution of the
10893	* instruction which caused the exception.
10894	* (3) if we leave the TB unexpectedly (eg a data abort on a load)
10895	* then the CPUARMState will be wrong and we need to reset it.
10896	* This is handled in the same way as restoration of the
10897	* PC in these situations; we save the value of the condexec bits
10898	* for each PC via tcg_gen_insn_start(), and restore_state_to_opc()
10899	* then uses this to restore them after an exception.
10900	*
10901	* Note that there are no instructions which can read the condexec
10902	* bits, and none which can write non-static values to them, so
10903	* we don't need to care about whether CPUARMState is correct in the
10904	* middle of a TB.
10905	*/
10906
10907	/ Reset the conditional execution bits immediately. This avoids*
10908	complications trying to do it at the end of the block. /*
10909	if (dc->condexec_mask \|\| dc->condexec_cond) {
10910	TCGv_i32 tmp = tcg_temp_new_i32();
10911	tcg_gen_movi_i32(tmp, `0`);
10912	store_cpu_field(tmp, condexec_bits);
10913	}
10914	}
10915
10916	static void arm_tr_insn_start(DisasContextBase dcbase, CPUState cpu)
10917	{
10918	DisasContext *dc = container_of(dcbase, DisasContext, base);
10919
10920	tcg_gen_insn_start(dc->base.pc_next,
10921	(dc->condexec_cond << `4`) \| (dc->condexec_mask >> `1`),
10922	`0`);
10923	dc->insn_start = tcg_last_op();
10924	}
10925
10926	static bool arm_tr_breakpoint_check(DisasContextBase dcbase, CPUState cpu,
10927	const CPUBreakpoint *bp)
10928	{
10929	DisasContext *dc = container_of(dcbase, DisasContext, base);
10930
10931	if (bp->flags & BP_CPU) {
10932	gen_set_condexec(dc);
10933	gen_set_pc_im(dc, dc->base.pc_next);
10934	gen_helper_check_breakpoints(cpu_env);
10935	/ End the TB early; it's likely not going to be executed /
10936	dc->base.is_jmp = DISAS_TOO_MANY;
10937	} else {
10938	gen_exception_internal_insn(dc, dc->base.pc_next, EXCP_DEBUG);
10939	/ The address covered by the breakpoint must be*
10940	included in [tb->pc, tb->pc + tb->size) in order
10941	to for it to be properly cleared -- thus we
10942	increment the PC here so that the logic setting
10943	tb->size below does the right thing. /*
10944	/ TODO: Advance PC by correct instruction length to*
10945	* avoid disassembler error messages */
10946	dc->base.pc_next += `2`;
10947	dc->base.is_jmp = DISAS_NORETURN;
10948	}
10949
10950	return true;
10951	}
10952
10953	static bool arm_pre_translate_insn(DisasContext *dc)
10954	{
10955	#ifdef CONFIG_USER_ONLY
10956	/ Intercept jump to the magic kernel page. /
10957	if (dc->base.pc_next >= `0xffff0000`) {
10958	/ We always get here via a jump, so know we are not in a*
10959	conditional execution block. /*
10960	gen_exception_internal(EXCP_KERNEL_TRAP);
10961	dc->base.is_jmp = DISAS_NORETURN;
10962	return true;
10963	}
10964	#endif
10965
10966	if (dc->ss_active && !dc->pstate_ss) {
10967	/ Singlestep state is Active-pending.*
10968	* If we're in this state at the start of a TB then either
10969	* a) we just took an exception to an EL which is being debugged
10970	* and this is the first insn in the exception handler
10971	* b) debug exceptions were masked and we just unmasked them
10972	* without changing EL (eg by clearing PSTATE.D)
10973	* In either case we're going to take a swstep exception in the
10974	* "did not step an insn" case, and so the syndrome ISV and EX
10975	* bits should be zero.
10976	*/
10977	assert(dc->base.num_insns == `1`);
10978	gen_swstep_exception(dc, `0`, `0`);
10979	dc->base.is_jmp = DISAS_NORETURN;
10980	return true;
10981	}
10982
10983	return false;
10984	}
10985
10986	static void arm_post_translate_insn(DisasContext *dc)
10987	{
10988	if (dc->condjmp && !dc->base.is_jmp) {
10989	gen_set_label(dc->condlabel);
10990	dc->condjmp = `0`;
10991	}
10992	translator_loop_temp_check(&dc->base);
10993	}
10994
10995	static void arm_tr_translate_insn(DisasContextBase dcbase, CPUState cpu)
10996	{
10997	DisasContext *dc = container_of(dcbase, DisasContext, base);
10998	CPUARMState *env = cpu->env_ptr;
10999	unsigned int insn;
11000
11001	if (arm_pre_translate_insn(dc)) {
11002	return;
11003	}
11004
11005	dc->pc_curr = dc->base.pc_next;
11006	insn = arm_ldl_code(env, dc->base.pc_next, dc->sctlr_b);
11007	dc->insn = insn;
11008	dc->base.pc_next += `4`;
11009	disas_arm_insn(dc, insn);
11010
11011	arm_post_translate_insn(dc);
11012
11013	/ ARM is a fixed-length ISA. We performed the cross-page check*
11014	in init_disas_context by adjusting max_insns. /*
11015	}
11016
11017	static bool thumb_insn_is_unconditional(DisasContext *s, uint32_t insn)
11018	{
11019	/ Return true if this Thumb insn is always unconditional,*
11020	* even inside an IT block. This is true of only a very few
11021	* instructions: BKPT, HLT, and SG.
11022	*
11023	* A larger class of instructions are UNPREDICTABLE if used
11024	* inside an IT block; we do not need to detect those here, because
11025	* what we do by default (perform the cc check and update the IT
11026	* bits state machine) is a permitted CONSTRAINED UNPREDICTABLE
11027	* choice for those situations.
11028	*
11029	* insn is either a 16-bit or a 32-bit instruction; the two are
11030	* distinguishable because for the 16-bit case the top 16 bits
11031	* are zeroes, and that isn't a valid 32-bit encoding.
11032	*/
11033	if ((insn & `0xffffff00`) == `0xbe00`) {
11034	/ BKPT /
11035	return true;
11036	}
11037
11038	if ((insn & `0xffffffc0`) == `0xba80` && arm_dc_feature(s, ARM_FEATURE_V8) &&
11039	!arm_dc_feature(s, ARM_FEATURE_M)) {
11040	/ HLT: v8A only. This is unconditional even when it is going to*
11041	* UNDEF; see the v8A ARM ARM DDI0487B.a H3.3.
11042	* For v7 cores this was a plain old undefined encoding and so
11043	* honours its cc check. (We might be using the encoding as
11044	* a semihosting trap, but we don't change the cc check behaviour
11045	* on that account, because a debugger connected to a real v7A
11046	* core and emulating semihosting traps by catching the UNDEF
11047	* exception would also only see cases where the cc check passed.
11048	* No guest code should be trying to do a HLT semihosting trap
11049	* in an IT block anyway.
11050	*/
11051	return true;
11052	}
11053
11054	if (insn == `0xe97fe97f` && arm_dc_feature(s, ARM_FEATURE_V8) &&
11055	arm_dc_feature(s, ARM_FEATURE_M)) {
11056	/ SG: v8M only /
11057	return true;
11058	}
11059
11060	return false;
11061	}
11062
11063	static void thumb_tr_translate_insn(DisasContextBase dcbase, CPUState cpu)
11064	{
11065	DisasContext *dc = container_of(dcbase, DisasContext, base);
11066	CPUARMState *env = cpu->env_ptr;
11067	uint32_t insn;
11068	bool is_16bit;
11069
11070	if (arm_pre_translate_insn(dc)) {
11071	return;
11072	}
11073
11074	dc->pc_curr = dc->base.pc_next;
11075	insn = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11076	is_16bit = thumb_insn_is_16bit(dc, dc->base.pc_next, insn);
11077	dc->base.pc_next += `2`;
11078	if (!is_16bit) {
11079	uint32_t insn2 = arm_lduw_code(env, dc->base.pc_next, dc->sctlr_b);
11080
11081	insn = insn << `16` \| insn2;
11082	dc->base.pc_next += `2`;
11083	}
11084	dc->insn = insn;
11085
11086	if (dc->condexec_mask && !thumb_insn_is_unconditional(dc, insn)) {
11087	uint32_t cond = dc->condexec_cond;
11088
11089	/*
11090	* Conditionally skip the insn. Note that both 0xe and 0xf mean
11091	* "always"; 0xf is not "never".
11092	*/
11093	if (cond < `0x0e`) {
11094	arm_skip_unless(dc, cond);
11095	}
11096	}
11097
11098	if (is_16bit) {
11099	disas_thumb_insn(dc, insn);
11100	} else {
11101	disas_thumb2_insn(dc, insn);
11102	}
11103
11104	/ Advance the Thumb condexec condition. /
11105	if (dc->condexec_mask) {
11106	dc->condexec_cond = ((dc->condexec_cond & `0xe`) \|
11107	((dc->condexec_mask >> `4`) & `1`));
11108	dc->condexec_mask = (dc->condexec_mask << `1`) & `0x1f`;
11109	if (dc->condexec_mask == `0`) {
11110	dc->condexec_cond = `0`;
11111	}
11112	}
11113
11114	arm_post_translate_insn(dc);
11115
11116	/ Thumb is a variable-length ISA. Stop translation when the next insn*
11117	* will touch a new page. This ensures that prefetch aborts occur at
11118	* the right place.
11119	*
11120	* We want to stop the TB if the next insn starts in a new page,
11121	* or if it spans between this page and the next. This means that
11122	* if we're looking at the last halfword in the page we need to
11123	* see if it's a 16-bit Thumb insn (which will fit in this TB)
11124	* or a 32-bit Thumb insn (which won't).
11125	* This is to avoid generating a silly TB with a single 16-bit insn
11126	* in it at the end of this page (which would execute correctly
11127	* but isn't very efficient).
11128	*/
11129	if (dc->base.is_jmp == DISAS_NEXT
11130	&& (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE
11131	\|\| (dc->base.pc_next - dc->page_start >= TARGET_PAGE_SIZE - `3`
11132	&& insn_crosses_page(env, dc)))) {
11133	dc->base.is_jmp = DISAS_TOO_MANY;
11134	}
11135	}
11136
11137	static void arm_tr_tb_stop(DisasContextBase dcbase, CPUState cpu)
11138	{
11139	DisasContext *dc = container_of(dcbase, DisasContext, base);
11140
11141	if (tb_cflags(dc->base.tb) & CF_LAST_IO && dc->condjmp) {
11142	/ FIXME: This can theoretically happen with self-modifying code. /
11143	cpu_abort(cpu, "IO on conditional branch instruction");
11144	}
11145
11146	/ At this stage dc->condjmp will only be set when the skipped*
11147	instruction was a conditional branch or trap, and the PC has
11148	already been written. /*
11149	gen_set_condexec(dc);
11150	if (dc->base.is_jmp == DISAS_BX_EXCRET) {
11151	/ Exception return branches need some special case code at the*
11152	* end of the TB, which is complex enough that it has to
11153	* handle the single-step vs not and the condition-failed
11154	* insn codepath itself.
11155	*/
11156	gen_bx_excret_final_code(dc);
11157	} else if (unlikely(is_singlestepping(dc))) {
11158	/ Unconditional and "condition passed" instruction codepath. /
11159	switch (dc->base.is_jmp) {
11160	case DISAS_SWI:
11161	gen_ss_advance(dc);
11162	gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11163	default_exception_el(dc));
11164	break;
11165	case DISAS_HVC:
11166	gen_ss_advance(dc);
11167	gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), `2`);
11168	break;
11169	case DISAS_SMC:
11170	gen_ss_advance(dc);
11171	gen_exception(EXCP_SMC, syn_aa32_smc(), `3`);
11172	break;
11173	case DISAS_NEXT:
11174	case DISAS_TOO_MANY:
11175	case DISAS_UPDATE:
11176	gen_set_pc_im(dc, dc->base.pc_next);
11177	/ fall through /
11178	default:
11179	/ FIXME: Single stepping a WFI insn will not halt the CPU. /
11180	gen_singlestep_exception(dc);
11181	break;
11182	case DISAS_NORETURN:
11183	break;
11184	}
11185	} else {
11186	/ While branches must always occur at the end of an IT block,*
11187	there are a few other things that can cause us to terminate
11188	the TB in the middle of an IT block:
11189	- Exception generating instructions (bkpt, swi, undefined).
11190	- Page boundaries.
11191	- Hardware watchpoints.
11192	Hardware breakpoints have already been handled and skip this code.
11193	*/
11194	switch(dc->base.is_jmp) {
11195	case DISAS_NEXT:
11196	case DISAS_TOO_MANY:
11197	gen_goto_tb(dc, `1`, dc->base.pc_next);
11198	break;
11199	case DISAS_JUMP:
11200	gen_goto_ptr();
11201	break;
11202	case DISAS_UPDATE:
11203	gen_set_pc_im(dc, dc->base.pc_next);
11204	/ fall through /
11205	default:
11206	/ indicate that the hash table must be used to find the next TB /
11207	tcg_gen_exit_tb(NULL, `0`);
11208	break;
11209	case DISAS_NORETURN:
11210	/ nothing more to generate /
11211	break;
11212	case DISAS_WFI:
11213	{
11214	TCGv_i32 tmp = tcg_const_i32((dc->thumb &&
11215	!(dc->insn & (`1U` << `31`))) ? `2` : `4`);
11216
11217	gen_helper_wfi(cpu_env, tmp);
11218	tcg_temp_free_i32(tmp);
11219	/ The helper doesn't necessarily throw an exception, but we*
11220	* must go back to the main loop to check for interrupts anyway.
11221	*/
11222	tcg_gen_exit_tb(NULL, `0`);
11223	break;
11224	}
11225	case DISAS_WFE:
11226	gen_helper_wfe(cpu_env);
11227	break;
11228	case DISAS_YIELD:
11229	gen_helper_yield(cpu_env);
11230	break;
11231	case DISAS_SWI:
11232	gen_exception(EXCP_SWI, syn_aa32_svc(dc->svc_imm, dc->thumb),
11233	default_exception_el(dc));
11234	break;
11235	case DISAS_HVC:
11236	gen_exception(EXCP_HVC, syn_aa32_hvc(dc->svc_imm), `2`);
11237	break;
11238	case DISAS_SMC:
11239	gen_exception(EXCP_SMC, syn_aa32_smc(), `3`);
11240	break;
11241	}
11242	}
11243
11244	if (dc->condjmp) {
11245	/ "Condition failed" instruction codepath for the branch/trap insn /
11246	gen_set_label(dc->condlabel);
11247	gen_set_condexec(dc);
11248	if (unlikely(is_singlestepping(dc))) {
11249	gen_set_pc_im(dc, dc->base.pc_next);
11250	gen_singlestep_exception(dc);
11251	} else {
11252	gen_goto_tb(dc, `1`, dc->base.pc_next);
11253	}
11254	}
11255	}
11256
11257	static void arm_tr_disas_log(const DisasContextBase dcbase, CPUState cpu)
11258	{
11259	DisasContext *dc = container_of(dcbase, DisasContext, base);
11260
11261	qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first));
11262	log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size);
11263	}
11264
11265	static const TranslatorOps arm_translator_ops = {
11266	.init_disas_context = arm_tr_init_disas_context,
11267	.tb_start = arm_tr_tb_start,
11268	.insn_start = arm_tr_insn_start,
11269	.breakpoint_check = arm_tr_breakpoint_check,
11270	.translate_insn = arm_tr_translate_insn,
11271	.tb_stop = arm_tr_tb_stop,
11272	.disas_log = arm_tr_disas_log,
11273	};
11274
11275	static const TranslatorOps thumb_translator_ops = {
11276	.init_disas_context = arm_tr_init_disas_context,
11277	.tb_start = arm_tr_tb_start,
11278	.insn_start = arm_tr_insn_start,
11279	.breakpoint_check = arm_tr_breakpoint_check,
11280	.translate_insn = thumb_tr_translate_insn,
11281	.tb_stop = arm_tr_tb_stop,
11282	.disas_log = arm_tr_disas_log,
11283	};
11284
11285	/ generate intermediate code for basic block 'tb'. /
11286	void gen_intermediate_code(CPUState cpu, TranslationBlock tb, int max_insns)
11287	{
11288	DisasContext dc;
11289	const TranslatorOps *ops = &arm_translator_ops;
11290
11291	if (FIELD_EX32(tb->flags, TBFLAG_A32, THUMB)) {
11292	ops = &thumb_translator_ops;
11293	}
11294	#ifdef TARGET_AARCH64
11295	if (FIELD_EX32(tb->flags, TBFLAG_ANY, AARCH64_STATE)) {
11296	ops = &aarch64_translator_ops;
11297	}
11298	#endif
11299
11300	translator_loop(ops, &dc.base, cpu, tb, max_insns);
11301	}
11302
11303	void restore_state_to_opc(CPUARMState env, TranslationBlock tb,
11304	target_ulong *data)
11305	{
11306	if (is_a64(env)) {
11307	env->pc = data[`0`];
11308	env->condexec_bits = `0`;
11309	env->exception.syndrome = data[`2`] << ARM_INSN_START_WORD2_SHIFT;
11310	} else {
11311	env->regs[`15`] = data[`0`];
11312	env->condexec_bits = data[`1`];
11313	env->exception.syndrome = data[`2`] << ARM_INSN_START_WORD2_SHIFT;
11314	}
11315	}
11316

Browse the source code of qemu/target/arm/translate.c