translate-sve.c source code [qemu/target/arm/translate-sve.c]

1	/*
2	* AArch64 SVE translation
3	*
4	* Copyright (c) 2018 Linaro, Ltd
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General Public
17	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
18	*/
19
20	#include "qemu/osdep.h"
21	#include "cpu.h"
22	#include "exec/exec-all.h"
23	#include "tcg-op.h"
24	#include "tcg-op-gvec.h"
25	#include "tcg-gvec-desc.h"
26	#include "qemu/log.h"
27	#include "arm_ldst.h"
28	#include "translate.h"
29	#include "internals.h"
30	#include "exec/helper-proto.h"
31	#include "exec/helper-gen.h"
32	#include "exec/log.h"
33	#include "trace-tcg.h"
34	#include "translate-a64.h"
35	#include "fpu/softfloat.h"
36
37
38	typedef void GVecGen2sFn(unsigned, uint32_t, uint32_t,
39	TCGv_i64, uint32_t, uint32_t);
40
41	typedef void gen_helper_gvec_flags_3(TCGv_i32, TCGv_ptr, TCGv_ptr,
42	TCGv_ptr, TCGv_i32);
43	typedef void gen_helper_gvec_flags_4(TCGv_i32, TCGv_ptr, TCGv_ptr,
44	TCGv_ptr, TCGv_ptr, TCGv_i32);
45
46	typedef void gen_helper_gvec_mem(TCGv_env, TCGv_ptr, TCGv_i64, TCGv_i32);
47	typedef void gen_helper_gvec_mem_scatter(TCGv_env, TCGv_ptr, TCGv_ptr,
48	TCGv_ptr, TCGv_i64, TCGv_i32);
49
50	/*
51	* Helpers for extracting complex instruction fields.
52	*/
53
54	/ See e.g. ASR (immediate, predicated).*
55	* Returns -1 for unallocated encoding; diagnose later.
56	*/
57	static int tszimm_esz(DisasContext s, int* x)
58	{
59	x >>= `3`; / discard imm3 /
60	return `31` - clz32(x);
61	}
62
63	static int tszimm_shr(DisasContext s, int* x)
64	{
65	return (`16` << tszimm_esz(s, x)) - x;
66	}
67
68	/ See e.g. LSL (immediate, predicated). /
69	static int tszimm_shl(DisasContext s, int* x)
70	{
71	return x - (`8` << tszimm_esz(s, x));
72	}
73
74	static inline int plus1(DisasContext s, int* x)
75	{
76	return x + `1`;
77	}
78
79	/ The SH bit is in bit 8. Extract the low 8 and shift. /
80	static inline int expand_imm_sh8s(DisasContext s, int* x)
81	{
82	return (int8_t)x << (x & `0x100` ? `8` : `0`);
83	}
84
85	static inline int expand_imm_sh8u(DisasContext s, int* x)
86	{
87	return (uint8_t)x << (x & `0x100` ? `8` : `0`);
88	}
89
90	/ Convert a 2-bit memory size (msz) to a 4-bit data type (dtype)*
91	* with unsigned data. C.f. SVE Memory Contiguous Load Group.
92	*/
93	static inline int msz_dtype(DisasContext s, int* msz)
94	{
95	static const uint8_t dtype[`4`] = { `0`, `5`, `10`, `15` };
96	return dtype[msz];
97	}
98
99	/*
100	* Include the generated decoder.
101	*/
102
103	#include "decode-sve.inc.c"
104
105	/*
106	* Implement all of the translator functions referenced by the decoder.
107	*/
108
109	/ Return the offset info CPUARMState of the predicate vector register Pn.*
110	* Note for this purpose, FFR is P16.
111	*/
112	static inline int pred_full_reg_offset(DisasContext s, int* regno)
113	{
114	return offsetof(CPUARMState, vfp.pregs[regno]);
115	}
116
117	/ Return the byte size of the whole predicate register, VL / 64. /
118	static inline int pred_full_reg_size(DisasContext *s)
119	{
120	return s->sve_len >> `3`;
121	}
122
123	/ Round up the size of a register to a size allowed by*
124	* the tcg vector infrastructure. Any operation which uses this
125	* size may assume that the bits above pred_full_reg_size are zero,
126	* and must leave them the same way.
127	*
128	* Note that this is not needed for the vector registers as they
129	* are always properly sized for tcg vectors.
130	*/
131	static int size_for_gvec(int size)
132	{
133	if (size <= `8`) {
134	return `8`;
135	} else {
136	return QEMU_ALIGN_UP(size, `16`);
137	}
138	}
139
140	static int pred_gvec_reg_size(DisasContext *s)
141	{
142	return size_for_gvec(pred_full_reg_size(s));
143	}
144
145	/ Invoke a vector expander on two Zregs. /
146	static bool do_vector2_z(DisasContext s, GVecGen2Fn gvec_fn,
147	int esz, int rd, int rn)
148	{
149	if (sve_access_check(s)) {
150	unsigned vsz = vec_full_reg_size(s);
151	gvec_fn(esz, vec_full_reg_offset(s, rd),
152	vec_full_reg_offset(s, rn), vsz, vsz);
153	}
154	return true;
155	}
156
157	/ Invoke a vector expander on three Zregs. /
158	static bool do_vector3_z(DisasContext s, GVecGen3Fn gvec_fn,
159	int esz, int rd, int rn, int rm)
160	{
161	if (sve_access_check(s)) {
162	unsigned vsz = vec_full_reg_size(s);
163	gvec_fn(esz, vec_full_reg_offset(s, rd),
164	vec_full_reg_offset(s, rn),
165	vec_full_reg_offset(s, rm), vsz, vsz);
166	}
167	return true;
168	}
169
170	/ Invoke a vector move on two Zregs. /
171	static bool do_mov_z(DisasContext s, int* rd, int rn)
172	{
173	return do_vector2_z(s, tcg_gen_gvec_mov, `0`, rd, rn);
174	}
175
176	/ Initialize a Zreg with replications of a 64-bit immediate. /
177	static void do_dupi_z(DisasContext s, int* rd, uint64_t word)
178	{
179	unsigned vsz = vec_full_reg_size(s);
180	tcg_gen_gvec_dup64i(vec_full_reg_offset(s, rd), vsz, vsz, word);
181	}
182
183	/ Invoke a vector expander on two Pregs. /
184	static bool do_vector2_p(DisasContext s, GVecGen2Fn gvec_fn,
185	int esz, int rd, int rn)
186	{
187	if (sve_access_check(s)) {
188	unsigned psz = pred_gvec_reg_size(s);
189	gvec_fn(esz, pred_full_reg_offset(s, rd),
190	pred_full_reg_offset(s, rn), psz, psz);
191	}
192	return true;
193	}
194
195	/ Invoke a vector expander on three Pregs. /
196	static bool do_vector3_p(DisasContext s, GVecGen3Fn gvec_fn,
197	int esz, int rd, int rn, int rm)
198	{
199	if (sve_access_check(s)) {
200	unsigned psz = pred_gvec_reg_size(s);
201	gvec_fn(esz, pred_full_reg_offset(s, rd),
202	pred_full_reg_offset(s, rn),
203	pred_full_reg_offset(s, rm), psz, psz);
204	}
205	return true;
206	}
207
208	/ Invoke a vector operation on four Pregs. /
209	static bool do_vecop4_p(DisasContext s, const* GVecGen4 *gvec_op,
210	int rd, int rn, int rm, int rg)
211	{
212	if (sve_access_check(s)) {
213	unsigned psz = pred_gvec_reg_size(s);
214	tcg_gen_gvec_4(pred_full_reg_offset(s, rd),
215	pred_full_reg_offset(s, rn),
216	pred_full_reg_offset(s, rm),
217	pred_full_reg_offset(s, rg),
218	psz, psz, gvec_op);
219	}
220	return true;
221	}
222
223	/ Invoke a vector move on two Pregs. /
224	static bool do_mov_p(DisasContext s, int* rd, int rn)
225	{
226	return do_vector2_p(s, tcg_gen_gvec_mov, `0`, rd, rn);
227	}
228
229	/ Set the cpu flags as per a return from an SVE helper. /
230	static void do_pred_flags(TCGv_i32 t)
231	{
232	tcg_gen_mov_i32(cpu_NF, t);
233	tcg_gen_andi_i32(cpu_ZF, t, `2`);
234	tcg_gen_andi_i32(cpu_CF, t, `1`);
235	tcg_gen_movi_i32(cpu_VF, `0`);
236	}
237
238	/ Subroutines computing the ARM PredTest psuedofunction. /
239	static void do_predtest1(TCGv_i64 d, TCGv_i64 g)
240	{
241	TCGv_i32 t = tcg_temp_new_i32();
242
243	gen_helper_sve_predtest1(t, d, g);
244	do_pred_flags(t);
245	tcg_temp_free_i32(t);
246	}
247
248	static void do_predtest(DisasContext s, int* dofs, int gofs, int words)
249	{
250	TCGv_ptr dptr = tcg_temp_new_ptr();
251	TCGv_ptr gptr = tcg_temp_new_ptr();
252	TCGv_i32 t;
253
254	tcg_gen_addi_ptr(dptr, cpu_env, dofs);
255	tcg_gen_addi_ptr(gptr, cpu_env, gofs);
256	t = tcg_const_i32(words);
257
258	gen_helper_sve_predtest(t, dptr, gptr, t);
259	tcg_temp_free_ptr(dptr);
260	tcg_temp_free_ptr(gptr);
261
262	do_pred_flags(t);
263	tcg_temp_free_i32(t);
264	}
265
266	/ For each element size, the bits within a predicate word that are active. /
267	const uint64_t pred_esz_masks[`4`] = {
268	`0xffffffffffffffffull`, `0x5555555555555555ull`,
269	`0x1111111111111111ull`, `0x0101010101010101ull`
270	};
271
272	/*
273	*** SVE Logical - Unpredicated Group
274	*/
275
276	static bool trans_AND_zzz(DisasContext s, arg_rrr_esz a)
277	{
278	return do_vector3_z(s, tcg_gen_gvec_and, `0`, a->rd, a->rn, a->rm);
279	}
280
281	static bool trans_ORR_zzz(DisasContext s, arg_rrr_esz a)
282	{
283	return do_vector3_z(s, tcg_gen_gvec_or, `0`, a->rd, a->rn, a->rm);
284	}
285
286	static bool trans_EOR_zzz(DisasContext s, arg_rrr_esz a)
287	{
288	return do_vector3_z(s, tcg_gen_gvec_xor, `0`, a->rd, a->rn, a->rm);
289	}
290
291	static bool trans_BIC_zzz(DisasContext s, arg_rrr_esz a)
292	{
293	return do_vector3_z(s, tcg_gen_gvec_andc, `0`, a->rd, a->rn, a->rm);
294	}
295
296	/*
297	*** SVE Integer Arithmetic - Unpredicated Group
298	*/
299
300	static bool trans_ADD_zzz(DisasContext s, arg_rrr_esz a)
301	{
302	return do_vector3_z(s, tcg_gen_gvec_add, a->esz, a->rd, a->rn, a->rm);
303	}
304
305	static bool trans_SUB_zzz(DisasContext s, arg_rrr_esz a)
306	{
307	return do_vector3_z(s, tcg_gen_gvec_sub, a->esz, a->rd, a->rn, a->rm);
308	}
309
310	static bool trans_SQADD_zzz(DisasContext s, arg_rrr_esz a)
311	{
312	return do_vector3_z(s, tcg_gen_gvec_ssadd, a->esz, a->rd, a->rn, a->rm);
313	}
314
315	static bool trans_SQSUB_zzz(DisasContext s, arg_rrr_esz a)
316	{
317	return do_vector3_z(s, tcg_gen_gvec_sssub, a->esz, a->rd, a->rn, a->rm);
318	}
319
320	static bool trans_UQADD_zzz(DisasContext s, arg_rrr_esz a)
321	{
322	return do_vector3_z(s, tcg_gen_gvec_usadd, a->esz, a->rd, a->rn, a->rm);
323	}
324
325	static bool trans_UQSUB_zzz(DisasContext s, arg_rrr_esz a)
326	{
327	return do_vector3_z(s, tcg_gen_gvec_ussub, a->esz, a->rd, a->rn, a->rm);
328	}
329
330	/*
331	*** SVE Integer Arithmetic - Binary Predicated Group
332	*/
333
334	static bool do_zpzz_ool(DisasContext s, arg_rprr_esz a, gen_helper_gvec_4 *fn)
335	{
336	unsigned vsz = vec_full_reg_size(s);
337	if (fn == NULL) {
338	return false;
339	}
340	if (sve_access_check(s)) {
341	tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
342	vec_full_reg_offset(s, a->rn),
343	vec_full_reg_offset(s, a->rm),
344	pred_full_reg_offset(s, a->pg),
345	vsz, vsz, `0`, fn);
346	}
347	return true;
348	}
349
350	/ Select active elememnts from Zn and inactive elements from Zm,*
351	* storing the result in Zd.
352	*/
353	static void do_sel_z(DisasContext s, int* rd, int rn, int rm, int pg, int esz)
354	{
355	static gen_helper_gvec_4 * const fns[`4`] = {
356	gen_helper_sve_sel_zpzz_b, gen_helper_sve_sel_zpzz_h,
357	gen_helper_sve_sel_zpzz_s, gen_helper_sve_sel_zpzz_d
358	};
359	unsigned vsz = vec_full_reg_size(s);
360	tcg_gen_gvec_4_ool(vec_full_reg_offset(s, rd),
361	vec_full_reg_offset(s, rn),
362	vec_full_reg_offset(s, rm),
363	pred_full_reg_offset(s, pg),
364	vsz, vsz, `0`, fns[esz]);
365	}
366
367	#define DO_ZPZZ(NAME, name) \
368	static bool trans_##NAME##_zpzz(DisasContext s, arg_rprr_esz a) \
369	{ \
370	static gen_helper_gvec_4 * const fns[4] = { \
371	gen_helper_sve_##name##_zpzz_b, gen_helper_sve_##name##_zpzz_h, \
372	gen_helper_sve_##name##_zpzz_s, gen_helper_sve_##name##_zpzz_d, \
373	}; \
374	return do_zpzz_ool(s, a, fns[a->esz]); \
375	}
376
377	DO_ZPZZ(AND, and)
378	DO_ZPZZ(EOR, eor)
379	DO_ZPZZ(ORR, orr)
380	DO_ZPZZ(BIC, bic)
381
382	DO_ZPZZ(ADD, add)
383	DO_ZPZZ(SUB, sub)
384
385	DO_ZPZZ(SMAX, smax)
386	DO_ZPZZ(UMAX, umax)
387	DO_ZPZZ(SMIN, smin)
388	DO_ZPZZ(UMIN, umin)
389	DO_ZPZZ(SABD, sabd)
390	DO_ZPZZ(UABD, uabd)
391
392	DO_ZPZZ(MUL, mul)
393	DO_ZPZZ(SMULH, smulh)
394	DO_ZPZZ(UMULH, umulh)
395
396	DO_ZPZZ(ASR, asr)
397	DO_ZPZZ(LSR, lsr)
398	DO_ZPZZ(LSL, lsl)
399
400	static bool trans_SDIV_zpzz(DisasContext s, arg_rprr_esz a)
401	{
402	static gen_helper_gvec_4 * const fns[`4`] = {
403	NULL, NULL, gen_helper_sve_sdiv_zpzz_s, gen_helper_sve_sdiv_zpzz_d
404	};
405	return do_zpzz_ool(s, a, fns[a->esz]);
406	}
407
408	static bool trans_UDIV_zpzz(DisasContext s, arg_rprr_esz a)
409	{
410	static gen_helper_gvec_4 * const fns[`4`] = {
411	NULL, NULL, gen_helper_sve_udiv_zpzz_s, gen_helper_sve_udiv_zpzz_d
412	};
413	return do_zpzz_ool(s, a, fns[a->esz]);
414	}
415
416	static bool trans_SEL_zpzz(DisasContext s, arg_rprr_esz a)
417	{
418	if (sve_access_check(s)) {
419	do_sel_z(s, a->rd, a->rn, a->rm, a->pg, a->esz);
420	}
421	return true;
422	}
423
424	#undef DO_ZPZZ
425
426	/*
427	*** SVE Integer Arithmetic - Unary Predicated Group
428	*/
429
430	static bool do_zpz_ool(DisasContext s, arg_rpr_esz a, gen_helper_gvec_3 *fn)
431	{
432	if (fn == NULL) {
433	return false;
434	}
435	if (sve_access_check(s)) {
436	unsigned vsz = vec_full_reg_size(s);
437	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
438	vec_full_reg_offset(s, a->rn),
439	pred_full_reg_offset(s, a->pg),
440	vsz, vsz, `0`, fn);
441	}
442	return true;
443	}
444
445	#define DO_ZPZ(NAME, name) \
446	static bool trans_##NAME(DisasContext s, arg_rpr_esz a) \
447	{ \
448	static gen_helper_gvec_3 * const fns[4] = { \
449	gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
450	gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
451	}; \
452	return do_zpz_ool(s, a, fns[a->esz]); \
453	}
454
455	DO_ZPZ(CLS, cls)
456	DO_ZPZ(CLZ, clz)
457	DO_ZPZ(CNT_zpz, cnt_zpz)
458	DO_ZPZ(CNOT, cnot)
459	DO_ZPZ(NOT_zpz, not_zpz)
460	DO_ZPZ(ABS, abs)
461	DO_ZPZ(NEG, neg)
462
463	static bool trans_FABS(DisasContext s, arg_rpr_esz a)
464	{
465	static gen_helper_gvec_3 * const fns[`4`] = {
466	NULL,
467	gen_helper_sve_fabs_h,
468	gen_helper_sve_fabs_s,
469	gen_helper_sve_fabs_d
470	};
471	return do_zpz_ool(s, a, fns[a->esz]);
472	}
473
474	static bool trans_FNEG(DisasContext s, arg_rpr_esz a)
475	{
476	static gen_helper_gvec_3 * const fns[`4`] = {
477	NULL,
478	gen_helper_sve_fneg_h,
479	gen_helper_sve_fneg_s,
480	gen_helper_sve_fneg_d
481	};
482	return do_zpz_ool(s, a, fns[a->esz]);
483	}
484
485	static bool trans_SXTB(DisasContext s, arg_rpr_esz a)
486	{
487	static gen_helper_gvec_3 * const fns[`4`] = {
488	NULL,
489	gen_helper_sve_sxtb_h,
490	gen_helper_sve_sxtb_s,
491	gen_helper_sve_sxtb_d
492	};
493	return do_zpz_ool(s, a, fns[a->esz]);
494	}
495
496	static bool trans_UXTB(DisasContext s, arg_rpr_esz a)
497	{
498	static gen_helper_gvec_3 * const fns[`4`] = {
499	NULL,
500	gen_helper_sve_uxtb_h,
501	gen_helper_sve_uxtb_s,
502	gen_helper_sve_uxtb_d
503	};
504	return do_zpz_ool(s, a, fns[a->esz]);
505	}
506
507	static bool trans_SXTH(DisasContext s, arg_rpr_esz a)
508	{
509	static gen_helper_gvec_3 * const fns[`4`] = {
510	NULL, NULL,
511	gen_helper_sve_sxth_s,
512	gen_helper_sve_sxth_d
513	};
514	return do_zpz_ool(s, a, fns[a->esz]);
515	}
516
517	static bool trans_UXTH(DisasContext s, arg_rpr_esz a)
518	{
519	static gen_helper_gvec_3 * const fns[`4`] = {
520	NULL, NULL,
521	gen_helper_sve_uxth_s,
522	gen_helper_sve_uxth_d
523	};
524	return do_zpz_ool(s, a, fns[a->esz]);
525	}
526
527	static bool trans_SXTW(DisasContext s, arg_rpr_esz a)
528	{
529	return do_zpz_ool(s, a, a->esz == `3` ? gen_helper_sve_sxtw_d : NULL);
530	}
531
532	static bool trans_UXTW(DisasContext s, arg_rpr_esz a)
533	{
534	return do_zpz_ool(s, a, a->esz == `3` ? gen_helper_sve_uxtw_d : NULL);
535	}
536
537	#undef DO_ZPZ
538
539	/*
540	*** SVE Integer Reduction Group
541	*/
542
543	typedef void gen_helper_gvec_reduc(TCGv_i64, TCGv_ptr, TCGv_ptr, TCGv_i32);
544	static bool do_vpz_ool(DisasContext s, arg_rpr_esz a,
545	gen_helper_gvec_reduc *fn)
546	{
547	unsigned vsz = vec_full_reg_size(s);
548	TCGv_ptr t_zn, t_pg;
549	TCGv_i32 desc;
550	TCGv_i64 temp;
551
552	if (fn == NULL) {
553	return false;
554	}
555	if (!sve_access_check(s)) {
556	return true;
557	}
558
559	desc = tcg_const_i32(simd_desc(vsz, vsz, `0`));
560	temp = tcg_temp_new_i64();
561	t_zn = tcg_temp_new_ptr();
562	t_pg = tcg_temp_new_ptr();
563
564	tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
565	tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
566	fn(temp, t_zn, t_pg, desc);
567	tcg_temp_free_ptr(t_zn);
568	tcg_temp_free_ptr(t_pg);
569	tcg_temp_free_i32(desc);
570
571	write_fp_dreg(s, a->rd, temp);
572	tcg_temp_free_i64(temp);
573	return true;
574	}
575
576	#define DO_VPZ(NAME, name) \
577	static bool trans_##NAME(DisasContext s, arg_rpr_esz a) \
578	{ \
579	static gen_helper_gvec_reduc * const fns[4] = { \
580	gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
581	gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
582	}; \
583	return do_vpz_ool(s, a, fns[a->esz]); \
584	}
585
586	DO_VPZ(ORV, orv)
587	DO_VPZ(ANDV, andv)
588	DO_VPZ(EORV, eorv)
589
590	DO_VPZ(UADDV, uaddv)
591	DO_VPZ(SMAXV, smaxv)
592	DO_VPZ(UMAXV, umaxv)
593	DO_VPZ(SMINV, sminv)
594	DO_VPZ(UMINV, uminv)
595
596	static bool trans_SADDV(DisasContext s, arg_rpr_esz a)
597	{
598	static gen_helper_gvec_reduc * const fns[`4`] = {
599	gen_helper_sve_saddv_b, gen_helper_sve_saddv_h,
600	gen_helper_sve_saddv_s, NULL
601	};
602	return do_vpz_ool(s, a, fns[a->esz]);
603	}
604
605	#undef DO_VPZ
606
607	/*
608	*** SVE Shift by Immediate - Predicated Group
609	*/
610
611	/ Store zero into every active element of Zd. We will use this for two*
612	* and three-operand predicated instructions for which logic dictates a
613	* zero result.
614	*/
615	static bool do_clr_zp(DisasContext s, int* rd, int pg, int esz)
616	{
617	static gen_helper_gvec_2 * const fns[`4`] = {
618	gen_helper_sve_clr_b, gen_helper_sve_clr_h,
619	gen_helper_sve_clr_s, gen_helper_sve_clr_d,
620	};
621	if (sve_access_check(s)) {
622	unsigned vsz = vec_full_reg_size(s);
623	tcg_gen_gvec_2_ool(vec_full_reg_offset(s, rd),
624	pred_full_reg_offset(s, pg),
625	vsz, vsz, `0`, fns[esz]);
626	}
627	return true;
628	}
629
630	/ Copy Zn into Zd, storing zeros into inactive elements. /
631	static void do_movz_zpz(DisasContext s, int* rd, int rn, int pg, int esz)
632	{
633	static gen_helper_gvec_3 * const fns[`4`] = {
634	gen_helper_sve_movz_b, gen_helper_sve_movz_h,
635	gen_helper_sve_movz_s, gen_helper_sve_movz_d,
636	};
637	unsigned vsz = vec_full_reg_size(s);
638	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, rd),
639	vec_full_reg_offset(s, rn),
640	pred_full_reg_offset(s, pg),
641	vsz, vsz, `0`, fns[esz]);
642	}
643
644	static bool do_zpzi_ool(DisasContext s, arg_rpri_esz a,
645	gen_helper_gvec_3 *fn)
646	{
647	if (sve_access_check(s)) {
648	unsigned vsz = vec_full_reg_size(s);
649	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
650	vec_full_reg_offset(s, a->rn),
651	pred_full_reg_offset(s, a->pg),
652	vsz, vsz, a->imm, fn);
653	}
654	return true;
655	}
656
657	static bool trans_ASR_zpzi(DisasContext s, arg_rpri_esz a)
658	{
659	static gen_helper_gvec_3 * const fns[`4`] = {
660	gen_helper_sve_asr_zpzi_b, gen_helper_sve_asr_zpzi_h,
661	gen_helper_sve_asr_zpzi_s, gen_helper_sve_asr_zpzi_d,
662	};
663	if (a->esz < `0`) {
664	/ Invalid tsz encoding -- see tszimm_esz. /
665	return false;
666	}
667	/ Shift by element size is architecturally valid. For*
668	arithmetic right-shift, it's the same as by one less. /*
669	a->imm = MIN(a->imm, (`8` << a->esz) - `1`);
670	return do_zpzi_ool(s, a, fns[a->esz]);
671	}
672
673	static bool trans_LSR_zpzi(DisasContext s, arg_rpri_esz a)
674	{
675	static gen_helper_gvec_3 * const fns[`4`] = {
676	gen_helper_sve_lsr_zpzi_b, gen_helper_sve_lsr_zpzi_h,
677	gen_helper_sve_lsr_zpzi_s, gen_helper_sve_lsr_zpzi_d,
678	};
679	if (a->esz < `0`) {
680	return false;
681	}
682	/ Shift by element size is architecturally valid.*
683	For logical shifts, it is a zeroing operation. /*
684	if (a->imm >= (`8` << a->esz)) {
685	return do_clr_zp(s, a->rd, a->pg, a->esz);
686	} else {
687	return do_zpzi_ool(s, a, fns[a->esz]);
688	}
689	}
690
691	static bool trans_LSL_zpzi(DisasContext s, arg_rpri_esz a)
692	{
693	static gen_helper_gvec_3 * const fns[`4`] = {
694	gen_helper_sve_lsl_zpzi_b, gen_helper_sve_lsl_zpzi_h,
695	gen_helper_sve_lsl_zpzi_s, gen_helper_sve_lsl_zpzi_d,
696	};
697	if (a->esz < `0`) {
698	return false;
699	}
700	/ Shift by element size is architecturally valid.*
701	For logical shifts, it is a zeroing operation. /*
702	if (a->imm >= (`8` << a->esz)) {
703	return do_clr_zp(s, a->rd, a->pg, a->esz);
704	} else {
705	return do_zpzi_ool(s, a, fns[a->esz]);
706	}
707	}
708
709	static bool trans_ASRD(DisasContext s, arg_rpri_esz a)
710	{
711	static gen_helper_gvec_3 * const fns[`4`] = {
712	gen_helper_sve_asrd_b, gen_helper_sve_asrd_h,
713	gen_helper_sve_asrd_s, gen_helper_sve_asrd_d,
714	};
715	if (a->esz < `0`) {
716	return false;
717	}
718	/ Shift by element size is architecturally valid. For arithmetic*
719	right shift for division, it is a zeroing operation. /*
720	if (a->imm >= (`8` << a->esz)) {
721	return do_clr_zp(s, a->rd, a->pg, a->esz);
722	} else {
723	return do_zpzi_ool(s, a, fns[a->esz]);
724	}
725	}
726
727	/*
728	*** SVE Bitwise Shift - Predicated Group
729	*/
730
731	#define DO_ZPZW(NAME, name) \
732	static bool trans_##NAME##_zpzw(DisasContext s, arg_rprr_esz a) \
733	{ \
734	static gen_helper_gvec_4 * const fns[3] = { \
735	gen_helper_sve_##name##_zpzw_b, gen_helper_sve_##name##_zpzw_h, \
736	gen_helper_sve_##name##_zpzw_s, \
737	}; \
738	if (a->esz < 0 \|\| a->esz >= 3) { \
739	return false; \
740	} \
741	return do_zpzz_ool(s, a, fns[a->esz]); \
742	}
743
744	DO_ZPZW(ASR, asr)
745	DO_ZPZW(LSR, lsr)
746	DO_ZPZW(LSL, lsl)
747
748	#undef DO_ZPZW
749
750	/*
751	*** SVE Bitwise Shift - Unpredicated Group
752	*/
753
754	static bool do_shift_imm(DisasContext s, arg_rri_esz a, bool asr,
755	void (gvec_fn)(unsigned*, uint32_t, uint32_t,
756	int64_t, uint32_t, uint32_t))
757	{
758	if (a->esz < `0`) {
759	/ Invalid tsz encoding -- see tszimm_esz. /
760	return false;
761	}
762	if (sve_access_check(s)) {
763	unsigned vsz = vec_full_reg_size(s);
764	/ Shift by element size is architecturally valid. For*
765	arithmetic right-shift, it's the same as by one less.
766	Otherwise it is a zeroing operation. /*
767	if (a->imm >= `8` << a->esz) {
768	if (asr) {
769	a->imm = (`8` << a->esz) - `1`;
770	} else {
771	do_dupi_z(s, a->rd, `0`);
772	return true;
773	}
774	}
775	gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
776	vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
777	}
778	return true;
779	}
780
781	static bool trans_ASR_zzi(DisasContext s, arg_rri_esz a)
782	{
783	return do_shift_imm(s, a, true, tcg_gen_gvec_sari);
784	}
785
786	static bool trans_LSR_zzi(DisasContext s, arg_rri_esz a)
787	{
788	return do_shift_imm(s, a, false, tcg_gen_gvec_shri);
789	}
790
791	static bool trans_LSL_zzi(DisasContext s, arg_rri_esz a)
792	{
793	return do_shift_imm(s, a, false, tcg_gen_gvec_shli);
794	}
795
796	static bool do_zzw_ool(DisasContext s, arg_rrr_esz a, gen_helper_gvec_3 *fn)
797	{
798	if (fn == NULL) {
799	return false;
800	}
801	if (sve_access_check(s)) {
802	unsigned vsz = vec_full_reg_size(s);
803	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
804	vec_full_reg_offset(s, a->rn),
805	vec_full_reg_offset(s, a->rm),
806	vsz, vsz, `0`, fn);
807	}
808	return true;
809	}
810
811	#define DO_ZZW(NAME, name) \
812	static bool trans_##NAME##_zzw(DisasContext s, arg_rrr_esz a) \
813	{ \
814	static gen_helper_gvec_3 * const fns[4] = { \
815	gen_helper_sve_##name##_zzw_b, gen_helper_sve_##name##_zzw_h, \
816	gen_helper_sve_##name##_zzw_s, NULL \
817	}; \
818	return do_zzw_ool(s, a, fns[a->esz]); \
819	}
820
821	DO_ZZW(ASR, asr)
822	DO_ZZW(LSR, lsr)
823	DO_ZZW(LSL, lsl)
824
825	#undef DO_ZZW
826
827	/*
828	*** SVE Integer Multiply-Add Group
829	*/
830
831	static bool do_zpzzz_ool(DisasContext s, arg_rprrr_esz a,
832	gen_helper_gvec_5 *fn)
833	{
834	if (sve_access_check(s)) {
835	unsigned vsz = vec_full_reg_size(s);
836	tcg_gen_gvec_5_ool(vec_full_reg_offset(s, a->rd),
837	vec_full_reg_offset(s, a->ra),
838	vec_full_reg_offset(s, a->rn),
839	vec_full_reg_offset(s, a->rm),
840	pred_full_reg_offset(s, a->pg),
841	vsz, vsz, `0`, fn);
842	}
843	return true;
844	}
845
846	#define DO_ZPZZZ(NAME, name) \
847	static bool trans_##NAME(DisasContext s, arg_rprrr_esz a) \
848	{ \
849	static gen_helper_gvec_5 * const fns[4] = { \
850	gen_helper_sve_##name##_b, gen_helper_sve_##name##_h, \
851	gen_helper_sve_##name##_s, gen_helper_sve_##name##_d, \
852	}; \
853	return do_zpzzz_ool(s, a, fns[a->esz]); \
854	}
855
856	DO_ZPZZZ(MLA, mla)
857	DO_ZPZZZ(MLS, mls)
858
859	#undef DO_ZPZZZ
860
861	/*
862	*** SVE Index Generation Group
863	*/
864
865	static void do_index(DisasContext s, int* esz, int rd,
866	TCGv_i64 start, TCGv_i64 incr)
867	{
868	unsigned vsz = vec_full_reg_size(s);
869	TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, `0`));
870	TCGv_ptr t_zd = tcg_temp_new_ptr();
871
872	tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
873	if (esz == `3`) {
874	gen_helper_sve_index_d(t_zd, start, incr, desc);
875	} else {
876	typedef void index_fn(TCGv_ptr, TCGv_i32, TCGv_i32, TCGv_i32);
877	static index_fn * const fns[`3`] = {
878	gen_helper_sve_index_b,
879	gen_helper_sve_index_h,
880	gen_helper_sve_index_s,
881	};
882	TCGv_i32 s32 = tcg_temp_new_i32();
883	TCGv_i32 i32 = tcg_temp_new_i32();
884
885	tcg_gen_extrl_i64_i32(s32, start);
886	tcg_gen_extrl_i64_i32(i32, incr);
887	fns[esz](t_zd, s32, i32, desc);
888
889	tcg_temp_free_i32(s32);
890	tcg_temp_free_i32(i32);
891	}
892	tcg_temp_free_ptr(t_zd);
893	tcg_temp_free_i32(desc);
894	}
895
896	static bool trans_INDEX_ii(DisasContext s, arg_INDEX_ii a)
897	{
898	if (sve_access_check(s)) {
899	TCGv_i64 start = tcg_const_i64(a->imm1);
900	TCGv_i64 incr = tcg_const_i64(a->imm2);
901	do_index(s, a->esz, a->rd, start, incr);
902	tcg_temp_free_i64(start);
903	tcg_temp_free_i64(incr);
904	}
905	return true;
906	}
907
908	static bool trans_INDEX_ir(DisasContext s, arg_INDEX_ir a)
909	{
910	if (sve_access_check(s)) {
911	TCGv_i64 start = tcg_const_i64(a->imm);
912	TCGv_i64 incr = cpu_reg(s, a->rm);
913	do_index(s, a->esz, a->rd, start, incr);
914	tcg_temp_free_i64(start);
915	}
916	return true;
917	}
918
919	static bool trans_INDEX_ri(DisasContext s, arg_INDEX_ri a)
920	{
921	if (sve_access_check(s)) {
922	TCGv_i64 start = cpu_reg(s, a->rn);
923	TCGv_i64 incr = tcg_const_i64(a->imm);
924	do_index(s, a->esz, a->rd, start, incr);
925	tcg_temp_free_i64(incr);
926	}
927	return true;
928	}
929
930	static bool trans_INDEX_rr(DisasContext s, arg_INDEX_rr a)
931	{
932	if (sve_access_check(s)) {
933	TCGv_i64 start = cpu_reg(s, a->rn);
934	TCGv_i64 incr = cpu_reg(s, a->rm);
935	do_index(s, a->esz, a->rd, start, incr);
936	}
937	return true;
938	}
939
940	/*
941	*** SVE Stack Allocation Group
942	*/
943
944	static bool trans_ADDVL(DisasContext s, arg_ADDVL a)
945	{
946	if (sve_access_check(s)) {
947	TCGv_i64 rd = cpu_reg_sp(s, a->rd);
948	TCGv_i64 rn = cpu_reg_sp(s, a->rn);
949	tcg_gen_addi_i64(rd, rn, a->imm * vec_full_reg_size(s));
950	}
951	return true;
952	}
953
954	static bool trans_ADDPL(DisasContext s, arg_ADDPL a)
955	{
956	if (sve_access_check(s)) {
957	TCGv_i64 rd = cpu_reg_sp(s, a->rd);
958	TCGv_i64 rn = cpu_reg_sp(s, a->rn);
959	tcg_gen_addi_i64(rd, rn, a->imm * pred_full_reg_size(s));
960	}
961	return true;
962	}
963
964	static bool trans_RDVL(DisasContext s, arg_RDVL a)
965	{
966	if (sve_access_check(s)) {
967	TCGv_i64 reg = cpu_reg(s, a->rd);
968	tcg_gen_movi_i64(reg, a->imm * vec_full_reg_size(s));
969	}
970	return true;
971	}
972
973	/*
974	*** SVE Compute Vector Address Group
975	*/
976
977	static bool do_adr(DisasContext s, arg_rrri a, gen_helper_gvec_3 *fn)
978	{
979	if (sve_access_check(s)) {
980	unsigned vsz = vec_full_reg_size(s);
981	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
982	vec_full_reg_offset(s, a->rn),
983	vec_full_reg_offset(s, a->rm),
984	vsz, vsz, a->imm, fn);
985	}
986	return true;
987	}
988
989	static bool trans_ADR_p32(DisasContext s, arg_rrri a)
990	{
991	return do_adr(s, a, gen_helper_sve_adr_p32);
992	}
993
994	static bool trans_ADR_p64(DisasContext s, arg_rrri a)
995	{
996	return do_adr(s, a, gen_helper_sve_adr_p64);
997	}
998
999	static bool trans_ADR_s32(DisasContext s, arg_rrri a)
1000	{
1001	return do_adr(s, a, gen_helper_sve_adr_s32);
1002	}
1003
1004	static bool trans_ADR_u32(DisasContext s, arg_rrri a)
1005	{
1006	return do_adr(s, a, gen_helper_sve_adr_u32);
1007	}
1008
1009	/*
1010	*** SVE Integer Misc - Unpredicated Group
1011	*/
1012
1013	static bool trans_FEXPA(DisasContext s, arg_rr_esz a)
1014	{
1015	static gen_helper_gvec_2 * const fns[`4`] = {
1016	NULL,
1017	gen_helper_sve_fexpa_h,
1018	gen_helper_sve_fexpa_s,
1019	gen_helper_sve_fexpa_d,
1020	};
1021	if (a->esz == `0`) {
1022	return false;
1023	}
1024	if (sve_access_check(s)) {
1025	unsigned vsz = vec_full_reg_size(s);
1026	tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
1027	vec_full_reg_offset(s, a->rn),
1028	vsz, vsz, `0`, fns[a->esz]);
1029	}
1030	return true;
1031	}
1032
1033	static bool trans_FTSSEL(DisasContext s, arg_rrr_esz a)
1034	{
1035	static gen_helper_gvec_3 * const fns[`4`] = {
1036	NULL,
1037	gen_helper_sve_ftssel_h,
1038	gen_helper_sve_ftssel_s,
1039	gen_helper_sve_ftssel_d,
1040	};
1041	if (a->esz == `0`) {
1042	return false;
1043	}
1044	if (sve_access_check(s)) {
1045	unsigned vsz = vec_full_reg_size(s);
1046	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
1047	vec_full_reg_offset(s, a->rn),
1048	vec_full_reg_offset(s, a->rm),
1049	vsz, vsz, `0`, fns[a->esz]);
1050	}
1051	return true;
1052	}
1053
1054	/*
1055	*** SVE Predicate Logical Operations Group
1056	*/
1057
1058	static bool do_pppp_flags(DisasContext s, arg_rprr_s a,
1059	const GVecGen4 *gvec_op)
1060	{
1061	if (!sve_access_check(s)) {
1062	return true;
1063	}
1064
1065	unsigned psz = pred_gvec_reg_size(s);
1066	int dofs = pred_full_reg_offset(s, a->rd);
1067	int nofs = pred_full_reg_offset(s, a->rn);
1068	int mofs = pred_full_reg_offset(s, a->rm);
1069	int gofs = pred_full_reg_offset(s, a->pg);
1070
1071	if (psz == `8`) {
1072	/ Do the operation and the flags generation in temps. /
1073	TCGv_i64 pd = tcg_temp_new_i64();
1074	TCGv_i64 pn = tcg_temp_new_i64();
1075	TCGv_i64 pm = tcg_temp_new_i64();
1076	TCGv_i64 pg = tcg_temp_new_i64();
1077
1078	tcg_gen_ld_i64(pn, cpu_env, nofs);
1079	tcg_gen_ld_i64(pm, cpu_env, mofs);
1080	tcg_gen_ld_i64(pg, cpu_env, gofs);
1081
1082	gvec_op->fni8(pd, pn, pm, pg);
1083	tcg_gen_st_i64(pd, cpu_env, dofs);
1084
1085	do_predtest1(pd, pg);
1086
1087	tcg_temp_free_i64(pd);
1088	tcg_temp_free_i64(pn);
1089	tcg_temp_free_i64(pm);
1090	tcg_temp_free_i64(pg);
1091	} else {
1092	/ The operation and flags generation is large. The computation*
1093	* of the flags depends on the original contents of the guarding
1094	* predicate. If the destination overwrites the guarding predicate,
1095	* then the easiest way to get this right is to save a copy.
1096	*/
1097	int tofs = gofs;
1098	if (a->rd == a->pg) {
1099	tofs = offsetof(CPUARMState, vfp.preg_tmp);
1100	tcg_gen_gvec_mov(`0`, tofs, gofs, psz, psz);
1101	}
1102
1103	tcg_gen_gvec_4(dofs, nofs, mofs, gofs, psz, psz, gvec_op);
1104	do_predtest(s, dofs, tofs, psz / `8`);
1105	}
1106	return true;
1107	}
1108
1109	static void gen_and_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1110	{
1111	tcg_gen_and_i64(pd, pn, pm);
1112	tcg_gen_and_i64(pd, pd, pg);
1113	}
1114
1115	static void gen_and_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1116	TCGv_vec pm, TCGv_vec pg)
1117	{
1118	tcg_gen_and_vec(vece, pd, pn, pm);
1119	tcg_gen_and_vec(vece, pd, pd, pg);
1120	}
1121
1122	static bool trans_AND_pppp(DisasContext s, arg_rprr_s a)
1123	{
1124	static const GVecGen4 op = {
1125	.fni8 = gen_and_pg_i64,
1126	.fniv = gen_and_pg_vec,
1127	.fno = gen_helper_sve_and_pppp,
1128	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
1129	};
1130	if (a->s) {
1131	return do_pppp_flags(s, a, &op);
1132	} else if (a->rn == a->rm) {
1133	if (a->pg == a->rn) {
1134	return do_mov_p(s, a->rd, a->rn);
1135	} else {
1136	return do_vector3_p(s, tcg_gen_gvec_and, `0`, a->rd, a->rn, a->pg);
1137	}
1138	} else if (a->pg == a->rn \|\| a->pg == a->rm) {
1139	return do_vector3_p(s, tcg_gen_gvec_and, `0`, a->rd, a->rn, a->rm);
1140	} else {
1141	return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1142	}
1143	}
1144
1145	static void gen_bic_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1146	{
1147	tcg_gen_andc_i64(pd, pn, pm);
1148	tcg_gen_and_i64(pd, pd, pg);
1149	}
1150
1151	static void gen_bic_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1152	TCGv_vec pm, TCGv_vec pg)
1153	{
1154	tcg_gen_andc_vec(vece, pd, pn, pm);
1155	tcg_gen_and_vec(vece, pd, pd, pg);
1156	}
1157
1158	static bool trans_BIC_pppp(DisasContext s, arg_rprr_s a)
1159	{
1160	static const GVecGen4 op = {
1161	.fni8 = gen_bic_pg_i64,
1162	.fniv = gen_bic_pg_vec,
1163	.fno = gen_helper_sve_bic_pppp,
1164	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
1165	};
1166	if (a->s) {
1167	return do_pppp_flags(s, a, &op);
1168	} else if (a->pg == a->rn) {
1169	return do_vector3_p(s, tcg_gen_gvec_andc, `0`, a->rd, a->rn, a->rm);
1170	} else {
1171	return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1172	}
1173	}
1174
1175	static void gen_eor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1176	{
1177	tcg_gen_xor_i64(pd, pn, pm);
1178	tcg_gen_and_i64(pd, pd, pg);
1179	}
1180
1181	static void gen_eor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1182	TCGv_vec pm, TCGv_vec pg)
1183	{
1184	tcg_gen_xor_vec(vece, pd, pn, pm);
1185	tcg_gen_and_vec(vece, pd, pd, pg);
1186	}
1187
1188	static bool trans_EOR_pppp(DisasContext s, arg_rprr_s a)
1189	{
1190	static const GVecGen4 op = {
1191	.fni8 = gen_eor_pg_i64,
1192	.fniv = gen_eor_pg_vec,
1193	.fno = gen_helper_sve_eor_pppp,
1194	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
1195	};
1196	if (a->s) {
1197	return do_pppp_flags(s, a, &op);
1198	} else {
1199	return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1200	}
1201	}
1202
1203	static void gen_sel_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1204	{
1205	tcg_gen_and_i64(pn, pn, pg);
1206	tcg_gen_andc_i64(pm, pm, pg);
1207	tcg_gen_or_i64(pd, pn, pm);
1208	}
1209
1210	static void gen_sel_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1211	TCGv_vec pm, TCGv_vec pg)
1212	{
1213	tcg_gen_and_vec(vece, pn, pn, pg);
1214	tcg_gen_andc_vec(vece, pm, pm, pg);
1215	tcg_gen_or_vec(vece, pd, pn, pm);
1216	}
1217
1218	static bool trans_SEL_pppp(DisasContext s, arg_rprr_s a)
1219	{
1220	static const GVecGen4 op = {
1221	.fni8 = gen_sel_pg_i64,
1222	.fniv = gen_sel_pg_vec,
1223	.fno = gen_helper_sve_sel_pppp,
1224	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
1225	};
1226	if (a->s) {
1227	return false;
1228	} else {
1229	return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1230	}
1231	}
1232
1233	static void gen_orr_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1234	{
1235	tcg_gen_or_i64(pd, pn, pm);
1236	tcg_gen_and_i64(pd, pd, pg);
1237	}
1238
1239	static void gen_orr_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1240	TCGv_vec pm, TCGv_vec pg)
1241	{
1242	tcg_gen_or_vec(vece, pd, pn, pm);
1243	tcg_gen_and_vec(vece, pd, pd, pg);
1244	}
1245
1246	static bool trans_ORR_pppp(DisasContext s, arg_rprr_s a)
1247	{
1248	static const GVecGen4 op = {
1249	.fni8 = gen_orr_pg_i64,
1250	.fniv = gen_orr_pg_vec,
1251	.fno = gen_helper_sve_orr_pppp,
1252	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
1253	};
1254	if (a->s) {
1255	return do_pppp_flags(s, a, &op);
1256	} else if (a->pg == a->rn && a->rn == a->rm) {
1257	return do_mov_p(s, a->rd, a->rn);
1258	} else {
1259	return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1260	}
1261	}
1262
1263	static void gen_orn_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1264	{
1265	tcg_gen_orc_i64(pd, pn, pm);
1266	tcg_gen_and_i64(pd, pd, pg);
1267	}
1268
1269	static void gen_orn_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1270	TCGv_vec pm, TCGv_vec pg)
1271	{
1272	tcg_gen_orc_vec(vece, pd, pn, pm);
1273	tcg_gen_and_vec(vece, pd, pd, pg);
1274	}
1275
1276	static bool trans_ORN_pppp(DisasContext s, arg_rprr_s a)
1277	{
1278	static const GVecGen4 op = {
1279	.fni8 = gen_orn_pg_i64,
1280	.fniv = gen_orn_pg_vec,
1281	.fno = gen_helper_sve_orn_pppp,
1282	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
1283	};
1284	if (a->s) {
1285	return do_pppp_flags(s, a, &op);
1286	} else {
1287	return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1288	}
1289	}
1290
1291	static void gen_nor_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1292	{
1293	tcg_gen_or_i64(pd, pn, pm);
1294	tcg_gen_andc_i64(pd, pg, pd);
1295	}
1296
1297	static void gen_nor_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1298	TCGv_vec pm, TCGv_vec pg)
1299	{
1300	tcg_gen_or_vec(vece, pd, pn, pm);
1301	tcg_gen_andc_vec(vece, pd, pg, pd);
1302	}
1303
1304	static bool trans_NOR_pppp(DisasContext s, arg_rprr_s a)
1305	{
1306	static const GVecGen4 op = {
1307	.fni8 = gen_nor_pg_i64,
1308	.fniv = gen_nor_pg_vec,
1309	.fno = gen_helper_sve_nor_pppp,
1310	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
1311	};
1312	if (a->s) {
1313	return do_pppp_flags(s, a, &op);
1314	} else {
1315	return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1316	}
1317	}
1318
1319	static void gen_nand_pg_i64(TCGv_i64 pd, TCGv_i64 pn, TCGv_i64 pm, TCGv_i64 pg)
1320	{
1321	tcg_gen_and_i64(pd, pn, pm);
1322	tcg_gen_andc_i64(pd, pg, pd);
1323	}
1324
1325	static void gen_nand_pg_vec(unsigned vece, TCGv_vec pd, TCGv_vec pn,
1326	TCGv_vec pm, TCGv_vec pg)
1327	{
1328	tcg_gen_and_vec(vece, pd, pn, pm);
1329	tcg_gen_andc_vec(vece, pd, pg, pd);
1330	}
1331
1332	static bool trans_NAND_pppp(DisasContext s, arg_rprr_s a)
1333	{
1334	static const GVecGen4 op = {
1335	.fni8 = gen_nand_pg_i64,
1336	.fniv = gen_nand_pg_vec,
1337	.fno = gen_helper_sve_nand_pppp,
1338	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
1339	};
1340	if (a->s) {
1341	return do_pppp_flags(s, a, &op);
1342	} else {
1343	return do_vecop4_p(s, &op, a->rd, a->rn, a->rm, a->pg);
1344	}
1345	}
1346
1347	/*
1348	*** SVE Predicate Misc Group
1349	*/
1350
1351	static bool trans_PTEST(DisasContext s, arg_PTEST a)
1352	{
1353	if (sve_access_check(s)) {
1354	int nofs = pred_full_reg_offset(s, a->rn);
1355	int gofs = pred_full_reg_offset(s, a->pg);
1356	int words = DIV_ROUND_UP(pred_full_reg_size(s), `8`);
1357
1358	if (words == `1`) {
1359	TCGv_i64 pn = tcg_temp_new_i64();
1360	TCGv_i64 pg = tcg_temp_new_i64();
1361
1362	tcg_gen_ld_i64(pn, cpu_env, nofs);
1363	tcg_gen_ld_i64(pg, cpu_env, gofs);
1364	do_predtest1(pn, pg);
1365
1366	tcg_temp_free_i64(pn);
1367	tcg_temp_free_i64(pg);
1368	} else {
1369	do_predtest(s, nofs, gofs, words);
1370	}
1371	}
1372	return true;
1373	}
1374
1375	/ See the ARM pseudocode DecodePredCount. /
1376	static unsigned decode_pred_count(unsigned fullsz, int pattern, int esz)
1377	{
1378	unsigned elements = fullsz >> esz;
1379	unsigned bound;
1380
1381	switch (pattern) {
1382	case `0x0`: / POW2 /
1383	return pow2floor(elements);
1384	case `0x1`: / VL1 /
1385	case `0x2`: / VL2 /
1386	case `0x3`: / VL3 /
1387	case `0x4`: / VL4 /
1388	case `0x5`: / VL5 /
1389	case `0x6`: / VL6 /
1390	case `0x7`: / VL7 /
1391	case `0x8`: / VL8 /
1392	bound = pattern;
1393	break;
1394	case `0x9`: / VL16 /
1395	case `0xa`: / VL32 /
1396	case `0xb`: / VL64 /
1397	case `0xc`: / VL128 /
1398	case `0xd`: / VL256 /
1399	bound = `16` << (pattern - `9`);
1400	break;
1401	case `0x1d`: / MUL4 /
1402	return elements - elements % `4`;
1403	case `0x1e`: / MUL3 /
1404	return elements - elements % `3`;
1405	case `0x1f`: / ALL /
1406	return elements;
1407	default: / #uimm5 /
1408	return `0`;
1409	}
1410	return elements >= bound ? bound : `0`;
1411	}
1412
1413	/ This handles all of the predicate initialization instructions,*
1414	* PTRUE, PFALSE, SETFFR. For PFALSE, we will have set PAT == 32
1415	* so that decode_pred_count returns 0. For SETFFR, we will have
1416	* set RD == 16 == FFR.
1417	*/
1418	static bool do_predset(DisasContext s, int* esz, int rd, int pat, bool setflag)
1419	{
1420	if (!sve_access_check(s)) {
1421	return true;
1422	}
1423
1424	unsigned fullsz = vec_full_reg_size(s);
1425	unsigned ofs = pred_full_reg_offset(s, rd);
1426	unsigned numelem, setsz, i;
1427	uint64_t word, lastword;
1428	TCGv_i64 t;
1429
1430	numelem = decode_pred_count(fullsz, pat, esz);
1431
1432	/ Determine what we must store into each bit, and how many. /
1433	if (numelem == `0`) {
1434	lastword = word = `0`;
1435	setsz = fullsz;
1436	} else {
1437	setsz = numelem << esz;
1438	lastword = word = pred_esz_masks[esz];
1439	if (setsz % `64`) {
1440	lastword &= MAKE_64BIT_MASK(`0`, setsz % `64`);
1441	}
1442	}
1443
1444	t = tcg_temp_new_i64();
1445	if (fullsz <= `64`) {
1446	tcg_gen_movi_i64(t, lastword);
1447	tcg_gen_st_i64(t, cpu_env, ofs);
1448	goto done;
1449	}
1450
1451	if (word == lastword) {
1452	unsigned maxsz = size_for_gvec(fullsz / `8`);
1453	unsigned oprsz = size_for_gvec(setsz / `8`);
1454
1455	if (oprsz * `8` == setsz) {
1456	tcg_gen_gvec_dup64i(ofs, oprsz, maxsz, word);
1457	goto done;
1458	}
1459	}
1460
1461	setsz /= `8`;
1462	fullsz /= `8`;
1463
1464	tcg_gen_movi_i64(t, word);
1465	for (i = `0`; i < QEMU_ALIGN_DOWN(setsz, `8`); i += `8`) {
1466	tcg_gen_st_i64(t, cpu_env, ofs + i);
1467	}
1468	if (lastword != word) {
1469	tcg_gen_movi_i64(t, lastword);
1470	tcg_gen_st_i64(t, cpu_env, ofs + i);
1471	i += `8`;
1472	}
1473	if (i < fullsz) {
1474	tcg_gen_movi_i64(t, `0`);
1475	for (; i < fullsz; i += `8`) {
1476	tcg_gen_st_i64(t, cpu_env, ofs + i);
1477	}
1478	}
1479
1480	done:
1481	tcg_temp_free_i64(t);
1482
1483	/ PTRUES /
1484	if (setflag) {
1485	tcg_gen_movi_i32(cpu_NF, -(word != `0`));
1486	tcg_gen_movi_i32(cpu_CF, word == `0`);
1487	tcg_gen_movi_i32(cpu_VF, `0`);
1488	tcg_gen_mov_i32(cpu_ZF, cpu_NF);
1489	}
1490	return true;
1491	}
1492
1493	static bool trans_PTRUE(DisasContext s, arg_PTRUE a)
1494	{
1495	return do_predset(s, a->esz, a->rd, a->pat, a->s);
1496	}
1497
1498	static bool trans_SETFFR(DisasContext s, arg_SETFFR a)
1499	{
1500	/ Note pat == 31 is #all, to set all elements. /
1501	return do_predset(s, `0`, FFR_PRED_NUM, `31`, false);
1502	}
1503
1504	static bool trans_PFALSE(DisasContext s, arg_PFALSE a)
1505	{
1506	/ Note pat == 32 is #unimp, to set no elements. /
1507	return do_predset(s, `0`, a->rd, `32`, false);
1508	}
1509
1510	static bool trans_RDFFR_p(DisasContext s, arg_RDFFR_p a)
1511	{
1512	/ The path through do_pppp_flags is complicated enough to want to avoid*
1513	* duplication. Frob the arguments into the form of a predicated AND.
1514	*/
1515	arg_rprr_s alt_a = {
1516	.rd = a->rd, .pg = a->pg, .s = a->s,
1517	.rn = FFR_PRED_NUM, .rm = FFR_PRED_NUM,
1518	};
1519	return trans_AND_pppp(s, &alt_a);
1520	}
1521
1522	static bool trans_RDFFR(DisasContext s, arg_RDFFR a)
1523	{
1524	return do_mov_p(s, a->rd, FFR_PRED_NUM);
1525	}
1526
1527	static bool trans_WRFFR(DisasContext s, arg_WRFFR a)
1528	{
1529	return do_mov_p(s, FFR_PRED_NUM, a->rn);
1530	}
1531
1532	static bool do_pfirst_pnext(DisasContext s, arg_rr_esz a,
1533	void (*gen_fn)(TCGv_i32, TCGv_ptr,
1534	TCGv_ptr, TCGv_i32))
1535	{
1536	if (!sve_access_check(s)) {
1537	return true;
1538	}
1539
1540	TCGv_ptr t_pd = tcg_temp_new_ptr();
1541	TCGv_ptr t_pg = tcg_temp_new_ptr();
1542	TCGv_i32 t;
1543	unsigned desc;
1544
1545	desc = DIV_ROUND_UP(pred_full_reg_size(s), `8`);
1546	desc = deposit32(desc, SIMD_DATA_SHIFT, `2`, a->esz);
1547
1548	tcg_gen_addi_ptr(t_pd, cpu_env, pred_full_reg_offset(s, a->rd));
1549	tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->rn));
1550	t = tcg_const_i32(desc);
1551
1552	gen_fn(t, t_pd, t_pg, t);
1553	tcg_temp_free_ptr(t_pd);
1554	tcg_temp_free_ptr(t_pg);
1555
1556	do_pred_flags(t);
1557	tcg_temp_free_i32(t);
1558	return true;
1559	}
1560
1561	static bool trans_PFIRST(DisasContext s, arg_rr_esz a)
1562	{
1563	return do_pfirst_pnext(s, a, gen_helper_sve_pfirst);
1564	}
1565
1566	static bool trans_PNEXT(DisasContext s, arg_rr_esz a)
1567	{
1568	return do_pfirst_pnext(s, a, gen_helper_sve_pnext);
1569	}
1570
1571	/*
1572	*** SVE Element Count Group
1573	*/
1574
1575	/ Perform an inline saturating addition of a 32-bit value within*
1576	* a 64-bit register. The second operand is known to be positive,
1577	* which halves the comparisions we must perform to bound the result.
1578	*/
1579	static void do_sat_addsub_32(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1580	{
1581	int64_t ibound;
1582	TCGv_i64 bound;
1583	TCGCond cond;
1584
1585	/ Use normal 64-bit arithmetic to detect 32-bit overflow. /
1586	if (u) {
1587	tcg_gen_ext32u_i64(reg, reg);
1588	} else {
1589	tcg_gen_ext32s_i64(reg, reg);
1590	}
1591	if (d) {
1592	tcg_gen_sub_i64(reg, reg, val);
1593	ibound = (u ? `0` : INT32_MIN);
1594	cond = TCG_COND_LT;
1595	} else {
1596	tcg_gen_add_i64(reg, reg, val);
1597	ibound = (u ? UINT32_MAX : INT32_MAX);
1598	cond = TCG_COND_GT;
1599	}
1600	bound = tcg_const_i64(ibound);
1601	tcg_gen_movcond_i64(cond, reg, reg, bound, bound, reg);
1602	tcg_temp_free_i64(bound);
1603	}
1604
1605	/ Similarly with 64-bit values. /
1606	static void do_sat_addsub_64(TCGv_i64 reg, TCGv_i64 val, bool u, bool d)
1607	{
1608	TCGv_i64 t0 = tcg_temp_new_i64();
1609	TCGv_i64 t1 = tcg_temp_new_i64();
1610	TCGv_i64 t2;
1611
1612	if (u) {
1613	if (d) {
1614	tcg_gen_sub_i64(t0, reg, val);
1615	tcg_gen_movi_i64(t1, `0`);
1616	tcg_gen_movcond_i64(TCG_COND_LTU, reg, reg, val, t1, t0);
1617	} else {
1618	tcg_gen_add_i64(t0, reg, val);
1619	tcg_gen_movi_i64(t1, -`1`);
1620	tcg_gen_movcond_i64(TCG_COND_LTU, reg, t0, reg, t1, t0);
1621	}
1622	} else {
1623	if (d) {
1624	/ Detect signed overflow for subtraction. /
1625	tcg_gen_xor_i64(t0, reg, val);
1626	tcg_gen_sub_i64(t1, reg, val);
1627	tcg_gen_xor_i64(reg, reg, t1);
1628	tcg_gen_and_i64(t0, t0, reg);
1629
1630	/ Bound the result. /
1631	tcg_gen_movi_i64(reg, INT64_MIN);
1632	t2 = tcg_const_i64(`0`);
1633	tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, reg, t1);
1634	} else {
1635	/ Detect signed overflow for addition. /
1636	tcg_gen_xor_i64(t0, reg, val);
1637	tcg_gen_add_i64(reg, reg, val);
1638	tcg_gen_xor_i64(t1, reg, val);
1639	tcg_gen_andc_i64(t0, t1, t0);
1640
1641	/ Bound the result. /
1642	tcg_gen_movi_i64(t1, INT64_MAX);
1643	t2 = tcg_const_i64(`0`);
1644	tcg_gen_movcond_i64(TCG_COND_LT, reg, t0, t2, t1, reg);
1645	}
1646	tcg_temp_free_i64(t2);
1647	}
1648	tcg_temp_free_i64(t0);
1649	tcg_temp_free_i64(t1);
1650	}
1651
1652	/ Similarly with a vector and a scalar operand. /
1653	static void do_sat_addsub_vec(DisasContext s, int* esz, int rd, int rn,
1654	TCGv_i64 val, bool u, bool d)
1655	{
1656	unsigned vsz = vec_full_reg_size(s);
1657	TCGv_ptr dptr, nptr;
1658	TCGv_i32 t32, desc;
1659	TCGv_i64 t64;
1660
1661	dptr = tcg_temp_new_ptr();
1662	nptr = tcg_temp_new_ptr();
1663	tcg_gen_addi_ptr(dptr, cpu_env, vec_full_reg_offset(s, rd));
1664	tcg_gen_addi_ptr(nptr, cpu_env, vec_full_reg_offset(s, rn));
1665	desc = tcg_const_i32(simd_desc(vsz, vsz, `0`));
1666
1667	switch (esz) {
1668	case MO_8:
1669	t32 = tcg_temp_new_i32();
1670	tcg_gen_extrl_i64_i32(t32, val);
1671	if (d) {
1672	tcg_gen_neg_i32(t32, t32);
1673	}
1674	if (u) {
1675	gen_helper_sve_uqaddi_b(dptr, nptr, t32, desc);
1676	} else {
1677	gen_helper_sve_sqaddi_b(dptr, nptr, t32, desc);
1678	}
1679	tcg_temp_free_i32(t32);
1680	break;
1681
1682	case MO_16:
1683	t32 = tcg_temp_new_i32();
1684	tcg_gen_extrl_i64_i32(t32, val);
1685	if (d) {
1686	tcg_gen_neg_i32(t32, t32);
1687	}
1688	if (u) {
1689	gen_helper_sve_uqaddi_h(dptr, nptr, t32, desc);
1690	} else {
1691	gen_helper_sve_sqaddi_h(dptr, nptr, t32, desc);
1692	}
1693	tcg_temp_free_i32(t32);
1694	break;
1695
1696	case MO_32:
1697	t64 = tcg_temp_new_i64();
1698	if (d) {
1699	tcg_gen_neg_i64(t64, val);
1700	} else {
1701	tcg_gen_mov_i64(t64, val);
1702	}
1703	if (u) {
1704	gen_helper_sve_uqaddi_s(dptr, nptr, t64, desc);
1705	} else {
1706	gen_helper_sve_sqaddi_s(dptr, nptr, t64, desc);
1707	}
1708	tcg_temp_free_i64(t64);
1709	break;
1710
1711	case MO_64:
1712	if (u) {
1713	if (d) {
1714	gen_helper_sve_uqsubi_d(dptr, nptr, val, desc);
1715	} else {
1716	gen_helper_sve_uqaddi_d(dptr, nptr, val, desc);
1717	}
1718	} else if (d) {
1719	t64 = tcg_temp_new_i64();
1720	tcg_gen_neg_i64(t64, val);
1721	gen_helper_sve_sqaddi_d(dptr, nptr, t64, desc);
1722	tcg_temp_free_i64(t64);
1723	} else {
1724	gen_helper_sve_sqaddi_d(dptr, nptr, val, desc);
1725	}
1726	break;
1727
1728	default:
1729	g_assert_not_reached();
1730	}
1731
1732	tcg_temp_free_ptr(dptr);
1733	tcg_temp_free_ptr(nptr);
1734	tcg_temp_free_i32(desc);
1735	}
1736
1737	static bool trans_CNT_r(DisasContext s, arg_CNT_r a)
1738	{
1739	if (sve_access_check(s)) {
1740	unsigned fullsz = vec_full_reg_size(s);
1741	unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1742	tcg_gen_movi_i64(cpu_reg(s, a->rd), numelem * a->imm);
1743	}
1744	return true;
1745	}
1746
1747	static bool trans_INCDEC_r(DisasContext s, arg_incdec_cnt a)
1748	{
1749	if (sve_access_check(s)) {
1750	unsigned fullsz = vec_full_reg_size(s);
1751	unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1752	int inc = numelem * a->imm * (a->d ? -`1` : `1`);
1753	TCGv_i64 reg = cpu_reg(s, a->rd);
1754
1755	tcg_gen_addi_i64(reg, reg, inc);
1756	}
1757	return true;
1758	}
1759
1760	static bool trans_SINCDEC_r_32(DisasContext s, arg_incdec_cnt a)
1761	{
1762	if (!sve_access_check(s)) {
1763	return true;
1764	}
1765
1766	unsigned fullsz = vec_full_reg_size(s);
1767	unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1768	int inc = numelem * a->imm;
1769	TCGv_i64 reg = cpu_reg(s, a->rd);
1770
1771	/ Use normal 64-bit arithmetic to detect 32-bit overflow. /
1772	if (inc == `0`) {
1773	if (a->u) {
1774	tcg_gen_ext32u_i64(reg, reg);
1775	} else {
1776	tcg_gen_ext32s_i64(reg, reg);
1777	}
1778	} else {
1779	TCGv_i64 t = tcg_const_i64(inc);
1780	do_sat_addsub_32(reg, t, a->u, a->d);
1781	tcg_temp_free_i64(t);
1782	}
1783	return true;
1784	}
1785
1786	static bool trans_SINCDEC_r_64(DisasContext s, arg_incdec_cnt a)
1787	{
1788	if (!sve_access_check(s)) {
1789	return true;
1790	}
1791
1792	unsigned fullsz = vec_full_reg_size(s);
1793	unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1794	int inc = numelem * a->imm;
1795	TCGv_i64 reg = cpu_reg(s, a->rd);
1796
1797	if (inc != `0`) {
1798	TCGv_i64 t = tcg_const_i64(inc);
1799	do_sat_addsub_64(reg, t, a->u, a->d);
1800	tcg_temp_free_i64(t);
1801	}
1802	return true;
1803	}
1804
1805	static bool trans_INCDEC_v(DisasContext s, arg_incdec2_cnt a)
1806	{
1807	if (a->esz == `0`) {
1808	return false;
1809	}
1810
1811	unsigned fullsz = vec_full_reg_size(s);
1812	unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1813	int inc = numelem * a->imm;
1814
1815	if (inc != `0`) {
1816	if (sve_access_check(s)) {
1817	TCGv_i64 t = tcg_const_i64(a->d ? -inc : inc);
1818	tcg_gen_gvec_adds(a->esz, vec_full_reg_offset(s, a->rd),
1819	vec_full_reg_offset(s, a->rn),
1820	t, fullsz, fullsz);
1821	tcg_temp_free_i64(t);
1822	}
1823	} else {
1824	do_mov_z(s, a->rd, a->rn);
1825	}
1826	return true;
1827	}
1828
1829	static bool trans_SINCDEC_v(DisasContext s, arg_incdec2_cnt a)
1830	{
1831	if (a->esz == `0`) {
1832	return false;
1833	}
1834
1835	unsigned fullsz = vec_full_reg_size(s);
1836	unsigned numelem = decode_pred_count(fullsz, a->pat, a->esz);
1837	int inc = numelem * a->imm;
1838
1839	if (inc != `0`) {
1840	if (sve_access_check(s)) {
1841	TCGv_i64 t = tcg_const_i64(inc);
1842	do_sat_addsub_vec(s, a->esz, a->rd, a->rn, t, a->u, a->d);
1843	tcg_temp_free_i64(t);
1844	}
1845	} else {
1846	do_mov_z(s, a->rd, a->rn);
1847	}
1848	return true;
1849	}
1850
1851	/*
1852	*** SVE Bitwise Immediate Group
1853	*/
1854
1855	static bool do_zz_dbm(DisasContext s, arg_rr_dbm a, GVecGen2iFn *gvec_fn)
1856	{
1857	uint64_t imm;
1858	if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, `12`, `1`),
1859	extract32(a->dbm, `0`, `6`),
1860	extract32(a->dbm, `6`, `6`))) {
1861	return false;
1862	}
1863	if (sve_access_check(s)) {
1864	unsigned vsz = vec_full_reg_size(s);
1865	gvec_fn(MO_64, vec_full_reg_offset(s, a->rd),
1866	vec_full_reg_offset(s, a->rn), imm, vsz, vsz);
1867	}
1868	return true;
1869	}
1870
1871	static bool trans_AND_zzi(DisasContext s, arg_rr_dbm a)
1872	{
1873	return do_zz_dbm(s, a, tcg_gen_gvec_andi);
1874	}
1875
1876	static bool trans_ORR_zzi(DisasContext s, arg_rr_dbm a)
1877	{
1878	return do_zz_dbm(s, a, tcg_gen_gvec_ori);
1879	}
1880
1881	static bool trans_EOR_zzi(DisasContext s, arg_rr_dbm a)
1882	{
1883	return do_zz_dbm(s, a, tcg_gen_gvec_xori);
1884	}
1885
1886	static bool trans_DUPM(DisasContext s, arg_DUPM a)
1887	{
1888	uint64_t imm;
1889	if (!logic_imm_decode_wmask(&imm, extract32(a->dbm, `12`, `1`),
1890	extract32(a->dbm, `0`, `6`),
1891	extract32(a->dbm, `6`, `6`))) {
1892	return false;
1893	}
1894	if (sve_access_check(s)) {
1895	do_dupi_z(s, a->rd, imm);
1896	}
1897	return true;
1898	}
1899
1900	/*
1901	*** SVE Integer Wide Immediate - Predicated Group
1902	*/
1903
1904	/ Implement all merging copies. This is used for CPY (immediate),*
1905	* FCPY, CPY (scalar), CPY (SIMD&FP scalar).
1906	*/
1907	static void do_cpy_m(DisasContext s, int* esz, int rd, int rn, int pg,
1908	TCGv_i64 val)
1909	{
1910	typedef void gen_cpy(TCGv_ptr, TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
1911	static gen_cpy * const fns[`4`] = {
1912	gen_helper_sve_cpy_m_b, gen_helper_sve_cpy_m_h,
1913	gen_helper_sve_cpy_m_s, gen_helper_sve_cpy_m_d,
1914	};
1915	unsigned vsz = vec_full_reg_size(s);
1916	TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, `0`));
1917	TCGv_ptr t_zd = tcg_temp_new_ptr();
1918	TCGv_ptr t_zn = tcg_temp_new_ptr();
1919	TCGv_ptr t_pg = tcg_temp_new_ptr();
1920
1921	tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, rd));
1922	tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, rn));
1923	tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
1924
1925	fns[esz](t_zd, t_zn, t_pg, val, desc);
1926
1927	tcg_temp_free_ptr(t_zd);
1928	tcg_temp_free_ptr(t_zn);
1929	tcg_temp_free_ptr(t_pg);
1930	tcg_temp_free_i32(desc);
1931	}
1932
1933	static bool trans_FCPY(DisasContext s, arg_FCPY a)
1934	{
1935	if (a->esz == `0`) {
1936	return false;
1937	}
1938	if (sve_access_check(s)) {
1939	/ Decode the VFP immediate. /
1940	uint64_t imm = vfp_expand_imm(a->esz, a->imm);
1941	TCGv_i64 t_imm = tcg_const_i64(imm);
1942	do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1943	tcg_temp_free_i64(t_imm);
1944	}
1945	return true;
1946	}
1947
1948	static bool trans_CPY_m_i(DisasContext s, arg_rpri_esz a)
1949	{
1950	if (a->esz == `0` && extract32(s->insn, `13`, `1`)) {
1951	return false;
1952	}
1953	if (sve_access_check(s)) {
1954	TCGv_i64 t_imm = tcg_const_i64(a->imm);
1955	do_cpy_m(s, a->esz, a->rd, a->rn, a->pg, t_imm);
1956	tcg_temp_free_i64(t_imm);
1957	}
1958	return true;
1959	}
1960
1961	static bool trans_CPY_z_i(DisasContext s, arg_CPY_z_i a)
1962	{
1963	static gen_helper_gvec_2i * const fns[`4`] = {
1964	gen_helper_sve_cpy_z_b, gen_helper_sve_cpy_z_h,
1965	gen_helper_sve_cpy_z_s, gen_helper_sve_cpy_z_d,
1966	};
1967
1968	if (a->esz == `0` && extract32(s->insn, `13`, `1`)) {
1969	return false;
1970	}
1971	if (sve_access_check(s)) {
1972	unsigned vsz = vec_full_reg_size(s);
1973	TCGv_i64 t_imm = tcg_const_i64(a->imm);
1974	tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
1975	pred_full_reg_offset(s, a->pg),
1976	t_imm, vsz, vsz, `0`, fns[a->esz]);
1977	tcg_temp_free_i64(t_imm);
1978	}
1979	return true;
1980	}
1981
1982	/*
1983	*** SVE Permute Extract Group
1984	*/
1985
1986	static bool trans_EXT(DisasContext s, arg_EXT a)
1987	{
1988	if (!sve_access_check(s)) {
1989	return true;
1990	}
1991
1992	unsigned vsz = vec_full_reg_size(s);
1993	unsigned n_ofs = a->imm >= vsz ? `0` : a->imm;
1994	unsigned n_siz = vsz - n_ofs;
1995	unsigned d = vec_full_reg_offset(s, a->rd);
1996	unsigned n = vec_full_reg_offset(s, a->rn);
1997	unsigned m = vec_full_reg_offset(s, a->rm);
1998
1999	/ Use host vector move insns if we have appropriate sizes*
2000	* and no unfortunate overlap.
2001	*/
2002	if (m != d
2003	&& n_ofs == size_for_gvec(n_ofs)
2004	&& n_siz == size_for_gvec(n_siz)
2005	&& (d != n \|\| n_siz <= n_ofs)) {
2006	tcg_gen_gvec_mov(`0`, d, n + n_ofs, n_siz, n_siz);
2007	if (n_ofs != `0`) {
2008	tcg_gen_gvec_mov(`0`, d + n_siz, m, n_ofs, n_ofs);
2009	}
2010	} else {
2011	tcg_gen_gvec_3_ool(d, n, m, vsz, vsz, n_ofs, gen_helper_sve_ext);
2012	}
2013	return true;
2014	}
2015
2016	/*
2017	*** SVE Permute - Unpredicated Group
2018	*/
2019
2020	static bool trans_DUP_s(DisasContext s, arg_DUP_s a)
2021	{
2022	if (sve_access_check(s)) {
2023	unsigned vsz = vec_full_reg_size(s);
2024	tcg_gen_gvec_dup_i64(a->esz, vec_full_reg_offset(s, a->rd),
2025	vsz, vsz, cpu_reg_sp(s, a->rn));
2026	}
2027	return true;
2028	}
2029
2030	static bool trans_DUP_x(DisasContext s, arg_DUP_x a)
2031	{
2032	if ((a->imm & `0x1f`) == `0`) {
2033	return false;
2034	}
2035	if (sve_access_check(s)) {
2036	unsigned vsz = vec_full_reg_size(s);
2037	unsigned dofs = vec_full_reg_offset(s, a->rd);
2038	unsigned esz, index;
2039
2040	esz = ctz32(a->imm);
2041	index = a->imm >> (esz + `1`);
2042
2043	if ((index << esz) < vsz) {
2044	unsigned nofs = vec_reg_offset(s, a->rn, index, esz);
2045	tcg_gen_gvec_dup_mem(esz, dofs, nofs, vsz, vsz);
2046	} else {
2047	tcg_gen_gvec_dup64i(dofs, vsz, vsz, `0`);
2048	}
2049	}
2050	return true;
2051	}
2052
2053	static void do_insr_i64(DisasContext s, arg_rrr_esz a, TCGv_i64 val)
2054	{
2055	typedef void gen_insr(TCGv_ptr, TCGv_ptr, TCGv_i64, TCGv_i32);
2056	static gen_insr * const fns[`4`] = {
2057	gen_helper_sve_insr_b, gen_helper_sve_insr_h,
2058	gen_helper_sve_insr_s, gen_helper_sve_insr_d,
2059	};
2060	unsigned vsz = vec_full_reg_size(s);
2061	TCGv_i32 desc = tcg_const_i32(simd_desc(vsz, vsz, `0`));
2062	TCGv_ptr t_zd = tcg_temp_new_ptr();
2063	TCGv_ptr t_zn = tcg_temp_new_ptr();
2064
2065	tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, a->rd));
2066	tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
2067
2068	fns[a->esz](t_zd, t_zn, val, desc);
2069
2070	tcg_temp_free_ptr(t_zd);
2071	tcg_temp_free_ptr(t_zn);
2072	tcg_temp_free_i32(desc);
2073	}
2074
2075	static bool trans_INSR_f(DisasContext s, arg_rrr_esz a)
2076	{
2077	if (sve_access_check(s)) {
2078	TCGv_i64 t = tcg_temp_new_i64();
2079	tcg_gen_ld_i64(t, cpu_env, vec_reg_offset(s, a->rm, `0`, MO_64));
2080	do_insr_i64(s, a, t);
2081	tcg_temp_free_i64(t);
2082	}
2083	return true;
2084	}
2085
2086	static bool trans_INSR_r(DisasContext s, arg_rrr_esz a)
2087	{
2088	if (sve_access_check(s)) {
2089	do_insr_i64(s, a, cpu_reg(s, a->rm));
2090	}
2091	return true;
2092	}
2093
2094	static bool trans_REV_v(DisasContext s, arg_rr_esz a)
2095	{
2096	static gen_helper_gvec_2 * const fns[`4`] = {
2097	gen_helper_sve_rev_b, gen_helper_sve_rev_h,
2098	gen_helper_sve_rev_s, gen_helper_sve_rev_d
2099	};
2100
2101	if (sve_access_check(s)) {
2102	unsigned vsz = vec_full_reg_size(s);
2103	tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2104	vec_full_reg_offset(s, a->rn),
2105	vsz, vsz, `0`, fns[a->esz]);
2106	}
2107	return true;
2108	}
2109
2110	static bool trans_TBL(DisasContext s, arg_rrr_esz a)
2111	{
2112	static gen_helper_gvec_3 * const fns[`4`] = {
2113	gen_helper_sve_tbl_b, gen_helper_sve_tbl_h,
2114	gen_helper_sve_tbl_s, gen_helper_sve_tbl_d
2115	};
2116
2117	if (sve_access_check(s)) {
2118	unsigned vsz = vec_full_reg_size(s);
2119	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2120	vec_full_reg_offset(s, a->rn),
2121	vec_full_reg_offset(s, a->rm),
2122	vsz, vsz, `0`, fns[a->esz]);
2123	}
2124	return true;
2125	}
2126
2127	static bool trans_UNPK(DisasContext s, arg_UNPK a)
2128	{
2129	static gen_helper_gvec_2 * const fns[`4`][`2`] = {
2130	{ NULL, NULL },
2131	{ gen_helper_sve_sunpk_h, gen_helper_sve_uunpk_h },
2132	{ gen_helper_sve_sunpk_s, gen_helper_sve_uunpk_s },
2133	{ gen_helper_sve_sunpk_d, gen_helper_sve_uunpk_d },
2134	};
2135
2136	if (a->esz == `0`) {
2137	return false;
2138	}
2139	if (sve_access_check(s)) {
2140	unsigned vsz = vec_full_reg_size(s);
2141	tcg_gen_gvec_2_ool(vec_full_reg_offset(s, a->rd),
2142	vec_full_reg_offset(s, a->rn)
2143	+ (a->h ? vsz / `2` : `0`),
2144	vsz, vsz, `0`, fns[a->esz][a->u]);
2145	}
2146	return true;
2147	}
2148
2149	/*
2150	*** SVE Permute - Predicates Group
2151	*/
2152
2153	static bool do_perm_pred3(DisasContext s, arg_rrr_esz a, bool high_odd,
2154	gen_helper_gvec_3 *fn)
2155	{
2156	if (!sve_access_check(s)) {
2157	return true;
2158	}
2159
2160	unsigned vsz = pred_full_reg_size(s);
2161
2162	/ Predicate sizes may be smaller and cannot use simd_desc.*
2163	We cannot round up, as we do elsewhere, because we need
2164	the exact size for ZIP2 and REV. We retain the style for
2165	the other helpers for consistency. /*
2166	TCGv_ptr t_d = tcg_temp_new_ptr();
2167	TCGv_ptr t_n = tcg_temp_new_ptr();
2168	TCGv_ptr t_m = tcg_temp_new_ptr();
2169	TCGv_i32 t_desc;
2170	int desc;
2171
2172	desc = vsz - `2`;
2173	desc = deposit32(desc, SIMD_DATA_SHIFT, `2`, a->esz);
2174	desc = deposit32(desc, SIMD_DATA_SHIFT + `2`, `2`, high_odd);
2175
2176	tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2177	tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2178	tcg_gen_addi_ptr(t_m, cpu_env, pred_full_reg_offset(s, a->rm));
2179	t_desc = tcg_const_i32(desc);
2180
2181	fn(t_d, t_n, t_m, t_desc);
2182
2183	tcg_temp_free_ptr(t_d);
2184	tcg_temp_free_ptr(t_n);
2185	tcg_temp_free_ptr(t_m);
2186	tcg_temp_free_i32(t_desc);
2187	return true;
2188	}
2189
2190	static bool do_perm_pred2(DisasContext s, arg_rr_esz a, bool high_odd,
2191	gen_helper_gvec_2 *fn)
2192	{
2193	if (!sve_access_check(s)) {
2194	return true;
2195	}
2196
2197	unsigned vsz = pred_full_reg_size(s);
2198	TCGv_ptr t_d = tcg_temp_new_ptr();
2199	TCGv_ptr t_n = tcg_temp_new_ptr();
2200	TCGv_i32 t_desc;
2201	int desc;
2202
2203	tcg_gen_addi_ptr(t_d, cpu_env, pred_full_reg_offset(s, a->rd));
2204	tcg_gen_addi_ptr(t_n, cpu_env, pred_full_reg_offset(s, a->rn));
2205
2206	/ Predicate sizes may be smaller and cannot use simd_desc.*
2207	We cannot round up, as we do elsewhere, because we need
2208	the exact size for ZIP2 and REV. We retain the style for
2209	the other helpers for consistency. /*
2210
2211	desc = vsz - `2`;
2212	desc = deposit32(desc, SIMD_DATA_SHIFT, `2`, a->esz);
2213	desc = deposit32(desc, SIMD_DATA_SHIFT + `2`, `2`, high_odd);
2214	t_desc = tcg_const_i32(desc);
2215
2216	fn(t_d, t_n, t_desc);
2217
2218	tcg_temp_free_i32(t_desc);
2219	tcg_temp_free_ptr(t_d);
2220	tcg_temp_free_ptr(t_n);
2221	return true;
2222	}
2223
2224	static bool trans_ZIP1_p(DisasContext s, arg_rrr_esz a)
2225	{
2226	return do_perm_pred3(s, a, `0`, gen_helper_sve_zip_p);
2227	}
2228
2229	static bool trans_ZIP2_p(DisasContext s, arg_rrr_esz a)
2230	{
2231	return do_perm_pred3(s, a, `1`, gen_helper_sve_zip_p);
2232	}
2233
2234	static bool trans_UZP1_p(DisasContext s, arg_rrr_esz a)
2235	{
2236	return do_perm_pred3(s, a, `0`, gen_helper_sve_uzp_p);
2237	}
2238
2239	static bool trans_UZP2_p(DisasContext s, arg_rrr_esz a)
2240	{
2241	return do_perm_pred3(s, a, `1`, gen_helper_sve_uzp_p);
2242	}
2243
2244	static bool trans_TRN1_p(DisasContext s, arg_rrr_esz a)
2245	{
2246	return do_perm_pred3(s, a, `0`, gen_helper_sve_trn_p);
2247	}
2248
2249	static bool trans_TRN2_p(DisasContext s, arg_rrr_esz a)
2250	{
2251	return do_perm_pred3(s, a, `1`, gen_helper_sve_trn_p);
2252	}
2253
2254	static bool trans_REV_p(DisasContext s, arg_rr_esz a)
2255	{
2256	return do_perm_pred2(s, a, `0`, gen_helper_sve_rev_p);
2257	}
2258
2259	static bool trans_PUNPKLO(DisasContext s, arg_PUNPKLO a)
2260	{
2261	return do_perm_pred2(s, a, `0`, gen_helper_sve_punpk_p);
2262	}
2263
2264	static bool trans_PUNPKHI(DisasContext s, arg_PUNPKHI a)
2265	{
2266	return do_perm_pred2(s, a, `1`, gen_helper_sve_punpk_p);
2267	}
2268
2269	/*
2270	*** SVE Permute - Interleaving Group
2271	*/
2272
2273	static bool do_zip(DisasContext s, arg_rrr_esz a, bool high)
2274	{
2275	static gen_helper_gvec_3 * const fns[`4`] = {
2276	gen_helper_sve_zip_b, gen_helper_sve_zip_h,
2277	gen_helper_sve_zip_s, gen_helper_sve_zip_d,
2278	};
2279
2280	if (sve_access_check(s)) {
2281	unsigned vsz = vec_full_reg_size(s);
2282	unsigned high_ofs = high ? vsz / `2` : `0`;
2283	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2284	vec_full_reg_offset(s, a->rn) + high_ofs,
2285	vec_full_reg_offset(s, a->rm) + high_ofs,
2286	vsz, vsz, `0`, fns[a->esz]);
2287	}
2288	return true;
2289	}
2290
2291	static bool do_zzz_data_ool(DisasContext s, arg_rrr_esz a, int data,
2292	gen_helper_gvec_3 *fn)
2293	{
2294	if (sve_access_check(s)) {
2295	unsigned vsz = vec_full_reg_size(s);
2296	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
2297	vec_full_reg_offset(s, a->rn),
2298	vec_full_reg_offset(s, a->rm),
2299	vsz, vsz, data, fn);
2300	}
2301	return true;
2302	}
2303
2304	static bool trans_ZIP1_z(DisasContext s, arg_rrr_esz a)
2305	{
2306	return do_zip(s, a, false);
2307	}
2308
2309	static bool trans_ZIP2_z(DisasContext s, arg_rrr_esz a)
2310	{
2311	return do_zip(s, a, true);
2312	}
2313
2314	static gen_helper_gvec_3 * const uzp_fns[`4`] = {
2315	gen_helper_sve_uzp_b, gen_helper_sve_uzp_h,
2316	gen_helper_sve_uzp_s, gen_helper_sve_uzp_d,
2317	};
2318
2319	static bool trans_UZP1_z(DisasContext s, arg_rrr_esz a)
2320	{
2321	return do_zzz_data_ool(s, a, `0`, uzp_fns[a->esz]);
2322	}
2323
2324	static bool trans_UZP2_z(DisasContext s, arg_rrr_esz a)
2325	{
2326	return do_zzz_data_ool(s, a, `1` << a->esz, uzp_fns[a->esz]);
2327	}
2328
2329	static gen_helper_gvec_3 * const trn_fns[`4`] = {
2330	gen_helper_sve_trn_b, gen_helper_sve_trn_h,
2331	gen_helper_sve_trn_s, gen_helper_sve_trn_d,
2332	};
2333
2334	static bool trans_TRN1_z(DisasContext s, arg_rrr_esz a)
2335	{
2336	return do_zzz_data_ool(s, a, `0`, trn_fns[a->esz]);
2337	}
2338
2339	static bool trans_TRN2_z(DisasContext s, arg_rrr_esz a)
2340	{
2341	return do_zzz_data_ool(s, a, `1` << a->esz, trn_fns[a->esz]);
2342	}
2343
2344	/*
2345	*** SVE Permute Vector - Predicated Group
2346	*/
2347
2348	static bool trans_COMPACT(DisasContext s, arg_rpr_esz a)
2349	{
2350	static gen_helper_gvec_3 * const fns[`4`] = {
2351	NULL, NULL, gen_helper_sve_compact_s, gen_helper_sve_compact_d
2352	};
2353	return do_zpz_ool(s, a, fns[a->esz]);
2354	}
2355
2356	/ Call the helper that computes the ARM LastActiveElement pseudocode*
2357	* function, scaled by the element size. This includes the not found
2358	* indication; e.g. not found for esz=3 is -8.
2359	*/
2360	static void find_last_active(DisasContext s, TCGv_i32 ret, int* esz, int pg)
2361	{
2362	/ Predicate sizes may be smaller and cannot use simd_desc. We cannot*
2363	* round up, as we do elsewhere, because we need the exact size.
2364	*/
2365	TCGv_ptr t_p = tcg_temp_new_ptr();
2366	TCGv_i32 t_desc;
2367	unsigned vsz = pred_full_reg_size(s);
2368	unsigned desc;
2369
2370	desc = vsz - `2`;
2371	desc = deposit32(desc, SIMD_DATA_SHIFT, `2`, esz);
2372
2373	tcg_gen_addi_ptr(t_p, cpu_env, pred_full_reg_offset(s, pg));
2374	t_desc = tcg_const_i32(desc);
2375
2376	gen_helper_sve_last_active_element(ret, t_p, t_desc);
2377
2378	tcg_temp_free_i32(t_desc);
2379	tcg_temp_free_ptr(t_p);
2380	}
2381
2382	/ Increment LAST to the offset of the next element in the vector,*
2383	* wrapping around to 0.
2384	*/
2385	static void incr_last_active(DisasContext s, TCGv_i32 last, int* esz)
2386	{
2387	unsigned vsz = vec_full_reg_size(s);
2388
2389	tcg_gen_addi_i32(last, last, `1` << esz);
2390	if (is_power_of_2(vsz)) {
2391	tcg_gen_andi_i32(last, last, vsz - `1`);
2392	} else {
2393	TCGv_i32 max = tcg_const_i32(vsz);
2394	TCGv_i32 zero = tcg_const_i32(`0`);
2395	tcg_gen_movcond_i32(TCG_COND_GEU, last, last, max, zero, last);
2396	tcg_temp_free_i32(max);
2397	tcg_temp_free_i32(zero);
2398	}
2399	}
2400
2401	/ If LAST < 0, set LAST to the offset of the last element in the vector. /
2402	static void wrap_last_active(DisasContext s, TCGv_i32 last, int* esz)
2403	{
2404	unsigned vsz = vec_full_reg_size(s);
2405
2406	if (is_power_of_2(vsz)) {
2407	tcg_gen_andi_i32(last, last, vsz - `1`);
2408	} else {
2409	TCGv_i32 max = tcg_const_i32(vsz - (`1` << esz));
2410	TCGv_i32 zero = tcg_const_i32(`0`);
2411	tcg_gen_movcond_i32(TCG_COND_LT, last, last, zero, max, last);
2412	tcg_temp_free_i32(max);
2413	tcg_temp_free_i32(zero);
2414	}
2415	}
2416
2417	/ Load an unsigned element of ESZ from BASE+OFS. /
2418	static TCGv_i64 load_esz(TCGv_ptr base, int ofs, int esz)
2419	{
2420	TCGv_i64 r = tcg_temp_new_i64();
2421
2422	switch (esz) {
2423	case `0`:
2424	tcg_gen_ld8u_i64(r, base, ofs);
2425	break;
2426	case `1`:
2427	tcg_gen_ld16u_i64(r, base, ofs);
2428	break;
2429	case `2`:
2430	tcg_gen_ld32u_i64(r, base, ofs);
2431	break;
2432	case `3`:
2433	tcg_gen_ld_i64(r, base, ofs);
2434	break;
2435	default:
2436	g_assert_not_reached();
2437	}
2438	return r;
2439	}
2440
2441	/ Load an unsigned element of ESZ from RM[LAST]. /
2442	static TCGv_i64 load_last_active(DisasContext *s, TCGv_i32 last,
2443	int rm, int esz)
2444	{
2445	TCGv_ptr p = tcg_temp_new_ptr();
2446	TCGv_i64 r;
2447
2448	/ Convert offset into vector into offset into ENV.*
2449	* The final adjustment for the vector register base
2450	* is added via constant offset to the load.
2451	*/
2452	#ifdef HOST_WORDS_BIGENDIAN
2453	/ Adjust for element ordering. See vec_reg_offset. /
2454	if (esz < `3`) {
2455	tcg_gen_xori_i32(last, last, `8` - (`1` << esz));
2456	}
2457	#endif
2458	tcg_gen_ext_i32_ptr(p, last);
2459	tcg_gen_add_ptr(p, p, cpu_env);
2460
2461	r = load_esz(p, vec_full_reg_offset(s, rm), esz);
2462	tcg_temp_free_ptr(p);
2463
2464	return r;
2465	}
2466
2467	/ Compute CLAST for a Zreg. /
2468	static bool do_clast_vector(DisasContext s, arg_rprr_esz a, bool before)
2469	{
2470	TCGv_i32 last;
2471	TCGLabel *over;
2472	TCGv_i64 ele;
2473	unsigned vsz, esz = a->esz;
2474
2475	if (!sve_access_check(s)) {
2476	return true;
2477	}
2478
2479	last = tcg_temp_local_new_i32();
2480	over = gen_new_label();
2481
2482	find_last_active(s, last, esz, a->pg);
2483
2484	/ There is of course no movcond for a 2048-bit vector,*
2485	* so we must branch over the actual store.
2486	*/
2487	tcg_gen_brcondi_i32(TCG_COND_LT, last, `0`, over);
2488
2489	if (!before) {
2490	incr_last_active(s, last, esz);
2491	}
2492
2493	ele = load_last_active(s, last, a->rm, esz);
2494	tcg_temp_free_i32(last);
2495
2496	vsz = vec_full_reg_size(s);
2497	tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd), vsz, vsz, ele);
2498	tcg_temp_free_i64(ele);
2499
2500	/ If this insn used MOVPRFX, we may need a second move. /
2501	if (a->rd != a->rn) {
2502	TCGLabel *done = gen_new_label();
2503	tcg_gen_br(done);
2504
2505	gen_set_label(over);
2506	do_mov_z(s, a->rd, a->rn);
2507
2508	gen_set_label(done);
2509	} else {
2510	gen_set_label(over);
2511	}
2512	return true;
2513	}
2514
2515	static bool trans_CLASTA_z(DisasContext s, arg_rprr_esz a)
2516	{
2517	return do_clast_vector(s, a, false);
2518	}
2519
2520	static bool trans_CLASTB_z(DisasContext s, arg_rprr_esz a)
2521	{
2522	return do_clast_vector(s, a, true);
2523	}
2524
2525	/ Compute CLAST for a scalar. /
2526	static void do_clast_scalar(DisasContext s, int* esz, int pg, int rm,
2527	bool before, TCGv_i64 reg_val)
2528	{
2529	TCGv_i32 last = tcg_temp_new_i32();
2530	TCGv_i64 ele, cmp, zero;
2531
2532	find_last_active(s, last, esz, pg);
2533
2534	/ Extend the original value of last prior to incrementing. /
2535	cmp = tcg_temp_new_i64();
2536	tcg_gen_ext_i32_i64(cmp, last);
2537
2538	if (!before) {
2539	incr_last_active(s, last, esz);
2540	}
2541
2542	/ The conceit here is that while last < 0 indicates not found, after*
2543	* adjusting for cpu_env->vfp.zregs[rm], it is still a valid address
2544	* from which we can load garbage. We then discard the garbage with
2545	* a conditional move.
2546	*/
2547	ele = load_last_active(s, last, rm, esz);
2548	tcg_temp_free_i32(last);
2549
2550	zero = tcg_const_i64(`0`);
2551	tcg_gen_movcond_i64(TCG_COND_GE, reg_val, cmp, zero, ele, reg_val);
2552
2553	tcg_temp_free_i64(zero);
2554	tcg_temp_free_i64(cmp);
2555	tcg_temp_free_i64(ele);
2556	}
2557
2558	/ Compute CLAST for a Vreg. /
2559	static bool do_clast_fp(DisasContext s, arg_rpr_esz a, bool before)
2560	{
2561	if (sve_access_check(s)) {
2562	int esz = a->esz;
2563	int ofs = vec_reg_offset(s, a->rd, `0`, esz);
2564	TCGv_i64 reg = load_esz(cpu_env, ofs, esz);
2565
2566	do_clast_scalar(s, esz, a->pg, a->rn, before, reg);
2567	write_fp_dreg(s, a->rd, reg);
2568	tcg_temp_free_i64(reg);
2569	}
2570	return true;
2571	}
2572
2573	static bool trans_CLASTA_v(DisasContext s, arg_rpr_esz a)
2574	{
2575	return do_clast_fp(s, a, false);
2576	}
2577
2578	static bool trans_CLASTB_v(DisasContext s, arg_rpr_esz a)
2579	{
2580	return do_clast_fp(s, a, true);
2581	}
2582
2583	/ Compute CLAST for a Xreg. /
2584	static bool do_clast_general(DisasContext s, arg_rpr_esz a, bool before)
2585	{
2586	TCGv_i64 reg;
2587
2588	if (!sve_access_check(s)) {
2589	return true;
2590	}
2591
2592	reg = cpu_reg(s, a->rd);
2593	switch (a->esz) {
2594	case `0`:
2595	tcg_gen_ext8u_i64(reg, reg);
2596	break;
2597	case `1`:
2598	tcg_gen_ext16u_i64(reg, reg);
2599	break;
2600	case `2`:
2601	tcg_gen_ext32u_i64(reg, reg);
2602	break;
2603	case `3`:
2604	break;
2605	default:
2606	g_assert_not_reached();
2607	}
2608
2609	do_clast_scalar(s, a->esz, a->pg, a->rn, before, reg);
2610	return true;
2611	}
2612
2613	static bool trans_CLASTA_r(DisasContext s, arg_rpr_esz a)
2614	{
2615	return do_clast_general(s, a, false);
2616	}
2617
2618	static bool trans_CLASTB_r(DisasContext s, arg_rpr_esz a)
2619	{
2620	return do_clast_general(s, a, true);
2621	}
2622
2623	/ Compute LAST for a scalar. /
2624	static TCGv_i64 do_last_scalar(DisasContext s, int* esz,
2625	int pg, int rm, bool before)
2626	{
2627	TCGv_i32 last = tcg_temp_new_i32();
2628	TCGv_i64 ret;
2629
2630	find_last_active(s, last, esz, pg);
2631	if (before) {
2632	wrap_last_active(s, last, esz);
2633	} else {
2634	incr_last_active(s, last, esz);
2635	}
2636
2637	ret = load_last_active(s, last, rm, esz);
2638	tcg_temp_free_i32(last);
2639	return ret;
2640	}
2641
2642	/ Compute LAST for a Vreg. /
2643	static bool do_last_fp(DisasContext s, arg_rpr_esz a, bool before)
2644	{
2645	if (sve_access_check(s)) {
2646	TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2647	write_fp_dreg(s, a->rd, val);
2648	tcg_temp_free_i64(val);
2649	}
2650	return true;
2651	}
2652
2653	static bool trans_LASTA_v(DisasContext s, arg_rpr_esz a)
2654	{
2655	return do_last_fp(s, a, false);
2656	}
2657
2658	static bool trans_LASTB_v(DisasContext s, arg_rpr_esz a)
2659	{
2660	return do_last_fp(s, a, true);
2661	}
2662
2663	/ Compute LAST for a Xreg. /
2664	static bool do_last_general(DisasContext s, arg_rpr_esz a, bool before)
2665	{
2666	if (sve_access_check(s)) {
2667	TCGv_i64 val = do_last_scalar(s, a->esz, a->pg, a->rn, before);
2668	tcg_gen_mov_i64(cpu_reg(s, a->rd), val);
2669	tcg_temp_free_i64(val);
2670	}
2671	return true;
2672	}
2673
2674	static bool trans_LASTA_r(DisasContext s, arg_rpr_esz a)
2675	{
2676	return do_last_general(s, a, false);
2677	}
2678
2679	static bool trans_LASTB_r(DisasContext s, arg_rpr_esz a)
2680	{
2681	return do_last_general(s, a, true);
2682	}
2683
2684	static bool trans_CPY_m_r(DisasContext s, arg_rpr_esz a)
2685	{
2686	if (sve_access_check(s)) {
2687	do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, cpu_reg_sp(s, a->rn));
2688	}
2689	return true;
2690	}
2691
2692	static bool trans_CPY_m_v(DisasContext s, arg_rpr_esz a)
2693	{
2694	if (sve_access_check(s)) {
2695	int ofs = vec_reg_offset(s, a->rn, `0`, a->esz);
2696	TCGv_i64 t = load_esz(cpu_env, ofs, a->esz);
2697	do_cpy_m(s, a->esz, a->rd, a->rd, a->pg, t);
2698	tcg_temp_free_i64(t);
2699	}
2700	return true;
2701	}
2702
2703	static bool trans_REVB(DisasContext s, arg_rpr_esz a)
2704	{
2705	static gen_helper_gvec_3 * const fns[`4`] = {
2706	NULL,
2707	gen_helper_sve_revb_h,
2708	gen_helper_sve_revb_s,
2709	gen_helper_sve_revb_d,
2710	};
2711	return do_zpz_ool(s, a, fns[a->esz]);
2712	}
2713
2714	static bool trans_REVH(DisasContext s, arg_rpr_esz a)
2715	{
2716	static gen_helper_gvec_3 * const fns[`4`] = {
2717	NULL,
2718	NULL,
2719	gen_helper_sve_revh_s,
2720	gen_helper_sve_revh_d,
2721	};
2722	return do_zpz_ool(s, a, fns[a->esz]);
2723	}
2724
2725	static bool trans_REVW(DisasContext s, arg_rpr_esz a)
2726	{
2727	return do_zpz_ool(s, a, a->esz == `3` ? gen_helper_sve_revw_d : NULL);
2728	}
2729
2730	static bool trans_RBIT(DisasContext s, arg_rpr_esz a)
2731	{
2732	static gen_helper_gvec_3 * const fns[`4`] = {
2733	gen_helper_sve_rbit_b,
2734	gen_helper_sve_rbit_h,
2735	gen_helper_sve_rbit_s,
2736	gen_helper_sve_rbit_d,
2737	};
2738	return do_zpz_ool(s, a, fns[a->esz]);
2739	}
2740
2741	static bool trans_SPLICE(DisasContext s, arg_rprr_esz a)
2742	{
2743	if (sve_access_check(s)) {
2744	unsigned vsz = vec_full_reg_size(s);
2745	tcg_gen_gvec_4_ool(vec_full_reg_offset(s, a->rd),
2746	vec_full_reg_offset(s, a->rn),
2747	vec_full_reg_offset(s, a->rm),
2748	pred_full_reg_offset(s, a->pg),
2749	vsz, vsz, a->esz, gen_helper_sve_splice);
2750	}
2751	return true;
2752	}
2753
2754	/*
2755	*** SVE Integer Compare - Vectors Group
2756	*/
2757
2758	static bool do_ppzz_flags(DisasContext s, arg_rprr_esz a,
2759	gen_helper_gvec_flags_4 *gen_fn)
2760	{
2761	TCGv_ptr pd, zn, zm, pg;
2762	unsigned vsz;
2763	TCGv_i32 t;
2764
2765	if (gen_fn == NULL) {
2766	return false;
2767	}
2768	if (!sve_access_check(s)) {
2769	return true;
2770	}
2771
2772	vsz = vec_full_reg_size(s);
2773	t = tcg_const_i32(simd_desc(vsz, vsz, `0`));
2774	pd = tcg_temp_new_ptr();
2775	zn = tcg_temp_new_ptr();
2776	zm = tcg_temp_new_ptr();
2777	pg = tcg_temp_new_ptr();
2778
2779	tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2780	tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2781	tcg_gen_addi_ptr(zm, cpu_env, vec_full_reg_offset(s, a->rm));
2782	tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2783
2784	gen_fn(t, pd, zn, zm, pg, t);
2785
2786	tcg_temp_free_ptr(pd);
2787	tcg_temp_free_ptr(zn);
2788	tcg_temp_free_ptr(zm);
2789	tcg_temp_free_ptr(pg);
2790
2791	do_pred_flags(t);
2792
2793	tcg_temp_free_i32(t);
2794	return true;
2795	}
2796
2797	#define DO_PPZZ(NAME, name) \
2798	static bool trans_##NAME##_ppzz(DisasContext s, arg_rprr_esz a) \
2799	{ \
2800	static gen_helper_gvec_flags_4 * const fns[4] = { \
2801	gen_helper_sve_##name##_ppzz_b, gen_helper_sve_##name##_ppzz_h, \
2802	gen_helper_sve_##name##_ppzz_s, gen_helper_sve_##name##_ppzz_d, \
2803	}; \
2804	return do_ppzz_flags(s, a, fns[a->esz]); \
2805	}
2806
2807	DO_PPZZ(CMPEQ, cmpeq)
2808	DO_PPZZ(CMPNE, cmpne)
2809	DO_PPZZ(CMPGT, cmpgt)
2810	DO_PPZZ(CMPGE, cmpge)
2811	DO_PPZZ(CMPHI, cmphi)
2812	DO_PPZZ(CMPHS, cmphs)
2813
2814	#undef DO_PPZZ
2815
2816	#define DO_PPZW(NAME, name) \
2817	static bool trans_##NAME##_ppzw(DisasContext s, arg_rprr_esz a) \
2818	{ \
2819	static gen_helper_gvec_flags_4 * const fns[4] = { \
2820	gen_helper_sve_##name##_ppzw_b, gen_helper_sve_##name##_ppzw_h, \
2821	gen_helper_sve_##name##_ppzw_s, NULL \
2822	}; \
2823	return do_ppzz_flags(s, a, fns[a->esz]); \
2824	}
2825
2826	DO_PPZW(CMPEQ, cmpeq)
2827	DO_PPZW(CMPNE, cmpne)
2828	DO_PPZW(CMPGT, cmpgt)
2829	DO_PPZW(CMPGE, cmpge)
2830	DO_PPZW(CMPHI, cmphi)
2831	DO_PPZW(CMPHS, cmphs)
2832	DO_PPZW(CMPLT, cmplt)
2833	DO_PPZW(CMPLE, cmple)
2834	DO_PPZW(CMPLO, cmplo)
2835	DO_PPZW(CMPLS, cmpls)
2836
2837	#undef DO_PPZW
2838
2839	/*
2840	*** SVE Integer Compare - Immediate Groups
2841	*/
2842
2843	static bool do_ppzi_flags(DisasContext s, arg_rpri_esz a,
2844	gen_helper_gvec_flags_3 *gen_fn)
2845	{
2846	TCGv_ptr pd, zn, pg;
2847	unsigned vsz;
2848	TCGv_i32 t;
2849
2850	if (gen_fn == NULL) {
2851	return false;
2852	}
2853	if (!sve_access_check(s)) {
2854	return true;
2855	}
2856
2857	vsz = vec_full_reg_size(s);
2858	t = tcg_const_i32(simd_desc(vsz, vsz, a->imm));
2859	pd = tcg_temp_new_ptr();
2860	zn = tcg_temp_new_ptr();
2861	pg = tcg_temp_new_ptr();
2862
2863	tcg_gen_addi_ptr(pd, cpu_env, pred_full_reg_offset(s, a->rd));
2864	tcg_gen_addi_ptr(zn, cpu_env, vec_full_reg_offset(s, a->rn));
2865	tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
2866
2867	gen_fn(t, pd, zn, pg, t);
2868
2869	tcg_temp_free_ptr(pd);
2870	tcg_temp_free_ptr(zn);
2871	tcg_temp_free_ptr(pg);
2872
2873	do_pred_flags(t);
2874
2875	tcg_temp_free_i32(t);
2876	return true;
2877	}
2878
2879	#define DO_PPZI(NAME, name) \
2880	static bool trans_##NAME##_ppzi(DisasContext s, arg_rpri_esz a) \
2881	{ \
2882	static gen_helper_gvec_flags_3 * const fns[4] = { \
2883	gen_helper_sve_##name##_ppzi_b, gen_helper_sve_##name##_ppzi_h, \
2884	gen_helper_sve_##name##_ppzi_s, gen_helper_sve_##name##_ppzi_d, \
2885	}; \
2886	return do_ppzi_flags(s, a, fns[a->esz]); \
2887	}
2888
2889	DO_PPZI(CMPEQ, cmpeq)
2890	DO_PPZI(CMPNE, cmpne)
2891	DO_PPZI(CMPGT, cmpgt)
2892	DO_PPZI(CMPGE, cmpge)
2893	DO_PPZI(CMPHI, cmphi)
2894	DO_PPZI(CMPHS, cmphs)
2895	DO_PPZI(CMPLT, cmplt)
2896	DO_PPZI(CMPLE, cmple)
2897	DO_PPZI(CMPLO, cmplo)
2898	DO_PPZI(CMPLS, cmpls)
2899
2900	#undef DO_PPZI
2901
2902	/*
2903	*** SVE Partition Break Group
2904	*/
2905
2906	static bool do_brk3(DisasContext s, arg_rprr_s a,
2907	gen_helper_gvec_4 fn, gen_helper_gvec_flags_4 fn_s)
2908	{
2909	if (!sve_access_check(s)) {
2910	return true;
2911	}
2912
2913	unsigned vsz = pred_full_reg_size(s);
2914
2915	/ Predicate sizes may be smaller and cannot use simd_desc. /
2916	TCGv_ptr d = tcg_temp_new_ptr();
2917	TCGv_ptr n = tcg_temp_new_ptr();
2918	TCGv_ptr m = tcg_temp_new_ptr();
2919	TCGv_ptr g = tcg_temp_new_ptr();
2920	TCGv_i32 t = tcg_const_i32(vsz - `2`);
2921
2922	tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2923	tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2924	tcg_gen_addi_ptr(m, cpu_env, pred_full_reg_offset(s, a->rm));
2925	tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2926
2927	if (a->s) {
2928	fn_s(t, d, n, m, g, t);
2929	do_pred_flags(t);
2930	} else {
2931	fn(d, n, m, g, t);
2932	}
2933	tcg_temp_free_ptr(d);
2934	tcg_temp_free_ptr(n);
2935	tcg_temp_free_ptr(m);
2936	tcg_temp_free_ptr(g);
2937	tcg_temp_free_i32(t);
2938	return true;
2939	}
2940
2941	static bool do_brk2(DisasContext s, arg_rpr_s a,
2942	gen_helper_gvec_3 fn, gen_helper_gvec_flags_3 fn_s)
2943	{
2944	if (!sve_access_check(s)) {
2945	return true;
2946	}
2947
2948	unsigned vsz = pred_full_reg_size(s);
2949
2950	/ Predicate sizes may be smaller and cannot use simd_desc. /
2951	TCGv_ptr d = tcg_temp_new_ptr();
2952	TCGv_ptr n = tcg_temp_new_ptr();
2953	TCGv_ptr g = tcg_temp_new_ptr();
2954	TCGv_i32 t = tcg_const_i32(vsz - `2`);
2955
2956	tcg_gen_addi_ptr(d, cpu_env, pred_full_reg_offset(s, a->rd));
2957	tcg_gen_addi_ptr(n, cpu_env, pred_full_reg_offset(s, a->rn));
2958	tcg_gen_addi_ptr(g, cpu_env, pred_full_reg_offset(s, a->pg));
2959
2960	if (a->s) {
2961	fn_s(t, d, n, g, t);
2962	do_pred_flags(t);
2963	} else {
2964	fn(d, n, g, t);
2965	}
2966	tcg_temp_free_ptr(d);
2967	tcg_temp_free_ptr(n);
2968	tcg_temp_free_ptr(g);
2969	tcg_temp_free_i32(t);
2970	return true;
2971	}
2972
2973	static bool trans_BRKPA(DisasContext s, arg_rprr_s a)
2974	{
2975	return do_brk3(s, a, gen_helper_sve_brkpa, gen_helper_sve_brkpas);
2976	}
2977
2978	static bool trans_BRKPB(DisasContext s, arg_rprr_s a)
2979	{
2980	return do_brk3(s, a, gen_helper_sve_brkpb, gen_helper_sve_brkpbs);
2981	}
2982
2983	static bool trans_BRKA_m(DisasContext s, arg_rpr_s a)
2984	{
2985	return do_brk2(s, a, gen_helper_sve_brka_m, gen_helper_sve_brkas_m);
2986	}
2987
2988	static bool trans_BRKB_m(DisasContext s, arg_rpr_s a)
2989	{
2990	return do_brk2(s, a, gen_helper_sve_brkb_m, gen_helper_sve_brkbs_m);
2991	}
2992
2993	static bool trans_BRKA_z(DisasContext s, arg_rpr_s a)
2994	{
2995	return do_brk2(s, a, gen_helper_sve_brka_z, gen_helper_sve_brkas_z);
2996	}
2997
2998	static bool trans_BRKB_z(DisasContext s, arg_rpr_s a)
2999	{
3000	return do_brk2(s, a, gen_helper_sve_brkb_z, gen_helper_sve_brkbs_z);
3001	}
3002
3003	static bool trans_BRKN(DisasContext s, arg_rpr_s a)
3004	{
3005	return do_brk2(s, a, gen_helper_sve_brkn, gen_helper_sve_brkns);
3006	}
3007
3008	/*
3009	*** SVE Predicate Count Group
3010	*/
3011
3012	static void do_cntp(DisasContext s, TCGv_i64 val, int* esz, int pn, int pg)
3013	{
3014	unsigned psz = pred_full_reg_size(s);
3015
3016	if (psz <= `8`) {
3017	uint64_t psz_mask;
3018
3019	tcg_gen_ld_i64(val, cpu_env, pred_full_reg_offset(s, pn));
3020	if (pn != pg) {
3021	TCGv_i64 g = tcg_temp_new_i64();
3022	tcg_gen_ld_i64(g, cpu_env, pred_full_reg_offset(s, pg));
3023	tcg_gen_and_i64(val, val, g);
3024	tcg_temp_free_i64(g);
3025	}
3026
3027	/ Reduce the pred_esz_masks value simply to reduce the*
3028	* size of the code generated here.
3029	*/
3030	psz_mask = MAKE_64BIT_MASK(`0`, psz * `8`);
3031	tcg_gen_andi_i64(val, val, pred_esz_masks[esz] & psz_mask);
3032
3033	tcg_gen_ctpop_i64(val, val);
3034	} else {
3035	TCGv_ptr t_pn = tcg_temp_new_ptr();
3036	TCGv_ptr t_pg = tcg_temp_new_ptr();
3037	unsigned desc;
3038	TCGv_i32 t_desc;
3039
3040	desc = psz - `2`;
3041	desc = deposit32(desc, SIMD_DATA_SHIFT, `2`, esz);
3042
3043	tcg_gen_addi_ptr(t_pn, cpu_env, pred_full_reg_offset(s, pn));
3044	tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3045	t_desc = tcg_const_i32(desc);
3046
3047	gen_helper_sve_cntp(val, t_pn, t_pg, t_desc);
3048	tcg_temp_free_ptr(t_pn);
3049	tcg_temp_free_ptr(t_pg);
3050	tcg_temp_free_i32(t_desc);
3051	}
3052	}
3053
3054	static bool trans_CNTP(DisasContext s, arg_CNTP a)
3055	{
3056	if (sve_access_check(s)) {
3057	do_cntp(s, cpu_reg(s, a->rd), a->esz, a->rn, a->pg);
3058	}
3059	return true;
3060	}
3061
3062	static bool trans_INCDECP_r(DisasContext s, arg_incdec_pred a)
3063	{
3064	if (sve_access_check(s)) {
3065	TCGv_i64 reg = cpu_reg(s, a->rd);
3066	TCGv_i64 val = tcg_temp_new_i64();
3067
3068	do_cntp(s, val, a->esz, a->pg, a->pg);
3069	if (a->d) {
3070	tcg_gen_sub_i64(reg, reg, val);
3071	} else {
3072	tcg_gen_add_i64(reg, reg, val);
3073	}
3074	tcg_temp_free_i64(val);
3075	}
3076	return true;
3077	}
3078
3079	static bool trans_INCDECP_z(DisasContext s, arg_incdec2_pred a)
3080	{
3081	if (a->esz == `0`) {
3082	return false;
3083	}
3084	if (sve_access_check(s)) {
3085	unsigned vsz = vec_full_reg_size(s);
3086	TCGv_i64 val = tcg_temp_new_i64();
3087	GVecGen2sFn *gvec_fn = a->d ? tcg_gen_gvec_subs : tcg_gen_gvec_adds;
3088
3089	do_cntp(s, val, a->esz, a->pg, a->pg);
3090	gvec_fn(a->esz, vec_full_reg_offset(s, a->rd),
3091	vec_full_reg_offset(s, a->rn), val, vsz, vsz);
3092	}
3093	return true;
3094	}
3095
3096	static bool trans_SINCDECP_r_32(DisasContext s, arg_incdec_pred a)
3097	{
3098	if (sve_access_check(s)) {
3099	TCGv_i64 reg = cpu_reg(s, a->rd);
3100	TCGv_i64 val = tcg_temp_new_i64();
3101
3102	do_cntp(s, val, a->esz, a->pg, a->pg);
3103	do_sat_addsub_32(reg, val, a->u, a->d);
3104	}
3105	return true;
3106	}
3107
3108	static bool trans_SINCDECP_r_64(DisasContext s, arg_incdec_pred a)
3109	{
3110	if (sve_access_check(s)) {
3111	TCGv_i64 reg = cpu_reg(s, a->rd);
3112	TCGv_i64 val = tcg_temp_new_i64();
3113
3114	do_cntp(s, val, a->esz, a->pg, a->pg);
3115	do_sat_addsub_64(reg, val, a->u, a->d);
3116	}
3117	return true;
3118	}
3119
3120	static bool trans_SINCDECP_z(DisasContext s, arg_incdec2_pred a)
3121	{
3122	if (a->esz == `0`) {
3123	return false;
3124	}
3125	if (sve_access_check(s)) {
3126	TCGv_i64 val = tcg_temp_new_i64();
3127	do_cntp(s, val, a->esz, a->pg, a->pg);
3128	do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, a->u, a->d);
3129	}
3130	return true;
3131	}
3132
3133	/*
3134	*** SVE Integer Compare Scalars Group
3135	*/
3136
3137	static bool trans_CTERM(DisasContext s, arg_CTERM a)
3138	{
3139	if (!sve_access_check(s)) {
3140	return true;
3141	}
3142
3143	TCGCond cond = (a->ne ? TCG_COND_NE : TCG_COND_EQ);
3144	TCGv_i64 rn = read_cpu_reg(s, a->rn, a->sf);
3145	TCGv_i64 rm = read_cpu_reg(s, a->rm, a->sf);
3146	TCGv_i64 cmp = tcg_temp_new_i64();
3147
3148	tcg_gen_setcond_i64(cond, cmp, rn, rm);
3149	tcg_gen_extrl_i64_i32(cpu_NF, cmp);
3150	tcg_temp_free_i64(cmp);
3151
3152	/ VF = !NF & !CF. /
3153	tcg_gen_xori_i32(cpu_VF, cpu_NF, `1`);
3154	tcg_gen_andc_i32(cpu_VF, cpu_VF, cpu_CF);
3155
3156	/ Both NF and VF actually look at bit 31. /
3157	tcg_gen_neg_i32(cpu_NF, cpu_NF);
3158	tcg_gen_neg_i32(cpu_VF, cpu_VF);
3159	return true;
3160	}
3161
3162	static bool trans_WHILE(DisasContext s, arg_WHILE a)
3163	{
3164	TCGv_i64 op0, op1, t0, t1, tmax;
3165	TCGv_i32 t2, t3;
3166	TCGv_ptr ptr;
3167	unsigned desc, vsz = vec_full_reg_size(s);
3168	TCGCond cond;
3169
3170	if (!sve_access_check(s)) {
3171	return true;
3172	}
3173
3174	op0 = read_cpu_reg(s, a->rn, `1`);
3175	op1 = read_cpu_reg(s, a->rm, `1`);
3176
3177	if (!a->sf) {
3178	if (a->u) {
3179	tcg_gen_ext32u_i64(op0, op0);
3180	tcg_gen_ext32u_i64(op1, op1);
3181	} else {
3182	tcg_gen_ext32s_i64(op0, op0);
3183	tcg_gen_ext32s_i64(op1, op1);
3184	}
3185	}
3186
3187	/ For the helper, compress the different conditions into a computation*
3188	* of how many iterations for which the condition is true.
3189	*/
3190	t0 = tcg_temp_new_i64();
3191	t1 = tcg_temp_new_i64();
3192	tcg_gen_sub_i64(t0, op1, op0);
3193
3194	tmax = tcg_const_i64(vsz >> a->esz);
3195	if (a->eq) {
3196	/ Equality means one more iteration. /
3197	tcg_gen_addi_i64(t0, t0, `1`);
3198
3199	/ If op1 is max (un)signed integer (and the only time the addition*
3200	* above could overflow), then we produce an all-true predicate by
3201	* setting the count to the vector length. This is because the
3202	* pseudocode is described as an increment + compare loop, and the
3203	* max integer would always compare true.
3204	*/
3205	tcg_gen_movi_i64(t1, (a->sf
3206	? (a->u ? UINT64_MAX : INT64_MAX)
3207	: (a->u ? UINT32_MAX : INT32_MAX)));
3208	tcg_gen_movcond_i64(TCG_COND_EQ, t0, op1, t1, tmax, t0);
3209	}
3210
3211	/ Bound to the maximum. /
3212	tcg_gen_umin_i64(t0, t0, tmax);
3213	tcg_temp_free_i64(tmax);
3214
3215	/ Set the count to zero if the condition is false. /
3216	cond = (a->u
3217	? (a->eq ? TCG_COND_LEU : TCG_COND_LTU)
3218	: (a->eq ? TCG_COND_LE : TCG_COND_LT));
3219	tcg_gen_movi_i64(t1, `0`);
3220	tcg_gen_movcond_i64(cond, t0, op0, op1, t0, t1);
3221	tcg_temp_free_i64(t1);
3222
3223	/ Since we're bounded, pass as a 32-bit type. /
3224	t2 = tcg_temp_new_i32();
3225	tcg_gen_extrl_i64_i32(t2, t0);
3226	tcg_temp_free_i64(t0);
3227
3228	/ Scale elements to bits. /
3229	tcg_gen_shli_i32(t2, t2, a->esz);
3230
3231	desc = (vsz / `8`) - `2`;
3232	desc = deposit32(desc, SIMD_DATA_SHIFT, `2`, a->esz);
3233	t3 = tcg_const_i32(desc);
3234
3235	ptr = tcg_temp_new_ptr();
3236	tcg_gen_addi_ptr(ptr, cpu_env, pred_full_reg_offset(s, a->rd));
3237
3238	gen_helper_sve_while(t2, ptr, t2, t3);
3239	do_pred_flags(t2);
3240
3241	tcg_temp_free_ptr(ptr);
3242	tcg_temp_free_i32(t2);
3243	tcg_temp_free_i32(t3);
3244	return true;
3245	}
3246
3247	/*
3248	*** SVE Integer Wide Immediate - Unpredicated Group
3249	*/
3250
3251	static bool trans_FDUP(DisasContext s, arg_FDUP a)
3252	{
3253	if (a->esz == `0`) {
3254	return false;
3255	}
3256	if (sve_access_check(s)) {
3257	unsigned vsz = vec_full_reg_size(s);
3258	int dofs = vec_full_reg_offset(s, a->rd);
3259	uint64_t imm;
3260
3261	/ Decode the VFP immediate. /
3262	imm = vfp_expand_imm(a->esz, a->imm);
3263	imm = dup_const(a->esz, imm);
3264
3265	tcg_gen_gvec_dup64i(dofs, vsz, vsz, imm);
3266	}
3267	return true;
3268	}
3269
3270	static bool trans_DUP_i(DisasContext s, arg_DUP_i a)
3271	{
3272	if (a->esz == `0` && extract32(s->insn, `13`, `1`)) {
3273	return false;
3274	}
3275	if (sve_access_check(s)) {
3276	unsigned vsz = vec_full_reg_size(s);
3277	int dofs = vec_full_reg_offset(s, a->rd);
3278
3279	tcg_gen_gvec_dup64i(dofs, vsz, vsz, dup_const(a->esz, a->imm));
3280	}
3281	return true;
3282	}
3283
3284	static bool trans_ADD_zzi(DisasContext s, arg_rri_esz a)
3285	{
3286	if (a->esz == `0` && extract32(s->insn, `13`, `1`)) {
3287	return false;
3288	}
3289	if (sve_access_check(s)) {
3290	unsigned vsz = vec_full_reg_size(s);
3291	tcg_gen_gvec_addi(a->esz, vec_full_reg_offset(s, a->rd),
3292	vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3293	}
3294	return true;
3295	}
3296
3297	static bool trans_SUB_zzi(DisasContext s, arg_rri_esz a)
3298	{
3299	a->imm = -a->imm;
3300	return trans_ADD_zzi(s, a);
3301	}
3302
3303	static bool trans_SUBR_zzi(DisasContext s, arg_rri_esz a)
3304	{
3305	static const TCGOpcode vecop_list[] = { INDEX_op_sub_vec, `0` };
3306	static const GVecGen2s op[`4`] = {
3307	{ .fni8 = tcg_gen_vec_sub8_i64,
3308	.fniv = tcg_gen_sub_vec,
3309	.fno = gen_helper_sve_subri_b,
3310	.opt_opc = vecop_list,
3311	.vece = MO_8,
3312	.scalar_first = true },
3313	{ .fni8 = tcg_gen_vec_sub16_i64,
3314	.fniv = tcg_gen_sub_vec,
3315	.fno = gen_helper_sve_subri_h,
3316	.opt_opc = vecop_list,
3317	.vece = MO_16,
3318	.scalar_first = true },
3319	{ .fni4 = tcg_gen_sub_i32,
3320	.fniv = tcg_gen_sub_vec,
3321	.fno = gen_helper_sve_subri_s,
3322	.opt_opc = vecop_list,
3323	.vece = MO_32,
3324	.scalar_first = true },
3325	{ .fni8 = tcg_gen_sub_i64,
3326	.fniv = tcg_gen_sub_vec,
3327	.fno = gen_helper_sve_subri_d,
3328	.opt_opc = vecop_list,
3329	.prefer_i64 = TCG_TARGET_REG_BITS == `64`,
3330	.vece = MO_64,
3331	.scalar_first = true }
3332	};
3333
3334	if (a->esz == `0` && extract32(s->insn, `13`, `1`)) {
3335	return false;
3336	}
3337	if (sve_access_check(s)) {
3338	unsigned vsz = vec_full_reg_size(s);
3339	TCGv_i64 c = tcg_const_i64(a->imm);
3340	tcg_gen_gvec_2s(vec_full_reg_offset(s, a->rd),
3341	vec_full_reg_offset(s, a->rn),
3342	vsz, vsz, c, &op[a->esz]);
3343	tcg_temp_free_i64(c);
3344	}
3345	return true;
3346	}
3347
3348	static bool trans_MUL_zzi(DisasContext s, arg_rri_esz a)
3349	{
3350	if (sve_access_check(s)) {
3351	unsigned vsz = vec_full_reg_size(s);
3352	tcg_gen_gvec_muli(a->esz, vec_full_reg_offset(s, a->rd),
3353	vec_full_reg_offset(s, a->rn), a->imm, vsz, vsz);
3354	}
3355	return true;
3356	}
3357
3358	static bool do_zzi_sat(DisasContext s, arg_rri_esz a, bool u, bool d)
3359	{
3360	if (a->esz == `0` && extract32(s->insn, `13`, `1`)) {
3361	return false;
3362	}
3363	if (sve_access_check(s)) {
3364	TCGv_i64 val = tcg_const_i64(a->imm);
3365	do_sat_addsub_vec(s, a->esz, a->rd, a->rn, val, u, d);
3366	tcg_temp_free_i64(val);
3367	}
3368	return true;
3369	}
3370
3371	static bool trans_SQADD_zzi(DisasContext s, arg_rri_esz a)
3372	{
3373	return do_zzi_sat(s, a, false, false);
3374	}
3375
3376	static bool trans_UQADD_zzi(DisasContext s, arg_rri_esz a)
3377	{
3378	return do_zzi_sat(s, a, true, false);
3379	}
3380
3381	static bool trans_SQSUB_zzi(DisasContext s, arg_rri_esz a)
3382	{
3383	return do_zzi_sat(s, a, false, true);
3384	}
3385
3386	static bool trans_UQSUB_zzi(DisasContext s, arg_rri_esz a)
3387	{
3388	return do_zzi_sat(s, a, true, true);
3389	}
3390
3391	static bool do_zzi_ool(DisasContext s, arg_rri_esz a, gen_helper_gvec_2i *fn)
3392	{
3393	if (sve_access_check(s)) {
3394	unsigned vsz = vec_full_reg_size(s);
3395	TCGv_i64 c = tcg_const_i64(a->imm);
3396
3397	tcg_gen_gvec_2i_ool(vec_full_reg_offset(s, a->rd),
3398	vec_full_reg_offset(s, a->rn),
3399	c, vsz, vsz, `0`, fn);
3400	tcg_temp_free_i64(c);
3401	}
3402	return true;
3403	}
3404
3405	#define DO_ZZI(NAME, name) \
3406	static bool trans_##NAME##_zzi(DisasContext s, arg_rri_esz a) \
3407	{ \
3408	static gen_helper_gvec_2i * const fns[4] = { \
3409	gen_helper_sve_##name##i_b, gen_helper_sve_##name##i_h, \
3410	gen_helper_sve_##name##i_s, gen_helper_sve_##name##i_d, \
3411	}; \
3412	return do_zzi_ool(s, a, fns[a->esz]); \
3413	}
3414
3415	DO_ZZI(SMAX, smax)
3416	DO_ZZI(UMAX, umax)
3417	DO_ZZI(SMIN, smin)
3418	DO_ZZI(UMIN, umin)
3419
3420	#undef DO_ZZI
3421
3422	static bool trans_DOT_zzz(DisasContext s, arg_DOT_zzz a)
3423	{
3424	static gen_helper_gvec_3 * const fns[`2`][`2`] = {
3425	{ gen_helper_gvec_sdot_b, gen_helper_gvec_sdot_h },
3426	{ gen_helper_gvec_udot_b, gen_helper_gvec_udot_h }
3427	};
3428
3429	if (sve_access_check(s)) {
3430	unsigned vsz = vec_full_reg_size(s);
3431	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3432	vec_full_reg_offset(s, a->rn),
3433	vec_full_reg_offset(s, a->rm),
3434	vsz, vsz, `0`, fns[a->u][a->sz]);
3435	}
3436	return true;
3437	}
3438
3439	static bool trans_DOT_zzx(DisasContext s, arg_DOT_zzx a)
3440	{
3441	static gen_helper_gvec_3 * const fns[`2`][`2`] = {
3442	{ gen_helper_gvec_sdot_idx_b, gen_helper_gvec_sdot_idx_h },
3443	{ gen_helper_gvec_udot_idx_b, gen_helper_gvec_udot_idx_h }
3444	};
3445
3446	if (sve_access_check(s)) {
3447	unsigned vsz = vec_full_reg_size(s);
3448	tcg_gen_gvec_3_ool(vec_full_reg_offset(s, a->rd),
3449	vec_full_reg_offset(s, a->rn),
3450	vec_full_reg_offset(s, a->rm),
3451	vsz, vsz, a->index, fns[a->u][a->sz]);
3452	}
3453	return true;
3454	}
3455
3456
3457	/*
3458	*** SVE Floating Point Multiply-Add Indexed Group
3459	*/
3460
3461	static bool trans_FMLA_zzxz(DisasContext s, arg_FMLA_zzxz a)
3462	{
3463	static gen_helper_gvec_4_ptr * const fns[`3`] = {
3464	gen_helper_gvec_fmla_idx_h,
3465	gen_helper_gvec_fmla_idx_s,
3466	gen_helper_gvec_fmla_idx_d,
3467	};
3468
3469	if (sve_access_check(s)) {
3470	unsigned vsz = vec_full_reg_size(s);
3471	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3472	tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3473	vec_full_reg_offset(s, a->rn),
3474	vec_full_reg_offset(s, a->rm),
3475	vec_full_reg_offset(s, a->ra),
3476	status, vsz, vsz, (a->index << `1`) \| a->sub,
3477	fns[a->esz - `1`]);
3478	tcg_temp_free_ptr(status);
3479	}
3480	return true;
3481	}
3482
3483	/*
3484	*** SVE Floating Point Multiply Indexed Group
3485	*/
3486
3487	static bool trans_FMUL_zzx(DisasContext s, arg_FMUL_zzx a)
3488	{
3489	static gen_helper_gvec_3_ptr * const fns[`3`] = {
3490	gen_helper_gvec_fmul_idx_h,
3491	gen_helper_gvec_fmul_idx_s,
3492	gen_helper_gvec_fmul_idx_d,
3493	};
3494
3495	if (sve_access_check(s)) {
3496	unsigned vsz = vec_full_reg_size(s);
3497	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3498	tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3499	vec_full_reg_offset(s, a->rn),
3500	vec_full_reg_offset(s, a->rm),
3501	status, vsz, vsz, a->index, fns[a->esz - `1`]);
3502	tcg_temp_free_ptr(status);
3503	}
3504	return true;
3505	}
3506
3507	/*
3508	*** SVE Floating Point Fast Reduction Group
3509	*/
3510
3511	typedef void gen_helper_fp_reduce(TCGv_i64, TCGv_ptr, TCGv_ptr,
3512	TCGv_ptr, TCGv_i32);
3513
3514	static void do_reduce(DisasContext s, arg_rpr_esz a,
3515	gen_helper_fp_reduce *fn)
3516	{
3517	unsigned vsz = vec_full_reg_size(s);
3518	unsigned p2vsz = pow2ceil(vsz);
3519	TCGv_i32 t_desc = tcg_const_i32(simd_desc(vsz, p2vsz, `0`));
3520	TCGv_ptr t_zn, t_pg, status;
3521	TCGv_i64 temp;
3522
3523	temp = tcg_temp_new_i64();
3524	t_zn = tcg_temp_new_ptr();
3525	t_pg = tcg_temp_new_ptr();
3526
3527	tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, a->rn));
3528	tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3529	status = get_fpstatus_ptr(a->esz == MO_16);
3530
3531	fn(temp, t_zn, t_pg, status, t_desc);
3532	tcg_temp_free_ptr(t_zn);
3533	tcg_temp_free_ptr(t_pg);
3534	tcg_temp_free_ptr(status);
3535	tcg_temp_free_i32(t_desc);
3536
3537	write_fp_dreg(s, a->rd, temp);
3538	tcg_temp_free_i64(temp);
3539	}
3540
3541	#define DO_VPZ(NAME, name) \
3542	static bool trans_##NAME(DisasContext s, arg_rpr_esz a) \
3543	{ \
3544	static gen_helper_fp_reduce * const fns[3] = { \
3545	gen_helper_sve_##name##_h, \
3546	gen_helper_sve_##name##_s, \
3547	gen_helper_sve_##name##_d, \
3548	}; \
3549	if (a->esz == 0) { \
3550	return false; \
3551	} \
3552	if (sve_access_check(s)) { \
3553	do_reduce(s, a, fns[a->esz - 1]); \
3554	} \
3555	return true; \
3556	}
3557
3558	DO_VPZ(FADDV, faddv)
3559	DO_VPZ(FMINNMV, fminnmv)
3560	DO_VPZ(FMAXNMV, fmaxnmv)
3561	DO_VPZ(FMINV, fminv)
3562	DO_VPZ(FMAXV, fmaxv)
3563
3564	/*
3565	*** SVE Floating Point Unary Operations - Unpredicated Group
3566	*/
3567
3568	static void do_zz_fp(DisasContext s, arg_rr_esz a, gen_helper_gvec_2_ptr *fn)
3569	{
3570	unsigned vsz = vec_full_reg_size(s);
3571	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3572
3573	tcg_gen_gvec_2_ptr(vec_full_reg_offset(s, a->rd),
3574	vec_full_reg_offset(s, a->rn),
3575	status, vsz, vsz, `0`, fn);
3576	tcg_temp_free_ptr(status);
3577	}
3578
3579	static bool trans_FRECPE(DisasContext s, arg_rr_esz a)
3580	{
3581	static gen_helper_gvec_2_ptr * const fns[`3`] = {
3582	gen_helper_gvec_frecpe_h,
3583	gen_helper_gvec_frecpe_s,
3584	gen_helper_gvec_frecpe_d,
3585	};
3586	if (a->esz == `0`) {
3587	return false;
3588	}
3589	if (sve_access_check(s)) {
3590	do_zz_fp(s, a, fns[a->esz - `1`]);
3591	}
3592	return true;
3593	}
3594
3595	static bool trans_FRSQRTE(DisasContext s, arg_rr_esz a)
3596	{
3597	static gen_helper_gvec_2_ptr * const fns[`3`] = {
3598	gen_helper_gvec_frsqrte_h,
3599	gen_helper_gvec_frsqrte_s,
3600	gen_helper_gvec_frsqrte_d,
3601	};
3602	if (a->esz == `0`) {
3603	return false;
3604	}
3605	if (sve_access_check(s)) {
3606	do_zz_fp(s, a, fns[a->esz - `1`]);
3607	}
3608	return true;
3609	}
3610
3611	/*
3612	*** SVE Floating Point Compare with Zero Group
3613	*/
3614
3615	static void do_ppz_fp(DisasContext s, arg_rpr_esz a,
3616	gen_helper_gvec_3_ptr *fn)
3617	{
3618	unsigned vsz = vec_full_reg_size(s);
3619	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3620
3621	tcg_gen_gvec_3_ptr(pred_full_reg_offset(s, a->rd),
3622	vec_full_reg_offset(s, a->rn),
3623	pred_full_reg_offset(s, a->pg),
3624	status, vsz, vsz, `0`, fn);
3625	tcg_temp_free_ptr(status);
3626	}
3627
3628	#define DO_PPZ(NAME, name) \
3629	static bool trans_##NAME(DisasContext s, arg_rpr_esz a) \
3630	{ \
3631	static gen_helper_gvec_3_ptr * const fns[3] = { \
3632	gen_helper_sve_##name##_h, \
3633	gen_helper_sve_##name##_s, \
3634	gen_helper_sve_##name##_d, \
3635	}; \
3636	if (a->esz == 0) { \
3637	return false; \
3638	} \
3639	if (sve_access_check(s)) { \
3640	do_ppz_fp(s, a, fns[a->esz - 1]); \
3641	} \
3642	return true; \
3643	}
3644
3645	DO_PPZ(FCMGE_ppz0, fcmge0)
3646	DO_PPZ(FCMGT_ppz0, fcmgt0)
3647	DO_PPZ(FCMLE_ppz0, fcmle0)
3648	DO_PPZ(FCMLT_ppz0, fcmlt0)
3649	DO_PPZ(FCMEQ_ppz0, fcmeq0)
3650	DO_PPZ(FCMNE_ppz0, fcmne0)
3651
3652	#undef DO_PPZ
3653
3654	/*
3655	*** SVE floating-point trig multiply-add coefficient
3656	*/
3657
3658	static bool trans_FTMAD(DisasContext s, arg_FTMAD a)
3659	{
3660	static gen_helper_gvec_3_ptr * const fns[`3`] = {
3661	gen_helper_sve_ftmad_h,
3662	gen_helper_sve_ftmad_s,
3663	gen_helper_sve_ftmad_d,
3664	};
3665
3666	if (a->esz == `0`) {
3667	return false;
3668	}
3669	if (sve_access_check(s)) {
3670	unsigned vsz = vec_full_reg_size(s);
3671	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3672	tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3673	vec_full_reg_offset(s, a->rn),
3674	vec_full_reg_offset(s, a->rm),
3675	status, vsz, vsz, a->imm, fns[a->esz - `1`]);
3676	tcg_temp_free_ptr(status);
3677	}
3678	return true;
3679	}
3680
3681	/*
3682	*** SVE Floating Point Accumulating Reduction Group
3683	*/
3684
3685	static bool trans_FADDA(DisasContext s, arg_rprr_esz a)
3686	{
3687	typedef void fadda_fn(TCGv_i64, TCGv_i64, TCGv_ptr,
3688	TCGv_ptr, TCGv_ptr, TCGv_i32);
3689	static fadda_fn * const fns[`3`] = {
3690	gen_helper_sve_fadda_h,
3691	gen_helper_sve_fadda_s,
3692	gen_helper_sve_fadda_d,
3693	};
3694	unsigned vsz = vec_full_reg_size(s);
3695	TCGv_ptr t_rm, t_pg, t_fpst;
3696	TCGv_i64 t_val;
3697	TCGv_i32 t_desc;
3698
3699	if (a->esz == `0`) {
3700	return false;
3701	}
3702	if (!sve_access_check(s)) {
3703	return true;
3704	}
3705
3706	t_val = load_esz(cpu_env, vec_reg_offset(s, a->rn, `0`, a->esz), a->esz);
3707	t_rm = tcg_temp_new_ptr();
3708	t_pg = tcg_temp_new_ptr();
3709	tcg_gen_addi_ptr(t_rm, cpu_env, vec_full_reg_offset(s, a->rm));
3710	tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, a->pg));
3711	t_fpst = get_fpstatus_ptr(a->esz == MO_16);
3712	t_desc = tcg_const_i32(simd_desc(vsz, vsz, `0`));
3713
3714	fns[a->esz - `1`](t_val, t_val, t_rm, t_pg, t_fpst, t_desc);
3715
3716	tcg_temp_free_i32(t_desc);
3717	tcg_temp_free_ptr(t_fpst);
3718	tcg_temp_free_ptr(t_pg);
3719	tcg_temp_free_ptr(t_rm);
3720
3721	write_fp_dreg(s, a->rd, t_val);
3722	tcg_temp_free_i64(t_val);
3723	return true;
3724	}
3725
3726	/*
3727	*** SVE Floating Point Arithmetic - Unpredicated Group
3728	*/
3729
3730	static bool do_zzz_fp(DisasContext s, arg_rrr_esz a,
3731	gen_helper_gvec_3_ptr *fn)
3732	{
3733	if (fn == NULL) {
3734	return false;
3735	}
3736	if (sve_access_check(s)) {
3737	unsigned vsz = vec_full_reg_size(s);
3738	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3739	tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
3740	vec_full_reg_offset(s, a->rn),
3741	vec_full_reg_offset(s, a->rm),
3742	status, vsz, vsz, `0`, fn);
3743	tcg_temp_free_ptr(status);
3744	}
3745	return true;
3746	}
3747
3748
3749	#define DO_FP3(NAME, name) \
3750	static bool trans_##NAME(DisasContext s, arg_rrr_esz a) \
3751	{ \
3752	static gen_helper_gvec_3_ptr * const fns[4] = { \
3753	NULL, gen_helper_gvec_##name##_h, \
3754	gen_helper_gvec_##name##_s, gen_helper_gvec_##name##_d \
3755	}; \
3756	return do_zzz_fp(s, a, fns[a->esz]); \
3757	}
3758
3759	DO_FP3(FADD_zzz, fadd)
3760	DO_FP3(FSUB_zzz, fsub)
3761	DO_FP3(FMUL_zzz, fmul)
3762	DO_FP3(FTSMUL, ftsmul)
3763	DO_FP3(FRECPS, recps)
3764	DO_FP3(FRSQRTS, rsqrts)
3765
3766	#undef DO_FP3
3767
3768	/*
3769	*** SVE Floating Point Arithmetic - Predicated Group
3770	*/
3771
3772	static bool do_zpzz_fp(DisasContext s, arg_rprr_esz a,
3773	gen_helper_gvec_4_ptr *fn)
3774	{
3775	if (fn == NULL) {
3776	return false;
3777	}
3778	if (sve_access_check(s)) {
3779	unsigned vsz = vec_full_reg_size(s);
3780	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3781	tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3782	vec_full_reg_offset(s, a->rn),
3783	vec_full_reg_offset(s, a->rm),
3784	pred_full_reg_offset(s, a->pg),
3785	status, vsz, vsz, `0`, fn);
3786	tcg_temp_free_ptr(status);
3787	}
3788	return true;
3789	}
3790
3791	#define DO_FP3(NAME, name) \
3792	static bool trans_##NAME(DisasContext s, arg_rprr_esz a) \
3793	{ \
3794	static gen_helper_gvec_4_ptr * const fns[4] = { \
3795	NULL, gen_helper_sve_##name##_h, \
3796	gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3797	}; \
3798	return do_zpzz_fp(s, a, fns[a->esz]); \
3799	}
3800
3801	DO_FP3(FADD_zpzz, fadd)
3802	DO_FP3(FSUB_zpzz, fsub)
3803	DO_FP3(FMUL_zpzz, fmul)
3804	DO_FP3(FMIN_zpzz, fmin)
3805	DO_FP3(FMAX_zpzz, fmax)
3806	DO_FP3(FMINNM_zpzz, fminnum)
3807	DO_FP3(FMAXNM_zpzz, fmaxnum)
3808	DO_FP3(FABD, fabd)
3809	DO_FP3(FSCALE, fscalbn)
3810	DO_FP3(FDIV, fdiv)
3811	DO_FP3(FMULX, fmulx)
3812
3813	#undef DO_FP3
3814
3815	typedef void gen_helper_sve_fp2scalar(TCGv_ptr, TCGv_ptr, TCGv_ptr,
3816	TCGv_i64, TCGv_ptr, TCGv_i32);
3817
3818	static void do_fp_scalar(DisasContext s, int* zd, int zn, int pg, bool is_fp16,
3819	TCGv_i64 scalar, gen_helper_sve_fp2scalar *fn)
3820	{
3821	unsigned vsz = vec_full_reg_size(s);
3822	TCGv_ptr t_zd, t_zn, t_pg, status;
3823	TCGv_i32 desc;
3824
3825	t_zd = tcg_temp_new_ptr();
3826	t_zn = tcg_temp_new_ptr();
3827	t_pg = tcg_temp_new_ptr();
3828	tcg_gen_addi_ptr(t_zd, cpu_env, vec_full_reg_offset(s, zd));
3829	tcg_gen_addi_ptr(t_zn, cpu_env, vec_full_reg_offset(s, zn));
3830	tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
3831
3832	status = get_fpstatus_ptr(is_fp16);
3833	desc = tcg_const_i32(simd_desc(vsz, vsz, `0`));
3834	fn(t_zd, t_zn, t_pg, scalar, status, desc);
3835
3836	tcg_temp_free_i32(desc);
3837	tcg_temp_free_ptr(status);
3838	tcg_temp_free_ptr(t_pg);
3839	tcg_temp_free_ptr(t_zn);
3840	tcg_temp_free_ptr(t_zd);
3841	}
3842
3843	static void do_fp_imm(DisasContext s, arg_rpri_esz a, uint64_t imm,
3844	gen_helper_sve_fp2scalar *fn)
3845	{
3846	TCGv_i64 temp = tcg_const_i64(imm);
3847	do_fp_scalar(s, a->rd, a->rn, a->pg, a->esz == MO_16, temp, fn);
3848	tcg_temp_free_i64(temp);
3849	}
3850
3851	#define DO_FP_IMM(NAME, name, const0, const1) \
3852	static bool trans_##NAME##_zpzi(DisasContext s, arg_rpri_esz a) \
3853	{ \
3854	static gen_helper_sve_fp2scalar * const fns[3] = { \
3855	gen_helper_sve_##name##_h, \
3856	gen_helper_sve_##name##_s, \
3857	gen_helper_sve_##name##_d \
3858	}; \
3859	static uint64_t const val[3][2] = { \
3860	{ float16_##const0, float16_##const1 }, \
3861	{ float32_##const0, float32_##const1 }, \
3862	{ float64_##const0, float64_##const1 }, \
3863	}; \
3864	if (a->esz == 0) { \
3865	return false; \
3866	} \
3867	if (sve_access_check(s)) { \
3868	do_fp_imm(s, a, val[a->esz - 1][a->imm], fns[a->esz - 1]); \
3869	} \
3870	return true; \
3871	}
3872
3873	#define float16_two make_float16(0x4000)
3874	#define float32_two make_float32(0x40000000)
3875	#define float64_two make_float64(0x4000000000000000ULL)
3876
3877	DO_FP_IMM(FADD, fadds, half, one)
3878	DO_FP_IMM(FSUB, fsubs, half, one)
3879	DO_FP_IMM(FMUL, fmuls, half, two)
3880	DO_FP_IMM(FSUBR, fsubrs, half, one)
3881	DO_FP_IMM(FMAXNM, fmaxnms, zero, one)
3882	DO_FP_IMM(FMINNM, fminnms, zero, one)
3883	DO_FP_IMM(FMAX, fmaxs, zero, one)
3884	DO_FP_IMM(FMIN, fmins, zero, one)
3885
3886	#undef DO_FP_IMM
3887
3888	static bool do_fp_cmp(DisasContext s, arg_rprr_esz a,
3889	gen_helper_gvec_4_ptr *fn)
3890	{
3891	if (fn == NULL) {
3892	return false;
3893	}
3894	if (sve_access_check(s)) {
3895	unsigned vsz = vec_full_reg_size(s);
3896	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3897	tcg_gen_gvec_4_ptr(pred_full_reg_offset(s, a->rd),
3898	vec_full_reg_offset(s, a->rn),
3899	vec_full_reg_offset(s, a->rm),
3900	pred_full_reg_offset(s, a->pg),
3901	status, vsz, vsz, `0`, fn);
3902	tcg_temp_free_ptr(status);
3903	}
3904	return true;
3905	}
3906
3907	#define DO_FPCMP(NAME, name) \
3908	static bool trans_##NAME##_ppzz(DisasContext s, arg_rprr_esz a) \
3909	{ \
3910	static gen_helper_gvec_4_ptr * const fns[4] = { \
3911	NULL, gen_helper_sve_##name##_h, \
3912	gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3913	}; \
3914	return do_fp_cmp(s, a, fns[a->esz]); \
3915	}
3916
3917	DO_FPCMP(FCMGE, fcmge)
3918	DO_FPCMP(FCMGT, fcmgt)
3919	DO_FPCMP(FCMEQ, fcmeq)
3920	DO_FPCMP(FCMNE, fcmne)
3921	DO_FPCMP(FCMUO, fcmuo)
3922	DO_FPCMP(FACGE, facge)
3923	DO_FPCMP(FACGT, facgt)
3924
3925	#undef DO_FPCMP
3926
3927	static bool trans_FCADD(DisasContext s, arg_FCADD a)
3928	{
3929	static gen_helper_gvec_4_ptr * const fns[`3`] = {
3930	gen_helper_sve_fcadd_h,
3931	gen_helper_sve_fcadd_s,
3932	gen_helper_sve_fcadd_d
3933	};
3934
3935	if (a->esz == `0`) {
3936	return false;
3937	}
3938	if (sve_access_check(s)) {
3939	unsigned vsz = vec_full_reg_size(s);
3940	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
3941	tcg_gen_gvec_4_ptr(vec_full_reg_offset(s, a->rd),
3942	vec_full_reg_offset(s, a->rn),
3943	vec_full_reg_offset(s, a->rm),
3944	pred_full_reg_offset(s, a->pg),
3945	status, vsz, vsz, a->rot, fns[a->esz - `1`]);
3946	tcg_temp_free_ptr(status);
3947	}
3948	return true;
3949	}
3950
3951	typedef void gen_helper_sve_fmla(TCGv_env, TCGv_ptr, TCGv_i32);
3952
3953	static bool do_fmla(DisasContext s, arg_rprrr_esz a, gen_helper_sve_fmla *fn)
3954	{
3955	if (fn == NULL) {
3956	return false;
3957	}
3958	if (!sve_access_check(s)) {
3959	return true;
3960	}
3961
3962	unsigned vsz = vec_full_reg_size(s);
3963	unsigned desc;
3964	TCGv_i32 t_desc;
3965	TCGv_ptr pg = tcg_temp_new_ptr();
3966
3967	/ We would need 7 operands to pass these arguments "properly".*
3968	* So we encode all the register numbers into the descriptor.
3969	*/
3970	desc = deposit32(a->rd, `5`, `5`, a->rn);
3971	desc = deposit32(desc, `10`, `5`, a->rm);
3972	desc = deposit32(desc, `15`, `5`, a->ra);
3973	desc = simd_desc(vsz, vsz, desc);
3974
3975	t_desc = tcg_const_i32(desc);
3976	tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
3977	fn(cpu_env, pg, t_desc);
3978	tcg_temp_free_i32(t_desc);
3979	tcg_temp_free_ptr(pg);
3980	return true;
3981	}
3982
3983	#define DO_FMLA(NAME, name) \
3984	static bool trans_##NAME(DisasContext s, arg_rprrr_esz a) \
3985	{ \
3986	static gen_helper_sve_fmla * const fns[4] = { \
3987	NULL, gen_helper_sve_##name##_h, \
3988	gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
3989	}; \
3990	return do_fmla(s, a, fns[a->esz]); \
3991	}
3992
3993	DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
3994	DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
3995	DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
3996	DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
3997
3998	#undef DO_FMLA
3999
4000	static bool trans_FCMLA_zpzzz(DisasContext s, arg_FCMLA_zpzzz a)
4001	{
4002	static gen_helper_sve_fmla * const fns[`3`] = {
4003	gen_helper_sve_fcmla_zpzzz_h,
4004	gen_helper_sve_fcmla_zpzzz_s,
4005	gen_helper_sve_fcmla_zpzzz_d,
4006	};
4007
4008	if (a->esz == `0`) {
4009	return false;
4010	}
4011	if (sve_access_check(s)) {
4012	unsigned vsz = vec_full_reg_size(s);
4013	unsigned desc;
4014	TCGv_i32 t_desc;
4015	TCGv_ptr pg = tcg_temp_new_ptr();
4016
4017	/ We would need 7 operands to pass these arguments "properly".*
4018	* So we encode all the register numbers into the descriptor.
4019	*/
4020	desc = deposit32(a->rd, `5`, `5`, a->rn);
4021	desc = deposit32(desc, `10`, `5`, a->rm);
4022	desc = deposit32(desc, `15`, `5`, a->ra);
4023	desc = deposit32(desc, `20`, `2`, a->rot);
4024	desc = sextract32(desc, `0`, `22`);
4025	desc = simd_desc(vsz, vsz, desc);
4026
4027	t_desc = tcg_const_i32(desc);
4028	tcg_gen_addi_ptr(pg, cpu_env, pred_full_reg_offset(s, a->pg));
4029	fns[a->esz - `1`](cpu_env, pg, t_desc);
4030	tcg_temp_free_i32(t_desc);
4031	tcg_temp_free_ptr(pg);
4032	}
4033	return true;
4034	}
4035
4036	static bool trans_FCMLA_zzxz(DisasContext s, arg_FCMLA_zzxz a)
4037	{
4038	static gen_helper_gvec_3_ptr * const fns[`2`] = {
4039	gen_helper_gvec_fcmlah_idx,
4040	gen_helper_gvec_fcmlas_idx,
4041	};
4042
4043	tcg_debug_assert(a->esz == `1` \|\| a->esz == `2`);
4044	tcg_debug_assert(a->rd == a->ra);
4045	if (sve_access_check(s)) {
4046	unsigned vsz = vec_full_reg_size(s);
4047	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4048	tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4049	vec_full_reg_offset(s, a->rn),
4050	vec_full_reg_offset(s, a->rm),
4051	status, vsz, vsz,
4052	a->index * `4` + a->rot,
4053	fns[a->esz - `1`]);
4054	tcg_temp_free_ptr(status);
4055	}
4056	return true;
4057	}
4058
4059	/*
4060	*** SVE Floating Point Unary Operations Predicated Group
4061	*/
4062
4063	static bool do_zpz_ptr(DisasContext s, int* rd, int rn, int pg,
4064	bool is_fp16, gen_helper_gvec_3_ptr *fn)
4065	{
4066	if (sve_access_check(s)) {
4067	unsigned vsz = vec_full_reg_size(s);
4068	TCGv_ptr status = get_fpstatus_ptr(is_fp16);
4069	tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, rd),
4070	vec_full_reg_offset(s, rn),
4071	pred_full_reg_offset(s, pg),
4072	status, vsz, vsz, `0`, fn);
4073	tcg_temp_free_ptr(status);
4074	}
4075	return true;
4076	}
4077
4078	static bool trans_FCVT_sh(DisasContext s, arg_rpr_esz a)
4079	{
4080	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sh);
4081	}
4082
4083	static bool trans_FCVT_hs(DisasContext s, arg_rpr_esz a)
4084	{
4085	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hs);
4086	}
4087
4088	static bool trans_FCVT_dh(DisasContext s, arg_rpr_esz a)
4089	{
4090	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_dh);
4091	}
4092
4093	static bool trans_FCVT_hd(DisasContext s, arg_rpr_esz a)
4094	{
4095	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_hd);
4096	}
4097
4098	static bool trans_FCVT_ds(DisasContext s, arg_rpr_esz a)
4099	{
4100	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_ds);
4101	}
4102
4103	static bool trans_FCVT_sd(DisasContext s, arg_rpr_esz a)
4104	{
4105	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvt_sd);
4106	}
4107
4108	static bool trans_FCVTZS_hh(DisasContext s, arg_rpr_esz a)
4109	{
4110	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hh);
4111	}
4112
4113	static bool trans_FCVTZU_hh(DisasContext s, arg_rpr_esz a)
4114	{
4115	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hh);
4116	}
4117
4118	static bool trans_FCVTZS_hs(DisasContext s, arg_rpr_esz a)
4119	{
4120	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hs);
4121	}
4122
4123	static bool trans_FCVTZU_hs(DisasContext s, arg_rpr_esz a)
4124	{
4125	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hs);
4126	}
4127
4128	static bool trans_FCVTZS_hd(DisasContext s, arg_rpr_esz a)
4129	{
4130	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzs_hd);
4131	}
4132
4133	static bool trans_FCVTZU_hd(DisasContext s, arg_rpr_esz a)
4134	{
4135	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_fcvtzu_hd);
4136	}
4137
4138	static bool trans_FCVTZS_ss(DisasContext s, arg_rpr_esz a)
4139	{
4140	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ss);
4141	}
4142
4143	static bool trans_FCVTZU_ss(DisasContext s, arg_rpr_esz a)
4144	{
4145	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ss);
4146	}
4147
4148	static bool trans_FCVTZS_sd(DisasContext s, arg_rpr_esz a)
4149	{
4150	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_sd);
4151	}
4152
4153	static bool trans_FCVTZU_sd(DisasContext s, arg_rpr_esz a)
4154	{
4155	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_sd);
4156	}
4157
4158	static bool trans_FCVTZS_ds(DisasContext s, arg_rpr_esz a)
4159	{
4160	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_ds);
4161	}
4162
4163	static bool trans_FCVTZU_ds(DisasContext s, arg_rpr_esz a)
4164	{
4165	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_ds);
4166	}
4167
4168	static bool trans_FCVTZS_dd(DisasContext s, arg_rpr_esz a)
4169	{
4170	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzs_dd);
4171	}
4172
4173	static bool trans_FCVTZU_dd(DisasContext s, arg_rpr_esz a)
4174	{
4175	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_fcvtzu_dd);
4176	}
4177
4178	static gen_helper_gvec_3_ptr * const frint_fns[`3`] = {
4179	gen_helper_sve_frint_h,
4180	gen_helper_sve_frint_s,
4181	gen_helper_sve_frint_d
4182	};
4183
4184	static bool trans_FRINTI(DisasContext s, arg_rpr_esz a)
4185	{
4186	if (a->esz == `0`) {
4187	return false;
4188	}
4189	return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16,
4190	frint_fns[a->esz - `1`]);
4191	}
4192
4193	static bool trans_FRINTX(DisasContext s, arg_rpr_esz a)
4194	{
4195	static gen_helper_gvec_3_ptr * const fns[`3`] = {
4196	gen_helper_sve_frintx_h,
4197	gen_helper_sve_frintx_s,
4198	gen_helper_sve_frintx_d
4199	};
4200	if (a->esz == `0`) {
4201	return false;
4202	}
4203	return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - `1`]);
4204	}
4205
4206	static bool do_frint_mode(DisasContext s, arg_rpr_esz a, int mode)
4207	{
4208	if (a->esz == `0`) {
4209	return false;
4210	}
4211	if (sve_access_check(s)) {
4212	unsigned vsz = vec_full_reg_size(s);
4213	TCGv_i32 tmode = tcg_const_i32(mode);
4214	TCGv_ptr status = get_fpstatus_ptr(a->esz == MO_16);
4215
4216	gen_helper_set_rmode(tmode, tmode, status);
4217
4218	tcg_gen_gvec_3_ptr(vec_full_reg_offset(s, a->rd),
4219	vec_full_reg_offset(s, a->rn),
4220	pred_full_reg_offset(s, a->pg),
4221	status, vsz, vsz, `0`, frint_fns[a->esz - `1`]);
4222
4223	gen_helper_set_rmode(tmode, tmode, status);
4224	tcg_temp_free_i32(tmode);
4225	tcg_temp_free_ptr(status);
4226	}
4227	return true;
4228	}
4229
4230	static bool trans_FRINTN(DisasContext s, arg_rpr_esz a)
4231	{
4232	return do_frint_mode(s, a, float_round_nearest_even);
4233	}
4234
4235	static bool trans_FRINTP(DisasContext s, arg_rpr_esz a)
4236	{
4237	return do_frint_mode(s, a, float_round_up);
4238	}
4239
4240	static bool trans_FRINTM(DisasContext s, arg_rpr_esz a)
4241	{
4242	return do_frint_mode(s, a, float_round_down);
4243	}
4244
4245	static bool trans_FRINTZ(DisasContext s, arg_rpr_esz a)
4246	{
4247	return do_frint_mode(s, a, float_round_to_zero);
4248	}
4249
4250	static bool trans_FRINTA(DisasContext s, arg_rpr_esz a)
4251	{
4252	return do_frint_mode(s, a, float_round_ties_away);
4253	}
4254
4255	static bool trans_FRECPX(DisasContext s, arg_rpr_esz a)
4256	{
4257	static gen_helper_gvec_3_ptr * const fns[`3`] = {
4258	gen_helper_sve_frecpx_h,
4259	gen_helper_sve_frecpx_s,
4260	gen_helper_sve_frecpx_d
4261	};
4262	if (a->esz == `0`) {
4263	return false;
4264	}
4265	return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - `1`]);
4266	}
4267
4268	static bool trans_FSQRT(DisasContext s, arg_rpr_esz a)
4269	{
4270	static gen_helper_gvec_3_ptr * const fns[`3`] = {
4271	gen_helper_sve_fsqrt_h,
4272	gen_helper_sve_fsqrt_s,
4273	gen_helper_sve_fsqrt_d
4274	};
4275	if (a->esz == `0`) {
4276	return false;
4277	}
4278	return do_zpz_ptr(s, a->rd, a->rn, a->pg, a->esz == MO_16, fns[a->esz - `1`]);
4279	}
4280
4281	static bool trans_SCVTF_hh(DisasContext s, arg_rpr_esz a)
4282	{
4283	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_hh);
4284	}
4285
4286	static bool trans_SCVTF_sh(DisasContext s, arg_rpr_esz a)
4287	{
4288	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_sh);
4289	}
4290
4291	static bool trans_SCVTF_dh(DisasContext s, arg_rpr_esz a)
4292	{
4293	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_scvt_dh);
4294	}
4295
4296	static bool trans_SCVTF_ss(DisasContext s, arg_rpr_esz a)
4297	{
4298	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ss);
4299	}
4300
4301	static bool trans_SCVTF_ds(DisasContext s, arg_rpr_esz a)
4302	{
4303	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_ds);
4304	}
4305
4306	static bool trans_SCVTF_sd(DisasContext s, arg_rpr_esz a)
4307	{
4308	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_sd);
4309	}
4310
4311	static bool trans_SCVTF_dd(DisasContext s, arg_rpr_esz a)
4312	{
4313	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_scvt_dd);
4314	}
4315
4316	static bool trans_UCVTF_hh(DisasContext s, arg_rpr_esz a)
4317	{
4318	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_hh);
4319	}
4320
4321	static bool trans_UCVTF_sh(DisasContext s, arg_rpr_esz a)
4322	{
4323	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_sh);
4324	}
4325
4326	static bool trans_UCVTF_dh(DisasContext s, arg_rpr_esz a)
4327	{
4328	return do_zpz_ptr(s, a->rd, a->rn, a->pg, true, gen_helper_sve_ucvt_dh);
4329	}
4330
4331	static bool trans_UCVTF_ss(DisasContext s, arg_rpr_esz a)
4332	{
4333	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ss);
4334	}
4335
4336	static bool trans_UCVTF_ds(DisasContext s, arg_rpr_esz a)
4337	{
4338	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_ds);
4339	}
4340
4341	static bool trans_UCVTF_sd(DisasContext s, arg_rpr_esz a)
4342	{
4343	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_sd);
4344	}
4345
4346	static bool trans_UCVTF_dd(DisasContext s, arg_rpr_esz a)
4347	{
4348	return do_zpz_ptr(s, a->rd, a->rn, a->pg, false, gen_helper_sve_ucvt_dd);
4349	}
4350
4351	/*
4352	*** SVE Memory - 32-bit Gather and Unsized Contiguous Group
4353	*/
4354
4355	/ Subroutine loading a vector register at VOFS of LEN bytes.*
4356	* The load should begin at the address Rn + IMM.
4357	*/
4358
4359	static void do_ldr(DisasContext s, uint32_t vofs, int* len, int rn, int imm)
4360	{
4361	int len_align = QEMU_ALIGN_DOWN(len, `8`);
4362	int len_remain = len % `8`;
4363	int nparts = len / `8` + ctpop8(len_remain);
4364	int midx = get_mem_index(s);
4365	TCGv_i64 addr, t0, t1;
4366
4367	addr = tcg_temp_new_i64();
4368	t0 = tcg_temp_new_i64();
4369
4370	/ Note that unpredicated load/store of vector/predicate registers*
4371	* are defined as a stream of bytes, which equates to little-endian
4372	* operations on larger quantities. There is no nice way to force
4373	* a little-endian load for aarch64_be-linux-user out of line.
4374	*
4375	* Attempt to keep code expansion to a minimum by limiting the
4376	* amount of unrolling done.
4377	*/
4378	if (nparts <= `4`) {
4379	int i;
4380
4381	for (i = `0`; i < len_align; i += `8`) {
4382	tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4383	tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4384	tcg_gen_st_i64(t0, cpu_env, vofs + i);
4385	}
4386	} else {
4387	TCGLabel *loop = gen_new_label();
4388	TCGv_ptr tp, i = tcg_const_local_ptr(`0`);
4389
4390	gen_set_label(loop);
4391
4392	/ Minimize the number of local temps that must be re-read from*
4393	* the stack each iteration. Instead, re-compute values other
4394	* than the loop counter.
4395	*/
4396	tp = tcg_temp_new_ptr();
4397	tcg_gen_addi_ptr(tp, i, imm);
4398	tcg_gen_extu_ptr_i64(addr, tp);
4399	tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4400
4401	tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEQ);
4402
4403	tcg_gen_add_ptr(tp, cpu_env, i);
4404	tcg_gen_addi_ptr(i, i, `8`);
4405	tcg_gen_st_i64(t0, tp, vofs);
4406	tcg_temp_free_ptr(tp);
4407
4408	tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4409	tcg_temp_free_ptr(i);
4410	}
4411
4412	/ Predicate register loads can be any multiple of 2.*
4413	* Note that we still store the entire 64-bit unit into cpu_env.
4414	*/
4415	if (len_remain) {
4416	tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4417
4418	switch (len_remain) {
4419	case `2`:
4420	case `4`:
4421	case `8`:
4422	tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LE \| ctz32(len_remain));
4423	break;
4424
4425	case `6`:
4426	t1 = tcg_temp_new_i64();
4427	tcg_gen_qemu_ld_i64(t0, addr, midx, MO_LEUL);
4428	tcg_gen_addi_i64(addr, addr, `4`);
4429	tcg_gen_qemu_ld_i64(t1, addr, midx, MO_LEUW);
4430	tcg_gen_deposit_i64(t0, t0, t1, `32`, `32`);
4431	tcg_temp_free_i64(t1);
4432	break;
4433
4434	default:
4435	g_assert_not_reached();
4436	}
4437	tcg_gen_st_i64(t0, cpu_env, vofs + len_align);
4438	}
4439	tcg_temp_free_i64(addr);
4440	tcg_temp_free_i64(t0);
4441	}
4442
4443	/ Similarly for stores. /
4444	static void do_str(DisasContext s, uint32_t vofs, int* len, int rn, int imm)
4445	{
4446	int len_align = QEMU_ALIGN_DOWN(len, `8`);
4447	int len_remain = len % `8`;
4448	int nparts = len / `8` + ctpop8(len_remain);
4449	int midx = get_mem_index(s);
4450	TCGv_i64 addr, t0;
4451
4452	addr = tcg_temp_new_i64();
4453	t0 = tcg_temp_new_i64();
4454
4455	/ Note that unpredicated load/store of vector/predicate registers*
4456	* are defined as a stream of bytes, which equates to little-endian
4457	* operations on larger quantities. There is no nice way to force
4458	* a little-endian store for aarch64_be-linux-user out of line.
4459	*
4460	* Attempt to keep code expansion to a minimum by limiting the
4461	* amount of unrolling done.
4462	*/
4463	if (nparts <= `4`) {
4464	int i;
4465
4466	for (i = `0`; i < len_align; i += `8`) {
4467	tcg_gen_ld_i64(t0, cpu_env, vofs + i);
4468	tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + i);
4469	tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4470	}
4471	} else {
4472	TCGLabel *loop = gen_new_label();
4473	TCGv_ptr t2, i = tcg_const_local_ptr(`0`);
4474
4475	gen_set_label(loop);
4476
4477	t2 = tcg_temp_new_ptr();
4478	tcg_gen_add_ptr(t2, cpu_env, i);
4479	tcg_gen_ld_i64(t0, t2, vofs);
4480
4481	/ Minimize the number of local temps that must be re-read from*
4482	* the stack each iteration. Instead, re-compute values other
4483	* than the loop counter.
4484	*/
4485	tcg_gen_addi_ptr(t2, i, imm);
4486	tcg_gen_extu_ptr_i64(addr, t2);
4487	tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, rn));
4488	tcg_temp_free_ptr(t2);
4489
4490	tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEQ);
4491
4492	tcg_gen_addi_ptr(i, i, `8`);
4493
4494	tcg_gen_brcondi_ptr(TCG_COND_LTU, i, len_align, loop);
4495	tcg_temp_free_ptr(i);
4496	}
4497
4498	/ Predicate register stores can be any multiple of 2. /
4499	if (len_remain) {
4500	tcg_gen_ld_i64(t0, cpu_env, vofs + len_align);
4501	tcg_gen_addi_i64(addr, cpu_reg_sp(s, rn), imm + len_align);
4502
4503	switch (len_remain) {
4504	case `2`:
4505	case `4`:
4506	case `8`:
4507	tcg_gen_qemu_st_i64(t0, addr, midx, MO_LE \| ctz32(len_remain));
4508	break;
4509
4510	case `6`:
4511	tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUL);
4512	tcg_gen_addi_i64(addr, addr, `4`);
4513	tcg_gen_shri_i64(t0, t0, `32`);
4514	tcg_gen_qemu_st_i64(t0, addr, midx, MO_LEUW);
4515	break;
4516
4517	default:
4518	g_assert_not_reached();
4519	}
4520	}
4521	tcg_temp_free_i64(addr);
4522	tcg_temp_free_i64(t0);
4523	}
4524
4525	static bool trans_LDR_zri(DisasContext s, arg_rri a)
4526	{
4527	if (sve_access_check(s)) {
4528	int size = vec_full_reg_size(s);
4529	int off = vec_full_reg_offset(s, a->rd);
4530	do_ldr(s, off, size, a->rn, a->imm * size);
4531	}
4532	return true;
4533	}
4534
4535	static bool trans_LDR_pri(DisasContext s, arg_rri a)
4536	{
4537	if (sve_access_check(s)) {
4538	int size = pred_full_reg_size(s);
4539	int off = pred_full_reg_offset(s, a->rd);
4540	do_ldr(s, off, size, a->rn, a->imm * size);
4541	}
4542	return true;
4543	}
4544
4545	static bool trans_STR_zri(DisasContext s, arg_rri a)
4546	{
4547	if (sve_access_check(s)) {
4548	int size = vec_full_reg_size(s);
4549	int off = vec_full_reg_offset(s, a->rd);
4550	do_str(s, off, size, a->rn, a->imm * size);
4551	}
4552	return true;
4553	}
4554
4555	static bool trans_STR_pri(DisasContext s, arg_rri a)
4556	{
4557	if (sve_access_check(s)) {
4558	int size = pred_full_reg_size(s);
4559	int off = pred_full_reg_offset(s, a->rd);
4560	do_str(s, off, size, a->rn, a->imm * size);
4561	}
4562	return true;
4563	}
4564
4565	/*
4566	*** SVE Memory - Contiguous Load Group
4567	*/
4568
4569	/ The memory mode of the dtype. /
4570	static const MemOp dtype_mop[`16`] = {
4571	MO_UB, MO_UB, MO_UB, MO_UB,
4572	MO_SL, MO_UW, MO_UW, MO_UW,
4573	MO_SW, MO_SW, MO_UL, MO_UL,
4574	MO_SB, MO_SB, MO_SB, MO_Q
4575	};
4576
4577	#define dtype_msz(x) (dtype_mop[x] & MO_SIZE)
4578
4579	/ The vector element size of dtype. /
4580	static const uint8_t dtype_esz[`16`] = {
4581	`0`, `1`, `2`, `3`,
4582	`3`, `1`, `2`, `3`,
4583	`3`, `2`, `2`, `3`,
4584	`3`, `2`, `1`, `3`
4585	};
4586
4587	static TCGMemOpIdx sve_memopidx(DisasContext s, int* dtype)
4588	{
4589	return make_memop_idx(s->be_data \| dtype_mop[dtype], get_mem_index(s));
4590	}
4591
4592	static void do_mem_zpa(DisasContext s, int* zt, int pg, TCGv_i64 addr,
4593	int dtype, gen_helper_gvec_mem *fn)
4594	{
4595	unsigned vsz = vec_full_reg_size(s);
4596	TCGv_ptr t_pg;
4597	TCGv_i32 t_desc;
4598	int desc;
4599
4600	/ For e.g. LD4, there are not enough arguments to pass all 4*
4601	* registers as pointers, so encode the regno into the data field.
4602	* For consistency, do this even for LD1.
4603	*/
4604	desc = sve_memopidx(s, dtype);
4605	desc \|= zt << MEMOPIDX_SHIFT;
4606	desc = simd_desc(vsz, vsz, desc);
4607	t_desc = tcg_const_i32(desc);
4608	t_pg = tcg_temp_new_ptr();
4609
4610	tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
4611	fn(cpu_env, t_pg, addr, t_desc);
4612
4613	tcg_temp_free_ptr(t_pg);
4614	tcg_temp_free_i32(t_desc);
4615	}
4616
4617	static void do_ld_zpa(DisasContext s, int* zt, int pg,
4618	TCGv_i64 addr, int dtype, int nreg)
4619	{
4620	static gen_helper_gvec_mem * const fns[`2`][`16`][`4`] = {
4621	/ Little-endian /
4622	{ { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4623	gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4624	{ gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4625	{ gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4626	{ gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4627
4628	{ gen_helper_sve_ld1sds_le_r, NULL, NULL, NULL },
4629	{ gen_helper_sve_ld1hh_le_r, gen_helper_sve_ld2hh_le_r,
4630	gen_helper_sve_ld3hh_le_r, gen_helper_sve_ld4hh_le_r },
4631	{ gen_helper_sve_ld1hsu_le_r, NULL, NULL, NULL },
4632	{ gen_helper_sve_ld1hdu_le_r, NULL, NULL, NULL },
4633
4634	{ gen_helper_sve_ld1hds_le_r, NULL, NULL, NULL },
4635	{ gen_helper_sve_ld1hss_le_r, NULL, NULL, NULL },
4636	{ gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld2ss_le_r,
4637	gen_helper_sve_ld3ss_le_r, gen_helper_sve_ld4ss_le_r },
4638	{ gen_helper_sve_ld1sdu_le_r, NULL, NULL, NULL },
4639
4640	{ gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4641	{ gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4642	{ gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4643	{ gen_helper_sve_ld1dd_le_r, gen_helper_sve_ld2dd_le_r,
4644	gen_helper_sve_ld3dd_le_r, gen_helper_sve_ld4dd_le_r } },
4645
4646	/ Big-endian /
4647	{ { gen_helper_sve_ld1bb_r, gen_helper_sve_ld2bb_r,
4648	gen_helper_sve_ld3bb_r, gen_helper_sve_ld4bb_r },
4649	{ gen_helper_sve_ld1bhu_r, NULL, NULL, NULL },
4650	{ gen_helper_sve_ld1bsu_r, NULL, NULL, NULL },
4651	{ gen_helper_sve_ld1bdu_r, NULL, NULL, NULL },
4652
4653	{ gen_helper_sve_ld1sds_be_r, NULL, NULL, NULL },
4654	{ gen_helper_sve_ld1hh_be_r, gen_helper_sve_ld2hh_be_r,
4655	gen_helper_sve_ld3hh_be_r, gen_helper_sve_ld4hh_be_r },
4656	{ gen_helper_sve_ld1hsu_be_r, NULL, NULL, NULL },
4657	{ gen_helper_sve_ld1hdu_be_r, NULL, NULL, NULL },
4658
4659	{ gen_helper_sve_ld1hds_be_r, NULL, NULL, NULL },
4660	{ gen_helper_sve_ld1hss_be_r, NULL, NULL, NULL },
4661	{ gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld2ss_be_r,
4662	gen_helper_sve_ld3ss_be_r, gen_helper_sve_ld4ss_be_r },
4663	{ gen_helper_sve_ld1sdu_be_r, NULL, NULL, NULL },
4664
4665	{ gen_helper_sve_ld1bds_r, NULL, NULL, NULL },
4666	{ gen_helper_sve_ld1bss_r, NULL, NULL, NULL },
4667	{ gen_helper_sve_ld1bhs_r, NULL, NULL, NULL },
4668	{ gen_helper_sve_ld1dd_be_r, gen_helper_sve_ld2dd_be_r,
4669	gen_helper_sve_ld3dd_be_r, gen_helper_sve_ld4dd_be_r } }
4670	};
4671	gen_helper_gvec_mem *fn = fns[s->be_data == MO_BE][dtype][nreg];
4672
4673	/ While there are holes in the table, they are not*
4674	* accessible via the instruction encoding.
4675	*/
4676	assert(fn != NULL);
4677	do_mem_zpa(s, zt, pg, addr, dtype, fn);
4678	}
4679
4680	static bool trans_LD_zprr(DisasContext s, arg_rprr_load a)
4681	{
4682	if (a->rm == `31`) {
4683	return false;
4684	}
4685	if (sve_access_check(s)) {
4686	TCGv_i64 addr = new_tmp_a64(s);
4687	tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4688	tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4689	do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4690	}
4691	return true;
4692	}
4693
4694	static bool trans_LD_zpri(DisasContext s, arg_rpri_load a)
4695	{
4696	if (sve_access_check(s)) {
4697	int vsz = vec_full_reg_size(s);
4698	int elements = vsz >> dtype_esz[a->dtype];
4699	TCGv_i64 addr = new_tmp_a64(s);
4700
4701	tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
4702	(a->imm * elements * (a->nreg + `1`))
4703	<< dtype_msz(a->dtype));
4704	do_ld_zpa(s, a->rd, a->pg, addr, a->dtype, a->nreg);
4705	}
4706	return true;
4707	}
4708
4709	static bool trans_LDFF1_zprr(DisasContext s, arg_rprr_load a)
4710	{
4711	static gen_helper_gvec_mem * const fns[`2`][`16`] = {
4712	/ Little-endian /
4713	{ gen_helper_sve_ldff1bb_r,
4714	gen_helper_sve_ldff1bhu_r,
4715	gen_helper_sve_ldff1bsu_r,
4716	gen_helper_sve_ldff1bdu_r,
4717
4718	gen_helper_sve_ldff1sds_le_r,
4719	gen_helper_sve_ldff1hh_le_r,
4720	gen_helper_sve_ldff1hsu_le_r,
4721	gen_helper_sve_ldff1hdu_le_r,
4722
4723	gen_helper_sve_ldff1hds_le_r,
4724	gen_helper_sve_ldff1hss_le_r,
4725	gen_helper_sve_ldff1ss_le_r,
4726	gen_helper_sve_ldff1sdu_le_r,
4727
4728	gen_helper_sve_ldff1bds_r,
4729	gen_helper_sve_ldff1bss_r,
4730	gen_helper_sve_ldff1bhs_r,
4731	gen_helper_sve_ldff1dd_le_r },
4732
4733	/ Big-endian /
4734	{ gen_helper_sve_ldff1bb_r,
4735	gen_helper_sve_ldff1bhu_r,
4736	gen_helper_sve_ldff1bsu_r,
4737	gen_helper_sve_ldff1bdu_r,
4738
4739	gen_helper_sve_ldff1sds_be_r,
4740	gen_helper_sve_ldff1hh_be_r,
4741	gen_helper_sve_ldff1hsu_be_r,
4742	gen_helper_sve_ldff1hdu_be_r,
4743
4744	gen_helper_sve_ldff1hds_be_r,
4745	gen_helper_sve_ldff1hss_be_r,
4746	gen_helper_sve_ldff1ss_be_r,
4747	gen_helper_sve_ldff1sdu_be_r,
4748
4749	gen_helper_sve_ldff1bds_r,
4750	gen_helper_sve_ldff1bss_r,
4751	gen_helper_sve_ldff1bhs_r,
4752	gen_helper_sve_ldff1dd_be_r },
4753	};
4754
4755	if (sve_access_check(s)) {
4756	TCGv_i64 addr = new_tmp_a64(s);
4757	tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), dtype_msz(a->dtype));
4758	tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4759	do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4760	fns[s->be_data == MO_BE][a->dtype]);
4761	}
4762	return true;
4763	}
4764
4765	static bool trans_LDNF1_zpri(DisasContext s, arg_rpri_load a)
4766	{
4767	static gen_helper_gvec_mem * const fns[`2`][`16`] = {
4768	/ Little-endian /
4769	{ gen_helper_sve_ldnf1bb_r,
4770	gen_helper_sve_ldnf1bhu_r,
4771	gen_helper_sve_ldnf1bsu_r,
4772	gen_helper_sve_ldnf1bdu_r,
4773
4774	gen_helper_sve_ldnf1sds_le_r,
4775	gen_helper_sve_ldnf1hh_le_r,
4776	gen_helper_sve_ldnf1hsu_le_r,
4777	gen_helper_sve_ldnf1hdu_le_r,
4778
4779	gen_helper_sve_ldnf1hds_le_r,
4780	gen_helper_sve_ldnf1hss_le_r,
4781	gen_helper_sve_ldnf1ss_le_r,
4782	gen_helper_sve_ldnf1sdu_le_r,
4783
4784	gen_helper_sve_ldnf1bds_r,
4785	gen_helper_sve_ldnf1bss_r,
4786	gen_helper_sve_ldnf1bhs_r,
4787	gen_helper_sve_ldnf1dd_le_r },
4788
4789	/ Big-endian /
4790	{ gen_helper_sve_ldnf1bb_r,
4791	gen_helper_sve_ldnf1bhu_r,
4792	gen_helper_sve_ldnf1bsu_r,
4793	gen_helper_sve_ldnf1bdu_r,
4794
4795	gen_helper_sve_ldnf1sds_be_r,
4796	gen_helper_sve_ldnf1hh_be_r,
4797	gen_helper_sve_ldnf1hsu_be_r,
4798	gen_helper_sve_ldnf1hdu_be_r,
4799
4800	gen_helper_sve_ldnf1hds_be_r,
4801	gen_helper_sve_ldnf1hss_be_r,
4802	gen_helper_sve_ldnf1ss_be_r,
4803	gen_helper_sve_ldnf1sdu_be_r,
4804
4805	gen_helper_sve_ldnf1bds_r,
4806	gen_helper_sve_ldnf1bss_r,
4807	gen_helper_sve_ldnf1bhs_r,
4808	gen_helper_sve_ldnf1dd_be_r },
4809	};
4810
4811	if (sve_access_check(s)) {
4812	int vsz = vec_full_reg_size(s);
4813	int elements = vsz >> dtype_esz[a->dtype];
4814	int off = (a->imm * elements) << dtype_msz(a->dtype);
4815	TCGv_i64 addr = new_tmp_a64(s);
4816
4817	tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), off);
4818	do_mem_zpa(s, a->rd, a->pg, addr, a->dtype,
4819	fns[s->be_data == MO_BE][a->dtype]);
4820	}
4821	return true;
4822	}
4823
4824	static void do_ldrq(DisasContext s, int* zt, int pg, TCGv_i64 addr, int msz)
4825	{
4826	static gen_helper_gvec_mem * const fns[`2`][`4`] = {
4827	{ gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_le_r,
4828	gen_helper_sve_ld1ss_le_r, gen_helper_sve_ld1dd_le_r },
4829	{ gen_helper_sve_ld1bb_r, gen_helper_sve_ld1hh_be_r,
4830	gen_helper_sve_ld1ss_be_r, gen_helper_sve_ld1dd_be_r },
4831	};
4832	unsigned vsz = vec_full_reg_size(s);
4833	TCGv_ptr t_pg;
4834	TCGv_i32 t_desc;
4835	int desc, poff;
4836
4837	/ Load the first quadword using the normal predicated load helpers. /
4838	desc = sve_memopidx(s, msz_dtype(s, msz));
4839	desc \|= zt << MEMOPIDX_SHIFT;
4840	desc = simd_desc(`16`, `16`, desc);
4841	t_desc = tcg_const_i32(desc);
4842
4843	poff = pred_full_reg_offset(s, pg);
4844	if (vsz > `16`) {
4845	/*
4846	* Zero-extend the first 16 bits of the predicate into a temporary.
4847	* This avoids triggering an assert making sure we don't have bits
4848	* set within a predicate beyond VQ, but we have lowered VQ to 1
4849	* for this load operation.
4850	*/
4851	TCGv_i64 tmp = tcg_temp_new_i64();
4852	#ifdef HOST_WORDS_BIGENDIAN
4853	poff += `6`;
4854	#endif
4855	tcg_gen_ld16u_i64(tmp, cpu_env, poff);
4856
4857	poff = offsetof(CPUARMState, vfp.preg_tmp);
4858	tcg_gen_st_i64(tmp, cpu_env, poff);
4859	tcg_temp_free_i64(tmp);
4860	}
4861
4862	t_pg = tcg_temp_new_ptr();
4863	tcg_gen_addi_ptr(t_pg, cpu_env, poff);
4864
4865	fns[s->be_data == MO_BE][msz](cpu_env, t_pg, addr, t_desc);
4866
4867	tcg_temp_free_ptr(t_pg);
4868	tcg_temp_free_i32(t_desc);
4869
4870	/ Replicate that first quadword. /
4871	if (vsz > `16`) {
4872	unsigned dofs = vec_full_reg_offset(s, zt);
4873	tcg_gen_gvec_dup_mem(`4`, dofs + `16`, dofs, vsz - `16`, vsz - `16`);
4874	}
4875	}
4876
4877	static bool trans_LD1RQ_zprr(DisasContext s, arg_rprr_load a)
4878	{
4879	if (a->rm == `31`) {
4880	return false;
4881	}
4882	if (sve_access_check(s)) {
4883	int msz = dtype_msz(a->dtype);
4884	TCGv_i64 addr = new_tmp_a64(s);
4885	tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), msz);
4886	tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
4887	do_ldrq(s, a->rd, a->pg, addr, msz);
4888	}
4889	return true;
4890	}
4891
4892	static bool trans_LD1RQ_zpri(DisasContext s, arg_rpri_load a)
4893	{
4894	if (sve_access_check(s)) {
4895	TCGv_i64 addr = new_tmp_a64(s);
4896	tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn), a->imm * `16`);
4897	do_ldrq(s, a->rd, a->pg, addr, dtype_msz(a->dtype));
4898	}
4899	return true;
4900	}
4901
4902	/ Load and broadcast element. /
4903	static bool trans_LD1R_zpri(DisasContext s, arg_rpri_load a)
4904	{
4905	if (!sve_access_check(s)) {
4906	return true;
4907	}
4908
4909	unsigned vsz = vec_full_reg_size(s);
4910	unsigned psz = pred_full_reg_size(s);
4911	unsigned esz = dtype_esz[a->dtype];
4912	unsigned msz = dtype_msz(a->dtype);
4913	TCGLabel *over = gen_new_label();
4914	TCGv_i64 temp;
4915
4916	/ If the guarding predicate has no bits set, no load occurs. /
4917	if (psz <= `8`) {
4918	/ Reduce the pred_esz_masks value simply to reduce the*
4919	* size of the code generated here.
4920	*/
4921	uint64_t psz_mask = MAKE_64BIT_MASK(`0`, psz * `8`);
4922	temp = tcg_temp_new_i64();
4923	tcg_gen_ld_i64(temp, cpu_env, pred_full_reg_offset(s, a->pg));
4924	tcg_gen_andi_i64(temp, temp, pred_esz_masks[esz] & psz_mask);
4925	tcg_gen_brcondi_i64(TCG_COND_EQ, temp, `0`, over);
4926	tcg_temp_free_i64(temp);
4927	} else {
4928	TCGv_i32 t32 = tcg_temp_new_i32();
4929	find_last_active(s, t32, esz, a->pg);
4930	tcg_gen_brcondi_i32(TCG_COND_LT, t32, `0`, over);
4931	tcg_temp_free_i32(t32);
4932	}
4933
4934	/ Load the data. /
4935	temp = tcg_temp_new_i64();
4936	tcg_gen_addi_i64(temp, cpu_reg_sp(s, a->rn), a->imm << msz);
4937	tcg_gen_qemu_ld_i64(temp, temp, get_mem_index(s),
4938	s->be_data \| dtype_mop[a->dtype]);
4939
4940	/ Broadcast to all elements. /
4941	tcg_gen_gvec_dup_i64(esz, vec_full_reg_offset(s, a->rd),
4942	vsz, vsz, temp);
4943	tcg_temp_free_i64(temp);
4944
4945	/ Zero the inactive elements. /
4946	gen_set_label(over);
4947	do_movz_zpz(s, a->rd, a->rd, a->pg, esz);
4948	return true;
4949	}
4950
4951	static void do_st_zpa(DisasContext s, int* zt, int pg, TCGv_i64 addr,
4952	int msz, int esz, int nreg)
4953	{
4954	static gen_helper_gvec_mem * const fn_single[`2`][`4`][`4`] = {
4955	{ { gen_helper_sve_st1bb_r,
4956	gen_helper_sve_st1bh_r,
4957	gen_helper_sve_st1bs_r,
4958	gen_helper_sve_st1bd_r },
4959	{ NULL,
4960	gen_helper_sve_st1hh_le_r,
4961	gen_helper_sve_st1hs_le_r,
4962	gen_helper_sve_st1hd_le_r },
4963	{ NULL, NULL,
4964	gen_helper_sve_st1ss_le_r,
4965	gen_helper_sve_st1sd_le_r },
4966	{ NULL, NULL, NULL,
4967	gen_helper_sve_st1dd_le_r } },
4968	{ { gen_helper_sve_st1bb_r,
4969	gen_helper_sve_st1bh_r,
4970	gen_helper_sve_st1bs_r,
4971	gen_helper_sve_st1bd_r },
4972	{ NULL,
4973	gen_helper_sve_st1hh_be_r,
4974	gen_helper_sve_st1hs_be_r,
4975	gen_helper_sve_st1hd_be_r },
4976	{ NULL, NULL,
4977	gen_helper_sve_st1ss_be_r,
4978	gen_helper_sve_st1sd_be_r },
4979	{ NULL, NULL, NULL,
4980	gen_helper_sve_st1dd_be_r } },
4981	};
4982	static gen_helper_gvec_mem * const fn_multiple[`2`][`3`][`4`] = {
4983	{ { gen_helper_sve_st2bb_r,
4984	gen_helper_sve_st2hh_le_r,
4985	gen_helper_sve_st2ss_le_r,
4986	gen_helper_sve_st2dd_le_r },
4987	{ gen_helper_sve_st3bb_r,
4988	gen_helper_sve_st3hh_le_r,
4989	gen_helper_sve_st3ss_le_r,
4990	gen_helper_sve_st3dd_le_r },
4991	{ gen_helper_sve_st4bb_r,
4992	gen_helper_sve_st4hh_le_r,
4993	gen_helper_sve_st4ss_le_r,
4994	gen_helper_sve_st4dd_le_r } },
4995	{ { gen_helper_sve_st2bb_r,
4996	gen_helper_sve_st2hh_be_r,
4997	gen_helper_sve_st2ss_be_r,
4998	gen_helper_sve_st2dd_be_r },
4999	{ gen_helper_sve_st3bb_r,
5000	gen_helper_sve_st3hh_be_r,
5001	gen_helper_sve_st3ss_be_r,
5002	gen_helper_sve_st3dd_be_r },
5003	{ gen_helper_sve_st4bb_r,
5004	gen_helper_sve_st4hh_be_r,
5005	gen_helper_sve_st4ss_be_r,
5006	gen_helper_sve_st4dd_be_r } },
5007	};
5008	gen_helper_gvec_mem *fn;
5009	int be = s->be_data == MO_BE;
5010
5011	if (nreg == `0`) {
5012	/ ST1 /
5013	fn = fn_single[be][msz][esz];
5014	} else {
5015	/ ST2, ST3, ST4 -- msz == esz, enforced by encoding /
5016	assert(msz == esz);
5017	fn = fn_multiple[be][nreg - `1`][msz];
5018	}
5019	assert(fn != NULL);
5020	do_mem_zpa(s, zt, pg, addr, msz_dtype(s, msz), fn);
5021	}
5022
5023	static bool trans_ST_zprr(DisasContext s, arg_rprr_store a)
5024	{
5025	if (a->rm == `31` \|\| a->msz > a->esz) {
5026	return false;
5027	}
5028	if (sve_access_check(s)) {
5029	TCGv_i64 addr = new_tmp_a64(s);
5030	tcg_gen_shli_i64(addr, cpu_reg(s, a->rm), a->msz);
5031	tcg_gen_add_i64(addr, addr, cpu_reg_sp(s, a->rn));
5032	do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5033	}
5034	return true;
5035	}
5036
5037	static bool trans_ST_zpri(DisasContext s, arg_rpri_store a)
5038	{
5039	if (a->msz > a->esz) {
5040	return false;
5041	}
5042	if (sve_access_check(s)) {
5043	int vsz = vec_full_reg_size(s);
5044	int elements = vsz >> a->esz;
5045	TCGv_i64 addr = new_tmp_a64(s);
5046
5047	tcg_gen_addi_i64(addr, cpu_reg_sp(s, a->rn),
5048	(a->imm * elements * (a->nreg + `1`)) << a->msz);
5049	do_st_zpa(s, a->rd, a->pg, addr, a->msz, a->esz, a->nreg);
5050	}
5051	return true;
5052	}
5053
5054	/*
5055	*** SVE gather loads / scatter stores
5056	*/
5057
5058	static void do_mem_zpz(DisasContext s, int* zt, int pg, int zm,
5059	int scale, TCGv_i64 scalar, int msz,
5060	gen_helper_gvec_mem_scatter *fn)
5061	{
5062	unsigned vsz = vec_full_reg_size(s);
5063	TCGv_ptr t_zm = tcg_temp_new_ptr();
5064	TCGv_ptr t_pg = tcg_temp_new_ptr();
5065	TCGv_ptr t_zt = tcg_temp_new_ptr();
5066	TCGv_i32 t_desc;
5067	int desc;
5068
5069	desc = sve_memopidx(s, msz_dtype(s, msz));
5070	desc \|= scale << MEMOPIDX_SHIFT;
5071	desc = simd_desc(vsz, vsz, desc);
5072	t_desc = tcg_const_i32(desc);
5073
5074	tcg_gen_addi_ptr(t_pg, cpu_env, pred_full_reg_offset(s, pg));
5075	tcg_gen_addi_ptr(t_zm, cpu_env, vec_full_reg_offset(s, zm));
5076	tcg_gen_addi_ptr(t_zt, cpu_env, vec_full_reg_offset(s, zt));
5077	fn(cpu_env, t_zt, t_pg, t_zm, scalar, t_desc);
5078
5079	tcg_temp_free_ptr(t_zt);
5080	tcg_temp_free_ptr(t_zm);
5081	tcg_temp_free_ptr(t_pg);
5082	tcg_temp_free_i32(t_desc);
5083	}
5084
5085	/ Indexed by [be][ff][xs][u][msz]. /
5086	static gen_helper_gvec_mem_scatter * const gather_load_fn32[`2`][`2`][`2`][`2`][`3`] = {
5087	/ Little-endian /
5088	{ { { { gen_helper_sve_ldbss_zsu,
5089	gen_helper_sve_ldhss_le_zsu,
5090	NULL, },
5091	{ gen_helper_sve_ldbsu_zsu,
5092	gen_helper_sve_ldhsu_le_zsu,
5093	gen_helper_sve_ldss_le_zsu, } },
5094	{ { gen_helper_sve_ldbss_zss,
5095	gen_helper_sve_ldhss_le_zss,
5096	NULL, },
5097	{ gen_helper_sve_ldbsu_zss,
5098	gen_helper_sve_ldhsu_le_zss,
5099	gen_helper_sve_ldss_le_zss, } } },
5100
5101	/ First-fault /
5102	{ { { gen_helper_sve_ldffbss_zsu,
5103	gen_helper_sve_ldffhss_le_zsu,
5104	NULL, },
5105	{ gen_helper_sve_ldffbsu_zsu,
5106	gen_helper_sve_ldffhsu_le_zsu,
5107	gen_helper_sve_ldffss_le_zsu, } },
5108	{ { gen_helper_sve_ldffbss_zss,
5109	gen_helper_sve_ldffhss_le_zss,
5110	NULL, },
5111	{ gen_helper_sve_ldffbsu_zss,
5112	gen_helper_sve_ldffhsu_le_zss,
5113	gen_helper_sve_ldffss_le_zss, } } } },
5114
5115	/ Big-endian /
5116	{ { { { gen_helper_sve_ldbss_zsu,
5117	gen_helper_sve_ldhss_be_zsu,
5118	NULL, },
5119	{ gen_helper_sve_ldbsu_zsu,
5120	gen_helper_sve_ldhsu_be_zsu,
5121	gen_helper_sve_ldss_be_zsu, } },
5122	{ { gen_helper_sve_ldbss_zss,
5123	gen_helper_sve_ldhss_be_zss,
5124	NULL, },
5125	{ gen_helper_sve_ldbsu_zss,
5126	gen_helper_sve_ldhsu_be_zss,
5127	gen_helper_sve_ldss_be_zss, } } },
5128
5129	/ First-fault /
5130	{ { { gen_helper_sve_ldffbss_zsu,
5131	gen_helper_sve_ldffhss_be_zsu,
5132	NULL, },
5133	{ gen_helper_sve_ldffbsu_zsu,
5134	gen_helper_sve_ldffhsu_be_zsu,
5135	gen_helper_sve_ldffss_be_zsu, } },
5136	{ { gen_helper_sve_ldffbss_zss,
5137	gen_helper_sve_ldffhss_be_zss,
5138	NULL, },
5139	{ gen_helper_sve_ldffbsu_zss,
5140	gen_helper_sve_ldffhsu_be_zss,
5141	gen_helper_sve_ldffss_be_zss, } } } },
5142	};
5143
5144	/ Note that we overload xs=2 to indicate 64-bit offset. /
5145	static gen_helper_gvec_mem_scatter * const gather_load_fn64[`2`][`2`][`3`][`2`][`4`] = {
5146	/ Little-endian /
5147	{ { { { gen_helper_sve_ldbds_zsu,
5148	gen_helper_sve_ldhds_le_zsu,
5149	gen_helper_sve_ldsds_le_zsu,
5150	NULL, },
5151	{ gen_helper_sve_ldbdu_zsu,
5152	gen_helper_sve_ldhdu_le_zsu,
5153	gen_helper_sve_ldsdu_le_zsu,
5154	gen_helper_sve_lddd_le_zsu, } },
5155	{ { gen_helper_sve_ldbds_zss,
5156	gen_helper_sve_ldhds_le_zss,
5157	gen_helper_sve_ldsds_le_zss,
5158	NULL, },
5159	{ gen_helper_sve_ldbdu_zss,
5160	gen_helper_sve_ldhdu_le_zss,
5161	gen_helper_sve_ldsdu_le_zss,
5162	gen_helper_sve_lddd_le_zss, } },
5163	{ { gen_helper_sve_ldbds_zd,
5164	gen_helper_sve_ldhds_le_zd,
5165	gen_helper_sve_ldsds_le_zd,
5166	NULL, },
5167	{ gen_helper_sve_ldbdu_zd,
5168	gen_helper_sve_ldhdu_le_zd,
5169	gen_helper_sve_ldsdu_le_zd,
5170	gen_helper_sve_lddd_le_zd, } } },
5171
5172	/ First-fault /
5173	{ { { gen_helper_sve_ldffbds_zsu,
5174	gen_helper_sve_ldffhds_le_zsu,
5175	gen_helper_sve_ldffsds_le_zsu,
5176	NULL, },
5177	{ gen_helper_sve_ldffbdu_zsu,
5178	gen_helper_sve_ldffhdu_le_zsu,
5179	gen_helper_sve_ldffsdu_le_zsu,
5180	gen_helper_sve_ldffdd_le_zsu, } },
5181	{ { gen_helper_sve_ldffbds_zss,
5182	gen_helper_sve_ldffhds_le_zss,
5183	gen_helper_sve_ldffsds_le_zss,
5184	NULL, },
5185	{ gen_helper_sve_ldffbdu_zss,
5186	gen_helper_sve_ldffhdu_le_zss,
5187	gen_helper_sve_ldffsdu_le_zss,
5188	gen_helper_sve_ldffdd_le_zss, } },
5189	{ { gen_helper_sve_ldffbds_zd,
5190	gen_helper_sve_ldffhds_le_zd,
5191	gen_helper_sve_ldffsds_le_zd,
5192	NULL, },
5193	{ gen_helper_sve_ldffbdu_zd,
5194	gen_helper_sve_ldffhdu_le_zd,
5195	gen_helper_sve_ldffsdu_le_zd,
5196	gen_helper_sve_ldffdd_le_zd, } } } },
5197
5198	/ Big-endian /
5199	{ { { { gen_helper_sve_ldbds_zsu,
5200	gen_helper_sve_ldhds_be_zsu,
5201	gen_helper_sve_ldsds_be_zsu,
5202	NULL, },
5203	{ gen_helper_sve_ldbdu_zsu,
5204	gen_helper_sve_ldhdu_be_zsu,
5205	gen_helper_sve_ldsdu_be_zsu,
5206	gen_helper_sve_lddd_be_zsu, } },
5207	{ { gen_helper_sve_ldbds_zss,
5208	gen_helper_sve_ldhds_be_zss,
5209	gen_helper_sve_ldsds_be_zss,
5210	NULL, },
5211	{ gen_helper_sve_ldbdu_zss,
5212	gen_helper_sve_ldhdu_be_zss,
5213	gen_helper_sve_ldsdu_be_zss,
5214	gen_helper_sve_lddd_be_zss, } },
5215	{ { gen_helper_sve_ldbds_zd,
5216	gen_helper_sve_ldhds_be_zd,
5217	gen_helper_sve_ldsds_be_zd,
5218	NULL, },
5219	{ gen_helper_sve_ldbdu_zd,
5220	gen_helper_sve_ldhdu_be_zd,
5221	gen_helper_sve_ldsdu_be_zd,
5222	gen_helper_sve_lddd_be_zd, } } },
5223
5224	/ First-fault /
5225	{ { { gen_helper_sve_ldffbds_zsu,
5226	gen_helper_sve_ldffhds_be_zsu,
5227	gen_helper_sve_ldffsds_be_zsu,
5228	NULL, },
5229	{ gen_helper_sve_ldffbdu_zsu,
5230	gen_helper_sve_ldffhdu_be_zsu,
5231	gen_helper_sve_ldffsdu_be_zsu,
5232	gen_helper_sve_ldffdd_be_zsu, } },
5233	{ { gen_helper_sve_ldffbds_zss,
5234	gen_helper_sve_ldffhds_be_zss,
5235	gen_helper_sve_ldffsds_be_zss,
5236	NULL, },
5237	{ gen_helper_sve_ldffbdu_zss,
5238	gen_helper_sve_ldffhdu_be_zss,
5239	gen_helper_sve_ldffsdu_be_zss,
5240	gen_helper_sve_ldffdd_be_zss, } },
5241	{ { gen_helper_sve_ldffbds_zd,
5242	gen_helper_sve_ldffhds_be_zd,
5243	gen_helper_sve_ldffsds_be_zd,
5244	NULL, },
5245	{ gen_helper_sve_ldffbdu_zd,
5246	gen_helper_sve_ldffhdu_be_zd,
5247	gen_helper_sve_ldffsdu_be_zd,
5248	gen_helper_sve_ldffdd_be_zd, } } } },
5249	};
5250
5251	static bool trans_LD1_zprz(DisasContext s, arg_LD1_zprz a)
5252	{
5253	gen_helper_gvec_mem_scatter *fn = NULL;
5254	int be = s->be_data == MO_BE;
5255
5256	if (!sve_access_check(s)) {
5257	return true;
5258	}
5259
5260	switch (a->esz) {
5261	case MO_32:
5262	fn = gather_load_fn32[be][a->ff][a->xs][a->u][a->msz];
5263	break;
5264	case MO_64:
5265	fn = gather_load_fn64[be][a->ff][a->xs][a->u][a->msz];
5266	break;
5267	}
5268	assert(fn != NULL);
5269
5270	do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5271	cpu_reg_sp(s, a->rn), a->msz, fn);
5272	return true;
5273	}
5274
5275	static bool trans_LD1_zpiz(DisasContext s, arg_LD1_zpiz a)
5276	{
5277	gen_helper_gvec_mem_scatter *fn = NULL;
5278	int be = s->be_data == MO_BE;
5279	TCGv_i64 imm;
5280
5281	if (a->esz < a->msz \|\| (a->esz == a->msz && !a->u)) {
5282	return false;
5283	}
5284	if (!sve_access_check(s)) {
5285	return true;
5286	}
5287
5288	switch (a->esz) {
5289	case MO_32:
5290	fn = gather_load_fn32[be][a->ff][`0`][a->u][a->msz];
5291	break;
5292	case MO_64:
5293	fn = gather_load_fn64[be][a->ff][`2`][a->u][a->msz];
5294	break;
5295	}
5296	assert(fn != NULL);
5297
5298	/ Treat LD1_zpiz (zn[x] + imm) the same way as LD1_zprz (rn + zm[x])*
5299	* by loading the immediate into the scalar parameter.
5300	*/
5301	imm = tcg_const_i64(a->imm << a->msz);
5302	do_mem_zpz(s, a->rd, a->pg, a->rn, `0`, imm, a->msz, fn);
5303	tcg_temp_free_i64(imm);
5304	return true;
5305	}
5306
5307	/ Indexed by [be][xs][msz]. /
5308	static gen_helper_gvec_mem_scatter * const scatter_store_fn32[`2`][`2`][`3`] = {
5309	/ Little-endian /
5310	{ { gen_helper_sve_stbs_zsu,
5311	gen_helper_sve_sths_le_zsu,
5312	gen_helper_sve_stss_le_zsu, },
5313	{ gen_helper_sve_stbs_zss,
5314	gen_helper_sve_sths_le_zss,
5315	gen_helper_sve_stss_le_zss, } },
5316	/ Big-endian /
5317	{ { gen_helper_sve_stbs_zsu,
5318	gen_helper_sve_sths_be_zsu,
5319	gen_helper_sve_stss_be_zsu, },
5320	{ gen_helper_sve_stbs_zss,
5321	gen_helper_sve_sths_be_zss,
5322	gen_helper_sve_stss_be_zss, } },
5323	};
5324
5325	/ Note that we overload xs=2 to indicate 64-bit offset. /
5326	static gen_helper_gvec_mem_scatter * const scatter_store_fn64[`2`][`3`][`4`] = {
5327	/ Little-endian /
5328	{ { gen_helper_sve_stbd_zsu,
5329	gen_helper_sve_sthd_le_zsu,
5330	gen_helper_sve_stsd_le_zsu,
5331	gen_helper_sve_stdd_le_zsu, },
5332	{ gen_helper_sve_stbd_zss,
5333	gen_helper_sve_sthd_le_zss,
5334	gen_helper_sve_stsd_le_zss,
5335	gen_helper_sve_stdd_le_zss, },
5336	{ gen_helper_sve_stbd_zd,
5337	gen_helper_sve_sthd_le_zd,
5338	gen_helper_sve_stsd_le_zd,
5339	gen_helper_sve_stdd_le_zd, } },
5340	/ Big-endian /
5341	{ { gen_helper_sve_stbd_zsu,
5342	gen_helper_sve_sthd_be_zsu,
5343	gen_helper_sve_stsd_be_zsu,
5344	gen_helper_sve_stdd_be_zsu, },
5345	{ gen_helper_sve_stbd_zss,
5346	gen_helper_sve_sthd_be_zss,
5347	gen_helper_sve_stsd_be_zss,
5348	gen_helper_sve_stdd_be_zss, },
5349	{ gen_helper_sve_stbd_zd,
5350	gen_helper_sve_sthd_be_zd,
5351	gen_helper_sve_stsd_be_zd,
5352	gen_helper_sve_stdd_be_zd, } },
5353	};
5354
5355	static bool trans_ST1_zprz(DisasContext s, arg_ST1_zprz a)
5356	{
5357	gen_helper_gvec_mem_scatter *fn;
5358	int be = s->be_data == MO_BE;
5359
5360	if (a->esz < a->msz \|\| (a->msz == `0` && a->scale)) {
5361	return false;
5362	}
5363	if (!sve_access_check(s)) {
5364	return true;
5365	}
5366	switch (a->esz) {
5367	case MO_32:
5368	fn = scatter_store_fn32[be][a->xs][a->msz];
5369	break;
5370	case MO_64:
5371	fn = scatter_store_fn64[be][a->xs][a->msz];
5372	break;
5373	default:
5374	g_assert_not_reached();
5375	}
5376	do_mem_zpz(s, a->rd, a->pg, a->rm, a->scale * a->msz,
5377	cpu_reg_sp(s, a->rn), a->msz, fn);
5378	return true;
5379	}
5380
5381	static bool trans_ST1_zpiz(DisasContext s, arg_ST1_zpiz a)
5382	{
5383	gen_helper_gvec_mem_scatter *fn = NULL;
5384	int be = s->be_data == MO_BE;
5385	TCGv_i64 imm;
5386
5387	if (a->esz < a->msz) {
5388	return false;
5389	}
5390	if (!sve_access_check(s)) {
5391	return true;
5392	}
5393
5394	switch (a->esz) {
5395	case MO_32:
5396	fn = scatter_store_fn32[be][`0`][a->msz];
5397	break;
5398	case MO_64:
5399	fn = scatter_store_fn64[be][`2`][a->msz];
5400	break;
5401	}
5402	assert(fn != NULL);
5403
5404	/ Treat ST1_zpiz (zn[x] + imm) the same way as ST1_zprz (rn + zm[x])*
5405	* by loading the immediate into the scalar parameter.
5406	*/
5407	imm = tcg_const_i64(a->imm << a->msz);
5408	do_mem_zpz(s, a->rd, a->pg, a->rn, `0`, imm, a->msz, fn);
5409	tcg_temp_free_i64(imm);
5410	return true;
5411	}
5412
5413	/*
5414	* Prefetches
5415	*/
5416
5417	static bool trans_PRF(DisasContext s, arg_PRF a)
5418	{
5419	/ Prefetch is a nop within QEMU. /
5420	(void)sve_access_check(s);
5421	return true;
5422	}
5423
5424	static bool trans_PRF_rr(DisasContext s, arg_PRF_rr a)
5425	{
5426	if (a->rm == `31`) {
5427	return false;
5428	}
5429	/ Prefetch is a nop within QEMU. /
5430	(void)sve_access_check(s);
5431	return true;
5432	}
5433
5434	/*
5435	* Move Prefix
5436	*
5437	* TODO: The implementation so far could handle predicated merging movprfx.
5438	* The helper functions as written take an extra source register to
5439	* use in the operation, but the result is only written when predication
5440	* succeeds. For unpredicated movprfx, we need to rearrange the helpers
5441	* to allow the final write back to the destination to be unconditional.
5442	* For predicated zeroing movprfx, we need to rearrange the helpers to
5443	* allow the final write back to zero inactives.
5444	*
5445	* In the meantime, just emit the moves.
5446	*/
5447
5448	static bool trans_MOVPRFX(DisasContext s, arg_MOVPRFX a)
5449	{
5450	return do_mov_z(s, a->rd, a->rn);
5451	}
5452
5453	static bool trans_MOVPRFX_m(DisasContext s, arg_rpr_esz a)
5454	{
5455	if (sve_access_check(s)) {
5456	do_sel_z(s, a->rd, a->rn, a->rd, a->pg, a->esz);
5457	}
5458	return true;
5459	}
5460
5461	static bool trans_MOVPRFX_z(DisasContext s, arg_rpr_esz a)
5462	{
5463	if (sve_access_check(s)) {
5464	do_movz_zpz(s, a->rd, a->rn, a->pg, a->esz);
5465	}
5466	return true;
5467	}
5468

Browse the source code of qemu/target/arm/translate-sve.c