helper-a64.c source code [qemu/target/arm/helper-a64.c]

1	/*
2	* AArch64 specific helpers
3	*
4	* Copyright (c) 2013 Alexander Graf <agraf@suse.de>
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General Public
17	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
18	*/
19
20	#include "qemu/osdep.h"
21	#include "cpu.h"
22	#include "exec/gdbstub.h"
23	#include "exec/helper-proto.h"
24	#include "qemu/host-utils.h"
25	#include "qemu/log.h"
26	#include "qemu/main-loop.h"
27	#include "qemu/bitops.h"
28	#include "internals.h"
29	#include "qemu/crc32c.h"
30	#include "exec/exec-all.h"
31	#include "exec/cpu_ldst.h"
32	#include "qemu/int128.h"
33	#include "qemu/atomic128.h"
34	#include "tcg.h"
35	#include "fpu/softfloat.h"
36	#include <zlib.h> /* For crc32 */
37
38	/ C2.4.7 Multiply and divide /
39	/ special cases for 0 and LLONG_MIN are mandated by the standard /
40	uint64_t HELPER(udiv64)(uint64_t num, uint64_t den)
41	{
42	if (den == `0`) {
43	return `0`;
44	}
45	return num / den;
46	}
47
48	int64_t HELPER(sdiv64)(int64_t num, int64_t den)
49	{
50	if (den == `0`) {
51	return `0`;
52	}
53	if (num == LLONG_MIN && den == -`1`) {
54	return LLONG_MIN;
55	}
56	return num / den;
57	}
58
59	uint64_t HELPER(rbit64)(uint64_t x)
60	{
61	return revbit64(x);
62	}
63
64	void HELPER(msr_i_spsel)(CPUARMState *env, uint32_t imm)
65	{
66	update_spsel(env, imm);
67	}
68
69	static void daif_check(CPUARMState *env, uint32_t op,
70	uint32_t imm, uintptr_t ra)
71	{
72	/ DAIF update to PSTATE. This is OK from EL0 only if UMA is set. /
73	if (arm_current_el(env) == `0` && !(env->cp15.sctlr_el[`1`] & SCTLR_UMA)) {
74	raise_exception_ra(env, EXCP_UDEF,
75	syn_aa64_sysregtrap(`0`, extract32(op, `0`, `3`),
76	extract32(op, `3`, `3`), `4`,
77	imm, `0x1f`, `0`),
78	exception_target_el(env), ra);
79	}
80	}
81
82	void HELPER(msr_i_daifset)(CPUARMState *env, uint32_t imm)
83	{
84	daif_check(env, `0x1e`, imm, GETPC());
85	env->daif \|= (imm << `6`) & PSTATE_DAIF;
86	}
87
88	void HELPER(msr_i_daifclear)(CPUARMState *env, uint32_t imm)
89	{
90	daif_check(env, `0x1f`, imm, GETPC());
91	env->daif &= ~((imm << `6`) & PSTATE_DAIF);
92	}
93
94	/ Convert a softfloat float_relation_ (as returned by*
95	* the float*_compare functions) to the correct ARM
96	* NZCV flag state.
97	*/
98	static inline uint32_t float_rel_to_flags(int res)
99	{
100	uint64_t flags;
101	switch (res) {
102	case float_relation_equal:
103	flags = PSTATE_Z \| PSTATE_C;
104	break;
105	case float_relation_less:
106	flags = PSTATE_N;
107	break;
108	case float_relation_greater:
109	flags = PSTATE_C;
110	break;
111	case float_relation_unordered:
112	default:
113	flags = PSTATE_C \| PSTATE_V;
114	break;
115	}
116	return flags;
117	}
118
119	uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status)
120	{
121	return float_rel_to_flags(float16_compare_quiet(x, y, fp_status));
122	}
123
124	uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status)
125	{
126	return float_rel_to_flags(float16_compare(x, y, fp_status));
127	}
128
129	uint64_t HELPER(vfp_cmps_a64)(float32 x, float32 y, void *fp_status)
130	{
131	return float_rel_to_flags(float32_compare_quiet(x, y, fp_status));
132	}
133
134	uint64_t HELPER(vfp_cmpes_a64)(float32 x, float32 y, void *fp_status)
135	{
136	return float_rel_to_flags(float32_compare(x, y, fp_status));
137	}
138
139	uint64_t HELPER(vfp_cmpd_a64)(float64 x, float64 y, void *fp_status)
140	{
141	return float_rel_to_flags(float64_compare_quiet(x, y, fp_status));
142	}
143
144	uint64_t HELPER(vfp_cmped_a64)(float64 x, float64 y, void *fp_status)
145	{
146	return float_rel_to_flags(float64_compare(x, y, fp_status));
147	}
148
149	float32 HELPER(vfp_mulxs)(float32 a, float32 b, void *fpstp)
150	{
151	float_status *fpst = fpstp;
152
153	a = float32_squash_input_denormal(a, fpst);
154	b = float32_squash_input_denormal(b, fpst);
155
156	if ((float32_is_zero(a) && float32_is_infinity(b)) \|\|
157	(float32_is_infinity(a) && float32_is_zero(b))) {
158	/ 2.0 with the sign bit set to sign(A) XOR sign(B) /
159	return make_float32((`1U` << `30`) \|
160	((float32_val(a) ^ float32_val(b)) & (`1U` << `31`)));
161	}
162	return float32_mul(a, b, fpst);
163	}
164
165	float64 HELPER(vfp_mulxd)(float64 a, float64 b, void *fpstp)
166	{
167	float_status *fpst = fpstp;
168
169	a = float64_squash_input_denormal(a, fpst);
170	b = float64_squash_input_denormal(b, fpst);
171
172	if ((float64_is_zero(a) && float64_is_infinity(b)) \|\|
173	(float64_is_infinity(a) && float64_is_zero(b))) {
174	/ 2.0 with the sign bit set to sign(A) XOR sign(B) /
175	return make_float64((`1ULL` << `62`) \|
176	((float64_val(a) ^ float64_val(b)) & (`1ULL` << `63`)));
177	}
178	return float64_mul(a, b, fpst);
179	}
180
181	uint64_t HELPER(simd_tbl)(CPUARMState *env, uint64_t result, uint64_t indices,
182	uint32_t rn, uint32_t numregs)
183	{
184	/ Helper function for SIMD TBL and TBX. We have to do the table*
185	* lookup part for the 64 bits worth of indices we're passed in.
186	* result is the initial results vector (either zeroes for TBL
187	* or some guest values for TBX), rn the register number where
188	* the table starts, and numregs the number of registers in the table.
189	* We return the results of the lookups.
190	*/
191	int shift;
192
193	for (shift = `0`; shift < `64`; shift += `8`) {
194	int index = extract64(indices, shift, `8`);
195	if (index < `16` * numregs) {
196	/ Convert index (a byte offset into the virtual table*
197	* which is a series of 128-bit vectors concatenated)
198	* into the correct register element plus a bit offset
199	* into that element, bearing in mind that the table
200	* can wrap around from V31 to V0.
201	*/
202	int elt = (rn * `2` + (index >> `3`)) % `64`;
203	int bitidx = (index & `7`) * `8`;
204	uint64_t *q = aa64_vfp_qreg(env, elt >> `1`);
205	uint64_t val = extract64(q[elt & `1`], bitidx, `8`);
206
207	result = deposit64(result, shift, `8`, val);
208	}
209	}
210	return result;
211	}
212
213	/ 64bit/double versions of the neon float compare functions /
214	uint64_t HELPER(neon_ceq_f64)(float64 a, float64 b, void *fpstp)
215	{
216	float_status *fpst = fpstp;
217	return -float64_eq_quiet(a, b, fpst);
218	}
219
220	uint64_t HELPER(neon_cge_f64)(float64 a, float64 b, void *fpstp)
221	{
222	float_status *fpst = fpstp;
223	return -float64_le(b, a, fpst);
224	}
225
226	uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void *fpstp)
227	{
228	float_status *fpst = fpstp;
229	return -float64_lt(b, a, fpst);
230	}
231
232	/ Reciprocal step and sqrt step. Note that unlike the A32/T32*
233	* versions, these do a fully fused multiply-add or
234	* multiply-add-and-halve.
235	*/
236	#define float16_two make_float16(0x4000)
237	#define float16_three make_float16(0x4200)
238	#define float16_one_point_five make_float16(0x3e00)
239
240	#define float32_two make_float32(0x40000000)
241	#define float32_three make_float32(0x40400000)
242	#define float32_one_point_five make_float32(0x3fc00000)
243
244	#define float64_two make_float64(0x4000000000000000ULL)
245	#define float64_three make_float64(0x4008000000000000ULL)
246	#define float64_one_point_five make_float64(0x3FF8000000000000ULL)
247
248	uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp)
249	{
250	float_status *fpst = fpstp;
251
252	a = float16_squash_input_denormal(a, fpst);
253	b = float16_squash_input_denormal(b, fpst);
254
255	a = float16_chs(a);
256	if ((float16_is_infinity(a) && float16_is_zero(b)) \|\|
257	(float16_is_infinity(b) && float16_is_zero(a))) {
258	return float16_two;
259	}
260	return float16_muladd(a, b, float16_two, `0`, fpst);
261	}
262
263	float32 HELPER(recpsf_f32)(float32 a, float32 b, void *fpstp)
264	{
265	float_status *fpst = fpstp;
266
267	a = float32_squash_input_denormal(a, fpst);
268	b = float32_squash_input_denormal(b, fpst);
269
270	a = float32_chs(a);
271	if ((float32_is_infinity(a) && float32_is_zero(b)) \|\|
272	(float32_is_infinity(b) && float32_is_zero(a))) {
273	return float32_two;
274	}
275	return float32_muladd(a, b, float32_two, `0`, fpst);
276	}
277
278	float64 HELPER(recpsf_f64)(float64 a, float64 b, void *fpstp)
279	{
280	float_status *fpst = fpstp;
281
282	a = float64_squash_input_denormal(a, fpst);
283	b = float64_squash_input_denormal(b, fpst);
284
285	a = float64_chs(a);
286	if ((float64_is_infinity(a) && float64_is_zero(b)) \|\|
287	(float64_is_infinity(b) && float64_is_zero(a))) {
288	return float64_two;
289	}
290	return float64_muladd(a, b, float64_two, `0`, fpst);
291	}
292
293	uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp)
294	{
295	float_status *fpst = fpstp;
296
297	a = float16_squash_input_denormal(a, fpst);
298	b = float16_squash_input_denormal(b, fpst);
299
300	a = float16_chs(a);
301	if ((float16_is_infinity(a) && float16_is_zero(b)) \|\|
302	(float16_is_infinity(b) && float16_is_zero(a))) {
303	return float16_one_point_five;
304	}
305	return float16_muladd(a, b, float16_three, float_muladd_halve_result, fpst);
306	}
307
308	float32 HELPER(rsqrtsf_f32)(float32 a, float32 b, void *fpstp)
309	{
310	float_status *fpst = fpstp;
311
312	a = float32_squash_input_denormal(a, fpst);
313	b = float32_squash_input_denormal(b, fpst);
314
315	a = float32_chs(a);
316	if ((float32_is_infinity(a) && float32_is_zero(b)) \|\|
317	(float32_is_infinity(b) && float32_is_zero(a))) {
318	return float32_one_point_five;
319	}
320	return float32_muladd(a, b, float32_three, float_muladd_halve_result, fpst);
321	}
322
323	float64 HELPER(rsqrtsf_f64)(float64 a, float64 b, void *fpstp)
324	{
325	float_status *fpst = fpstp;
326
327	a = float64_squash_input_denormal(a, fpst);
328	b = float64_squash_input_denormal(b, fpst);
329
330	a = float64_chs(a);
331	if ((float64_is_infinity(a) && float64_is_zero(b)) \|\|
332	(float64_is_infinity(b) && float64_is_zero(a))) {
333	return float64_one_point_five;
334	}
335	return float64_muladd(a, b, float64_three, float_muladd_halve_result, fpst);
336	}
337
338	/ Pairwise long add: add pairs of adjacent elements into*
339	* double-width elements in the result (eg _s8 is an 8x8->16 op)
340	*/
341	uint64_t HELPER(neon_addlp_s8)(uint64_t a)
342	{
343	uint64_t nsignmask = `0x0080008000800080ULL`;
344	uint64_t wsignmask = `0x8000800080008000ULL`;
345	uint64_t elementmask = `0x00ff00ff00ff00ffULL`;
346	uint64_t tmp1, tmp2;
347	uint64_t res, signres;
348
349	/ Extract odd elements, sign extend each to a 16 bit field /
350	tmp1 = a & elementmask;
351	tmp1 ^= nsignmask;
352	tmp1 \|= wsignmask;
353	tmp1 = (tmp1 - nsignmask) ^ wsignmask;
354	/ Ditto for the even elements /
355	tmp2 = (a >> `8`) & elementmask;
356	tmp2 ^= nsignmask;
357	tmp2 \|= wsignmask;
358	tmp2 = (tmp2 - nsignmask) ^ wsignmask;
359
360	/ calculate the result by summing bits 0..14, 16..22, etc,*
361	* and then adjusting the sign bits 15, 23, etc manually.
362	* This ensures the addition can't overflow the 16 bit field.
363	*/
364	signres = (tmp1 ^ tmp2) & wsignmask;
365	res = (tmp1 & ~wsignmask) + (tmp2 & ~wsignmask);
366	res ^= signres;
367
368	return res;
369	}
370
371	uint64_t HELPER(neon_addlp_u8)(uint64_t a)
372	{
373	uint64_t tmp;
374
375	tmp = a & `0x00ff00ff00ff00ffULL`;
376	tmp += (a >> `8`) & `0x00ff00ff00ff00ffULL`;
377	return tmp;
378	}
379
380	uint64_t HELPER(neon_addlp_s16)(uint64_t a)
381	{
382	int32_t reslo, reshi;
383
384	reslo = (int32_t)(int16_t)a + (int32_t)(int16_t)(a >> `16`);
385	reshi = (int32_t)(int16_t)(a >> `32`) + (int32_t)(int16_t)(a >> `48`);
386
387	return (uint32_t)reslo \| (((uint64_t)reshi) << `32`);
388	}
389
390	uint64_t HELPER(neon_addlp_u16)(uint64_t a)
391	{
392	uint64_t tmp;
393
394	tmp = a & `0x0000ffff0000ffffULL`;
395	tmp += (a >> `16`) & `0x0000ffff0000ffffULL`;
396	return tmp;
397	}
398
399	/ Floating-point reciprocal exponent - see FPRecpX in ARM ARM /
400	uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp)
401	{
402	float_status *fpst = fpstp;
403	uint16_t val16, sbit;
404	int16_t exp;
405
406	if (float16_is_any_nan(a)) {
407	float16 nan = a;
408	if (float16_is_signaling_nan(a, fpst)) {
409	float_raise(float_flag_invalid, fpst);
410	nan = float16_silence_nan(a, fpst);
411	}
412	if (fpst->default_nan_mode) {
413	nan = float16_default_nan(fpst);
414	}
415	return nan;
416	}
417
418	a = float16_squash_input_denormal(a, fpst);
419
420	val16 = float16_val(a);
421	sbit = `0x8000` & val16;
422	exp = extract32(val16, `10`, `5`);
423
424	if (exp == `0`) {
425	return make_float16(deposit32(sbit, `10`, `5`, `0x1e`));
426	} else {
427	return make_float16(deposit32(sbit, `10`, `5`, ~exp));
428	}
429	}
430
431	float32 HELPER(frecpx_f32)(float32 a, void *fpstp)
432	{
433	float_status *fpst = fpstp;
434	uint32_t val32, sbit;
435	int32_t exp;
436
437	if (float32_is_any_nan(a)) {
438	float32 nan = a;
439	if (float32_is_signaling_nan(a, fpst)) {
440	float_raise(float_flag_invalid, fpst);
441	nan = float32_silence_nan(a, fpst);
442	}
443	if (fpst->default_nan_mode) {
444	nan = float32_default_nan(fpst);
445	}
446	return nan;
447	}
448
449	a = float32_squash_input_denormal(a, fpst);
450
451	val32 = float32_val(a);
452	sbit = `0x80000000ULL` & val32;
453	exp = extract32(val32, `23`, `8`);
454
455	if (exp == `0`) {
456	return make_float32(sbit \| (`0xfe` << `23`));
457	} else {
458	return make_float32(sbit \| (~exp & `0xff`) << `23`);
459	}
460	}
461
462	float64 HELPER(frecpx_f64)(float64 a, void *fpstp)
463	{
464	float_status *fpst = fpstp;
465	uint64_t val64, sbit;
466	int64_t exp;
467
468	if (float64_is_any_nan(a)) {
469	float64 nan = a;
470	if (float64_is_signaling_nan(a, fpst)) {
471	float_raise(float_flag_invalid, fpst);
472	nan = float64_silence_nan(a, fpst);
473	}
474	if (fpst->default_nan_mode) {
475	nan = float64_default_nan(fpst);
476	}
477	return nan;
478	}
479
480	a = float64_squash_input_denormal(a, fpst);
481
482	val64 = float64_val(a);
483	sbit = `0x8000000000000000ULL` & val64;
484	exp = extract64(float64_val(a), `52`, `11`);
485
486	if (exp == `0`) {
487	return make_float64(sbit \| (`0x7feULL` << `52`));
488	} else {
489	return make_float64(sbit \| (~exp & `0x7ffULL`) << `52`);
490	}
491	}
492
493	float32 HELPER(fcvtx_f64_to_f32)(float64 a, CPUARMState *env)
494	{
495	/ Von Neumann rounding is implemented by using round-to-zero*
496	* and then setting the LSB of the result if Inexact was raised.
497	*/
498	float32 r;
499	float_status *fpst = &env->vfp.fp_status;
500	float_status tstat = *fpst;
501	int exflags;
502
503	set_float_rounding_mode(float_round_to_zero, &tstat);
504	set_float_exception_flags(`0`, &tstat);
505	r = float64_to_float32(a, &tstat);
506	exflags = get_float_exception_flags(&tstat);
507	if (exflags & float_flag_inexact) {
508	r = make_float32(float32_val(r) \| `1`);
509	}
510	exflags \|= get_float_exception_flags(fpst);
511	set_float_exception_flags(exflags, fpst);
512	return r;
513	}
514
515	/ 64-bit versions of the CRC helpers. Note that although the operation*
516	* (and the prototypes of crc32c() and crc32() mean that only the bottom
517	* 32 bits of the accumulator and result are used, we pass and return
518	* uint64_t for convenience of the generated code. Unlike the 32-bit
519	* instruction set versions, val may genuinely have 64 bits of data in it.
520	* The upper bytes of val (above the number specified by 'bytes') must have
521	* been zeroed out by the caller.
522	*/
523	uint64_t HELPER(crc32_64)(uint64_t acc, uint64_t val, uint32_t bytes)
524	{
525	uint8_t buf[`8`];
526
527	stq_le_p(buf, val);
528
529	/ zlib crc32 converts the accumulator and output to one's complement. /
530	return crc32(acc ^ `0xffffffff`, buf, bytes) ^ `0xffffffff`;
531	}
532
533	uint64_t HELPER(crc32c_64)(uint64_t acc, uint64_t val, uint32_t bytes)
534	{
535	uint8_t buf[`8`];
536
537	stq_le_p(buf, val);
538
539	/ Linux crc32c converts the output to one's complement. /
540	return crc32c(acc, buf, bytes) ^ `0xffffffff`;
541	}
542
543	uint64_t HELPER(paired_cmpxchg64_le)(CPUARMState *env, uint64_t addr,
544	uint64_t new_lo, uint64_t new_hi)
545	{
546	Int128 cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
547	Int128 newv = int128_make128(new_lo, new_hi);
548	Int128 oldv;
549	uintptr_t ra = GETPC();
550	uint64_t o0, o1;
551	bool success;
552
553	#ifdef CONFIG_USER_ONLY
554	/ ??? Enforce alignment. /
555	uint64_t *haddr = g2h(addr);
556
557	set_helper_retaddr(ra);
558	o0 = ldq_le_p(haddr + `0`);
559	o1 = ldq_le_p(haddr + `1`);
560	oldv = int128_make128(o0, o1);
561
562	success = int128_eq(oldv, cmpv);
563	if (success) {
564	stq_le_p(haddr + `0`, int128_getlo(newv));
565	stq_le_p(haddr + `1`, int128_gethi(newv));
566	}
567	clear_helper_retaddr();
568	#else
569	int mem_idx = cpu_mmu_index(env, false);
570	TCGMemOpIdx oi0 = make_memop_idx(MO_LEQ \| MO_ALIGN_16, mem_idx);
571	TCGMemOpIdx oi1 = make_memop_idx(MO_LEQ, mem_idx);
572
573	o0 = helper_le_ldq_mmu(env, addr + `0`, oi0, ra);
574	o1 = helper_le_ldq_mmu(env, addr + `8`, oi1, ra);
575	oldv = int128_make128(o0, o1);
576
577	success = int128_eq(oldv, cmpv);
578	if (success) {
579	helper_le_stq_mmu(env, addr + `0`, int128_getlo(newv), oi1, ra);
580	helper_le_stq_mmu(env, addr + `8`, int128_gethi(newv), oi1, ra);
581	}
582	#endif
583
584	return !success;
585	}
586
587	uint64_t HELPER(paired_cmpxchg64_le_parallel)(CPUARMState *env, uint64_t addr,
588	uint64_t new_lo, uint64_t new_hi)
589	{
590	Int128 oldv, cmpv, newv;
591	uintptr_t ra = GETPC();
592	bool success;
593	int mem_idx;
594	TCGMemOpIdx oi;
595
596	assert(HAVE_CMPXCHG128);
597
598	mem_idx = cpu_mmu_index(env, false);
599	oi = make_memop_idx(MO_LEQ \| MO_ALIGN_16, mem_idx);
600
601	cmpv = int128_make128(env->exclusive_val, env->exclusive_high);
602	newv = int128_make128(new_lo, new_hi);
603	oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
604
605	success = int128_eq(oldv, cmpv);
606	return !success;
607	}
608
609	uint64_t HELPER(paired_cmpxchg64_be)(CPUARMState *env, uint64_t addr,
610	uint64_t new_lo, uint64_t new_hi)
611	{
612	/*
613	* High and low need to be switched here because this is not actually a
614	* 128bit store but two doublewords stored consecutively
615	*/
616	Int128 cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
617	Int128 newv = int128_make128(new_hi, new_lo);
618	Int128 oldv;
619	uintptr_t ra = GETPC();
620	uint64_t o0, o1;
621	bool success;
622
623	#ifdef CONFIG_USER_ONLY
624	/ ??? Enforce alignment. /
625	uint64_t *haddr = g2h(addr);
626
627	set_helper_retaddr(ra);
628	o1 = ldq_be_p(haddr + `0`);
629	o0 = ldq_be_p(haddr + `1`);
630	oldv = int128_make128(o0, o1);
631
632	success = int128_eq(oldv, cmpv);
633	if (success) {
634	stq_be_p(haddr + `0`, int128_gethi(newv));
635	stq_be_p(haddr + `1`, int128_getlo(newv));
636	}
637	clear_helper_retaddr();
638	#else
639	int mem_idx = cpu_mmu_index(env, false);
640	TCGMemOpIdx oi0 = make_memop_idx(MO_BEQ \| MO_ALIGN_16, mem_idx);
641	TCGMemOpIdx oi1 = make_memop_idx(MO_BEQ, mem_idx);
642
643	o1 = helper_be_ldq_mmu(env, addr + `0`, oi0, ra);
644	o0 = helper_be_ldq_mmu(env, addr + `8`, oi1, ra);
645	oldv = int128_make128(o0, o1);
646
647	success = int128_eq(oldv, cmpv);
648	if (success) {
649	helper_be_stq_mmu(env, addr + `0`, int128_gethi(newv), oi1, ra);
650	helper_be_stq_mmu(env, addr + `8`, int128_getlo(newv), oi1, ra);
651	}
652	#endif
653
654	return !success;
655	}
656
657	uint64_t HELPER(paired_cmpxchg64_be_parallel)(CPUARMState *env, uint64_t addr,
658	uint64_t new_lo, uint64_t new_hi)
659	{
660	Int128 oldv, cmpv, newv;
661	uintptr_t ra = GETPC();
662	bool success;
663	int mem_idx;
664	TCGMemOpIdx oi;
665
666	assert(HAVE_CMPXCHG128);
667
668	mem_idx = cpu_mmu_index(env, false);
669	oi = make_memop_idx(MO_BEQ \| MO_ALIGN_16, mem_idx);
670
671	/*
672	* High and low need to be switched here because this is not actually a
673	* 128bit store but two doublewords stored consecutively
674	*/
675	cmpv = int128_make128(env->exclusive_high, env->exclusive_val);
676	newv = int128_make128(new_hi, new_lo);
677	oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
678
679	success = int128_eq(oldv, cmpv);
680	return !success;
681	}
682
683	/ Writes back the old data into Rs. /
684	void HELPER(casp_le_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
685	uint64_t new_lo, uint64_t new_hi)
686	{
687	Int128 oldv, cmpv, newv;
688	uintptr_t ra = GETPC();
689	int mem_idx;
690	TCGMemOpIdx oi;
691
692	assert(HAVE_CMPXCHG128);
693
694	mem_idx = cpu_mmu_index(env, false);
695	oi = make_memop_idx(MO_LEQ \| MO_ALIGN_16, mem_idx);
696
697	cmpv = int128_make128(env->xregs[rs], env->xregs[rs + `1`]);
698	newv = int128_make128(new_lo, new_hi);
699	oldv = helper_atomic_cmpxchgo_le_mmu(env, addr, cmpv, newv, oi, ra);
700
701	env->xregs[rs] = int128_getlo(oldv);
702	env->xregs[rs + `1`] = int128_gethi(oldv);
703	}
704
705	void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t rs, uint64_t addr,
706	uint64_t new_hi, uint64_t new_lo)
707	{
708	Int128 oldv, cmpv, newv;
709	uintptr_t ra = GETPC();
710	int mem_idx;
711	TCGMemOpIdx oi;
712
713	assert(HAVE_CMPXCHG128);
714
715	mem_idx = cpu_mmu_index(env, false);
716	oi = make_memop_idx(MO_LEQ \| MO_ALIGN_16, mem_idx);
717
718	cmpv = int128_make128(env->xregs[rs + `1`], env->xregs[rs]);
719	newv = int128_make128(new_lo, new_hi);
720	oldv = helper_atomic_cmpxchgo_be_mmu(env, addr, cmpv, newv, oi, ra);
721
722	env->xregs[rs + `1`] = int128_getlo(oldv);
723	env->xregs[rs] = int128_gethi(oldv);
724	}
725
726	/*
727	* AdvSIMD half-precision
728	*/
729
730	#define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), suffix))
731
732	#define ADVSIMD_HALFOP(name) \
733	uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \
734	{ \
735	float_status *fpst = fpstp; \
736	return float16_ ## name(a, b, fpst); \
737	}
738
739	ADVSIMD_HALFOP(add)
740	ADVSIMD_HALFOP(sub)
741	ADVSIMD_HALFOP(mul)
742	ADVSIMD_HALFOP(div)
743	ADVSIMD_HALFOP(min)
744	ADVSIMD_HALFOP(max)
745	ADVSIMD_HALFOP(minnum)
746	ADVSIMD_HALFOP(maxnum)
747
748	#define ADVSIMD_TWOHALFOP(name) \
749	uint32_t ADVSIMD_HELPER(name, 2h)(uint32_t two_a, uint32_t two_b, void *fpstp) \
750	{ \
751	float16 a1, a2, b1, b2; \
752	uint32_t r1, r2; \
753	float_status *fpst = fpstp; \
754	a1 = extract32(two_a, 0, 16); \
755	a2 = extract32(two_a, 16, 16); \
756	b1 = extract32(two_b, 0, 16); \
757	b2 = extract32(two_b, 16, 16); \
758	r1 = float16_ ## name(a1, b1, fpst); \
759	r2 = float16_ ## name(a2, b2, fpst); \
760	return deposit32(r1, 16, 16, r2); \
761	}
762
763	ADVSIMD_TWOHALFOP(add)
764	ADVSIMD_TWOHALFOP(sub)
765	ADVSIMD_TWOHALFOP(mul)
766	ADVSIMD_TWOHALFOP(div)
767	ADVSIMD_TWOHALFOP(min)
768	ADVSIMD_TWOHALFOP(max)
769	ADVSIMD_TWOHALFOP(minnum)
770	ADVSIMD_TWOHALFOP(maxnum)
771
772	/ Data processing - scalar floating-point and advanced SIMD /
773	static float16 float16_mulx(float16 a, float16 b, void *fpstp)
774	{
775	float_status *fpst = fpstp;
776
777	a = float16_squash_input_denormal(a, fpst);
778	b = float16_squash_input_denormal(b, fpst);
779
780	if ((float16_is_zero(a) && float16_is_infinity(b)) \|\|
781	(float16_is_infinity(a) && float16_is_zero(b))) {
782	/ 2.0 with the sign bit set to sign(A) XOR sign(B) /
783	return make_float16((`1U` << `14`) \|
784	((float16_val(a) ^ float16_val(b)) & (`1U` << `15`)));
785	}
786	return float16_mul(a, b, fpst);
787	}
788
789	ADVSIMD_HALFOP(mulx)
790	ADVSIMD_TWOHALFOP(mulx)
791
792	/ fused multiply-accumulate /
793	uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c,
794	void *fpstp)
795	{
796	float_status *fpst = fpstp;
797	return float16_muladd(a, b, c, `0`, fpst);
798	}
799
800	uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, uint32_t two_b,
801	uint32_t two_c, void *fpstp)
802	{
803	float_status *fpst = fpstp;
804	float16 a1, a2, b1, b2, c1, c2;
805	uint32_t r1, r2;
806	a1 = extract32(two_a, `0`, `16`);
807	a2 = extract32(two_a, `16`, `16`);
808	b1 = extract32(two_b, `0`, `16`);
809	b2 = extract32(two_b, `16`, `16`);
810	c1 = extract32(two_c, `0`, `16`);
811	c2 = extract32(two_c, `16`, `16`);
812	r1 = float16_muladd(a1, b1, c1, `0`, fpst);
813	r2 = float16_muladd(a2, b2, c2, `0`, fpst);
814	return deposit32(r1, `16`, `16`, r2);
815	}
816
817	/*
818	* Floating point comparisons produce an integer result. Softfloat
819	* routines return float_relation types which we convert to the 0/-1
820	* Neon requires.
821	*/
822
823	#define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0
824
825	uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp)
826	{
827	float_status *fpst = fpstp;
828	int compare = float16_compare_quiet(a, b, fpst);
829	return ADVSIMD_CMPRES(compare == float_relation_equal);
830	}
831
832	uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp)
833	{
834	float_status *fpst = fpstp;
835	int compare = float16_compare(a, b, fpst);
836	return ADVSIMD_CMPRES(compare == float_relation_greater \|\|
837	compare == float_relation_equal);
838	}
839
840	uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp)
841	{
842	float_status *fpst = fpstp;
843	int compare = float16_compare(a, b, fpst);
844	return ADVSIMD_CMPRES(compare == float_relation_greater);
845	}
846
847	uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp)
848	{
849	float_status *fpst = fpstp;
850	float16 f0 = float16_abs(a);
851	float16 f1 = float16_abs(b);
852	int compare = float16_compare(f0, f1, fpst);
853	return ADVSIMD_CMPRES(compare == float_relation_greater \|\|
854	compare == float_relation_equal);
855	}
856
857	uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp)
858	{
859	float_status *fpst = fpstp;
860	float16 f0 = float16_abs(a);
861	float16 f1 = float16_abs(b);
862	int compare = float16_compare(f0, f1, fpst);
863	return ADVSIMD_CMPRES(compare == float_relation_greater);
864	}
865
866	/ round to integral /
867	uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status)
868	{
869	return float16_round_to_int(x, fp_status);
870	}
871
872	uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status)
873	{
874	int old_flags = get_float_exception_flags(fp_status), new_flags;
875	float16 ret;
876
877	ret = float16_round_to_int(x, fp_status);
878
879	/ Suppress any inexact exceptions the conversion produced /
880	if (!(old_flags & float_flag_inexact)) {
881	new_flags = get_float_exception_flags(fp_status);
882	set_float_exception_flags(new_flags & ~float_flag_inexact, fp_status);
883	}
884
885	return ret;
886	}
887
888	/*
889	* Half-precision floating point conversion functions
890	*
891	* There are a multitude of conversion functions with various
892	* different rounding modes. This is dealt with by the calling code
893	* setting the mode appropriately before calling the helper.
894	*/
895
896	uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp)
897	{
898	float_status *fpst = fpstp;
899
900	/ Invalid if we are passed a NaN /
901	if (float16_is_any_nan(a)) {
902	float_raise(float_flag_invalid, fpst);
903	return `0`;
904	}
905	return float16_to_int16(a, fpst);
906	}
907
908	uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp)
909	{
910	float_status *fpst = fpstp;
911
912	/ Invalid if we are passed a NaN /
913	if (float16_is_any_nan(a)) {
914	float_raise(float_flag_invalid, fpst);
915	return `0`;
916	}
917	return float16_to_uint16(a, fpst);
918	}
919
920	static int el_from_spsr(uint32_t spsr)
921	{
922	/ Return the exception level that this SPSR is requesting a return to,*
923	* or -1 if it is invalid (an illegal return)
924	*/
925	if (spsr & PSTATE_nRW) {
926	switch (spsr & CPSR_M) {
927	case ARM_CPU_MODE_USR:
928	return `0`;
929	case ARM_CPU_MODE_HYP:
930	return `2`;
931	case ARM_CPU_MODE_FIQ:
932	case ARM_CPU_MODE_IRQ:
933	case ARM_CPU_MODE_SVC:
934	case ARM_CPU_MODE_ABT:
935	case ARM_CPU_MODE_UND:
936	case ARM_CPU_MODE_SYS:
937	return `1`;
938	case ARM_CPU_MODE_MON:
939	/ Returning to Mon from AArch64 is never possible,*
940	* so this is an illegal return.
941	*/
942	default:
943	return -`1`;
944	}
945	} else {
946	if (extract32(spsr, `1`, `1`)) {
947	/ Return with reserved M[1] bit set /
948	return -`1`;
949	}
950	if (extract32(spsr, `0`, `4`) == `1`) {
951	/ return to EL0 with M[0] bit set /
952	return -`1`;
953	}
954	return extract32(spsr, `2`, `2`);
955	}
956	}
957
958	void HELPER(exception_return)(CPUARMState *env, uint64_t new_pc)
959	{
960	int cur_el = arm_current_el(env);
961	unsigned int spsr_idx = aarch64_banked_spsr_index(cur_el);
962	uint32_t spsr = env->banked_spsr[spsr_idx];
963	int new_el;
964	bool return_to_aa64 = (spsr & PSTATE_nRW) == `0`;
965
966	aarch64_save_sp(env, cur_el);
967
968	arm_clear_exclusive(env);
969
970	/ We must squash the PSTATE.SS bit to zero unless both of the*
971	* following hold:
972	* 1. debug exceptions are currently disabled
973	* 2. singlestep will be active in the EL we return to
974	* We check 1 here and 2 after we've done the pstate/cpsr write() to
975	* transition to the EL we're going to.
976	*/
977	if (arm_generate_debug_exceptions(env)) {
978	spsr &= ~PSTATE_SS;
979	}
980
981	new_el = el_from_spsr(spsr);
982	if (new_el == -`1`) {
983	goto illegal_return;
984	}
985	if (new_el > cur_el
986	\|\| (new_el == `2` && !arm_feature(env, ARM_FEATURE_EL2))) {
987	/ Disallow return to an EL which is unimplemented or higher*
988	* than the current one.
989	*/
990	goto illegal_return;
991	}
992
993	if (new_el != `0` && arm_el_is_aa64(env, new_el) != return_to_aa64) {
994	/ Return to an EL which is configured for a different register width /
995	goto illegal_return;
996	}
997
998	if (new_el == `2` && arm_is_secure_below_el3(env)) {
999	/ Return to the non-existent secure-EL2 /
1000	goto illegal_return;
1001	}
1002
1003	if (new_el == `1` && (arm_hcr_el2_eff(env) & HCR_TGE)) {
1004	goto illegal_return;
1005	}
1006
1007	qemu_mutex_lock_iothread();
1008	arm_call_pre_el_change_hook(env_archcpu(env));
1009	qemu_mutex_unlock_iothread();
1010
1011	if (!return_to_aa64) {
1012	env->aarch64 = `0`;
1013	/ We do a raw CPSR write because aarch64_sync_64_to_32()*
1014	* will sort the register banks out for us, and we've already
1015	* caught all the bad-mode cases in el_from_spsr().
1016	*/
1017	cpsr_write(env, spsr, ~`0`, CPSRWriteRaw);
1018	if (!arm_singlestep_active(env)) {
1019	env->uncached_cpsr &= ~PSTATE_SS;
1020	}
1021	aarch64_sync_64_to_32(env);
1022
1023	if (spsr & CPSR_T) {
1024	env->regs[`15`] = new_pc & ~`0x1`;
1025	} else {
1026	env->regs[`15`] = new_pc & ~`0x3`;
1027	}
1028	qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
1029	"AArch32 EL%d PC 0x%" PRIx32 "\n",
1030	cur_el, new_el, env->regs[`15`]);
1031	} else {
1032	env->aarch64 = `1`;
1033	pstate_write(env, spsr);
1034	if (!arm_singlestep_active(env)) {
1035	env->pstate &= ~PSTATE_SS;
1036	}
1037	aarch64_restore_sp(env, new_el);
1038	env->pc = new_pc;
1039	qemu_log_mask(CPU_LOG_INT, "Exception return from AArch64 EL%d to "
1040	"AArch64 EL%d PC 0x%" PRIx64 "\n",
1041	cur_el, new_el, env->pc);
1042	}
1043	/*
1044	* Note that cur_el can never be 0. If new_el is 0, then
1045	* el0_a64 is return_to_aa64, else el0_a64 is ignored.
1046	*/
1047	aarch64_sve_change_el(env, cur_el, new_el, return_to_aa64);
1048
1049	qemu_mutex_lock_iothread();
1050	arm_call_el_change_hook(env_archcpu(env));
1051	qemu_mutex_unlock_iothread();
1052
1053	return;
1054
1055	illegal_return:
1056	/ Illegal return events of various kinds have architecturally*
1057	* mandated behaviour:
1058	* restore NZCV and DAIF from SPSR_ELx
1059	* set PSTATE.IL
1060	* restore PC from ELR_ELx
1061	* no change to exception level, execution state or stack pointer
1062	*/
1063	env->pstate \|= PSTATE_IL;
1064	env->pc = new_pc;
1065	spsr &= PSTATE_NZCV \| PSTATE_DAIF;
1066	spsr \|= pstate_read(env) & ~(PSTATE_NZCV \| PSTATE_DAIF);
1067	pstate_write(env, spsr);
1068	if (!arm_singlestep_active(env)) {
1069	env->pstate &= ~PSTATE_SS;
1070	}
1071	qemu_log_mask(LOG_GUEST_ERROR, "Illegal exception return at EL%d: "
1072	"resuming execution at 0x%" PRIx64 "\n", cur_el, env->pc);
1073	}
1074
1075	/*
1076	* Square Root and Reciprocal square root
1077	*/
1078
1079	uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp)
1080	{
1081	float_status *s = fpstp;
1082
1083	return float16_sqrt(a, s);
1084	}
1085
1086
1087

Browse the source code of qemu/target/arm/helper-a64.c