fpu_helper.c source code [qemu/target/i386/fpu_helper.c]

1	/*
2	* x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3	*
4	* Copyright (c) 2003 Fabrice Bellard
5	*
6	* This library is free software; you can redistribute it and/or
7	* modify it under the terms of the GNU Lesser General Public
8	* License as published by the Free Software Foundation; either
9	* version 2 of the License, or (at your option) any later version.
10	*
11	* This library is distributed in the hope that it will be useful,
12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	* Lesser General Public License for more details.
15	*
16	* You should have received a copy of the GNU Lesser General Public
17	* License along with this library; if not, see <http://www.gnu.org/licenses/>.
18	*/
19
20	#include "qemu/osdep.h"
21	#include <math.h>
22	#include "cpu.h"
23	#include "exec/helper-proto.h"
24	#include "qemu/host-utils.h"
25	#include "exec/exec-all.h"
26	#include "exec/cpu_ldst.h"
27	#include "fpu/softfloat.h"
28
29	#define FPU_RC_MASK 0xc00
30	#define FPU_RC_NEAR 0x000
31	#define FPU_RC_DOWN 0x400
32	#define FPU_RC_UP 0x800
33	#define FPU_RC_CHOP 0xc00
34
35	#define MAXTAN 9223372036854775808.0
36
37	/ the following deal with x86 long double-precision numbers /
38	#define MAXEXPD 0x7fff
39	#define EXPBIAS 16383
40	#define EXPD(fp) (fp.l.upper & 0x7fff)
41	#define SIGND(fp) ((fp.l.upper) & 0x8000)
42	#define MANTD(fp) (fp.l.lower)
43	#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) \| EXPBIAS
44
45	#define FPUS_IE (1 << 0)
46	#define FPUS_DE (1 << 1)
47	#define FPUS_ZE (1 << 2)
48	#define FPUS_OE (1 << 3)
49	#define FPUS_UE (1 << 4)
50	#define FPUS_PE (1 << 5)
51	#define FPUS_SF (1 << 6)
52	#define FPUS_SE (1 << 7)
53	#define FPUS_B (1 << 15)
54
55	#define FPUC_EM 0x3f
56
57	#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
58	#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
59	#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
60
61	static inline void fpush(CPUX86State *env)
62	{
63	env->fpstt = (env->fpstt - `1`) & `7`;
64	env->fptags[env->fpstt] = `0`; / validate stack entry /
65	}
66
67	static inline void fpop(CPUX86State *env)
68	{
69	env->fptags[env->fpstt] = `1`; / invalidate stack entry /
70	env->fpstt = (env->fpstt + `1`) & `7`;
71	}
72
73	static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
74	uintptr_t retaddr)
75	{
76	CPU_LDoubleU temp;
77
78	temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
79	temp.l.upper = cpu_lduw_data_ra(env, ptr + `8`, retaddr);
80	return temp.d;
81	}
82
83	static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
84	uintptr_t retaddr)
85	{
86	CPU_LDoubleU temp;
87
88	temp.d = f;
89	cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
90	cpu_stw_data_ra(env, ptr + `8`, temp.l.upper, retaddr);
91	}
92
93	/ x87 FPU helpers /
94
95	static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
96	{
97	union {
98	float64 f64;
99	double d;
100	} u;
101
102	u.f64 = floatx80_to_float64(a, &env->fp_status);
103	return u.d;
104	}
105
106	static inline floatx80 double_to_floatx80(CPUX86State env, double* a)
107	{
108	union {
109	float64 f64;
110	double d;
111	} u;
112
113	u.d = a;
114	return float64_to_floatx80(u.f64, &env->fp_status);
115	}
116
117	static void fpu_set_exception(CPUX86State env, int* mask)
118	{
119	env->fpus \|= mask;
120	if (env->fpus & (~env->fpuc & FPUC_EM)) {
121	env->fpus \|= FPUS_SE \| FPUS_B;
122	}
123	}
124
125	static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
126	{
127	if (floatx80_is_zero(b)) {
128	fpu_set_exception(env, FPUS_ZE);
129	}
130	return floatx80_div(a, b, &env->fp_status);
131	}
132
133	static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
134	{
135	if (env->cr[`0`] & CR0_NE_MASK) {
136	raise_exception_ra(env, EXCP10_COPR, retaddr);
137	}
138	#if !defined(CONFIG_USER_ONLY)
139	else {
140	cpu_set_ferr(env);
141	}
142	#endif
143	}
144
145	void helper_flds_FT0(CPUX86State *env, uint32_t val)
146	{
147	union {
148	float32 f;
149	uint32_t i;
150	} u;
151
152	u.i = val;
153	FT0 = float32_to_floatx80(u.f, &env->fp_status);
154	}
155
156	void helper_fldl_FT0(CPUX86State *env, uint64_t val)
157	{
158	union {
159	float64 f;
160	uint64_t i;
161	} u;
162
163	u.i = val;
164	FT0 = float64_to_floatx80(u.f, &env->fp_status);
165	}
166
167	void helper_fildl_FT0(CPUX86State *env, int32_t val)
168	{
169	FT0 = int32_to_floatx80(val, &env->fp_status);
170	}
171
172	void helper_flds_ST0(CPUX86State *env, uint32_t val)
173	{
174	int new_fpstt;
175	union {
176	float32 f;
177	uint32_t i;
178	} u;
179
180	new_fpstt = (env->fpstt - `1`) & `7`;
181	u.i = val;
182	env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
183	env->fpstt = new_fpstt;
184	env->fptags[new_fpstt] = `0`; / validate stack entry /
185	}
186
187	void helper_fldl_ST0(CPUX86State *env, uint64_t val)
188	{
189	int new_fpstt;
190	union {
191	float64 f;
192	uint64_t i;
193	} u;
194
195	new_fpstt = (env->fpstt - `1`) & `7`;
196	u.i = val;
197	env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
198	env->fpstt = new_fpstt;
199	env->fptags[new_fpstt] = `0`; / validate stack entry /
200	}
201
202	void helper_fildl_ST0(CPUX86State *env, int32_t val)
203	{
204	int new_fpstt;
205
206	new_fpstt = (env->fpstt - `1`) & `7`;
207	env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
208	env->fpstt = new_fpstt;
209	env->fptags[new_fpstt] = `0`; / validate stack entry /
210	}
211
212	void helper_fildll_ST0(CPUX86State *env, int64_t val)
213	{
214	int new_fpstt;
215
216	new_fpstt = (env->fpstt - `1`) & `7`;
217	env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
218	env->fpstt = new_fpstt;
219	env->fptags[new_fpstt] = `0`; / validate stack entry /
220	}
221
222	uint32_t helper_fsts_ST0(CPUX86State *env)
223	{
224	union {
225	float32 f;
226	uint32_t i;
227	} u;
228
229	u.f = floatx80_to_float32(ST0, &env->fp_status);
230	return u.i;
231	}
232
233	uint64_t helper_fstl_ST0(CPUX86State *env)
234	{
235	union {
236	float64 f;
237	uint64_t i;
238	} u;
239
240	u.f = floatx80_to_float64(ST0, &env->fp_status);
241	return u.i;
242	}
243
244	int32_t helper_fist_ST0(CPUX86State *env)
245	{
246	int32_t val;
247
248	val = floatx80_to_int32(ST0, &env->fp_status);
249	if (val != (int16_t)val) {
250	val = -`32768`;
251	}
252	return val;
253	}
254
255	int32_t helper_fistl_ST0(CPUX86State *env)
256	{
257	int32_t val;
258	signed char old_exp_flags;
259
260	old_exp_flags = get_float_exception_flags(&env->fp_status);
261	set_float_exception_flags(`0`, &env->fp_status);
262
263	val = floatx80_to_int32(ST0, &env->fp_status);
264	if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
265	val = `0x80000000`;
266	}
267	set_float_exception_flags(get_float_exception_flags(&env->fp_status)
268	\| old_exp_flags, &env->fp_status);
269	return val;
270	}
271
272	int64_t helper_fistll_ST0(CPUX86State *env)
273	{
274	int64_t val;
275	signed char old_exp_flags;
276
277	old_exp_flags = get_float_exception_flags(&env->fp_status);
278	set_float_exception_flags(`0`, &env->fp_status);
279
280	val = floatx80_to_int64(ST0, &env->fp_status);
281	if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
282	val = `0x8000000000000000ULL`;
283	}
284	set_float_exception_flags(get_float_exception_flags(&env->fp_status)
285	\| old_exp_flags, &env->fp_status);
286	return val;
287	}
288
289	int32_t helper_fistt_ST0(CPUX86State *env)
290	{
291	int32_t val;
292
293	val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
294	if (val != (int16_t)val) {
295	val = -`32768`;
296	}
297	return val;
298	}
299
300	int32_t helper_fisttl_ST0(CPUX86State *env)
301	{
302	return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
303	}
304
305	int64_t helper_fisttll_ST0(CPUX86State *env)
306	{
307	return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
308	}
309
310	void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
311	{
312	int new_fpstt;
313
314	new_fpstt = (env->fpstt - `1`) & `7`;
315	env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
316	env->fpstt = new_fpstt;
317	env->fptags[new_fpstt] = `0`; / validate stack entry /
318	}
319
320	void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
321	{
322	helper_fstt(env, ST0, ptr, GETPC());
323	}
324
325	void helper_fpush(CPUX86State *env)
326	{
327	fpush(env);
328	}
329
330	void helper_fpop(CPUX86State *env)
331	{
332	fpop(env);
333	}
334
335	void helper_fdecstp(CPUX86State *env)
336	{
337	env->fpstt = (env->fpstt - `1`) & `7`;
338	env->fpus &= ~`0x4700`;
339	}
340
341	void helper_fincstp(CPUX86State *env)
342	{
343	env->fpstt = (env->fpstt + `1`) & `7`;
344	env->fpus &= ~`0x4700`;
345	}
346
347	/ FPU move /
348
349	void helper_ffree_STN(CPUX86State env, int* st_index)
350	{
351	env->fptags[(env->fpstt + st_index) & `7`] = `1`;
352	}
353
354	void helper_fmov_ST0_FT0(CPUX86State *env)
355	{
356	ST0 = FT0;
357	}
358
359	void helper_fmov_FT0_STN(CPUX86State env, int* st_index)
360	{
361	FT0 = ST(st_index);
362	}
363
364	void helper_fmov_ST0_STN(CPUX86State env, int* st_index)
365	{
366	ST0 = ST(st_index);
367	}
368
369	void helper_fmov_STN_ST0(CPUX86State env, int* st_index)
370	{
371	ST(st_index) = ST0;
372	}
373
374	void helper_fxchg_ST0_STN(CPUX86State env, int* st_index)
375	{
376	floatx80 tmp;
377
378	tmp = ST(st_index);
379	ST(st_index) = ST0;
380	ST0 = tmp;
381	}
382
383	/ FPU operations /
384
385	static const int fcom_ccval[`4`] = {`0x0100`, `0x4000`, `0x0000`, `0x4500`};
386
387	void helper_fcom_ST0_FT0(CPUX86State *env)
388	{
389	int ret;
390
391	ret = floatx80_compare(ST0, FT0, &env->fp_status);
392	env->fpus = (env->fpus & ~`0x4500`) \| fcom_ccval[ret + `1`];
393	}
394
395	void helper_fucom_ST0_FT0(CPUX86State *env)
396	{
397	int ret;
398
399	ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
400	env->fpus = (env->fpus & ~`0x4500`) \| fcom_ccval[ret + `1`];
401	}
402
403	static const int fcomi_ccval[`4`] = {CC_C, CC_Z, `0`, CC_Z \| CC_P \| CC_C};
404
405	void helper_fcomi_ST0_FT0(CPUX86State *env)
406	{
407	int eflags;
408	int ret;
409
410	ret = floatx80_compare(ST0, FT0, &env->fp_status);
411	eflags = cpu_cc_compute_all(env, CC_OP);
412	eflags = (eflags & ~(CC_Z \| CC_P \| CC_C)) \| fcomi_ccval[ret + `1`];
413	CC_SRC = eflags;
414	}
415
416	void helper_fucomi_ST0_FT0(CPUX86State *env)
417	{
418	int eflags;
419	int ret;
420
421	ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
422	eflags = cpu_cc_compute_all(env, CC_OP);
423	eflags = (eflags & ~(CC_Z \| CC_P \| CC_C)) \| fcomi_ccval[ret + `1`];
424	CC_SRC = eflags;
425	}
426
427	void helper_fadd_ST0_FT0(CPUX86State *env)
428	{
429	ST0 = floatx80_add(ST0, FT0, &env->fp_status);
430	}
431
432	void helper_fmul_ST0_FT0(CPUX86State *env)
433	{
434	ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
435	}
436
437	void helper_fsub_ST0_FT0(CPUX86State *env)
438	{
439	ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
440	}
441
442	void helper_fsubr_ST0_FT0(CPUX86State *env)
443	{
444	ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
445	}
446
447	void helper_fdiv_ST0_FT0(CPUX86State *env)
448	{
449	ST0 = helper_fdiv(env, ST0, FT0);
450	}
451
452	void helper_fdivr_ST0_FT0(CPUX86State *env)
453	{
454	ST0 = helper_fdiv(env, FT0, ST0);
455	}
456
457	/ fp operations between STN and ST0 /
458
459	void helper_fadd_STN_ST0(CPUX86State env, int* st_index)
460	{
461	ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
462	}
463
464	void helper_fmul_STN_ST0(CPUX86State env, int* st_index)
465	{
466	ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
467	}
468
469	void helper_fsub_STN_ST0(CPUX86State env, int* st_index)
470	{
471	ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
472	}
473
474	void helper_fsubr_STN_ST0(CPUX86State env, int* st_index)
475	{
476	ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
477	}
478
479	void helper_fdiv_STN_ST0(CPUX86State env, int* st_index)
480	{
481	floatx80 *p;
482
483	p = &ST(st_index);
484	p = helper_fdiv(env, p, ST0);
485	}
486
487	void helper_fdivr_STN_ST0(CPUX86State env, int* st_index)
488	{
489	floatx80 *p;
490
491	p = &ST(st_index);
492	p = helper_fdiv(env, ST0, p);
493	}
494
495	/ misc FPU operations /
496	void helper_fchs_ST0(CPUX86State *env)
497	{
498	ST0 = floatx80_chs(ST0);
499	}
500
501	void helper_fabs_ST0(CPUX86State *env)
502	{
503	ST0 = floatx80_abs(ST0);
504	}
505
506	void helper_fld1_ST0(CPUX86State *env)
507	{
508	ST0 = floatx80_one;
509	}
510
511	void helper_fldl2t_ST0(CPUX86State *env)
512	{
513	ST0 = floatx80_l2t;
514	}
515
516	void helper_fldl2e_ST0(CPUX86State *env)
517	{
518	ST0 = floatx80_l2e;
519	}
520
521	void helper_fldpi_ST0(CPUX86State *env)
522	{
523	ST0 = floatx80_pi;
524	}
525
526	void helper_fldlg2_ST0(CPUX86State *env)
527	{
528	ST0 = floatx80_lg2;
529	}
530
531	void helper_fldln2_ST0(CPUX86State *env)
532	{
533	ST0 = floatx80_ln2;
534	}
535
536	void helper_fldz_ST0(CPUX86State *env)
537	{
538	ST0 = floatx80_zero;
539	}
540
541	void helper_fldz_FT0(CPUX86State *env)
542	{
543	FT0 = floatx80_zero;
544	}
545
546	uint32_t helper_fnstsw(CPUX86State *env)
547	{
548	return (env->fpus & ~`0x3800`) \| (env->fpstt & `0x7`) << `11`;
549	}
550
551	uint32_t helper_fnstcw(CPUX86State *env)
552	{
553	return env->fpuc;
554	}
555
556	void update_fp_status(CPUX86State *env)
557	{
558	int rnd_type;
559
560	/ set rounding mode /
561	switch (env->fpuc & FPU_RC_MASK) {
562	default:
563	case FPU_RC_NEAR:
564	rnd_type = float_round_nearest_even;
565	break;
566	case FPU_RC_DOWN:
567	rnd_type = float_round_down;
568	break;
569	case FPU_RC_UP:
570	rnd_type = float_round_up;
571	break;
572	case FPU_RC_CHOP:
573	rnd_type = float_round_to_zero;
574	break;
575	}
576	set_float_rounding_mode(rnd_type, &env->fp_status);
577	switch ((env->fpuc >> `8`) & `3`) {
578	case `0`:
579	rnd_type = `32`;
580	break;
581	case `2`:
582	rnd_type = `64`;
583	break;
584	case `3`:
585	default:
586	rnd_type = `80`;
587	break;
588	}
589	set_floatx80_rounding_precision(rnd_type, &env->fp_status);
590	}
591
592	void helper_fldcw(CPUX86State *env, uint32_t val)
593	{
594	cpu_set_fpuc(env, val);
595	}
596
597	void helper_fclex(CPUX86State *env)
598	{
599	env->fpus &= `0x7f00`;
600	}
601
602	void helper_fwait(CPUX86State *env)
603	{
604	if (env->fpus & FPUS_SE) {
605	fpu_raise_exception(env, GETPC());
606	}
607	}
608
609	void helper_fninit(CPUX86State *env)
610	{
611	env->fpus = `0`;
612	env->fpstt = `0`;
613	cpu_set_fpuc(env, `0x37f`);
614	env->fptags[`0`] = `1`;
615	env->fptags[`1`] = `1`;
616	env->fptags[`2`] = `1`;
617	env->fptags[`3`] = `1`;
618	env->fptags[`4`] = `1`;
619	env->fptags[`5`] = `1`;
620	env->fptags[`6`] = `1`;
621	env->fptags[`7`] = `1`;
622	}
623
624	/ BCD ops /
625
626	void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
627	{
628	floatx80 tmp;
629	uint64_t val;
630	unsigned int v;
631	int i;
632
633	val = `0`;
634	for (i = `8`; i >= `0`; i--) {
635	v = cpu_ldub_data_ra(env, ptr + i, GETPC());
636	val = (val * `100`) + ((v >> `4`) * `10`) + (v & `0xf`);
637	}
638	tmp = int64_to_floatx80(val, &env->fp_status);
639	if (cpu_ldub_data_ra(env, ptr + `9`, GETPC()) & `0x80`) {
640	tmp = floatx80_chs(tmp);
641	}
642	fpush(env);
643	ST0 = tmp;
644	}
645
646	void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
647	{
648	int v;
649	target_ulong mem_ref, mem_end;
650	int64_t val;
651
652	val = floatx80_to_int64(ST0, &env->fp_status);
653	mem_ref = ptr;
654	mem_end = mem_ref + `9`;
655	if (val < `0`) {
656	cpu_stb_data_ra(env, mem_end, `0x80`, GETPC());
657	val = -val;
658	} else {
659	cpu_stb_data_ra(env, mem_end, `0x00`, GETPC());
660	}
661	while (mem_ref < mem_end) {
662	if (val == `0`) {
663	break;
664	}
665	v = val % `100`;
666	val = val / `100`;
667	v = ((v / `10`) << `4`) \| (v % `10`);
668	cpu_stb_data_ra(env, mem_ref++, v, GETPC());
669	}
670	while (mem_ref < mem_end) {
671	cpu_stb_data_ra(env, mem_ref++, `0`, GETPC());
672	}
673	}
674
675	void helper_f2xm1(CPUX86State *env)
676	{
677	double val = floatx80_to_double(env, ST0);
678
679	val = pow(`2.0`, val) - `1.0`;
680	ST0 = double_to_floatx80(env, val);
681	}
682
683	void helper_fyl2x(CPUX86State *env)
684	{
685	double fptemp = floatx80_to_double(env, ST0);
686
687	if (fptemp > `0.0`) {
688	fptemp = log(fptemp) / log(`2.0`); / log2(ST) /
689	fptemp *= floatx80_to_double(env, ST1);
690	ST1 = double_to_floatx80(env, fptemp);
691	fpop(env);
692	} else {
693	env->fpus &= ~`0x4700`;
694	env->fpus \|= `0x400`;
695	}
696	}
697
698	void helper_fptan(CPUX86State *env)
699	{
700	double fptemp = floatx80_to_double(env, ST0);
701
702	if ((fptemp > MAXTAN) \|\| (fptemp < -MAXTAN)) {
703	env->fpus \|= `0x400`;
704	} else {
705	fptemp = tan(fptemp);
706	ST0 = double_to_floatx80(env, fptemp);
707	fpush(env);
708	ST0 = floatx80_one;
709	env->fpus &= ~`0x400`; / C2 <-- 0 /
710	/ the above code is for \|arg\| < 2*52 only /*
711	}
712	}
713
714	void helper_fpatan(CPUX86State *env)
715	{
716	double fptemp, fpsrcop;
717
718	fpsrcop = floatx80_to_double(env, ST1);
719	fptemp = floatx80_to_double(env, ST0);
720	ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
721	fpop(env);
722	}
723
724	void helper_fxtract(CPUX86State *env)
725	{
726	CPU_LDoubleU temp;
727
728	temp.d = ST0;
729
730	if (floatx80_is_zero(ST0)) {
731	/ Easy way to generate -inf and raising division by 0 exception /
732	ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
733	&env->fp_status);
734	fpush(env);
735	ST0 = temp.d;
736	} else {
737	int expdif;
738
739	expdif = EXPD(temp) - EXPBIAS;
740	/ DP exponent bias /
741	ST0 = int32_to_floatx80(expdif, &env->fp_status);
742	fpush(env);
743	BIASEXPONENT(temp);
744	ST0 = temp.d;
745	}
746	}
747
748	void helper_fprem1(CPUX86State *env)
749	{
750	double st0, st1, dblq, fpsrcop, fptemp;
751	CPU_LDoubleU fpsrcop1, fptemp1;
752	int expdif;
753	signed long long int q;
754
755	st0 = floatx80_to_double(env, ST0);
756	st1 = floatx80_to_double(env, ST1);
757
758	if (isinf(st0) \|\| isnan(st0) \|\| isnan(st1) \|\| (st1 == `0.0`)) {
759	ST0 = double_to_floatx80(env, `0.0` / `0.0`); / NaN /
760	env->fpus &= ~`0x4700`; / (C3,C2,C1,C0) <-- 0000 /
761	return;
762	}
763
764	fpsrcop = st0;
765	fptemp = st1;
766	fpsrcop1.d = ST0;
767	fptemp1.d = ST1;
768	expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
769
770	if (expdif < `0`) {
771	/ optimisation? taken from the AMD docs /
772	env->fpus &= ~`0x4700`; / (C3,C2,C1,C0) <-- 0000 /
773	/ ST0 is unchanged /
774	return;
775	}
776
777	if (expdif < `53`) {
778	dblq = fpsrcop / fptemp;
779	/ round dblq towards nearest integer /
780	dblq = rint(dblq);
781	st0 = fpsrcop - fptemp * dblq;
782
783	/ convert dblq to q by truncating towards zero /
784	if (dblq < `0.0`) {
785	q = (signed long long int)(-dblq);
786	} else {
787	q = (signed long long int)dblq;
788	}
789
790	env->fpus &= ~`0x4700`; / (C3,C2,C1,C0) <-- 0000 /
791	/ (C0,C3,C1) <-- (q2,q1,q0) /
792	env->fpus \|= (q & `0x4`) << (`8` - `2`); / (C0) <-- q2 /
793	env->fpus \|= (q & `0x2`) << (`14` - `1`); / (C3) <-- q1 /
794	env->fpus \|= (q & `0x1`) << (`9` - `0`); / (C1) <-- q0 /
795	} else {
796	env->fpus \|= `0x400`; / C2 <-- 1 /
797	fptemp = pow(`2.0`, expdif - `50`);
798	fpsrcop = (st0 / st1) / fptemp;
799	/ fpsrcop = integer obtained by chopping /
800	fpsrcop = (fpsrcop < `0.0`) ?
801	-(floor(fabs(fpsrcop))) : floor(fpsrcop);
802	st0 -= (st1 * fpsrcop * fptemp);
803	}
804	ST0 = double_to_floatx80(env, st0);
805	}
806
807	void helper_fprem(CPUX86State *env)
808	{
809	double st0, st1, dblq, fpsrcop, fptemp;
810	CPU_LDoubleU fpsrcop1, fptemp1;
811	int expdif;
812	signed long long int q;
813
814	st0 = floatx80_to_double(env, ST0);
815	st1 = floatx80_to_double(env, ST1);
816
817	if (isinf(st0) \|\| isnan(st0) \|\| isnan(st1) \|\| (st1 == `0.0`)) {
818	ST0 = double_to_floatx80(env, `0.0` / `0.0`); / NaN /
819	env->fpus &= ~`0x4700`; / (C3,C2,C1,C0) <-- 0000 /
820	return;
821	}
822
823	fpsrcop = st0;
824	fptemp = st1;
825	fpsrcop1.d = ST0;
826	fptemp1.d = ST1;
827	expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
828
829	if (expdif < `0`) {
830	/ optimisation? taken from the AMD docs /
831	env->fpus &= ~`0x4700`; / (C3,C2,C1,C0) <-- 0000 /
832	/ ST0 is unchanged /
833	return;
834	}
835
836	if (expdif < `53`) {
837	dblq = fpsrcop / fptemp; / ST0 / ST1 /
838	/ round dblq towards zero /
839	dblq = (dblq < `0.0`) ? ceil(dblq) : floor(dblq);
840	st0 = fpsrcop - fptemp * dblq; / fpsrcop is ST0 /
841
842	/ convert dblq to q by truncating towards zero /
843	if (dblq < `0.0`) {
844	q = (signed long long int)(-dblq);
845	} else {
846	q = (signed long long int)dblq;
847	}
848
849	env->fpus &= ~`0x4700`; / (C3,C2,C1,C0) <-- 0000 /
850	/ (C0,C3,C1) <-- (q2,q1,q0) /
851	env->fpus \|= (q & `0x4`) << (`8` - `2`); / (C0) <-- q2 /
852	env->fpus \|= (q & `0x2`) << (`14` - `1`); / (C3) <-- q1 /
853	env->fpus \|= (q & `0x1`) << (`9` - `0`); / (C1) <-- q0 /
854	} else {
855	int N = `32` + (expdif % `32`); / as per AMD docs /
856
857	env->fpus \|= `0x400`; / C2 <-- 1 /
858	fptemp = pow(`2.0`, (double)(expdif - N));
859	fpsrcop = (st0 / st1) / fptemp;
860	/ fpsrcop = integer obtained by chopping /
861	fpsrcop = (fpsrcop < `0.0`) ?
862	-(floor(fabs(fpsrcop))) : floor(fpsrcop);
863	st0 -= (st1 * fpsrcop * fptemp);
864	}
865	ST0 = double_to_floatx80(env, st0);
866	}
867
868	void helper_fyl2xp1(CPUX86State *env)
869	{
870	double fptemp = floatx80_to_double(env, ST0);
871
872	if ((fptemp + `1.0`) > `0.0`) {
873	fptemp = log(fptemp + `1.0`) / log(`2.0`); / log2(ST + 1.0) /
874	fptemp *= floatx80_to_double(env, ST1);
875	ST1 = double_to_floatx80(env, fptemp);
876	fpop(env);
877	} else {
878	env->fpus &= ~`0x4700`;
879	env->fpus \|= `0x400`;
880	}
881	}
882
883	void helper_fsqrt(CPUX86State *env)
884	{
885	if (floatx80_is_neg(ST0)) {
886	env->fpus &= ~`0x4700`; / (C3,C2,C1,C0) <-- 0000 /
887	env->fpus \|= `0x400`;
888	}
889	ST0 = floatx80_sqrt(ST0, &env->fp_status);
890	}
891
892	void helper_fsincos(CPUX86State *env)
893	{
894	double fptemp = floatx80_to_double(env, ST0);
895
896	if ((fptemp > MAXTAN) \|\| (fptemp < -MAXTAN)) {
897	env->fpus \|= `0x400`;
898	} else {
899	ST0 = double_to_floatx80(env, sin(fptemp));
900	fpush(env);
901	ST0 = double_to_floatx80(env, cos(fptemp));
902	env->fpus &= ~`0x400`; / C2 <-- 0 /
903	/ the above code is for \|arg\| < 2*63 only /*
904	}
905	}
906
907	void helper_frndint(CPUX86State *env)
908	{
909	ST0 = floatx80_round_to_int(ST0, &env->fp_status);
910	}
911
912	void helper_fscale(CPUX86State *env)
913	{
914	if (floatx80_is_any_nan(ST1)) {
915	ST0 = ST1;
916	} else {
917	int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
918	ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
919	}
920	}
921
922	void helper_fsin(CPUX86State *env)
923	{
924	double fptemp = floatx80_to_double(env, ST0);
925
926	if ((fptemp > MAXTAN) \|\| (fptemp < -MAXTAN)) {
927	env->fpus \|= `0x400`;
928	} else {
929	ST0 = double_to_floatx80(env, sin(fptemp));
930	env->fpus &= ~`0x400`; / C2 <-- 0 /
931	/ the above code is for \|arg\| < 2*53 only /*
932	}
933	}
934
935	void helper_fcos(CPUX86State *env)
936	{
937	double fptemp = floatx80_to_double(env, ST0);
938
939	if ((fptemp > MAXTAN) \|\| (fptemp < -MAXTAN)) {
940	env->fpus \|= `0x400`;
941	} else {
942	ST0 = double_to_floatx80(env, cos(fptemp));
943	env->fpus &= ~`0x400`; / C2 <-- 0 /
944	/ the above code is for \|arg\| < 2*63 only /*
945	}
946	}
947
948	void helper_fxam_ST0(CPUX86State *env)
949	{
950	CPU_LDoubleU temp;
951	int expdif;
952
953	temp.d = ST0;
954
955	env->fpus &= ~`0x4700`; / (C3,C2,C1,C0) <-- 0000 /
956	if (SIGND(temp)) {
957	env->fpus \|= `0x200`; / C1 <-- 1 /
958	}
959
960	/ XXX: test fptags too /
961	expdif = EXPD(temp);
962	if (expdif == MAXEXPD) {
963	if (MANTD(temp) == `0x8000000000000000ULL`) {
964	env->fpus \|= `0x500`; / Infinity /
965	} else {
966	env->fpus \|= `0x100`; / NaN /
967	}
968	} else if (expdif == `0`) {
969	if (MANTD(temp) == `0`) {
970	env->fpus \|= `0x4000`; / Zero /
971	} else {
972	env->fpus \|= `0x4400`; / Denormal /
973	}
974	} else {
975	env->fpus \|= `0x400`;
976	}
977	}
978
979	static void do_fstenv(CPUX86State env, target_ulong ptr, int* data32,
980	uintptr_t retaddr)
981	{
982	int fpus, fptag, exp, i;
983	uint64_t mant;
984	CPU_LDoubleU tmp;
985
986	fpus = (env->fpus & ~`0x3800`) \| (env->fpstt & `0x7`) << `11`;
987	fptag = `0`;
988	for (i = `7`; i >= `0`; i--) {
989	fptag <<= `2`;
990	if (env->fptags[i]) {
991	fptag \|= `3`;
992	} else {
993	tmp.d = env->fpregs[i].d;
994	exp = EXPD(tmp);
995	mant = MANTD(tmp);
996	if (exp == `0` && mant == `0`) {
997	/ zero /
998	fptag \|= `1`;
999	} else if (exp == `0` \|\| exp == MAXEXPD
1000	\|\| (mant & (`1LL` << `63`)) == `0`) {
1001	/ NaNs, infinity, denormal /
1002	fptag \|= `2`;
1003	}
1004	}
1005	}
1006	if (data32) {
1007	/ 32 bit /
1008	cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1009	cpu_stl_data_ra(env, ptr + `4`, fpus, retaddr);
1010	cpu_stl_data_ra(env, ptr + `8`, fptag, retaddr);
1011	cpu_stl_data_ra(env, ptr + `12`, `0`, retaddr); / fpip /
1012	cpu_stl_data_ra(env, ptr + `16`, `0`, retaddr); / fpcs /
1013	cpu_stl_data_ra(env, ptr + `20`, `0`, retaddr); / fpoo /
1014	cpu_stl_data_ra(env, ptr + `24`, `0`, retaddr); / fpos /
1015	} else {
1016	/ 16 bit /
1017	cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1018	cpu_stw_data_ra(env, ptr + `2`, fpus, retaddr);
1019	cpu_stw_data_ra(env, ptr + `4`, fptag, retaddr);
1020	cpu_stw_data_ra(env, ptr + `6`, `0`, retaddr);
1021	cpu_stw_data_ra(env, ptr + `8`, `0`, retaddr);
1022	cpu_stw_data_ra(env, ptr + `10`, `0`, retaddr);
1023	cpu_stw_data_ra(env, ptr + `12`, `0`, retaddr);
1024	}
1025	}
1026
1027	void helper_fstenv(CPUX86State env, target_ulong ptr, int* data32)
1028	{
1029	do_fstenv(env, ptr, data32, GETPC());
1030	}
1031
1032	static void do_fldenv(CPUX86State env, target_ulong ptr, int* data32,
1033	uintptr_t retaddr)
1034	{
1035	int i, fpus, fptag;
1036
1037	if (data32) {
1038	cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1039	fpus = cpu_lduw_data_ra(env, ptr + `4`, retaddr);
1040	fptag = cpu_lduw_data_ra(env, ptr + `8`, retaddr);
1041	} else {
1042	cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043	fpus = cpu_lduw_data_ra(env, ptr + `2`, retaddr);
1044	fptag = cpu_lduw_data_ra(env, ptr + `4`, retaddr);
1045	}
1046	env->fpstt = (fpus >> `11`) & `7`;
1047	env->fpus = fpus & ~`0x3800`;
1048	for (i = `0`; i < `8`; i++) {
1049	env->fptags[i] = ((fptag & `3`) == `3`);
1050	fptag >>= `2`;
1051	}
1052	}
1053
1054	void helper_fldenv(CPUX86State env, target_ulong ptr, int* data32)
1055	{
1056	do_fldenv(env, ptr, data32, GETPC());
1057	}
1058
1059	void helper_fsave(CPUX86State env, target_ulong ptr, int* data32)
1060	{
1061	floatx80 tmp;
1062	int i;
1063
1064	do_fstenv(env, ptr, data32, GETPC());
1065
1066	ptr += (`14` << data32);
1067	for (i = `0`; i < `8`; i++) {
1068	tmp = ST(i);
1069	helper_fstt(env, tmp, ptr, GETPC());
1070	ptr += `10`;
1071	}
1072
1073	/ fninit /
1074	env->fpus = `0`;
1075	env->fpstt = `0`;
1076	cpu_set_fpuc(env, `0x37f`);
1077	env->fptags[`0`] = `1`;
1078	env->fptags[`1`] = `1`;
1079	env->fptags[`2`] = `1`;
1080	env->fptags[`3`] = `1`;
1081	env->fptags[`4`] = `1`;
1082	env->fptags[`5`] = `1`;
1083	env->fptags[`6`] = `1`;
1084	env->fptags[`7`] = `1`;
1085	}
1086
1087	void helper_frstor(CPUX86State env, target_ulong ptr, int* data32)
1088	{
1089	floatx80 tmp;
1090	int i;
1091
1092	do_fldenv(env, ptr, data32, GETPC());
1093	ptr += (`14` << data32);
1094
1095	for (i = `0`; i < `8`; i++) {
1096	tmp = helper_fldt(env, ptr, GETPC());
1097	ST(i) = tmp;
1098	ptr += `10`;
1099	}
1100	}
1101
1102	#if defined(CONFIG_USER_ONLY)
1103	void cpu_x86_fsave(CPUX86State env, target_ulong ptr, int* data32)
1104	{
1105	helper_fsave(env, ptr, data32);
1106	}
1107
1108	void cpu_x86_frstor(CPUX86State env, target_ulong ptr, int* data32)
1109	{
1110	helper_frstor(env, ptr, data32);
1111	}
1112	#endif
1113
1114	#define XO(X) offsetof(X86XSaveArea, X)
1115
1116	static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1117	{
1118	int fpus, fptag, i;
1119	target_ulong addr;
1120
1121	fpus = (env->fpus & ~`0x3800`) \| (env->fpstt & `0x7`) << `11`;
1122	fptag = `0`;
1123	for (i = `0`; i < `8`; i++) {
1124	fptag \|= (env->fptags[i] << i);
1125	}
1126
1127	cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1128	cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1129	cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ `0xff`, ra);
1130
1131	/ In 32-bit mode this is eip, sel, dp, sel.*
1132	In 64-bit mode this is rip, rdp.
1133	But in either case we don't write actual data, just zeros. /*
1134	cpu_stq_data_ra(env, ptr + XO(legacy.fpip), `0`, ra); / eip+sel; rip /
1135	cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), `0`, ra); / edp+sel; rdp /
1136
1137	addr = ptr + XO(legacy.fpregs);
1138	for (i = `0`; i < `8`; i++) {
1139	floatx80 tmp = ST(i);
1140	helper_fstt(env, tmp, addr, ra);
1141	addr += `16`;
1142	}
1143	}
1144
1145	static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1146	{
1147	cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1148	cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), `0x0000ffff`, ra);
1149	}
1150
1151	static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1152	{
1153	int i, nb_xmm_regs;
1154	target_ulong addr;
1155
1156	if (env->hflags & HF_CS64_MASK) {
1157	nb_xmm_regs = `16`;
1158	} else {
1159	nb_xmm_regs = `8`;
1160	}
1161
1162	addr = ptr + XO(legacy.xmm_regs);
1163	for (i = `0`; i < nb_xmm_regs; i++) {
1164	cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(`0`), ra);
1165	cpu_stq_data_ra(env, addr + `8`, env->xmm_regs[i].ZMM_Q(`1`), ra);
1166	addr += `16`;
1167	}
1168	}
1169
1170	static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1171	{
1172	target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1173	int i;
1174
1175	for (i = `0`; i < `4`; i++, addr += `16`) {
1176	cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177	cpu_stq_data_ra(env, addr + `8`, env->bnd_regs[i].ub, ra);
1178	}
1179	}
1180
1181	static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1182	{
1183	cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1184	env->bndcs_regs.cfgu, ra);
1185	cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1186	env->bndcs_regs.sts, ra);
1187	}
1188
1189	static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1190	{
1191	cpu_stq_data_ra(env, ptr, env->pkru, ra);
1192	}
1193
1194	void helper_fxsave(CPUX86State *env, target_ulong ptr)
1195	{
1196	uintptr_t ra = GETPC();
1197
1198	/ The operand must be 16 byte aligned /
1199	if (ptr & `0xf`) {
1200	raise_exception_ra(env, EXCP0D_GPF, ra);
1201	}
1202
1203	do_xsave_fpu(env, ptr, ra);
1204
1205	if (env->cr[`4`] & CR4_OSFXSR_MASK) {
1206	do_xsave_mxcsr(env, ptr, ra);
1207	/ Fast FXSAVE leaves out the XMM registers /
1208	if (!(env->efer & MSR_EFER_FFXSR)
1209	\|\| (env->hflags & HF_CPL_MASK)
1210	\|\| !(env->hflags & HF_LMA_MASK)) {
1211	do_xsave_sse(env, ptr, ra);
1212	}
1213	}
1214	}
1215
1216	static uint64_t get_xinuse(CPUX86State *env)
1217	{
1218	uint64_t inuse = -`1`;
1219
1220	/ For the most part, we don't track XINUSE. We could calculate it*
1221	here for all components, but it's probably less work to simply
1222	indicate in use. That said, the state of BNDREGS is important
1223	enough to track in HFLAGS, so we might as well use that here. /*
1224	if ((env->hflags & HF_MPX_IU_MASK) == `0`) {
1225	inuse &= ~XSTATE_BNDREGS_MASK;
1226	}
1227	return inuse;
1228	}
1229
1230	static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1231	uint64_t inuse, uint64_t opt, uintptr_t ra)
1232	{
1233	uint64_t old_bv, new_bv;
1234
1235	/ The OS must have enabled XSAVE. /
1236	if (!(env->cr[`4`] & CR4_OSXSAVE_MASK)) {
1237	raise_exception_ra(env, EXCP06_ILLOP, ra);
1238	}
1239
1240	/ The operand must be 64 byte aligned. /
1241	if (ptr & `63`) {
1242	raise_exception_ra(env, EXCP0D_GPF, ra);
1243	}
1244
1245	/ Never save anything not enabled by XCR0. /
1246	rfbm &= env->xcr0;
1247	opt &= rfbm;
1248
1249	if (opt & XSTATE_FP_MASK) {
1250	do_xsave_fpu(env, ptr, ra);
1251	}
1252	if (rfbm & XSTATE_SSE_MASK) {
1253	/ Note that saving MXCSR is not suppressed by XSAVEOPT. /
1254	do_xsave_mxcsr(env, ptr, ra);
1255	}
1256	if (opt & XSTATE_SSE_MASK) {
1257	do_xsave_sse(env, ptr, ra);
1258	}
1259	if (opt & XSTATE_BNDREGS_MASK) {
1260	do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1261	}
1262	if (opt & XSTATE_BNDCSR_MASK) {
1263	do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1264	}
1265	if (opt & XSTATE_PKRU_MASK) {
1266	do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1267	}
1268
1269	/ Update the XSTATE_BV field. /
1270	old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1271	new_bv = (old_bv & ~rfbm) \| (inuse & rfbm);
1272	cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1273	}
1274
1275	void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1276	{
1277	do_xsave(env, ptr, rfbm, get_xinuse(env), -`1`, GETPC());
1278	}
1279
1280	void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1281	{
1282	uint64_t inuse = get_xinuse(env);
1283	do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1284	}
1285
1286	static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1287	{
1288	int i, fpuc, fpus, fptag;
1289	target_ulong addr;
1290
1291	fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1292	fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1293	fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1294	cpu_set_fpuc(env, fpuc);
1295	env->fpstt = (fpus >> `11`) & `7`;
1296	env->fpus = fpus & ~`0x3800`;
1297	fptag ^= `0xff`;
1298	for (i = `0`; i < `8`; i++) {
1299	env->fptags[i] = ((fptag >> i) & `1`);
1300	}
1301
1302	addr = ptr + XO(legacy.fpregs);
1303	for (i = `0`; i < `8`; i++) {
1304	floatx80 tmp = helper_fldt(env, addr, ra);
1305	ST(i) = tmp;
1306	addr += `16`;
1307	}
1308	}
1309
1310	static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1311	{
1312	cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1313	}
1314
1315	static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1316	{
1317	int i, nb_xmm_regs;
1318	target_ulong addr;
1319
1320	if (env->hflags & HF_CS64_MASK) {
1321	nb_xmm_regs = `16`;
1322	} else {
1323	nb_xmm_regs = `8`;
1324	}
1325
1326	addr = ptr + XO(legacy.xmm_regs);
1327	for (i = `0`; i < nb_xmm_regs; i++) {
1328	env->xmm_regs[i].ZMM_Q(`0`) = cpu_ldq_data_ra(env, addr, ra);
1329	env->xmm_regs[i].ZMM_Q(`1`) = cpu_ldq_data_ra(env, addr + `8`, ra);
1330	addr += `16`;
1331	}
1332	}
1333
1334	static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1335	{
1336	target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1337	int i;
1338
1339	for (i = `0`; i < `4`; i++, addr += `16`) {
1340	env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1341	env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + `8`, ra);
1342	}
1343	}
1344
1345	static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1346	{
1347	/ FIXME: Extend highest implemented bit of linear address. /
1348	env->bndcs_regs.cfgu
1349	= cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1350	env->bndcs_regs.sts
1351	= cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1352	}
1353
1354	static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1355	{
1356	env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1357	}
1358
1359	void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1360	{
1361	uintptr_t ra = GETPC();
1362
1363	/ The operand must be 16 byte aligned /
1364	if (ptr & `0xf`) {
1365	raise_exception_ra(env, EXCP0D_GPF, ra);
1366	}
1367
1368	do_xrstor_fpu(env, ptr, ra);
1369
1370	if (env->cr[`4`] & CR4_OSFXSR_MASK) {
1371	do_xrstor_mxcsr(env, ptr, ra);
1372	/ Fast FXRSTOR leaves out the XMM registers /
1373	if (!(env->efer & MSR_EFER_FFXSR)
1374	\|\| (env->hflags & HF_CPL_MASK)
1375	\|\| !(env->hflags & HF_LMA_MASK)) {
1376	do_xrstor_sse(env, ptr, ra);
1377	}
1378	}
1379	}
1380
1381	#if defined(CONFIG_USER_ONLY)
1382	void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1383	{
1384	helper_fxsave(env, ptr);
1385	}
1386
1387	void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1388	{
1389	helper_fxrstor(env, ptr);
1390	}
1391	#endif
1392
1393	void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1394	{
1395	uintptr_t ra = GETPC();
1396	uint64_t xstate_bv, xcomp_bv, reserve0;
1397
1398	rfbm &= env->xcr0;
1399
1400	/ The OS must have enabled XSAVE. /
1401	if (!(env->cr[`4`] & CR4_OSXSAVE_MASK)) {
1402	raise_exception_ra(env, EXCP06_ILLOP, ra);
1403	}
1404
1405	/ The operand must be 64 byte aligned. /
1406	if (ptr & `63`) {
1407	raise_exception_ra(env, EXCP0D_GPF, ra);
1408	}
1409
1410	xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1411
1412	if ((int64_t)xstate_bv < `0`) {
1413	/ FIXME: Compact form. /
1414	raise_exception_ra(env, EXCP0D_GPF, ra);
1415	}
1416
1417	/ Standard form. /
1418
1419	/ The XSTATE_BV field must not set bits not present in XCR0. /
1420	if (xstate_bv & ~env->xcr0) {
1421	raise_exception_ra(env, EXCP0D_GPF, ra);
1422	}
1423
1424	/ The XCOMP_BV field must be zero. Note that, as of the April 2016*
1425	revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1426	describes only XCOMP_BV, but the description of the standard form
1427	of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1428	includes the next 64-bit field. /*
1429	xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1430	reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1431	if (xcomp_bv \|\| reserve0) {
1432	raise_exception_ra(env, EXCP0D_GPF, ra);
1433	}
1434
1435	if (rfbm & XSTATE_FP_MASK) {
1436	if (xstate_bv & XSTATE_FP_MASK) {
1437	do_xrstor_fpu(env, ptr, ra);
1438	} else {
1439	helper_fninit(env);
1440	memset(env->fpregs, `0`, sizeof(env->fpregs));
1441	}
1442	}
1443	if (rfbm & XSTATE_SSE_MASK) {
1444	/ Note that the standard form of XRSTOR loads MXCSR from memory*
1445	whether or not the XSTATE_BV bit is set. /*
1446	do_xrstor_mxcsr(env, ptr, ra);
1447	if (xstate_bv & XSTATE_SSE_MASK) {
1448	do_xrstor_sse(env, ptr, ra);
1449	} else {
1450	/ ??? When AVX is implemented, we may have to be more*
1451	selective in the clearing. /*
1452	memset(env->xmm_regs, `0`, sizeof(env->xmm_regs));
1453	}
1454	}
1455	if (rfbm & XSTATE_BNDREGS_MASK) {
1456	if (xstate_bv & XSTATE_BNDREGS_MASK) {
1457	do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1458	env->hflags \|= HF_MPX_IU_MASK;
1459	} else {
1460	memset(env->bnd_regs, `0`, sizeof(env->bnd_regs));
1461	env->hflags &= ~HF_MPX_IU_MASK;
1462	}
1463	}
1464	if (rfbm & XSTATE_BNDCSR_MASK) {
1465	if (xstate_bv & XSTATE_BNDCSR_MASK) {
1466	do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1467	} else {
1468	memset(&env->bndcs_regs, `0`, sizeof(env->bndcs_regs));
1469	}
1470	cpu_sync_bndcs_hflags(env);
1471	}
1472	if (rfbm & XSTATE_PKRU_MASK) {
1473	uint64_t old_pkru = env->pkru;
1474	if (xstate_bv & XSTATE_PKRU_MASK) {
1475	do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1476	} else {
1477	env->pkru = `0`;
1478	}
1479	if (env->pkru != old_pkru) {
1480	CPUState *cs = env_cpu(env);
1481	tlb_flush(cs);
1482	}
1483	}
1484	}
1485
1486	#undef XO
1487
1488	uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1489	{
1490	/ The OS must have enabled XSAVE. /
1491	if (!(env->cr[`4`] & CR4_OSXSAVE_MASK)) {
1492	raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1493	}
1494
1495	switch (ecx) {
1496	case `0`:
1497	return env->xcr0;
1498	case `1`:
1499	if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1500	return env->xcr0 & get_xinuse(env);
1501	}
1502	break;
1503	}
1504	raise_exception_ra(env, EXCP0D_GPF, GETPC());
1505	}
1506
1507	void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1508	{
1509	uint32_t dummy, ena_lo, ena_hi;
1510	uint64_t ena;
1511
1512	/ The OS must have enabled XSAVE. /
1513	if (!(env->cr[`4`] & CR4_OSXSAVE_MASK)) {
1514	raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1515	}
1516
1517	/ Only XCR0 is defined at present; the FPU may not be disabled. /
1518	if (ecx != `0` \|\| (mask & XSTATE_FP_MASK) == `0`) {
1519	goto do_gpf;
1520	}
1521
1522	/ Disallow enabling unimplemented features. /
1523	cpu_x86_cpuid(env, `0x0d`, `0`, &ena_lo, &dummy, &dummy, &ena_hi);
1524	ena = ((uint64_t)ena_hi << `32`) \| ena_lo;
1525	if (mask & ~ena) {
1526	goto do_gpf;
1527	}
1528
1529	/ Disallow enabling only half of MPX. /
1530	if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1531	& XSTATE_BNDCSR_MASK) {
1532	goto do_gpf;
1533	}
1534
1535	env->xcr0 = mask;
1536	cpu_sync_bndcs_hflags(env);
1537	return;
1538
1539	do_gpf:
1540	raise_exception_ra(env, EXCP0D_GPF, GETPC());
1541	}
1542
1543	/ MMX/SSE /
1544	/ XXX: optimize by storing fptt and fptags in the static cpu state /
1545
1546	#define SSE_DAZ 0x0040
1547	#define SSE_RC_MASK 0x6000
1548	#define SSE_RC_NEAR 0x0000
1549	#define SSE_RC_DOWN 0x2000
1550	#define SSE_RC_UP 0x4000
1551	#define SSE_RC_CHOP 0x6000
1552	#define SSE_FZ 0x8000
1553
1554	void update_mxcsr_status(CPUX86State *env)
1555	{
1556	uint32_t mxcsr = env->mxcsr;
1557	int rnd_type;
1558
1559	/ set rounding mode /
1560	switch (mxcsr & SSE_RC_MASK) {
1561	default:
1562	case SSE_RC_NEAR:
1563	rnd_type = float_round_nearest_even;
1564	break;
1565	case SSE_RC_DOWN:
1566	rnd_type = float_round_down;
1567	break;
1568	case SSE_RC_UP:
1569	rnd_type = float_round_up;
1570	break;
1571	case SSE_RC_CHOP:
1572	rnd_type = float_round_to_zero;
1573	break;
1574	}
1575	set_float_rounding_mode(rnd_type, &env->sse_status);
1576
1577	/ set denormals are zero /
1578	set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? `1` : `0`, &env->sse_status);
1579
1580	/ set flush to zero /
1581	set_flush_to_zero((mxcsr & SSE_FZ) ? `1` : `0`, &env->fp_status);
1582	}
1583
1584	void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1585	{
1586	cpu_set_mxcsr(env, val);
1587	}
1588
1589	void helper_enter_mmx(CPUX86State *env)
1590	{
1591	env->fpstt = `0`;
1592	(uint32_t )(env->fptags) = `0`;
1593	(uint32_t )(env->fptags + `4`) = `0`;
1594	}
1595
1596	void helper_emms(CPUX86State *env)
1597	{
1598	/ set to empty state /
1599	(uint32_t )(env->fptags) = `0x01010101`;
1600	(uint32_t )(env->fptags + `4`) = `0x01010101`;
1601	}
1602
1603	/ XXX: suppress /
1604	void helper_movq(CPUX86State env, void* d, void* *s)
1605	{
1606	(uint64_t )d = (uint64_t )s;
1607	}
1608
1609	#define SHIFT 0
1610	#include "ops_sse.h"
1611
1612	#define SHIFT 1
1613	#include "ops_sse.h"
1614

Browse the source code of qemu/target/i386/fpu_helper.c