1/*
2 * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers
3 *
4 * Copyright (c) 2003 Fabrice Bellard
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18 */
19
20#include "qemu/osdep.h"
21#include <math.h>
22#include "cpu.h"
23#include "exec/helper-proto.h"
24#include "qemu/host-utils.h"
25#include "exec/exec-all.h"
26#include "exec/cpu_ldst.h"
27#include "fpu/softfloat.h"
28
29#define FPU_RC_MASK 0xc00
30#define FPU_RC_NEAR 0x000
31#define FPU_RC_DOWN 0x400
32#define FPU_RC_UP 0x800
33#define FPU_RC_CHOP 0xc00
34
35#define MAXTAN 9223372036854775808.0
36
37/* the following deal with x86 long double-precision numbers */
38#define MAXEXPD 0x7fff
39#define EXPBIAS 16383
40#define EXPD(fp) (fp.l.upper & 0x7fff)
41#define SIGND(fp) ((fp.l.upper) & 0x8000)
42#define MANTD(fp) (fp.l.lower)
43#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS
44
45#define FPUS_IE (1 << 0)
46#define FPUS_DE (1 << 1)
47#define FPUS_ZE (1 << 2)
48#define FPUS_OE (1 << 3)
49#define FPUS_UE (1 << 4)
50#define FPUS_PE (1 << 5)
51#define FPUS_SF (1 << 6)
52#define FPUS_SE (1 << 7)
53#define FPUS_B (1 << 15)
54
55#define FPUC_EM 0x3f
56
57#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL)
58#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL)
59#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL)
60
61static inline void fpush(CPUX86State *env)
62{
63 env->fpstt = (env->fpstt - 1) & 7;
64 env->fptags[env->fpstt] = 0; /* validate stack entry */
65}
66
67static inline void fpop(CPUX86State *env)
68{
69 env->fptags[env->fpstt] = 1; /* invalidate stack entry */
70 env->fpstt = (env->fpstt + 1) & 7;
71}
72
73static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr,
74 uintptr_t retaddr)
75{
76 CPU_LDoubleU temp;
77
78 temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr);
79 temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr);
80 return temp.d;
81}
82
83static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr,
84 uintptr_t retaddr)
85{
86 CPU_LDoubleU temp;
87
88 temp.d = f;
89 cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr);
90 cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr);
91}
92
93/* x87 FPU helpers */
94
95static inline double floatx80_to_double(CPUX86State *env, floatx80 a)
96{
97 union {
98 float64 f64;
99 double d;
100 } u;
101
102 u.f64 = floatx80_to_float64(a, &env->fp_status);
103 return u.d;
104}
105
106static inline floatx80 double_to_floatx80(CPUX86State *env, double a)
107{
108 union {
109 float64 f64;
110 double d;
111 } u;
112
113 u.d = a;
114 return float64_to_floatx80(u.f64, &env->fp_status);
115}
116
117static void fpu_set_exception(CPUX86State *env, int mask)
118{
119 env->fpus |= mask;
120 if (env->fpus & (~env->fpuc & FPUC_EM)) {
121 env->fpus |= FPUS_SE | FPUS_B;
122 }
123}
124
125static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b)
126{
127 if (floatx80_is_zero(b)) {
128 fpu_set_exception(env, FPUS_ZE);
129 }
130 return floatx80_div(a, b, &env->fp_status);
131}
132
133static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr)
134{
135 if (env->cr[0] & CR0_NE_MASK) {
136 raise_exception_ra(env, EXCP10_COPR, retaddr);
137 }
138#if !defined(CONFIG_USER_ONLY)
139 else {
140 cpu_set_ferr(env);
141 }
142#endif
143}
144
145void helper_flds_FT0(CPUX86State *env, uint32_t val)
146{
147 union {
148 float32 f;
149 uint32_t i;
150 } u;
151
152 u.i = val;
153 FT0 = float32_to_floatx80(u.f, &env->fp_status);
154}
155
156void helper_fldl_FT0(CPUX86State *env, uint64_t val)
157{
158 union {
159 float64 f;
160 uint64_t i;
161 } u;
162
163 u.i = val;
164 FT0 = float64_to_floatx80(u.f, &env->fp_status);
165}
166
167void helper_fildl_FT0(CPUX86State *env, int32_t val)
168{
169 FT0 = int32_to_floatx80(val, &env->fp_status);
170}
171
172void helper_flds_ST0(CPUX86State *env, uint32_t val)
173{
174 int new_fpstt;
175 union {
176 float32 f;
177 uint32_t i;
178 } u;
179
180 new_fpstt = (env->fpstt - 1) & 7;
181 u.i = val;
182 env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status);
183 env->fpstt = new_fpstt;
184 env->fptags[new_fpstt] = 0; /* validate stack entry */
185}
186
187void helper_fldl_ST0(CPUX86State *env, uint64_t val)
188{
189 int new_fpstt;
190 union {
191 float64 f;
192 uint64_t i;
193 } u;
194
195 new_fpstt = (env->fpstt - 1) & 7;
196 u.i = val;
197 env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status);
198 env->fpstt = new_fpstt;
199 env->fptags[new_fpstt] = 0; /* validate stack entry */
200}
201
202void helper_fildl_ST0(CPUX86State *env, int32_t val)
203{
204 int new_fpstt;
205
206 new_fpstt = (env->fpstt - 1) & 7;
207 env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status);
208 env->fpstt = new_fpstt;
209 env->fptags[new_fpstt] = 0; /* validate stack entry */
210}
211
212void helper_fildll_ST0(CPUX86State *env, int64_t val)
213{
214 int new_fpstt;
215
216 new_fpstt = (env->fpstt - 1) & 7;
217 env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status);
218 env->fpstt = new_fpstt;
219 env->fptags[new_fpstt] = 0; /* validate stack entry */
220}
221
222uint32_t helper_fsts_ST0(CPUX86State *env)
223{
224 union {
225 float32 f;
226 uint32_t i;
227 } u;
228
229 u.f = floatx80_to_float32(ST0, &env->fp_status);
230 return u.i;
231}
232
233uint64_t helper_fstl_ST0(CPUX86State *env)
234{
235 union {
236 float64 f;
237 uint64_t i;
238 } u;
239
240 u.f = floatx80_to_float64(ST0, &env->fp_status);
241 return u.i;
242}
243
244int32_t helper_fist_ST0(CPUX86State *env)
245{
246 int32_t val;
247
248 val = floatx80_to_int32(ST0, &env->fp_status);
249 if (val != (int16_t)val) {
250 val = -32768;
251 }
252 return val;
253}
254
255int32_t helper_fistl_ST0(CPUX86State *env)
256{
257 int32_t val;
258 signed char old_exp_flags;
259
260 old_exp_flags = get_float_exception_flags(&env->fp_status);
261 set_float_exception_flags(0, &env->fp_status);
262
263 val = floatx80_to_int32(ST0, &env->fp_status);
264 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
265 val = 0x80000000;
266 }
267 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
268 | old_exp_flags, &env->fp_status);
269 return val;
270}
271
272int64_t helper_fistll_ST0(CPUX86State *env)
273{
274 int64_t val;
275 signed char old_exp_flags;
276
277 old_exp_flags = get_float_exception_flags(&env->fp_status);
278 set_float_exception_flags(0, &env->fp_status);
279
280 val = floatx80_to_int64(ST0, &env->fp_status);
281 if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) {
282 val = 0x8000000000000000ULL;
283 }
284 set_float_exception_flags(get_float_exception_flags(&env->fp_status)
285 | old_exp_flags, &env->fp_status);
286 return val;
287}
288
289int32_t helper_fistt_ST0(CPUX86State *env)
290{
291 int32_t val;
292
293 val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
294 if (val != (int16_t)val) {
295 val = -32768;
296 }
297 return val;
298}
299
300int32_t helper_fisttl_ST0(CPUX86State *env)
301{
302 return floatx80_to_int32_round_to_zero(ST0, &env->fp_status);
303}
304
305int64_t helper_fisttll_ST0(CPUX86State *env)
306{
307 return floatx80_to_int64_round_to_zero(ST0, &env->fp_status);
308}
309
310void helper_fldt_ST0(CPUX86State *env, target_ulong ptr)
311{
312 int new_fpstt;
313
314 new_fpstt = (env->fpstt - 1) & 7;
315 env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC());
316 env->fpstt = new_fpstt;
317 env->fptags[new_fpstt] = 0; /* validate stack entry */
318}
319
320void helper_fstt_ST0(CPUX86State *env, target_ulong ptr)
321{
322 helper_fstt(env, ST0, ptr, GETPC());
323}
324
325void helper_fpush(CPUX86State *env)
326{
327 fpush(env);
328}
329
330void helper_fpop(CPUX86State *env)
331{
332 fpop(env);
333}
334
335void helper_fdecstp(CPUX86State *env)
336{
337 env->fpstt = (env->fpstt - 1) & 7;
338 env->fpus &= ~0x4700;
339}
340
341void helper_fincstp(CPUX86State *env)
342{
343 env->fpstt = (env->fpstt + 1) & 7;
344 env->fpus &= ~0x4700;
345}
346
347/* FPU move */
348
349void helper_ffree_STN(CPUX86State *env, int st_index)
350{
351 env->fptags[(env->fpstt + st_index) & 7] = 1;
352}
353
354void helper_fmov_ST0_FT0(CPUX86State *env)
355{
356 ST0 = FT0;
357}
358
359void helper_fmov_FT0_STN(CPUX86State *env, int st_index)
360{
361 FT0 = ST(st_index);
362}
363
364void helper_fmov_ST0_STN(CPUX86State *env, int st_index)
365{
366 ST0 = ST(st_index);
367}
368
369void helper_fmov_STN_ST0(CPUX86State *env, int st_index)
370{
371 ST(st_index) = ST0;
372}
373
374void helper_fxchg_ST0_STN(CPUX86State *env, int st_index)
375{
376 floatx80 tmp;
377
378 tmp = ST(st_index);
379 ST(st_index) = ST0;
380 ST0 = tmp;
381}
382
383/* FPU operations */
384
385static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500};
386
387void helper_fcom_ST0_FT0(CPUX86State *env)
388{
389 int ret;
390
391 ret = floatx80_compare(ST0, FT0, &env->fp_status);
392 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
393}
394
395void helper_fucom_ST0_FT0(CPUX86State *env)
396{
397 int ret;
398
399 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
400 env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1];
401}
402
403static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C};
404
405void helper_fcomi_ST0_FT0(CPUX86State *env)
406{
407 int eflags;
408 int ret;
409
410 ret = floatx80_compare(ST0, FT0, &env->fp_status);
411 eflags = cpu_cc_compute_all(env, CC_OP);
412 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
413 CC_SRC = eflags;
414}
415
416void helper_fucomi_ST0_FT0(CPUX86State *env)
417{
418 int eflags;
419 int ret;
420
421 ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status);
422 eflags = cpu_cc_compute_all(env, CC_OP);
423 eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1];
424 CC_SRC = eflags;
425}
426
427void helper_fadd_ST0_FT0(CPUX86State *env)
428{
429 ST0 = floatx80_add(ST0, FT0, &env->fp_status);
430}
431
432void helper_fmul_ST0_FT0(CPUX86State *env)
433{
434 ST0 = floatx80_mul(ST0, FT0, &env->fp_status);
435}
436
437void helper_fsub_ST0_FT0(CPUX86State *env)
438{
439 ST0 = floatx80_sub(ST0, FT0, &env->fp_status);
440}
441
442void helper_fsubr_ST0_FT0(CPUX86State *env)
443{
444 ST0 = floatx80_sub(FT0, ST0, &env->fp_status);
445}
446
447void helper_fdiv_ST0_FT0(CPUX86State *env)
448{
449 ST0 = helper_fdiv(env, ST0, FT0);
450}
451
452void helper_fdivr_ST0_FT0(CPUX86State *env)
453{
454 ST0 = helper_fdiv(env, FT0, ST0);
455}
456
457/* fp operations between STN and ST0 */
458
459void helper_fadd_STN_ST0(CPUX86State *env, int st_index)
460{
461 ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status);
462}
463
464void helper_fmul_STN_ST0(CPUX86State *env, int st_index)
465{
466 ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status);
467}
468
469void helper_fsub_STN_ST0(CPUX86State *env, int st_index)
470{
471 ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status);
472}
473
474void helper_fsubr_STN_ST0(CPUX86State *env, int st_index)
475{
476 ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status);
477}
478
479void helper_fdiv_STN_ST0(CPUX86State *env, int st_index)
480{
481 floatx80 *p;
482
483 p = &ST(st_index);
484 *p = helper_fdiv(env, *p, ST0);
485}
486
487void helper_fdivr_STN_ST0(CPUX86State *env, int st_index)
488{
489 floatx80 *p;
490
491 p = &ST(st_index);
492 *p = helper_fdiv(env, ST0, *p);
493}
494
495/* misc FPU operations */
496void helper_fchs_ST0(CPUX86State *env)
497{
498 ST0 = floatx80_chs(ST0);
499}
500
501void helper_fabs_ST0(CPUX86State *env)
502{
503 ST0 = floatx80_abs(ST0);
504}
505
506void helper_fld1_ST0(CPUX86State *env)
507{
508 ST0 = floatx80_one;
509}
510
511void helper_fldl2t_ST0(CPUX86State *env)
512{
513 ST0 = floatx80_l2t;
514}
515
516void helper_fldl2e_ST0(CPUX86State *env)
517{
518 ST0 = floatx80_l2e;
519}
520
521void helper_fldpi_ST0(CPUX86State *env)
522{
523 ST0 = floatx80_pi;
524}
525
526void helper_fldlg2_ST0(CPUX86State *env)
527{
528 ST0 = floatx80_lg2;
529}
530
531void helper_fldln2_ST0(CPUX86State *env)
532{
533 ST0 = floatx80_ln2;
534}
535
536void helper_fldz_ST0(CPUX86State *env)
537{
538 ST0 = floatx80_zero;
539}
540
541void helper_fldz_FT0(CPUX86State *env)
542{
543 FT0 = floatx80_zero;
544}
545
546uint32_t helper_fnstsw(CPUX86State *env)
547{
548 return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
549}
550
551uint32_t helper_fnstcw(CPUX86State *env)
552{
553 return env->fpuc;
554}
555
556void update_fp_status(CPUX86State *env)
557{
558 int rnd_type;
559
560 /* set rounding mode */
561 switch (env->fpuc & FPU_RC_MASK) {
562 default:
563 case FPU_RC_NEAR:
564 rnd_type = float_round_nearest_even;
565 break;
566 case FPU_RC_DOWN:
567 rnd_type = float_round_down;
568 break;
569 case FPU_RC_UP:
570 rnd_type = float_round_up;
571 break;
572 case FPU_RC_CHOP:
573 rnd_type = float_round_to_zero;
574 break;
575 }
576 set_float_rounding_mode(rnd_type, &env->fp_status);
577 switch ((env->fpuc >> 8) & 3) {
578 case 0:
579 rnd_type = 32;
580 break;
581 case 2:
582 rnd_type = 64;
583 break;
584 case 3:
585 default:
586 rnd_type = 80;
587 break;
588 }
589 set_floatx80_rounding_precision(rnd_type, &env->fp_status);
590}
591
592void helper_fldcw(CPUX86State *env, uint32_t val)
593{
594 cpu_set_fpuc(env, val);
595}
596
597void helper_fclex(CPUX86State *env)
598{
599 env->fpus &= 0x7f00;
600}
601
602void helper_fwait(CPUX86State *env)
603{
604 if (env->fpus & FPUS_SE) {
605 fpu_raise_exception(env, GETPC());
606 }
607}
608
609void helper_fninit(CPUX86State *env)
610{
611 env->fpus = 0;
612 env->fpstt = 0;
613 cpu_set_fpuc(env, 0x37f);
614 env->fptags[0] = 1;
615 env->fptags[1] = 1;
616 env->fptags[2] = 1;
617 env->fptags[3] = 1;
618 env->fptags[4] = 1;
619 env->fptags[5] = 1;
620 env->fptags[6] = 1;
621 env->fptags[7] = 1;
622}
623
624/* BCD ops */
625
626void helper_fbld_ST0(CPUX86State *env, target_ulong ptr)
627{
628 floatx80 tmp;
629 uint64_t val;
630 unsigned int v;
631 int i;
632
633 val = 0;
634 for (i = 8; i >= 0; i--) {
635 v = cpu_ldub_data_ra(env, ptr + i, GETPC());
636 val = (val * 100) + ((v >> 4) * 10) + (v & 0xf);
637 }
638 tmp = int64_to_floatx80(val, &env->fp_status);
639 if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) {
640 tmp = floatx80_chs(tmp);
641 }
642 fpush(env);
643 ST0 = tmp;
644}
645
646void helper_fbst_ST0(CPUX86State *env, target_ulong ptr)
647{
648 int v;
649 target_ulong mem_ref, mem_end;
650 int64_t val;
651
652 val = floatx80_to_int64(ST0, &env->fp_status);
653 mem_ref = ptr;
654 mem_end = mem_ref + 9;
655 if (val < 0) {
656 cpu_stb_data_ra(env, mem_end, 0x80, GETPC());
657 val = -val;
658 } else {
659 cpu_stb_data_ra(env, mem_end, 0x00, GETPC());
660 }
661 while (mem_ref < mem_end) {
662 if (val == 0) {
663 break;
664 }
665 v = val % 100;
666 val = val / 100;
667 v = ((v / 10) << 4) | (v % 10);
668 cpu_stb_data_ra(env, mem_ref++, v, GETPC());
669 }
670 while (mem_ref < mem_end) {
671 cpu_stb_data_ra(env, mem_ref++, 0, GETPC());
672 }
673}
674
675void helper_f2xm1(CPUX86State *env)
676{
677 double val = floatx80_to_double(env, ST0);
678
679 val = pow(2.0, val) - 1.0;
680 ST0 = double_to_floatx80(env, val);
681}
682
683void helper_fyl2x(CPUX86State *env)
684{
685 double fptemp = floatx80_to_double(env, ST0);
686
687 if (fptemp > 0.0) {
688 fptemp = log(fptemp) / log(2.0); /* log2(ST) */
689 fptemp *= floatx80_to_double(env, ST1);
690 ST1 = double_to_floatx80(env, fptemp);
691 fpop(env);
692 } else {
693 env->fpus &= ~0x4700;
694 env->fpus |= 0x400;
695 }
696}
697
698void helper_fptan(CPUX86State *env)
699{
700 double fptemp = floatx80_to_double(env, ST0);
701
702 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
703 env->fpus |= 0x400;
704 } else {
705 fptemp = tan(fptemp);
706 ST0 = double_to_floatx80(env, fptemp);
707 fpush(env);
708 ST0 = floatx80_one;
709 env->fpus &= ~0x400; /* C2 <-- 0 */
710 /* the above code is for |arg| < 2**52 only */
711 }
712}
713
714void helper_fpatan(CPUX86State *env)
715{
716 double fptemp, fpsrcop;
717
718 fpsrcop = floatx80_to_double(env, ST1);
719 fptemp = floatx80_to_double(env, ST0);
720 ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp));
721 fpop(env);
722}
723
724void helper_fxtract(CPUX86State *env)
725{
726 CPU_LDoubleU temp;
727
728 temp.d = ST0;
729
730 if (floatx80_is_zero(ST0)) {
731 /* Easy way to generate -inf and raising division by 0 exception */
732 ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero,
733 &env->fp_status);
734 fpush(env);
735 ST0 = temp.d;
736 } else {
737 int expdif;
738
739 expdif = EXPD(temp) - EXPBIAS;
740 /* DP exponent bias */
741 ST0 = int32_to_floatx80(expdif, &env->fp_status);
742 fpush(env);
743 BIASEXPONENT(temp);
744 ST0 = temp.d;
745 }
746}
747
748void helper_fprem1(CPUX86State *env)
749{
750 double st0, st1, dblq, fpsrcop, fptemp;
751 CPU_LDoubleU fpsrcop1, fptemp1;
752 int expdif;
753 signed long long int q;
754
755 st0 = floatx80_to_double(env, ST0);
756 st1 = floatx80_to_double(env, ST1);
757
758 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
759 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
760 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
761 return;
762 }
763
764 fpsrcop = st0;
765 fptemp = st1;
766 fpsrcop1.d = ST0;
767 fptemp1.d = ST1;
768 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
769
770 if (expdif < 0) {
771 /* optimisation? taken from the AMD docs */
772 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
773 /* ST0 is unchanged */
774 return;
775 }
776
777 if (expdif < 53) {
778 dblq = fpsrcop / fptemp;
779 /* round dblq towards nearest integer */
780 dblq = rint(dblq);
781 st0 = fpsrcop - fptemp * dblq;
782
783 /* convert dblq to q by truncating towards zero */
784 if (dblq < 0.0) {
785 q = (signed long long int)(-dblq);
786 } else {
787 q = (signed long long int)dblq;
788 }
789
790 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
791 /* (C0,C3,C1) <-- (q2,q1,q0) */
792 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
793 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
794 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
795 } else {
796 env->fpus |= 0x400; /* C2 <-- 1 */
797 fptemp = pow(2.0, expdif - 50);
798 fpsrcop = (st0 / st1) / fptemp;
799 /* fpsrcop = integer obtained by chopping */
800 fpsrcop = (fpsrcop < 0.0) ?
801 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
802 st0 -= (st1 * fpsrcop * fptemp);
803 }
804 ST0 = double_to_floatx80(env, st0);
805}
806
807void helper_fprem(CPUX86State *env)
808{
809 double st0, st1, dblq, fpsrcop, fptemp;
810 CPU_LDoubleU fpsrcop1, fptemp1;
811 int expdif;
812 signed long long int q;
813
814 st0 = floatx80_to_double(env, ST0);
815 st1 = floatx80_to_double(env, ST1);
816
817 if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) {
818 ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */
819 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
820 return;
821 }
822
823 fpsrcop = st0;
824 fptemp = st1;
825 fpsrcop1.d = ST0;
826 fptemp1.d = ST1;
827 expdif = EXPD(fpsrcop1) - EXPD(fptemp1);
828
829 if (expdif < 0) {
830 /* optimisation? taken from the AMD docs */
831 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
832 /* ST0 is unchanged */
833 return;
834 }
835
836 if (expdif < 53) {
837 dblq = fpsrcop / fptemp; /* ST0 / ST1 */
838 /* round dblq towards zero */
839 dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq);
840 st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */
841
842 /* convert dblq to q by truncating towards zero */
843 if (dblq < 0.0) {
844 q = (signed long long int)(-dblq);
845 } else {
846 q = (signed long long int)dblq;
847 }
848
849 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
850 /* (C0,C3,C1) <-- (q2,q1,q0) */
851 env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */
852 env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */
853 env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */
854 } else {
855 int N = 32 + (expdif % 32); /* as per AMD docs */
856
857 env->fpus |= 0x400; /* C2 <-- 1 */
858 fptemp = pow(2.0, (double)(expdif - N));
859 fpsrcop = (st0 / st1) / fptemp;
860 /* fpsrcop = integer obtained by chopping */
861 fpsrcop = (fpsrcop < 0.0) ?
862 -(floor(fabs(fpsrcop))) : floor(fpsrcop);
863 st0 -= (st1 * fpsrcop * fptemp);
864 }
865 ST0 = double_to_floatx80(env, st0);
866}
867
868void helper_fyl2xp1(CPUX86State *env)
869{
870 double fptemp = floatx80_to_double(env, ST0);
871
872 if ((fptemp + 1.0) > 0.0) {
873 fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */
874 fptemp *= floatx80_to_double(env, ST1);
875 ST1 = double_to_floatx80(env, fptemp);
876 fpop(env);
877 } else {
878 env->fpus &= ~0x4700;
879 env->fpus |= 0x400;
880 }
881}
882
883void helper_fsqrt(CPUX86State *env)
884{
885 if (floatx80_is_neg(ST0)) {
886 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
887 env->fpus |= 0x400;
888 }
889 ST0 = floatx80_sqrt(ST0, &env->fp_status);
890}
891
892void helper_fsincos(CPUX86State *env)
893{
894 double fptemp = floatx80_to_double(env, ST0);
895
896 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
897 env->fpus |= 0x400;
898 } else {
899 ST0 = double_to_floatx80(env, sin(fptemp));
900 fpush(env);
901 ST0 = double_to_floatx80(env, cos(fptemp));
902 env->fpus &= ~0x400; /* C2 <-- 0 */
903 /* the above code is for |arg| < 2**63 only */
904 }
905}
906
907void helper_frndint(CPUX86State *env)
908{
909 ST0 = floatx80_round_to_int(ST0, &env->fp_status);
910}
911
912void helper_fscale(CPUX86State *env)
913{
914 if (floatx80_is_any_nan(ST1)) {
915 ST0 = ST1;
916 } else {
917 int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status);
918 ST0 = floatx80_scalbn(ST0, n, &env->fp_status);
919 }
920}
921
922void helper_fsin(CPUX86State *env)
923{
924 double fptemp = floatx80_to_double(env, ST0);
925
926 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
927 env->fpus |= 0x400;
928 } else {
929 ST0 = double_to_floatx80(env, sin(fptemp));
930 env->fpus &= ~0x400; /* C2 <-- 0 */
931 /* the above code is for |arg| < 2**53 only */
932 }
933}
934
935void helper_fcos(CPUX86State *env)
936{
937 double fptemp = floatx80_to_double(env, ST0);
938
939 if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) {
940 env->fpus |= 0x400;
941 } else {
942 ST0 = double_to_floatx80(env, cos(fptemp));
943 env->fpus &= ~0x400; /* C2 <-- 0 */
944 /* the above code is for |arg| < 2**63 only */
945 }
946}
947
948void helper_fxam_ST0(CPUX86State *env)
949{
950 CPU_LDoubleU temp;
951 int expdif;
952
953 temp.d = ST0;
954
955 env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */
956 if (SIGND(temp)) {
957 env->fpus |= 0x200; /* C1 <-- 1 */
958 }
959
960 /* XXX: test fptags too */
961 expdif = EXPD(temp);
962 if (expdif == MAXEXPD) {
963 if (MANTD(temp) == 0x8000000000000000ULL) {
964 env->fpus |= 0x500; /* Infinity */
965 } else {
966 env->fpus |= 0x100; /* NaN */
967 }
968 } else if (expdif == 0) {
969 if (MANTD(temp) == 0) {
970 env->fpus |= 0x4000; /* Zero */
971 } else {
972 env->fpus |= 0x4400; /* Denormal */
973 }
974 } else {
975 env->fpus |= 0x400;
976 }
977}
978
979static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32,
980 uintptr_t retaddr)
981{
982 int fpus, fptag, exp, i;
983 uint64_t mant;
984 CPU_LDoubleU tmp;
985
986 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
987 fptag = 0;
988 for (i = 7; i >= 0; i--) {
989 fptag <<= 2;
990 if (env->fptags[i]) {
991 fptag |= 3;
992 } else {
993 tmp.d = env->fpregs[i].d;
994 exp = EXPD(tmp);
995 mant = MANTD(tmp);
996 if (exp == 0 && mant == 0) {
997 /* zero */
998 fptag |= 1;
999 } else if (exp == 0 || exp == MAXEXPD
1000 || (mant & (1LL << 63)) == 0) {
1001 /* NaNs, infinity, denormal */
1002 fptag |= 2;
1003 }
1004 }
1005 }
1006 if (data32) {
1007 /* 32 bit */
1008 cpu_stl_data_ra(env, ptr, env->fpuc, retaddr);
1009 cpu_stl_data_ra(env, ptr + 4, fpus, retaddr);
1010 cpu_stl_data_ra(env, ptr + 8, fptag, retaddr);
1011 cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */
1012 cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */
1013 cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */
1014 cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */
1015 } else {
1016 /* 16 bit */
1017 cpu_stw_data_ra(env, ptr, env->fpuc, retaddr);
1018 cpu_stw_data_ra(env, ptr + 2, fpus, retaddr);
1019 cpu_stw_data_ra(env, ptr + 4, fptag, retaddr);
1020 cpu_stw_data_ra(env, ptr + 6, 0, retaddr);
1021 cpu_stw_data_ra(env, ptr + 8, 0, retaddr);
1022 cpu_stw_data_ra(env, ptr + 10, 0, retaddr);
1023 cpu_stw_data_ra(env, ptr + 12, 0, retaddr);
1024 }
1025}
1026
1027void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32)
1028{
1029 do_fstenv(env, ptr, data32, GETPC());
1030}
1031
1032static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32,
1033 uintptr_t retaddr)
1034{
1035 int i, fpus, fptag;
1036
1037 if (data32) {
1038 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1039 fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1040 fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr);
1041 } else {
1042 cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr));
1043 fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr);
1044 fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr);
1045 }
1046 env->fpstt = (fpus >> 11) & 7;
1047 env->fpus = fpus & ~0x3800;
1048 for (i = 0; i < 8; i++) {
1049 env->fptags[i] = ((fptag & 3) == 3);
1050 fptag >>= 2;
1051 }
1052}
1053
1054void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32)
1055{
1056 do_fldenv(env, ptr, data32, GETPC());
1057}
1058
1059void helper_fsave(CPUX86State *env, target_ulong ptr, int data32)
1060{
1061 floatx80 tmp;
1062 int i;
1063
1064 do_fstenv(env, ptr, data32, GETPC());
1065
1066 ptr += (14 << data32);
1067 for (i = 0; i < 8; i++) {
1068 tmp = ST(i);
1069 helper_fstt(env, tmp, ptr, GETPC());
1070 ptr += 10;
1071 }
1072
1073 /* fninit */
1074 env->fpus = 0;
1075 env->fpstt = 0;
1076 cpu_set_fpuc(env, 0x37f);
1077 env->fptags[0] = 1;
1078 env->fptags[1] = 1;
1079 env->fptags[2] = 1;
1080 env->fptags[3] = 1;
1081 env->fptags[4] = 1;
1082 env->fptags[5] = 1;
1083 env->fptags[6] = 1;
1084 env->fptags[7] = 1;
1085}
1086
1087void helper_frstor(CPUX86State *env, target_ulong ptr, int data32)
1088{
1089 floatx80 tmp;
1090 int i;
1091
1092 do_fldenv(env, ptr, data32, GETPC());
1093 ptr += (14 << data32);
1094
1095 for (i = 0; i < 8; i++) {
1096 tmp = helper_fldt(env, ptr, GETPC());
1097 ST(i) = tmp;
1098 ptr += 10;
1099 }
1100}
1101
1102#if defined(CONFIG_USER_ONLY)
1103void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32)
1104{
1105 helper_fsave(env, ptr, data32);
1106}
1107
1108void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32)
1109{
1110 helper_frstor(env, ptr, data32);
1111}
1112#endif
1113
1114#define XO(X) offsetof(X86XSaveArea, X)
1115
1116static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1117{
1118 int fpus, fptag, i;
1119 target_ulong addr;
1120
1121 fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11;
1122 fptag = 0;
1123 for (i = 0; i < 8; i++) {
1124 fptag |= (env->fptags[i] << i);
1125 }
1126
1127 cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra);
1128 cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra);
1129 cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra);
1130
1131 /* In 32-bit mode this is eip, sel, dp, sel.
1132 In 64-bit mode this is rip, rdp.
1133 But in either case we don't write actual data, just zeros. */
1134 cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */
1135 cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */
1136
1137 addr = ptr + XO(legacy.fpregs);
1138 for (i = 0; i < 8; i++) {
1139 floatx80 tmp = ST(i);
1140 helper_fstt(env, tmp, addr, ra);
1141 addr += 16;
1142 }
1143}
1144
1145static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1146{
1147 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra);
1148 cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra);
1149}
1150
1151static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1152{
1153 int i, nb_xmm_regs;
1154 target_ulong addr;
1155
1156 if (env->hflags & HF_CS64_MASK) {
1157 nb_xmm_regs = 16;
1158 } else {
1159 nb_xmm_regs = 8;
1160 }
1161
1162 addr = ptr + XO(legacy.xmm_regs);
1163 for (i = 0; i < nb_xmm_regs; i++) {
1164 cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra);
1165 cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra);
1166 addr += 16;
1167 }
1168}
1169
1170static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1171{
1172 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1173 int i;
1174
1175 for (i = 0; i < 4; i++, addr += 16) {
1176 cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra);
1177 cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra);
1178 }
1179}
1180
1181static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1182{
1183 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu),
1184 env->bndcs_regs.cfgu, ra);
1185 cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts),
1186 env->bndcs_regs.sts, ra);
1187}
1188
1189static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1190{
1191 cpu_stq_data_ra(env, ptr, env->pkru, ra);
1192}
1193
1194void helper_fxsave(CPUX86State *env, target_ulong ptr)
1195{
1196 uintptr_t ra = GETPC();
1197
1198 /* The operand must be 16 byte aligned */
1199 if (ptr & 0xf) {
1200 raise_exception_ra(env, EXCP0D_GPF, ra);
1201 }
1202
1203 do_xsave_fpu(env, ptr, ra);
1204
1205 if (env->cr[4] & CR4_OSFXSR_MASK) {
1206 do_xsave_mxcsr(env, ptr, ra);
1207 /* Fast FXSAVE leaves out the XMM registers */
1208 if (!(env->efer & MSR_EFER_FFXSR)
1209 || (env->hflags & HF_CPL_MASK)
1210 || !(env->hflags & HF_LMA_MASK)) {
1211 do_xsave_sse(env, ptr, ra);
1212 }
1213 }
1214}
1215
1216static uint64_t get_xinuse(CPUX86State *env)
1217{
1218 uint64_t inuse = -1;
1219
1220 /* For the most part, we don't track XINUSE. We could calculate it
1221 here for all components, but it's probably less work to simply
1222 indicate in use. That said, the state of BNDREGS is important
1223 enough to track in HFLAGS, so we might as well use that here. */
1224 if ((env->hflags & HF_MPX_IU_MASK) == 0) {
1225 inuse &= ~XSTATE_BNDREGS_MASK;
1226 }
1227 return inuse;
1228}
1229
1230static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm,
1231 uint64_t inuse, uint64_t opt, uintptr_t ra)
1232{
1233 uint64_t old_bv, new_bv;
1234
1235 /* The OS must have enabled XSAVE. */
1236 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1237 raise_exception_ra(env, EXCP06_ILLOP, ra);
1238 }
1239
1240 /* The operand must be 64 byte aligned. */
1241 if (ptr & 63) {
1242 raise_exception_ra(env, EXCP0D_GPF, ra);
1243 }
1244
1245 /* Never save anything not enabled by XCR0. */
1246 rfbm &= env->xcr0;
1247 opt &= rfbm;
1248
1249 if (opt & XSTATE_FP_MASK) {
1250 do_xsave_fpu(env, ptr, ra);
1251 }
1252 if (rfbm & XSTATE_SSE_MASK) {
1253 /* Note that saving MXCSR is not suppressed by XSAVEOPT. */
1254 do_xsave_mxcsr(env, ptr, ra);
1255 }
1256 if (opt & XSTATE_SSE_MASK) {
1257 do_xsave_sse(env, ptr, ra);
1258 }
1259 if (opt & XSTATE_BNDREGS_MASK) {
1260 do_xsave_bndregs(env, ptr + XO(bndreg_state), ra);
1261 }
1262 if (opt & XSTATE_BNDCSR_MASK) {
1263 do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra);
1264 }
1265 if (opt & XSTATE_PKRU_MASK) {
1266 do_xsave_pkru(env, ptr + XO(pkru_state), ra);
1267 }
1268
1269 /* Update the XSTATE_BV field. */
1270 old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1271 new_bv = (old_bv & ~rfbm) | (inuse & rfbm);
1272 cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra);
1273}
1274
1275void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1276{
1277 do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC());
1278}
1279
1280void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1281{
1282 uint64_t inuse = get_xinuse(env);
1283 do_xsave(env, ptr, rfbm, inuse, inuse, GETPC());
1284}
1285
1286static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1287{
1288 int i, fpuc, fpus, fptag;
1289 target_ulong addr;
1290
1291 fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra);
1292 fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra);
1293 fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra);
1294 cpu_set_fpuc(env, fpuc);
1295 env->fpstt = (fpus >> 11) & 7;
1296 env->fpus = fpus & ~0x3800;
1297 fptag ^= 0xff;
1298 for (i = 0; i < 8; i++) {
1299 env->fptags[i] = ((fptag >> i) & 1);
1300 }
1301
1302 addr = ptr + XO(legacy.fpregs);
1303 for (i = 0; i < 8; i++) {
1304 floatx80 tmp = helper_fldt(env, addr, ra);
1305 ST(i) = tmp;
1306 addr += 16;
1307 }
1308}
1309
1310static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1311{
1312 cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra));
1313}
1314
1315static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1316{
1317 int i, nb_xmm_regs;
1318 target_ulong addr;
1319
1320 if (env->hflags & HF_CS64_MASK) {
1321 nb_xmm_regs = 16;
1322 } else {
1323 nb_xmm_regs = 8;
1324 }
1325
1326 addr = ptr + XO(legacy.xmm_regs);
1327 for (i = 0; i < nb_xmm_regs; i++) {
1328 env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra);
1329 env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra);
1330 addr += 16;
1331 }
1332}
1333
1334static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1335{
1336 target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs);
1337 int i;
1338
1339 for (i = 0; i < 4; i++, addr += 16) {
1340 env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra);
1341 env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra);
1342 }
1343}
1344
1345static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1346{
1347 /* FIXME: Extend highest implemented bit of linear address. */
1348 env->bndcs_regs.cfgu
1349 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra);
1350 env->bndcs_regs.sts
1351 = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra);
1352}
1353
1354static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra)
1355{
1356 env->pkru = cpu_ldq_data_ra(env, ptr, ra);
1357}
1358
1359void helper_fxrstor(CPUX86State *env, target_ulong ptr)
1360{
1361 uintptr_t ra = GETPC();
1362
1363 /* The operand must be 16 byte aligned */
1364 if (ptr & 0xf) {
1365 raise_exception_ra(env, EXCP0D_GPF, ra);
1366 }
1367
1368 do_xrstor_fpu(env, ptr, ra);
1369
1370 if (env->cr[4] & CR4_OSFXSR_MASK) {
1371 do_xrstor_mxcsr(env, ptr, ra);
1372 /* Fast FXRSTOR leaves out the XMM registers */
1373 if (!(env->efer & MSR_EFER_FFXSR)
1374 || (env->hflags & HF_CPL_MASK)
1375 || !(env->hflags & HF_LMA_MASK)) {
1376 do_xrstor_sse(env, ptr, ra);
1377 }
1378 }
1379}
1380
1381#if defined(CONFIG_USER_ONLY)
1382void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr)
1383{
1384 helper_fxsave(env, ptr);
1385}
1386
1387void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr)
1388{
1389 helper_fxrstor(env, ptr);
1390}
1391#endif
1392
1393void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm)
1394{
1395 uintptr_t ra = GETPC();
1396 uint64_t xstate_bv, xcomp_bv, reserve0;
1397
1398 rfbm &= env->xcr0;
1399
1400 /* The OS must have enabled XSAVE. */
1401 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1402 raise_exception_ra(env, EXCP06_ILLOP, ra);
1403 }
1404
1405 /* The operand must be 64 byte aligned. */
1406 if (ptr & 63) {
1407 raise_exception_ra(env, EXCP0D_GPF, ra);
1408 }
1409
1410 xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra);
1411
1412 if ((int64_t)xstate_bv < 0) {
1413 /* FIXME: Compact form. */
1414 raise_exception_ra(env, EXCP0D_GPF, ra);
1415 }
1416
1417 /* Standard form. */
1418
1419 /* The XSTATE_BV field must not set bits not present in XCR0. */
1420 if (xstate_bv & ~env->xcr0) {
1421 raise_exception_ra(env, EXCP0D_GPF, ra);
1422 }
1423
1424 /* The XCOMP_BV field must be zero. Note that, as of the April 2016
1425 revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2)
1426 describes only XCOMP_BV, but the description of the standard form
1427 of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which
1428 includes the next 64-bit field. */
1429 xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra);
1430 reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra);
1431 if (xcomp_bv || reserve0) {
1432 raise_exception_ra(env, EXCP0D_GPF, ra);
1433 }
1434
1435 if (rfbm & XSTATE_FP_MASK) {
1436 if (xstate_bv & XSTATE_FP_MASK) {
1437 do_xrstor_fpu(env, ptr, ra);
1438 } else {
1439 helper_fninit(env);
1440 memset(env->fpregs, 0, sizeof(env->fpregs));
1441 }
1442 }
1443 if (rfbm & XSTATE_SSE_MASK) {
1444 /* Note that the standard form of XRSTOR loads MXCSR from memory
1445 whether or not the XSTATE_BV bit is set. */
1446 do_xrstor_mxcsr(env, ptr, ra);
1447 if (xstate_bv & XSTATE_SSE_MASK) {
1448 do_xrstor_sse(env, ptr, ra);
1449 } else {
1450 /* ??? When AVX is implemented, we may have to be more
1451 selective in the clearing. */
1452 memset(env->xmm_regs, 0, sizeof(env->xmm_regs));
1453 }
1454 }
1455 if (rfbm & XSTATE_BNDREGS_MASK) {
1456 if (xstate_bv & XSTATE_BNDREGS_MASK) {
1457 do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra);
1458 env->hflags |= HF_MPX_IU_MASK;
1459 } else {
1460 memset(env->bnd_regs, 0, sizeof(env->bnd_regs));
1461 env->hflags &= ~HF_MPX_IU_MASK;
1462 }
1463 }
1464 if (rfbm & XSTATE_BNDCSR_MASK) {
1465 if (xstate_bv & XSTATE_BNDCSR_MASK) {
1466 do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra);
1467 } else {
1468 memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs));
1469 }
1470 cpu_sync_bndcs_hflags(env);
1471 }
1472 if (rfbm & XSTATE_PKRU_MASK) {
1473 uint64_t old_pkru = env->pkru;
1474 if (xstate_bv & XSTATE_PKRU_MASK) {
1475 do_xrstor_pkru(env, ptr + XO(pkru_state), ra);
1476 } else {
1477 env->pkru = 0;
1478 }
1479 if (env->pkru != old_pkru) {
1480 CPUState *cs = env_cpu(env);
1481 tlb_flush(cs);
1482 }
1483 }
1484}
1485
1486#undef XO
1487
1488uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx)
1489{
1490 /* The OS must have enabled XSAVE. */
1491 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1492 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1493 }
1494
1495 switch (ecx) {
1496 case 0:
1497 return env->xcr0;
1498 case 1:
1499 if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) {
1500 return env->xcr0 & get_xinuse(env);
1501 }
1502 break;
1503 }
1504 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1505}
1506
1507void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask)
1508{
1509 uint32_t dummy, ena_lo, ena_hi;
1510 uint64_t ena;
1511
1512 /* The OS must have enabled XSAVE. */
1513 if (!(env->cr[4] & CR4_OSXSAVE_MASK)) {
1514 raise_exception_ra(env, EXCP06_ILLOP, GETPC());
1515 }
1516
1517 /* Only XCR0 is defined at present; the FPU may not be disabled. */
1518 if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) {
1519 goto do_gpf;
1520 }
1521
1522 /* Disallow enabling unimplemented features. */
1523 cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi);
1524 ena = ((uint64_t)ena_hi << 32) | ena_lo;
1525 if (mask & ~ena) {
1526 goto do_gpf;
1527 }
1528
1529 /* Disallow enabling only half of MPX. */
1530 if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK)))
1531 & XSTATE_BNDCSR_MASK) {
1532 goto do_gpf;
1533 }
1534
1535 env->xcr0 = mask;
1536 cpu_sync_bndcs_hflags(env);
1537 return;
1538
1539 do_gpf:
1540 raise_exception_ra(env, EXCP0D_GPF, GETPC());
1541}
1542
1543/* MMX/SSE */
1544/* XXX: optimize by storing fptt and fptags in the static cpu state */
1545
1546#define SSE_DAZ 0x0040
1547#define SSE_RC_MASK 0x6000
1548#define SSE_RC_NEAR 0x0000
1549#define SSE_RC_DOWN 0x2000
1550#define SSE_RC_UP 0x4000
1551#define SSE_RC_CHOP 0x6000
1552#define SSE_FZ 0x8000
1553
1554void update_mxcsr_status(CPUX86State *env)
1555{
1556 uint32_t mxcsr = env->mxcsr;
1557 int rnd_type;
1558
1559 /* set rounding mode */
1560 switch (mxcsr & SSE_RC_MASK) {
1561 default:
1562 case SSE_RC_NEAR:
1563 rnd_type = float_round_nearest_even;
1564 break;
1565 case SSE_RC_DOWN:
1566 rnd_type = float_round_down;
1567 break;
1568 case SSE_RC_UP:
1569 rnd_type = float_round_up;
1570 break;
1571 case SSE_RC_CHOP:
1572 rnd_type = float_round_to_zero;
1573 break;
1574 }
1575 set_float_rounding_mode(rnd_type, &env->sse_status);
1576
1577 /* set denormals are zero */
1578 set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status);
1579
1580 /* set flush to zero */
1581 set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status);
1582}
1583
1584void helper_ldmxcsr(CPUX86State *env, uint32_t val)
1585{
1586 cpu_set_mxcsr(env, val);
1587}
1588
1589void helper_enter_mmx(CPUX86State *env)
1590{
1591 env->fpstt = 0;
1592 *(uint32_t *)(env->fptags) = 0;
1593 *(uint32_t *)(env->fptags + 4) = 0;
1594}
1595
1596void helper_emms(CPUX86State *env)
1597{
1598 /* set to empty state */
1599 *(uint32_t *)(env->fptags) = 0x01010101;
1600 *(uint32_t *)(env->fptags + 4) = 0x01010101;
1601}
1602
1603/* XXX: suppress */
1604void helper_movq(CPUX86State *env, void *d, void *s)
1605{
1606 *(uint64_t *)d = *(uint64_t *)s;
1607}
1608
1609#define SHIFT 0
1610#include "ops_sse.h"
1611
1612#define SHIFT 1
1613#include "ops_sse.h"
1614