1 | /* |
2 | * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers |
3 | * |
4 | * Copyright (c) 2003 Fabrice Bellard |
5 | * |
6 | * This library is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2 of the License, or (at your option) any later version. |
10 | * |
11 | * This library is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
18 | */ |
19 | |
20 | #include "qemu/osdep.h" |
21 | #include <math.h> |
22 | #include "cpu.h" |
23 | #include "exec/helper-proto.h" |
24 | #include "qemu/host-utils.h" |
25 | #include "exec/exec-all.h" |
26 | #include "exec/cpu_ldst.h" |
27 | #include "fpu/softfloat.h" |
28 | |
29 | #define FPU_RC_MASK 0xc00 |
30 | #define FPU_RC_NEAR 0x000 |
31 | #define FPU_RC_DOWN 0x400 |
32 | #define FPU_RC_UP 0x800 |
33 | #define FPU_RC_CHOP 0xc00 |
34 | |
35 | #define MAXTAN 9223372036854775808.0 |
36 | |
37 | /* the following deal with x86 long double-precision numbers */ |
38 | #define MAXEXPD 0x7fff |
39 | #define EXPBIAS 16383 |
40 | #define EXPD(fp) (fp.l.upper & 0x7fff) |
41 | #define SIGND(fp) ((fp.l.upper) & 0x8000) |
42 | #define MANTD(fp) (fp.l.lower) |
43 | #define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS |
44 | |
45 | #define FPUS_IE (1 << 0) |
46 | #define FPUS_DE (1 << 1) |
47 | #define FPUS_ZE (1 << 2) |
48 | #define FPUS_OE (1 << 3) |
49 | #define FPUS_UE (1 << 4) |
50 | #define FPUS_PE (1 << 5) |
51 | #define FPUS_SF (1 << 6) |
52 | #define FPUS_SE (1 << 7) |
53 | #define FPUS_B (1 << 15) |
54 | |
55 | #define FPUC_EM 0x3f |
56 | |
57 | #define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) |
58 | #define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) |
59 | #define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) |
60 | |
61 | static inline void fpush(CPUX86State *env) |
62 | { |
63 | env->fpstt = (env->fpstt - 1) & 7; |
64 | env->fptags[env->fpstt] = 0; /* validate stack entry */ |
65 | } |
66 | |
67 | static inline void fpop(CPUX86State *env) |
68 | { |
69 | env->fptags[env->fpstt] = 1; /* invalidate stack entry */ |
70 | env->fpstt = (env->fpstt + 1) & 7; |
71 | } |
72 | |
73 | static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr, |
74 | uintptr_t retaddr) |
75 | { |
76 | CPU_LDoubleU temp; |
77 | |
78 | temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); |
79 | temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); |
80 | return temp.d; |
81 | } |
82 | |
83 | static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, |
84 | uintptr_t retaddr) |
85 | { |
86 | CPU_LDoubleU temp; |
87 | |
88 | temp.d = f; |
89 | cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); |
90 | cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); |
91 | } |
92 | |
93 | /* x87 FPU helpers */ |
94 | |
95 | static inline double floatx80_to_double(CPUX86State *env, floatx80 a) |
96 | { |
97 | union { |
98 | float64 f64; |
99 | double d; |
100 | } u; |
101 | |
102 | u.f64 = floatx80_to_float64(a, &env->fp_status); |
103 | return u.d; |
104 | } |
105 | |
106 | static inline floatx80 double_to_floatx80(CPUX86State *env, double a) |
107 | { |
108 | union { |
109 | float64 f64; |
110 | double d; |
111 | } u; |
112 | |
113 | u.d = a; |
114 | return float64_to_floatx80(u.f64, &env->fp_status); |
115 | } |
116 | |
117 | static void fpu_set_exception(CPUX86State *env, int mask) |
118 | { |
119 | env->fpus |= mask; |
120 | if (env->fpus & (~env->fpuc & FPUC_EM)) { |
121 | env->fpus |= FPUS_SE | FPUS_B; |
122 | } |
123 | } |
124 | |
125 | static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) |
126 | { |
127 | if (floatx80_is_zero(b)) { |
128 | fpu_set_exception(env, FPUS_ZE); |
129 | } |
130 | return floatx80_div(a, b, &env->fp_status); |
131 | } |
132 | |
133 | static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) |
134 | { |
135 | if (env->cr[0] & CR0_NE_MASK) { |
136 | raise_exception_ra(env, EXCP10_COPR, retaddr); |
137 | } |
138 | #if !defined(CONFIG_USER_ONLY) |
139 | else { |
140 | cpu_set_ferr(env); |
141 | } |
142 | #endif |
143 | } |
144 | |
145 | void helper_flds_FT0(CPUX86State *env, uint32_t val) |
146 | { |
147 | union { |
148 | float32 f; |
149 | uint32_t i; |
150 | } u; |
151 | |
152 | u.i = val; |
153 | FT0 = float32_to_floatx80(u.f, &env->fp_status); |
154 | } |
155 | |
156 | void helper_fldl_FT0(CPUX86State *env, uint64_t val) |
157 | { |
158 | union { |
159 | float64 f; |
160 | uint64_t i; |
161 | } u; |
162 | |
163 | u.i = val; |
164 | FT0 = float64_to_floatx80(u.f, &env->fp_status); |
165 | } |
166 | |
167 | void helper_fildl_FT0(CPUX86State *env, int32_t val) |
168 | { |
169 | FT0 = int32_to_floatx80(val, &env->fp_status); |
170 | } |
171 | |
172 | void helper_flds_ST0(CPUX86State *env, uint32_t val) |
173 | { |
174 | int new_fpstt; |
175 | union { |
176 | float32 f; |
177 | uint32_t i; |
178 | } u; |
179 | |
180 | new_fpstt = (env->fpstt - 1) & 7; |
181 | u.i = val; |
182 | env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); |
183 | env->fpstt = new_fpstt; |
184 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
185 | } |
186 | |
187 | void helper_fldl_ST0(CPUX86State *env, uint64_t val) |
188 | { |
189 | int new_fpstt; |
190 | union { |
191 | float64 f; |
192 | uint64_t i; |
193 | } u; |
194 | |
195 | new_fpstt = (env->fpstt - 1) & 7; |
196 | u.i = val; |
197 | env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); |
198 | env->fpstt = new_fpstt; |
199 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
200 | } |
201 | |
202 | void helper_fildl_ST0(CPUX86State *env, int32_t val) |
203 | { |
204 | int new_fpstt; |
205 | |
206 | new_fpstt = (env->fpstt - 1) & 7; |
207 | env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); |
208 | env->fpstt = new_fpstt; |
209 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
210 | } |
211 | |
212 | void helper_fildll_ST0(CPUX86State *env, int64_t val) |
213 | { |
214 | int new_fpstt; |
215 | |
216 | new_fpstt = (env->fpstt - 1) & 7; |
217 | env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); |
218 | env->fpstt = new_fpstt; |
219 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
220 | } |
221 | |
222 | uint32_t helper_fsts_ST0(CPUX86State *env) |
223 | { |
224 | union { |
225 | float32 f; |
226 | uint32_t i; |
227 | } u; |
228 | |
229 | u.f = floatx80_to_float32(ST0, &env->fp_status); |
230 | return u.i; |
231 | } |
232 | |
233 | uint64_t helper_fstl_ST0(CPUX86State *env) |
234 | { |
235 | union { |
236 | float64 f; |
237 | uint64_t i; |
238 | } u; |
239 | |
240 | u.f = floatx80_to_float64(ST0, &env->fp_status); |
241 | return u.i; |
242 | } |
243 | |
244 | int32_t helper_fist_ST0(CPUX86State *env) |
245 | { |
246 | int32_t val; |
247 | |
248 | val = floatx80_to_int32(ST0, &env->fp_status); |
249 | if (val != (int16_t)val) { |
250 | val = -32768; |
251 | } |
252 | return val; |
253 | } |
254 | |
255 | int32_t helper_fistl_ST0(CPUX86State *env) |
256 | { |
257 | int32_t val; |
258 | signed char old_exp_flags; |
259 | |
260 | old_exp_flags = get_float_exception_flags(&env->fp_status); |
261 | set_float_exception_flags(0, &env->fp_status); |
262 | |
263 | val = floatx80_to_int32(ST0, &env->fp_status); |
264 | if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { |
265 | val = 0x80000000; |
266 | } |
267 | set_float_exception_flags(get_float_exception_flags(&env->fp_status) |
268 | | old_exp_flags, &env->fp_status); |
269 | return val; |
270 | } |
271 | |
272 | int64_t helper_fistll_ST0(CPUX86State *env) |
273 | { |
274 | int64_t val; |
275 | signed char old_exp_flags; |
276 | |
277 | old_exp_flags = get_float_exception_flags(&env->fp_status); |
278 | set_float_exception_flags(0, &env->fp_status); |
279 | |
280 | val = floatx80_to_int64(ST0, &env->fp_status); |
281 | if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { |
282 | val = 0x8000000000000000ULL; |
283 | } |
284 | set_float_exception_flags(get_float_exception_flags(&env->fp_status) |
285 | | old_exp_flags, &env->fp_status); |
286 | return val; |
287 | } |
288 | |
289 | int32_t helper_fistt_ST0(CPUX86State *env) |
290 | { |
291 | int32_t val; |
292 | |
293 | val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); |
294 | if (val != (int16_t)val) { |
295 | val = -32768; |
296 | } |
297 | return val; |
298 | } |
299 | |
300 | int32_t helper_fisttl_ST0(CPUX86State *env) |
301 | { |
302 | return floatx80_to_int32_round_to_zero(ST0, &env->fp_status); |
303 | } |
304 | |
305 | int64_t helper_fisttll_ST0(CPUX86State *env) |
306 | { |
307 | return floatx80_to_int64_round_to_zero(ST0, &env->fp_status); |
308 | } |
309 | |
310 | void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) |
311 | { |
312 | int new_fpstt; |
313 | |
314 | new_fpstt = (env->fpstt - 1) & 7; |
315 | env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC()); |
316 | env->fpstt = new_fpstt; |
317 | env->fptags[new_fpstt] = 0; /* validate stack entry */ |
318 | } |
319 | |
320 | void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) |
321 | { |
322 | helper_fstt(env, ST0, ptr, GETPC()); |
323 | } |
324 | |
325 | void helper_fpush(CPUX86State *env) |
326 | { |
327 | fpush(env); |
328 | } |
329 | |
330 | void helper_fpop(CPUX86State *env) |
331 | { |
332 | fpop(env); |
333 | } |
334 | |
335 | void helper_fdecstp(CPUX86State *env) |
336 | { |
337 | env->fpstt = (env->fpstt - 1) & 7; |
338 | env->fpus &= ~0x4700; |
339 | } |
340 | |
341 | void helper_fincstp(CPUX86State *env) |
342 | { |
343 | env->fpstt = (env->fpstt + 1) & 7; |
344 | env->fpus &= ~0x4700; |
345 | } |
346 | |
347 | /* FPU move */ |
348 | |
349 | void helper_ffree_STN(CPUX86State *env, int st_index) |
350 | { |
351 | env->fptags[(env->fpstt + st_index) & 7] = 1; |
352 | } |
353 | |
354 | void helper_fmov_ST0_FT0(CPUX86State *env) |
355 | { |
356 | ST0 = FT0; |
357 | } |
358 | |
359 | void helper_fmov_FT0_STN(CPUX86State *env, int st_index) |
360 | { |
361 | FT0 = ST(st_index); |
362 | } |
363 | |
364 | void helper_fmov_ST0_STN(CPUX86State *env, int st_index) |
365 | { |
366 | ST0 = ST(st_index); |
367 | } |
368 | |
369 | void helper_fmov_STN_ST0(CPUX86State *env, int st_index) |
370 | { |
371 | ST(st_index) = ST0; |
372 | } |
373 | |
374 | void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) |
375 | { |
376 | floatx80 tmp; |
377 | |
378 | tmp = ST(st_index); |
379 | ST(st_index) = ST0; |
380 | ST0 = tmp; |
381 | } |
382 | |
383 | /* FPU operations */ |
384 | |
385 | static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; |
386 | |
387 | void helper_fcom_ST0_FT0(CPUX86State *env) |
388 | { |
389 | int ret; |
390 | |
391 | ret = floatx80_compare(ST0, FT0, &env->fp_status); |
392 | env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; |
393 | } |
394 | |
395 | void helper_fucom_ST0_FT0(CPUX86State *env) |
396 | { |
397 | int ret; |
398 | |
399 | ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); |
400 | env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; |
401 | } |
402 | |
403 | static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; |
404 | |
405 | void helper_fcomi_ST0_FT0(CPUX86State *env) |
406 | { |
407 | int eflags; |
408 | int ret; |
409 | |
410 | ret = floatx80_compare(ST0, FT0, &env->fp_status); |
411 | eflags = cpu_cc_compute_all(env, CC_OP); |
412 | eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; |
413 | CC_SRC = eflags; |
414 | } |
415 | |
416 | void helper_fucomi_ST0_FT0(CPUX86State *env) |
417 | { |
418 | int eflags; |
419 | int ret; |
420 | |
421 | ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); |
422 | eflags = cpu_cc_compute_all(env, CC_OP); |
423 | eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; |
424 | CC_SRC = eflags; |
425 | } |
426 | |
427 | void helper_fadd_ST0_FT0(CPUX86State *env) |
428 | { |
429 | ST0 = floatx80_add(ST0, FT0, &env->fp_status); |
430 | } |
431 | |
432 | void helper_fmul_ST0_FT0(CPUX86State *env) |
433 | { |
434 | ST0 = floatx80_mul(ST0, FT0, &env->fp_status); |
435 | } |
436 | |
437 | void helper_fsub_ST0_FT0(CPUX86State *env) |
438 | { |
439 | ST0 = floatx80_sub(ST0, FT0, &env->fp_status); |
440 | } |
441 | |
442 | void helper_fsubr_ST0_FT0(CPUX86State *env) |
443 | { |
444 | ST0 = floatx80_sub(FT0, ST0, &env->fp_status); |
445 | } |
446 | |
447 | void helper_fdiv_ST0_FT0(CPUX86State *env) |
448 | { |
449 | ST0 = helper_fdiv(env, ST0, FT0); |
450 | } |
451 | |
452 | void helper_fdivr_ST0_FT0(CPUX86State *env) |
453 | { |
454 | ST0 = helper_fdiv(env, FT0, ST0); |
455 | } |
456 | |
457 | /* fp operations between STN and ST0 */ |
458 | |
459 | void helper_fadd_STN_ST0(CPUX86State *env, int st_index) |
460 | { |
461 | ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); |
462 | } |
463 | |
464 | void helper_fmul_STN_ST0(CPUX86State *env, int st_index) |
465 | { |
466 | ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); |
467 | } |
468 | |
469 | void helper_fsub_STN_ST0(CPUX86State *env, int st_index) |
470 | { |
471 | ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); |
472 | } |
473 | |
474 | void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) |
475 | { |
476 | ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); |
477 | } |
478 | |
479 | void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) |
480 | { |
481 | floatx80 *p; |
482 | |
483 | p = &ST(st_index); |
484 | *p = helper_fdiv(env, *p, ST0); |
485 | } |
486 | |
487 | void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) |
488 | { |
489 | floatx80 *p; |
490 | |
491 | p = &ST(st_index); |
492 | *p = helper_fdiv(env, ST0, *p); |
493 | } |
494 | |
495 | /* misc FPU operations */ |
496 | void helper_fchs_ST0(CPUX86State *env) |
497 | { |
498 | ST0 = floatx80_chs(ST0); |
499 | } |
500 | |
501 | void helper_fabs_ST0(CPUX86State *env) |
502 | { |
503 | ST0 = floatx80_abs(ST0); |
504 | } |
505 | |
506 | void helper_fld1_ST0(CPUX86State *env) |
507 | { |
508 | ST0 = floatx80_one; |
509 | } |
510 | |
511 | void helper_fldl2t_ST0(CPUX86State *env) |
512 | { |
513 | ST0 = floatx80_l2t; |
514 | } |
515 | |
516 | void helper_fldl2e_ST0(CPUX86State *env) |
517 | { |
518 | ST0 = floatx80_l2e; |
519 | } |
520 | |
521 | void helper_fldpi_ST0(CPUX86State *env) |
522 | { |
523 | ST0 = floatx80_pi; |
524 | } |
525 | |
526 | void helper_fldlg2_ST0(CPUX86State *env) |
527 | { |
528 | ST0 = floatx80_lg2; |
529 | } |
530 | |
531 | void helper_fldln2_ST0(CPUX86State *env) |
532 | { |
533 | ST0 = floatx80_ln2; |
534 | } |
535 | |
536 | void helper_fldz_ST0(CPUX86State *env) |
537 | { |
538 | ST0 = floatx80_zero; |
539 | } |
540 | |
541 | void helper_fldz_FT0(CPUX86State *env) |
542 | { |
543 | FT0 = floatx80_zero; |
544 | } |
545 | |
546 | uint32_t helper_fnstsw(CPUX86State *env) |
547 | { |
548 | return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; |
549 | } |
550 | |
551 | uint32_t helper_fnstcw(CPUX86State *env) |
552 | { |
553 | return env->fpuc; |
554 | } |
555 | |
556 | void update_fp_status(CPUX86State *env) |
557 | { |
558 | int rnd_type; |
559 | |
560 | /* set rounding mode */ |
561 | switch (env->fpuc & FPU_RC_MASK) { |
562 | default: |
563 | case FPU_RC_NEAR: |
564 | rnd_type = float_round_nearest_even; |
565 | break; |
566 | case FPU_RC_DOWN: |
567 | rnd_type = float_round_down; |
568 | break; |
569 | case FPU_RC_UP: |
570 | rnd_type = float_round_up; |
571 | break; |
572 | case FPU_RC_CHOP: |
573 | rnd_type = float_round_to_zero; |
574 | break; |
575 | } |
576 | set_float_rounding_mode(rnd_type, &env->fp_status); |
577 | switch ((env->fpuc >> 8) & 3) { |
578 | case 0: |
579 | rnd_type = 32; |
580 | break; |
581 | case 2: |
582 | rnd_type = 64; |
583 | break; |
584 | case 3: |
585 | default: |
586 | rnd_type = 80; |
587 | break; |
588 | } |
589 | set_floatx80_rounding_precision(rnd_type, &env->fp_status); |
590 | } |
591 | |
592 | void helper_fldcw(CPUX86State *env, uint32_t val) |
593 | { |
594 | cpu_set_fpuc(env, val); |
595 | } |
596 | |
597 | void helper_fclex(CPUX86State *env) |
598 | { |
599 | env->fpus &= 0x7f00; |
600 | } |
601 | |
602 | void helper_fwait(CPUX86State *env) |
603 | { |
604 | if (env->fpus & FPUS_SE) { |
605 | fpu_raise_exception(env, GETPC()); |
606 | } |
607 | } |
608 | |
609 | void helper_fninit(CPUX86State *env) |
610 | { |
611 | env->fpus = 0; |
612 | env->fpstt = 0; |
613 | cpu_set_fpuc(env, 0x37f); |
614 | env->fptags[0] = 1; |
615 | env->fptags[1] = 1; |
616 | env->fptags[2] = 1; |
617 | env->fptags[3] = 1; |
618 | env->fptags[4] = 1; |
619 | env->fptags[5] = 1; |
620 | env->fptags[6] = 1; |
621 | env->fptags[7] = 1; |
622 | } |
623 | |
624 | /* BCD ops */ |
625 | |
626 | void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) |
627 | { |
628 | floatx80 tmp; |
629 | uint64_t val; |
630 | unsigned int v; |
631 | int i; |
632 | |
633 | val = 0; |
634 | for (i = 8; i >= 0; i--) { |
635 | v = cpu_ldub_data_ra(env, ptr + i, GETPC()); |
636 | val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); |
637 | } |
638 | tmp = int64_to_floatx80(val, &env->fp_status); |
639 | if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { |
640 | tmp = floatx80_chs(tmp); |
641 | } |
642 | fpush(env); |
643 | ST0 = tmp; |
644 | } |
645 | |
646 | void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) |
647 | { |
648 | int v; |
649 | target_ulong mem_ref, mem_end; |
650 | int64_t val; |
651 | |
652 | val = floatx80_to_int64(ST0, &env->fp_status); |
653 | mem_ref = ptr; |
654 | mem_end = mem_ref + 9; |
655 | if (val < 0) { |
656 | cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); |
657 | val = -val; |
658 | } else { |
659 | cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); |
660 | } |
661 | while (mem_ref < mem_end) { |
662 | if (val == 0) { |
663 | break; |
664 | } |
665 | v = val % 100; |
666 | val = val / 100; |
667 | v = ((v / 10) << 4) | (v % 10); |
668 | cpu_stb_data_ra(env, mem_ref++, v, GETPC()); |
669 | } |
670 | while (mem_ref < mem_end) { |
671 | cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); |
672 | } |
673 | } |
674 | |
675 | void helper_f2xm1(CPUX86State *env) |
676 | { |
677 | double val = floatx80_to_double(env, ST0); |
678 | |
679 | val = pow(2.0, val) - 1.0; |
680 | ST0 = double_to_floatx80(env, val); |
681 | } |
682 | |
683 | void helper_fyl2x(CPUX86State *env) |
684 | { |
685 | double fptemp = floatx80_to_double(env, ST0); |
686 | |
687 | if (fptemp > 0.0) { |
688 | fptemp = log(fptemp) / log(2.0); /* log2(ST) */ |
689 | fptemp *= floatx80_to_double(env, ST1); |
690 | ST1 = double_to_floatx80(env, fptemp); |
691 | fpop(env); |
692 | } else { |
693 | env->fpus &= ~0x4700; |
694 | env->fpus |= 0x400; |
695 | } |
696 | } |
697 | |
698 | void helper_fptan(CPUX86State *env) |
699 | { |
700 | double fptemp = floatx80_to_double(env, ST0); |
701 | |
702 | if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { |
703 | env->fpus |= 0x400; |
704 | } else { |
705 | fptemp = tan(fptemp); |
706 | ST0 = double_to_floatx80(env, fptemp); |
707 | fpush(env); |
708 | ST0 = floatx80_one; |
709 | env->fpus &= ~0x400; /* C2 <-- 0 */ |
710 | /* the above code is for |arg| < 2**52 only */ |
711 | } |
712 | } |
713 | |
714 | void helper_fpatan(CPUX86State *env) |
715 | { |
716 | double fptemp, fpsrcop; |
717 | |
718 | fpsrcop = floatx80_to_double(env, ST1); |
719 | fptemp = floatx80_to_double(env, ST0); |
720 | ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp)); |
721 | fpop(env); |
722 | } |
723 | |
724 | void helper_fxtract(CPUX86State *env) |
725 | { |
726 | CPU_LDoubleU temp; |
727 | |
728 | temp.d = ST0; |
729 | |
730 | if (floatx80_is_zero(ST0)) { |
731 | /* Easy way to generate -inf and raising division by 0 exception */ |
732 | ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, |
733 | &env->fp_status); |
734 | fpush(env); |
735 | ST0 = temp.d; |
736 | } else { |
737 | int expdif; |
738 | |
739 | expdif = EXPD(temp) - EXPBIAS; |
740 | /* DP exponent bias */ |
741 | ST0 = int32_to_floatx80(expdif, &env->fp_status); |
742 | fpush(env); |
743 | BIASEXPONENT(temp); |
744 | ST0 = temp.d; |
745 | } |
746 | } |
747 | |
748 | void helper_fprem1(CPUX86State *env) |
749 | { |
750 | double st0, st1, dblq, fpsrcop, fptemp; |
751 | CPU_LDoubleU fpsrcop1, fptemp1; |
752 | int expdif; |
753 | signed long long int q; |
754 | |
755 | st0 = floatx80_to_double(env, ST0); |
756 | st1 = floatx80_to_double(env, ST1); |
757 | |
758 | if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { |
759 | ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */ |
760 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
761 | return; |
762 | } |
763 | |
764 | fpsrcop = st0; |
765 | fptemp = st1; |
766 | fpsrcop1.d = ST0; |
767 | fptemp1.d = ST1; |
768 | expdif = EXPD(fpsrcop1) - EXPD(fptemp1); |
769 | |
770 | if (expdif < 0) { |
771 | /* optimisation? taken from the AMD docs */ |
772 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
773 | /* ST0 is unchanged */ |
774 | return; |
775 | } |
776 | |
777 | if (expdif < 53) { |
778 | dblq = fpsrcop / fptemp; |
779 | /* round dblq towards nearest integer */ |
780 | dblq = rint(dblq); |
781 | st0 = fpsrcop - fptemp * dblq; |
782 | |
783 | /* convert dblq to q by truncating towards zero */ |
784 | if (dblq < 0.0) { |
785 | q = (signed long long int)(-dblq); |
786 | } else { |
787 | q = (signed long long int)dblq; |
788 | } |
789 | |
790 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
791 | /* (C0,C3,C1) <-- (q2,q1,q0) */ |
792 | env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ |
793 | env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ |
794 | env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ |
795 | } else { |
796 | env->fpus |= 0x400; /* C2 <-- 1 */ |
797 | fptemp = pow(2.0, expdif - 50); |
798 | fpsrcop = (st0 / st1) / fptemp; |
799 | /* fpsrcop = integer obtained by chopping */ |
800 | fpsrcop = (fpsrcop < 0.0) ? |
801 | -(floor(fabs(fpsrcop))) : floor(fpsrcop); |
802 | st0 -= (st1 * fpsrcop * fptemp); |
803 | } |
804 | ST0 = double_to_floatx80(env, st0); |
805 | } |
806 | |
807 | void helper_fprem(CPUX86State *env) |
808 | { |
809 | double st0, st1, dblq, fpsrcop, fptemp; |
810 | CPU_LDoubleU fpsrcop1, fptemp1; |
811 | int expdif; |
812 | signed long long int q; |
813 | |
814 | st0 = floatx80_to_double(env, ST0); |
815 | st1 = floatx80_to_double(env, ST1); |
816 | |
817 | if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { |
818 | ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */ |
819 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
820 | return; |
821 | } |
822 | |
823 | fpsrcop = st0; |
824 | fptemp = st1; |
825 | fpsrcop1.d = ST0; |
826 | fptemp1.d = ST1; |
827 | expdif = EXPD(fpsrcop1) - EXPD(fptemp1); |
828 | |
829 | if (expdif < 0) { |
830 | /* optimisation? taken from the AMD docs */ |
831 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
832 | /* ST0 is unchanged */ |
833 | return; |
834 | } |
835 | |
836 | if (expdif < 53) { |
837 | dblq = fpsrcop / fptemp; /* ST0 / ST1 */ |
838 | /* round dblq towards zero */ |
839 | dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); |
840 | st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */ |
841 | |
842 | /* convert dblq to q by truncating towards zero */ |
843 | if (dblq < 0.0) { |
844 | q = (signed long long int)(-dblq); |
845 | } else { |
846 | q = (signed long long int)dblq; |
847 | } |
848 | |
849 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
850 | /* (C0,C3,C1) <-- (q2,q1,q0) */ |
851 | env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ |
852 | env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ |
853 | env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ |
854 | } else { |
855 | int N = 32 + (expdif % 32); /* as per AMD docs */ |
856 | |
857 | env->fpus |= 0x400; /* C2 <-- 1 */ |
858 | fptemp = pow(2.0, (double)(expdif - N)); |
859 | fpsrcop = (st0 / st1) / fptemp; |
860 | /* fpsrcop = integer obtained by chopping */ |
861 | fpsrcop = (fpsrcop < 0.0) ? |
862 | -(floor(fabs(fpsrcop))) : floor(fpsrcop); |
863 | st0 -= (st1 * fpsrcop * fptemp); |
864 | } |
865 | ST0 = double_to_floatx80(env, st0); |
866 | } |
867 | |
868 | void helper_fyl2xp1(CPUX86State *env) |
869 | { |
870 | double fptemp = floatx80_to_double(env, ST0); |
871 | |
872 | if ((fptemp + 1.0) > 0.0) { |
873 | fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */ |
874 | fptemp *= floatx80_to_double(env, ST1); |
875 | ST1 = double_to_floatx80(env, fptemp); |
876 | fpop(env); |
877 | } else { |
878 | env->fpus &= ~0x4700; |
879 | env->fpus |= 0x400; |
880 | } |
881 | } |
882 | |
883 | void helper_fsqrt(CPUX86State *env) |
884 | { |
885 | if (floatx80_is_neg(ST0)) { |
886 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
887 | env->fpus |= 0x400; |
888 | } |
889 | ST0 = floatx80_sqrt(ST0, &env->fp_status); |
890 | } |
891 | |
892 | void helper_fsincos(CPUX86State *env) |
893 | { |
894 | double fptemp = floatx80_to_double(env, ST0); |
895 | |
896 | if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { |
897 | env->fpus |= 0x400; |
898 | } else { |
899 | ST0 = double_to_floatx80(env, sin(fptemp)); |
900 | fpush(env); |
901 | ST0 = double_to_floatx80(env, cos(fptemp)); |
902 | env->fpus &= ~0x400; /* C2 <-- 0 */ |
903 | /* the above code is for |arg| < 2**63 only */ |
904 | } |
905 | } |
906 | |
907 | void helper_frndint(CPUX86State *env) |
908 | { |
909 | ST0 = floatx80_round_to_int(ST0, &env->fp_status); |
910 | } |
911 | |
912 | void helper_fscale(CPUX86State *env) |
913 | { |
914 | if (floatx80_is_any_nan(ST1)) { |
915 | ST0 = ST1; |
916 | } else { |
917 | int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); |
918 | ST0 = floatx80_scalbn(ST0, n, &env->fp_status); |
919 | } |
920 | } |
921 | |
922 | void helper_fsin(CPUX86State *env) |
923 | { |
924 | double fptemp = floatx80_to_double(env, ST0); |
925 | |
926 | if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { |
927 | env->fpus |= 0x400; |
928 | } else { |
929 | ST0 = double_to_floatx80(env, sin(fptemp)); |
930 | env->fpus &= ~0x400; /* C2 <-- 0 */ |
931 | /* the above code is for |arg| < 2**53 only */ |
932 | } |
933 | } |
934 | |
935 | void helper_fcos(CPUX86State *env) |
936 | { |
937 | double fptemp = floatx80_to_double(env, ST0); |
938 | |
939 | if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { |
940 | env->fpus |= 0x400; |
941 | } else { |
942 | ST0 = double_to_floatx80(env, cos(fptemp)); |
943 | env->fpus &= ~0x400; /* C2 <-- 0 */ |
944 | /* the above code is for |arg| < 2**63 only */ |
945 | } |
946 | } |
947 | |
948 | void helper_fxam_ST0(CPUX86State *env) |
949 | { |
950 | CPU_LDoubleU temp; |
951 | int expdif; |
952 | |
953 | temp.d = ST0; |
954 | |
955 | env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ |
956 | if (SIGND(temp)) { |
957 | env->fpus |= 0x200; /* C1 <-- 1 */ |
958 | } |
959 | |
960 | /* XXX: test fptags too */ |
961 | expdif = EXPD(temp); |
962 | if (expdif == MAXEXPD) { |
963 | if (MANTD(temp) == 0x8000000000000000ULL) { |
964 | env->fpus |= 0x500; /* Infinity */ |
965 | } else { |
966 | env->fpus |= 0x100; /* NaN */ |
967 | } |
968 | } else if (expdif == 0) { |
969 | if (MANTD(temp) == 0) { |
970 | env->fpus |= 0x4000; /* Zero */ |
971 | } else { |
972 | env->fpus |= 0x4400; /* Denormal */ |
973 | } |
974 | } else { |
975 | env->fpus |= 0x400; |
976 | } |
977 | } |
978 | |
979 | static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, |
980 | uintptr_t retaddr) |
981 | { |
982 | int fpus, fptag, exp, i; |
983 | uint64_t mant; |
984 | CPU_LDoubleU tmp; |
985 | |
986 | fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; |
987 | fptag = 0; |
988 | for (i = 7; i >= 0; i--) { |
989 | fptag <<= 2; |
990 | if (env->fptags[i]) { |
991 | fptag |= 3; |
992 | } else { |
993 | tmp.d = env->fpregs[i].d; |
994 | exp = EXPD(tmp); |
995 | mant = MANTD(tmp); |
996 | if (exp == 0 && mant == 0) { |
997 | /* zero */ |
998 | fptag |= 1; |
999 | } else if (exp == 0 || exp == MAXEXPD |
1000 | || (mant & (1LL << 63)) == 0) { |
1001 | /* NaNs, infinity, denormal */ |
1002 | fptag |= 2; |
1003 | } |
1004 | } |
1005 | } |
1006 | if (data32) { |
1007 | /* 32 bit */ |
1008 | cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); |
1009 | cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); |
1010 | cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); |
1011 | cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */ |
1012 | cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */ |
1013 | cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */ |
1014 | cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */ |
1015 | } else { |
1016 | /* 16 bit */ |
1017 | cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); |
1018 | cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); |
1019 | cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); |
1020 | cpu_stw_data_ra(env, ptr + 6, 0, retaddr); |
1021 | cpu_stw_data_ra(env, ptr + 8, 0, retaddr); |
1022 | cpu_stw_data_ra(env, ptr + 10, 0, retaddr); |
1023 | cpu_stw_data_ra(env, ptr + 12, 0, retaddr); |
1024 | } |
1025 | } |
1026 | |
1027 | void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) |
1028 | { |
1029 | do_fstenv(env, ptr, data32, GETPC()); |
1030 | } |
1031 | |
1032 | static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, |
1033 | uintptr_t retaddr) |
1034 | { |
1035 | int i, fpus, fptag; |
1036 | |
1037 | if (data32) { |
1038 | cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); |
1039 | fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); |
1040 | fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); |
1041 | } else { |
1042 | cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); |
1043 | fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); |
1044 | fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); |
1045 | } |
1046 | env->fpstt = (fpus >> 11) & 7; |
1047 | env->fpus = fpus & ~0x3800; |
1048 | for (i = 0; i < 8; i++) { |
1049 | env->fptags[i] = ((fptag & 3) == 3); |
1050 | fptag >>= 2; |
1051 | } |
1052 | } |
1053 | |
1054 | void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) |
1055 | { |
1056 | do_fldenv(env, ptr, data32, GETPC()); |
1057 | } |
1058 | |
1059 | void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) |
1060 | { |
1061 | floatx80 tmp; |
1062 | int i; |
1063 | |
1064 | do_fstenv(env, ptr, data32, GETPC()); |
1065 | |
1066 | ptr += (14 << data32); |
1067 | for (i = 0; i < 8; i++) { |
1068 | tmp = ST(i); |
1069 | helper_fstt(env, tmp, ptr, GETPC()); |
1070 | ptr += 10; |
1071 | } |
1072 | |
1073 | /* fninit */ |
1074 | env->fpus = 0; |
1075 | env->fpstt = 0; |
1076 | cpu_set_fpuc(env, 0x37f); |
1077 | env->fptags[0] = 1; |
1078 | env->fptags[1] = 1; |
1079 | env->fptags[2] = 1; |
1080 | env->fptags[3] = 1; |
1081 | env->fptags[4] = 1; |
1082 | env->fptags[5] = 1; |
1083 | env->fptags[6] = 1; |
1084 | env->fptags[7] = 1; |
1085 | } |
1086 | |
1087 | void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) |
1088 | { |
1089 | floatx80 tmp; |
1090 | int i; |
1091 | |
1092 | do_fldenv(env, ptr, data32, GETPC()); |
1093 | ptr += (14 << data32); |
1094 | |
1095 | for (i = 0; i < 8; i++) { |
1096 | tmp = helper_fldt(env, ptr, GETPC()); |
1097 | ST(i) = tmp; |
1098 | ptr += 10; |
1099 | } |
1100 | } |
1101 | |
1102 | #if defined(CONFIG_USER_ONLY) |
1103 | void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) |
1104 | { |
1105 | helper_fsave(env, ptr, data32); |
1106 | } |
1107 | |
1108 | void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) |
1109 | { |
1110 | helper_frstor(env, ptr, data32); |
1111 | } |
1112 | #endif |
1113 | |
1114 | #define XO(X) offsetof(X86XSaveArea, X) |
1115 | |
1116 | static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1117 | { |
1118 | int fpus, fptag, i; |
1119 | target_ulong addr; |
1120 | |
1121 | fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; |
1122 | fptag = 0; |
1123 | for (i = 0; i < 8; i++) { |
1124 | fptag |= (env->fptags[i] << i); |
1125 | } |
1126 | |
1127 | cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); |
1128 | cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); |
1129 | cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); |
1130 | |
1131 | /* In 32-bit mode this is eip, sel, dp, sel. |
1132 | In 64-bit mode this is rip, rdp. |
1133 | But in either case we don't write actual data, just zeros. */ |
1134 | cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ |
1135 | cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ |
1136 | |
1137 | addr = ptr + XO(legacy.fpregs); |
1138 | for (i = 0; i < 8; i++) { |
1139 | floatx80 tmp = ST(i); |
1140 | helper_fstt(env, tmp, addr, ra); |
1141 | addr += 16; |
1142 | } |
1143 | } |
1144 | |
1145 | static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1146 | { |
1147 | cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); |
1148 | cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); |
1149 | } |
1150 | |
1151 | static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1152 | { |
1153 | int i, nb_xmm_regs; |
1154 | target_ulong addr; |
1155 | |
1156 | if (env->hflags & HF_CS64_MASK) { |
1157 | nb_xmm_regs = 16; |
1158 | } else { |
1159 | nb_xmm_regs = 8; |
1160 | } |
1161 | |
1162 | addr = ptr + XO(legacy.xmm_regs); |
1163 | for (i = 0; i < nb_xmm_regs; i++) { |
1164 | cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); |
1165 | cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); |
1166 | addr += 16; |
1167 | } |
1168 | } |
1169 | |
1170 | static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1171 | { |
1172 | target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); |
1173 | int i; |
1174 | |
1175 | for (i = 0; i < 4; i++, addr += 16) { |
1176 | cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); |
1177 | cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); |
1178 | } |
1179 | } |
1180 | |
1181 | static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1182 | { |
1183 | cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), |
1184 | env->bndcs_regs.cfgu, ra); |
1185 | cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), |
1186 | env->bndcs_regs.sts, ra); |
1187 | } |
1188 | |
1189 | static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1190 | { |
1191 | cpu_stq_data_ra(env, ptr, env->pkru, ra); |
1192 | } |
1193 | |
1194 | void helper_fxsave(CPUX86State *env, target_ulong ptr) |
1195 | { |
1196 | uintptr_t ra = GETPC(); |
1197 | |
1198 | /* The operand must be 16 byte aligned */ |
1199 | if (ptr & 0xf) { |
1200 | raise_exception_ra(env, EXCP0D_GPF, ra); |
1201 | } |
1202 | |
1203 | do_xsave_fpu(env, ptr, ra); |
1204 | |
1205 | if (env->cr[4] & CR4_OSFXSR_MASK) { |
1206 | do_xsave_mxcsr(env, ptr, ra); |
1207 | /* Fast FXSAVE leaves out the XMM registers */ |
1208 | if (!(env->efer & MSR_EFER_FFXSR) |
1209 | || (env->hflags & HF_CPL_MASK) |
1210 | || !(env->hflags & HF_LMA_MASK)) { |
1211 | do_xsave_sse(env, ptr, ra); |
1212 | } |
1213 | } |
1214 | } |
1215 | |
1216 | static uint64_t get_xinuse(CPUX86State *env) |
1217 | { |
1218 | uint64_t inuse = -1; |
1219 | |
1220 | /* For the most part, we don't track XINUSE. We could calculate it |
1221 | here for all components, but it's probably less work to simply |
1222 | indicate in use. That said, the state of BNDREGS is important |
1223 | enough to track in HFLAGS, so we might as well use that here. */ |
1224 | if ((env->hflags & HF_MPX_IU_MASK) == 0) { |
1225 | inuse &= ~XSTATE_BNDREGS_MASK; |
1226 | } |
1227 | return inuse; |
1228 | } |
1229 | |
1230 | static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, |
1231 | uint64_t inuse, uint64_t opt, uintptr_t ra) |
1232 | { |
1233 | uint64_t old_bv, new_bv; |
1234 | |
1235 | /* The OS must have enabled XSAVE. */ |
1236 | if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { |
1237 | raise_exception_ra(env, EXCP06_ILLOP, ra); |
1238 | } |
1239 | |
1240 | /* The operand must be 64 byte aligned. */ |
1241 | if (ptr & 63) { |
1242 | raise_exception_ra(env, EXCP0D_GPF, ra); |
1243 | } |
1244 | |
1245 | /* Never save anything not enabled by XCR0. */ |
1246 | rfbm &= env->xcr0; |
1247 | opt &= rfbm; |
1248 | |
1249 | if (opt & XSTATE_FP_MASK) { |
1250 | do_xsave_fpu(env, ptr, ra); |
1251 | } |
1252 | if (rfbm & XSTATE_SSE_MASK) { |
1253 | /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ |
1254 | do_xsave_mxcsr(env, ptr, ra); |
1255 | } |
1256 | if (opt & XSTATE_SSE_MASK) { |
1257 | do_xsave_sse(env, ptr, ra); |
1258 | } |
1259 | if (opt & XSTATE_BNDREGS_MASK) { |
1260 | do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); |
1261 | } |
1262 | if (opt & XSTATE_BNDCSR_MASK) { |
1263 | do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); |
1264 | } |
1265 | if (opt & XSTATE_PKRU_MASK) { |
1266 | do_xsave_pkru(env, ptr + XO(pkru_state), ra); |
1267 | } |
1268 | |
1269 | /* Update the XSTATE_BV field. */ |
1270 | old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); |
1271 | new_bv = (old_bv & ~rfbm) | (inuse & rfbm); |
1272 | cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); |
1273 | } |
1274 | |
1275 | void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) |
1276 | { |
1277 | do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); |
1278 | } |
1279 | |
1280 | void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) |
1281 | { |
1282 | uint64_t inuse = get_xinuse(env); |
1283 | do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); |
1284 | } |
1285 | |
1286 | static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1287 | { |
1288 | int i, fpuc, fpus, fptag; |
1289 | target_ulong addr; |
1290 | |
1291 | fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); |
1292 | fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); |
1293 | fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); |
1294 | cpu_set_fpuc(env, fpuc); |
1295 | env->fpstt = (fpus >> 11) & 7; |
1296 | env->fpus = fpus & ~0x3800; |
1297 | fptag ^= 0xff; |
1298 | for (i = 0; i < 8; i++) { |
1299 | env->fptags[i] = ((fptag >> i) & 1); |
1300 | } |
1301 | |
1302 | addr = ptr + XO(legacy.fpregs); |
1303 | for (i = 0; i < 8; i++) { |
1304 | floatx80 tmp = helper_fldt(env, addr, ra); |
1305 | ST(i) = tmp; |
1306 | addr += 16; |
1307 | } |
1308 | } |
1309 | |
1310 | static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1311 | { |
1312 | cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); |
1313 | } |
1314 | |
1315 | static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1316 | { |
1317 | int i, nb_xmm_regs; |
1318 | target_ulong addr; |
1319 | |
1320 | if (env->hflags & HF_CS64_MASK) { |
1321 | nb_xmm_regs = 16; |
1322 | } else { |
1323 | nb_xmm_regs = 8; |
1324 | } |
1325 | |
1326 | addr = ptr + XO(legacy.xmm_regs); |
1327 | for (i = 0; i < nb_xmm_regs; i++) { |
1328 | env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); |
1329 | env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); |
1330 | addr += 16; |
1331 | } |
1332 | } |
1333 | |
1334 | static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1335 | { |
1336 | target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); |
1337 | int i; |
1338 | |
1339 | for (i = 0; i < 4; i++, addr += 16) { |
1340 | env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); |
1341 | env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); |
1342 | } |
1343 | } |
1344 | |
1345 | static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1346 | { |
1347 | /* FIXME: Extend highest implemented bit of linear address. */ |
1348 | env->bndcs_regs.cfgu |
1349 | = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); |
1350 | env->bndcs_regs.sts |
1351 | = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); |
1352 | } |
1353 | |
1354 | static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) |
1355 | { |
1356 | env->pkru = cpu_ldq_data_ra(env, ptr, ra); |
1357 | } |
1358 | |
1359 | void helper_fxrstor(CPUX86State *env, target_ulong ptr) |
1360 | { |
1361 | uintptr_t ra = GETPC(); |
1362 | |
1363 | /* The operand must be 16 byte aligned */ |
1364 | if (ptr & 0xf) { |
1365 | raise_exception_ra(env, EXCP0D_GPF, ra); |
1366 | } |
1367 | |
1368 | do_xrstor_fpu(env, ptr, ra); |
1369 | |
1370 | if (env->cr[4] & CR4_OSFXSR_MASK) { |
1371 | do_xrstor_mxcsr(env, ptr, ra); |
1372 | /* Fast FXRSTOR leaves out the XMM registers */ |
1373 | if (!(env->efer & MSR_EFER_FFXSR) |
1374 | || (env->hflags & HF_CPL_MASK) |
1375 | || !(env->hflags & HF_LMA_MASK)) { |
1376 | do_xrstor_sse(env, ptr, ra); |
1377 | } |
1378 | } |
1379 | } |
1380 | |
1381 | #if defined(CONFIG_USER_ONLY) |
1382 | void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) |
1383 | { |
1384 | helper_fxsave(env, ptr); |
1385 | } |
1386 | |
1387 | void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) |
1388 | { |
1389 | helper_fxrstor(env, ptr); |
1390 | } |
1391 | #endif |
1392 | |
1393 | void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) |
1394 | { |
1395 | uintptr_t ra = GETPC(); |
1396 | uint64_t xstate_bv, xcomp_bv, reserve0; |
1397 | |
1398 | rfbm &= env->xcr0; |
1399 | |
1400 | /* The OS must have enabled XSAVE. */ |
1401 | if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { |
1402 | raise_exception_ra(env, EXCP06_ILLOP, ra); |
1403 | } |
1404 | |
1405 | /* The operand must be 64 byte aligned. */ |
1406 | if (ptr & 63) { |
1407 | raise_exception_ra(env, EXCP0D_GPF, ra); |
1408 | } |
1409 | |
1410 | xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); |
1411 | |
1412 | if ((int64_t)xstate_bv < 0) { |
1413 | /* FIXME: Compact form. */ |
1414 | raise_exception_ra(env, EXCP0D_GPF, ra); |
1415 | } |
1416 | |
1417 | /* Standard form. */ |
1418 | |
1419 | /* The XSTATE_BV field must not set bits not present in XCR0. */ |
1420 | if (xstate_bv & ~env->xcr0) { |
1421 | raise_exception_ra(env, EXCP0D_GPF, ra); |
1422 | } |
1423 | |
1424 | /* The XCOMP_BV field must be zero. Note that, as of the April 2016 |
1425 | revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) |
1426 | describes only XCOMP_BV, but the description of the standard form |
1427 | of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which |
1428 | includes the next 64-bit field. */ |
1429 | xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); |
1430 | reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); |
1431 | if (xcomp_bv || reserve0) { |
1432 | raise_exception_ra(env, EXCP0D_GPF, ra); |
1433 | } |
1434 | |
1435 | if (rfbm & XSTATE_FP_MASK) { |
1436 | if (xstate_bv & XSTATE_FP_MASK) { |
1437 | do_xrstor_fpu(env, ptr, ra); |
1438 | } else { |
1439 | helper_fninit(env); |
1440 | memset(env->fpregs, 0, sizeof(env->fpregs)); |
1441 | } |
1442 | } |
1443 | if (rfbm & XSTATE_SSE_MASK) { |
1444 | /* Note that the standard form of XRSTOR loads MXCSR from memory |
1445 | whether or not the XSTATE_BV bit is set. */ |
1446 | do_xrstor_mxcsr(env, ptr, ra); |
1447 | if (xstate_bv & XSTATE_SSE_MASK) { |
1448 | do_xrstor_sse(env, ptr, ra); |
1449 | } else { |
1450 | /* ??? When AVX is implemented, we may have to be more |
1451 | selective in the clearing. */ |
1452 | memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); |
1453 | } |
1454 | } |
1455 | if (rfbm & XSTATE_BNDREGS_MASK) { |
1456 | if (xstate_bv & XSTATE_BNDREGS_MASK) { |
1457 | do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); |
1458 | env->hflags |= HF_MPX_IU_MASK; |
1459 | } else { |
1460 | memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); |
1461 | env->hflags &= ~HF_MPX_IU_MASK; |
1462 | } |
1463 | } |
1464 | if (rfbm & XSTATE_BNDCSR_MASK) { |
1465 | if (xstate_bv & XSTATE_BNDCSR_MASK) { |
1466 | do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); |
1467 | } else { |
1468 | memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); |
1469 | } |
1470 | cpu_sync_bndcs_hflags(env); |
1471 | } |
1472 | if (rfbm & XSTATE_PKRU_MASK) { |
1473 | uint64_t old_pkru = env->pkru; |
1474 | if (xstate_bv & XSTATE_PKRU_MASK) { |
1475 | do_xrstor_pkru(env, ptr + XO(pkru_state), ra); |
1476 | } else { |
1477 | env->pkru = 0; |
1478 | } |
1479 | if (env->pkru != old_pkru) { |
1480 | CPUState *cs = env_cpu(env); |
1481 | tlb_flush(cs); |
1482 | } |
1483 | } |
1484 | } |
1485 | |
1486 | #undef XO |
1487 | |
1488 | uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) |
1489 | { |
1490 | /* The OS must have enabled XSAVE. */ |
1491 | if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { |
1492 | raise_exception_ra(env, EXCP06_ILLOP, GETPC()); |
1493 | } |
1494 | |
1495 | switch (ecx) { |
1496 | case 0: |
1497 | return env->xcr0; |
1498 | case 1: |
1499 | if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { |
1500 | return env->xcr0 & get_xinuse(env); |
1501 | } |
1502 | break; |
1503 | } |
1504 | raise_exception_ra(env, EXCP0D_GPF, GETPC()); |
1505 | } |
1506 | |
1507 | void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) |
1508 | { |
1509 | uint32_t dummy, ena_lo, ena_hi; |
1510 | uint64_t ena; |
1511 | |
1512 | /* The OS must have enabled XSAVE. */ |
1513 | if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { |
1514 | raise_exception_ra(env, EXCP06_ILLOP, GETPC()); |
1515 | } |
1516 | |
1517 | /* Only XCR0 is defined at present; the FPU may not be disabled. */ |
1518 | if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { |
1519 | goto do_gpf; |
1520 | } |
1521 | |
1522 | /* Disallow enabling unimplemented features. */ |
1523 | cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); |
1524 | ena = ((uint64_t)ena_hi << 32) | ena_lo; |
1525 | if (mask & ~ena) { |
1526 | goto do_gpf; |
1527 | } |
1528 | |
1529 | /* Disallow enabling only half of MPX. */ |
1530 | if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) |
1531 | & XSTATE_BNDCSR_MASK) { |
1532 | goto do_gpf; |
1533 | } |
1534 | |
1535 | env->xcr0 = mask; |
1536 | cpu_sync_bndcs_hflags(env); |
1537 | return; |
1538 | |
1539 | do_gpf: |
1540 | raise_exception_ra(env, EXCP0D_GPF, GETPC()); |
1541 | } |
1542 | |
1543 | /* MMX/SSE */ |
1544 | /* XXX: optimize by storing fptt and fptags in the static cpu state */ |
1545 | |
1546 | #define SSE_DAZ 0x0040 |
1547 | #define SSE_RC_MASK 0x6000 |
1548 | #define SSE_RC_NEAR 0x0000 |
1549 | #define SSE_RC_DOWN 0x2000 |
1550 | #define SSE_RC_UP 0x4000 |
1551 | #define SSE_RC_CHOP 0x6000 |
1552 | #define SSE_FZ 0x8000 |
1553 | |
1554 | void update_mxcsr_status(CPUX86State *env) |
1555 | { |
1556 | uint32_t mxcsr = env->mxcsr; |
1557 | int rnd_type; |
1558 | |
1559 | /* set rounding mode */ |
1560 | switch (mxcsr & SSE_RC_MASK) { |
1561 | default: |
1562 | case SSE_RC_NEAR: |
1563 | rnd_type = float_round_nearest_even; |
1564 | break; |
1565 | case SSE_RC_DOWN: |
1566 | rnd_type = float_round_down; |
1567 | break; |
1568 | case SSE_RC_UP: |
1569 | rnd_type = float_round_up; |
1570 | break; |
1571 | case SSE_RC_CHOP: |
1572 | rnd_type = float_round_to_zero; |
1573 | break; |
1574 | } |
1575 | set_float_rounding_mode(rnd_type, &env->sse_status); |
1576 | |
1577 | /* set denormals are zero */ |
1578 | set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); |
1579 | |
1580 | /* set flush to zero */ |
1581 | set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status); |
1582 | } |
1583 | |
1584 | void helper_ldmxcsr(CPUX86State *env, uint32_t val) |
1585 | { |
1586 | cpu_set_mxcsr(env, val); |
1587 | } |
1588 | |
1589 | void helper_enter_mmx(CPUX86State *env) |
1590 | { |
1591 | env->fpstt = 0; |
1592 | *(uint32_t *)(env->fptags) = 0; |
1593 | *(uint32_t *)(env->fptags + 4) = 0; |
1594 | } |
1595 | |
1596 | void helper_emms(CPUX86State *env) |
1597 | { |
1598 | /* set to empty state */ |
1599 | *(uint32_t *)(env->fptags) = 0x01010101; |
1600 | *(uint32_t *)(env->fptags + 4) = 0x01010101; |
1601 | } |
1602 | |
1603 | /* XXX: suppress */ |
1604 | void helper_movq(CPUX86State *env, void *d, void *s) |
1605 | { |
1606 | *(uint64_t *)d = *(uint64_t *)s; |
1607 | } |
1608 | |
1609 | #define SHIFT 0 |
1610 | #include "ops_sse.h" |
1611 | |
1612 | #define SHIFT 1 |
1613 | #include "ops_sse.h" |
1614 | |