1 | /* |
2 | * Ported from a work by Andreas Grabher for Previous, NeXT Computer Emulator, |
3 | * derived from NetBSD M68040 FPSP functions, |
4 | * derived from release 2a of the SoftFloat IEC/IEEE Floating-point Arithmetic |
5 | * Package. Those parts of the code (and some later contributions) are |
6 | * provided under that license, as detailed below. |
7 | * It has subsequently been modified by contributors to the QEMU Project, |
8 | * so some portions are provided under: |
9 | * the SoftFloat-2a license |
10 | * the BSD license |
11 | * GPL-v2-or-later |
12 | * |
13 | * Any future contributions to this file will be taken to be licensed under |
14 | * the Softfloat-2a license unless specifically indicated otherwise. |
15 | */ |
16 | |
17 | /* |
18 | * Portions of this work are licensed under the terms of the GNU GPL, |
19 | * version 2 or later. See the COPYING file in the top-level directory. |
20 | */ |
21 | |
22 | #include "qemu/osdep.h" |
23 | #include "softfloat.h" |
24 | #include "fpu/softfloat-macros.h" |
25 | #include "softfloat_fpsp_tables.h" |
26 | |
27 | #define pi_exp 0x4000 |
28 | #define piby2_exp 0x3FFF |
29 | #define pi_sig UINT64_C(0xc90fdaa22168c235) |
30 | |
31 | static floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) |
32 | { |
33 | if (floatx80_is_signaling_nan(a, status)) { |
34 | float_raise(float_flag_invalid, status); |
35 | a = floatx80_silence_nan(a, status); |
36 | } |
37 | |
38 | if (status->default_nan_mode) { |
39 | return floatx80_default_nan(status); |
40 | } |
41 | |
42 | return a; |
43 | } |
44 | |
45 | /* |
46 | * Returns the modulo remainder of the extended double-precision floating-point |
47 | * value `a' with respect to the corresponding value `b'. |
48 | */ |
49 | |
50 | floatx80 floatx80_mod(floatx80 a, floatx80 b, float_status *status) |
51 | { |
52 | flag aSign, zSign; |
53 | int32_t aExp, bExp, expDiff; |
54 | uint64_t aSig0, aSig1, bSig; |
55 | uint64_t qTemp, term0, term1; |
56 | |
57 | aSig0 = extractFloatx80Frac(a); |
58 | aExp = extractFloatx80Exp(a); |
59 | aSign = extractFloatx80Sign(a); |
60 | bSig = extractFloatx80Frac(b); |
61 | bExp = extractFloatx80Exp(b); |
62 | |
63 | if (aExp == 0x7FFF) { |
64 | if ((uint64_t) (aSig0 << 1) |
65 | || ((bExp == 0x7FFF) && (uint64_t) (bSig << 1))) { |
66 | return propagateFloatx80NaN(a, b, status); |
67 | } |
68 | goto invalid; |
69 | } |
70 | if (bExp == 0x7FFF) { |
71 | if ((uint64_t) (bSig << 1)) { |
72 | return propagateFloatx80NaN(a, b, status); |
73 | } |
74 | return a; |
75 | } |
76 | if (bExp == 0) { |
77 | if (bSig == 0) { |
78 | invalid: |
79 | float_raise(float_flag_invalid, status); |
80 | return floatx80_default_nan(status); |
81 | } |
82 | normalizeFloatx80Subnormal(bSig, &bExp, &bSig); |
83 | } |
84 | if (aExp == 0) { |
85 | if ((uint64_t) (aSig0 << 1) == 0) { |
86 | return a; |
87 | } |
88 | normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); |
89 | } |
90 | bSig |= UINT64_C(0x8000000000000000); |
91 | zSign = aSign; |
92 | expDiff = aExp - bExp; |
93 | aSig1 = 0; |
94 | if (expDiff < 0) { |
95 | return a; |
96 | } |
97 | qTemp = (bSig <= aSig0); |
98 | if (qTemp) { |
99 | aSig0 -= bSig; |
100 | } |
101 | expDiff -= 64; |
102 | while (0 < expDiff) { |
103 | qTemp = estimateDiv128To64(aSig0, aSig1, bSig); |
104 | qTemp = (2 < qTemp) ? qTemp - 2 : 0; |
105 | mul64To128(bSig, qTemp, &term0, &term1); |
106 | sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); |
107 | shortShift128Left(aSig0, aSig1, 62, &aSig0, &aSig1); |
108 | expDiff -= 62; |
109 | } |
110 | expDiff += 64; |
111 | if (0 < expDiff) { |
112 | qTemp = estimateDiv128To64(aSig0, aSig1, bSig); |
113 | qTemp = (2 < qTemp) ? qTemp - 2 : 0; |
114 | qTemp >>= 64 - expDiff; |
115 | mul64To128(bSig, qTemp << (64 - expDiff), &term0, &term1); |
116 | sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); |
117 | shortShift128Left(0, bSig, 64 - expDiff, &term0, &term1); |
118 | while (le128(term0, term1, aSig0, aSig1)) { |
119 | ++qTemp; |
120 | sub128(aSig0, aSig1, term0, term1, &aSig0, &aSig1); |
121 | } |
122 | } |
123 | return |
124 | normalizeRoundAndPackFloatx80( |
125 | 80, zSign, bExp + expDiff, aSig0, aSig1, status); |
126 | } |
127 | |
128 | /* |
129 | * Returns the mantissa of the extended double-precision floating-point |
130 | * value `a'. |
131 | */ |
132 | |
133 | floatx80 floatx80_getman(floatx80 a, float_status *status) |
134 | { |
135 | flag aSign; |
136 | int32_t aExp; |
137 | uint64_t aSig; |
138 | |
139 | aSig = extractFloatx80Frac(a); |
140 | aExp = extractFloatx80Exp(a); |
141 | aSign = extractFloatx80Sign(a); |
142 | |
143 | if (aExp == 0x7FFF) { |
144 | if ((uint64_t) (aSig << 1)) { |
145 | return propagateFloatx80NaNOneArg(a , status); |
146 | } |
147 | float_raise(float_flag_invalid , status); |
148 | return floatx80_default_nan(status); |
149 | } |
150 | |
151 | if (aExp == 0) { |
152 | if (aSig == 0) { |
153 | return packFloatx80(aSign, 0, 0); |
154 | } |
155 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); |
156 | } |
157 | |
158 | return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, |
159 | 0x3FFF, aSig, 0, status); |
160 | } |
161 | |
162 | /* |
163 | * Returns the exponent of the extended double-precision floating-point |
164 | * value `a' as an extended double-precision value. |
165 | */ |
166 | |
167 | floatx80 floatx80_getexp(floatx80 a, float_status *status) |
168 | { |
169 | flag aSign; |
170 | int32_t aExp; |
171 | uint64_t aSig; |
172 | |
173 | aSig = extractFloatx80Frac(a); |
174 | aExp = extractFloatx80Exp(a); |
175 | aSign = extractFloatx80Sign(a); |
176 | |
177 | if (aExp == 0x7FFF) { |
178 | if ((uint64_t) (aSig << 1)) { |
179 | return propagateFloatx80NaNOneArg(a , status); |
180 | } |
181 | float_raise(float_flag_invalid , status); |
182 | return floatx80_default_nan(status); |
183 | } |
184 | |
185 | if (aExp == 0) { |
186 | if (aSig == 0) { |
187 | return packFloatx80(aSign, 0, 0); |
188 | } |
189 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); |
190 | } |
191 | |
192 | return int32_to_floatx80(aExp - 0x3FFF, status); |
193 | } |
194 | |
195 | /* |
196 | * Scales extended double-precision floating-point value in operand `a' by |
197 | * value `b'. The function truncates the value in the second operand 'b' to |
198 | * an integral value and adds that value to the exponent of the operand 'a'. |
199 | * The operation performed according to the IEC/IEEE Standard for Binary |
200 | * Floating-Point Arithmetic. |
201 | */ |
202 | |
203 | floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) |
204 | { |
205 | flag aSign, bSign; |
206 | int32_t aExp, bExp, shiftCount; |
207 | uint64_t aSig, bSig; |
208 | |
209 | aSig = extractFloatx80Frac(a); |
210 | aExp = extractFloatx80Exp(a); |
211 | aSign = extractFloatx80Sign(a); |
212 | bSig = extractFloatx80Frac(b); |
213 | bExp = extractFloatx80Exp(b); |
214 | bSign = extractFloatx80Sign(b); |
215 | |
216 | if (bExp == 0x7FFF) { |
217 | if ((uint64_t) (bSig << 1) || |
218 | ((aExp == 0x7FFF) && (uint64_t) (aSig << 1))) { |
219 | return propagateFloatx80NaN(a, b, status); |
220 | } |
221 | float_raise(float_flag_invalid , status); |
222 | return floatx80_default_nan(status); |
223 | } |
224 | if (aExp == 0x7FFF) { |
225 | if ((uint64_t) (aSig << 1)) { |
226 | return propagateFloatx80NaN(a, b, status); |
227 | } |
228 | return packFloatx80(aSign, floatx80_infinity.high, |
229 | floatx80_infinity.low); |
230 | } |
231 | if (aExp == 0) { |
232 | if (aSig == 0) { |
233 | return packFloatx80(aSign, 0, 0); |
234 | } |
235 | if (bExp < 0x3FFF) { |
236 | return a; |
237 | } |
238 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); |
239 | } |
240 | |
241 | if (bExp < 0x3FFF) { |
242 | return a; |
243 | } |
244 | |
245 | if (0x400F < bExp) { |
246 | aExp = bSign ? -0x6001 : 0xE000; |
247 | return roundAndPackFloatx80(status->floatx80_rounding_precision, |
248 | aSign, aExp, aSig, 0, status); |
249 | } |
250 | |
251 | shiftCount = 0x403E - bExp; |
252 | bSig >>= shiftCount; |
253 | aExp = bSign ? (aExp - bSig) : (aExp + bSig); |
254 | |
255 | return roundAndPackFloatx80(status->floatx80_rounding_precision, |
256 | aSign, aExp, aSig, 0, status); |
257 | } |
258 | |
259 | floatx80 floatx80_move(floatx80 a, float_status *status) |
260 | { |
261 | flag aSign; |
262 | int32_t aExp; |
263 | uint64_t aSig; |
264 | |
265 | aSig = extractFloatx80Frac(a); |
266 | aExp = extractFloatx80Exp(a); |
267 | aSign = extractFloatx80Sign(a); |
268 | |
269 | if (aExp == 0x7FFF) { |
270 | if ((uint64_t)(aSig << 1)) { |
271 | return propagateFloatx80NaNOneArg(a, status); |
272 | } |
273 | return a; |
274 | } |
275 | if (aExp == 0) { |
276 | if (aSig == 0) { |
277 | return a; |
278 | } |
279 | normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, |
280 | aSign, aExp, aSig, 0, status); |
281 | } |
282 | return roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, |
283 | aExp, aSig, 0, status); |
284 | } |
285 | |
286 | /* |
287 | * Algorithms for transcendental functions supported by MC68881 and MC68882 |
288 | * mathematical coprocessors. The functions are derived from FPSP library. |
289 | */ |
290 | |
291 | #define one_exp 0x3FFF |
292 | #define one_sig UINT64_C(0x8000000000000000) |
293 | |
294 | /* |
295 | * Function for compactifying extended double-precision floating point values. |
296 | */ |
297 | |
298 | static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) |
299 | { |
300 | return (aExp << 16) | (aSig >> 48); |
301 | } |
302 | |
303 | /* |
304 | * Log base e of x plus 1 |
305 | */ |
306 | |
307 | floatx80 floatx80_lognp1(floatx80 a, float_status *status) |
308 | { |
309 | flag aSign; |
310 | int32_t aExp; |
311 | uint64_t aSig, fSig; |
312 | |
313 | int8_t user_rnd_mode, user_rnd_prec; |
314 | |
315 | int32_t compact, j, k; |
316 | floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; |
317 | |
318 | aSig = extractFloatx80Frac(a); |
319 | aExp = extractFloatx80Exp(a); |
320 | aSign = extractFloatx80Sign(a); |
321 | |
322 | if (aExp == 0x7FFF) { |
323 | if ((uint64_t) (aSig << 1)) { |
324 | propagateFloatx80NaNOneArg(a, status); |
325 | } |
326 | if (aSign) { |
327 | float_raise(float_flag_invalid, status); |
328 | return floatx80_default_nan(status); |
329 | } |
330 | return packFloatx80(0, floatx80_infinity.high, floatx80_infinity.low); |
331 | } |
332 | |
333 | if (aExp == 0 && aSig == 0) { |
334 | return packFloatx80(aSign, 0, 0); |
335 | } |
336 | |
337 | if (aSign && aExp >= one_exp) { |
338 | if (aExp == one_exp && aSig == one_sig) { |
339 | float_raise(float_flag_divbyzero, status); |
340 | return packFloatx80(aSign, floatx80_infinity.high, |
341 | floatx80_infinity.low); |
342 | } |
343 | float_raise(float_flag_invalid, status); |
344 | return floatx80_default_nan(status); |
345 | } |
346 | |
347 | if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { |
348 | /* <= min threshold */ |
349 | float_raise(float_flag_inexact, status); |
350 | return floatx80_move(a, status); |
351 | } |
352 | |
353 | user_rnd_mode = status->float_rounding_mode; |
354 | user_rnd_prec = status->floatx80_rounding_precision; |
355 | status->float_rounding_mode = float_round_nearest_even; |
356 | status->floatx80_rounding_precision = 80; |
357 | |
358 | compact = floatx80_make_compact(aExp, aSig); |
359 | |
360 | fp0 = a; /* Z */ |
361 | fp1 = a; |
362 | |
363 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), |
364 | status), status); /* X = (1+Z) */ |
365 | |
366 | aExp = extractFloatx80Exp(fp0); |
367 | aSig = extractFloatx80Frac(fp0); |
368 | |
369 | compact = floatx80_make_compact(aExp, aSig); |
370 | |
371 | if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { |
372 | /* |X| < 1/2 or |X| > 3/2 */ |
373 | k = aExp - 0x3FFF; |
374 | fp1 = int32_to_floatx80(k, status); |
375 | |
376 | fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000); |
377 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ |
378 | |
379 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ |
380 | fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ |
381 | |
382 | fp0 = floatx80_sub(fp0, f, status); /* Y-F */ |
383 | |
384 | lp1cont1: |
385 | /* LP1CONT1 */ |
386 | fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ |
387 | logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); |
388 | klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ |
389 | fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ |
390 | |
391 | fp3 = fp2; |
392 | fp1 = fp2; |
393 | |
394 | fp1 = floatx80_mul(fp1, float64_to_floatx80( |
395 | make_float64(0x3FC2499AB5E4040B), status), |
396 | status); /* V*A6 */ |
397 | fp2 = floatx80_mul(fp2, float64_to_floatx80( |
398 | make_float64(0xBFC555B5848CB7DB), status), |
399 | status); /* V*A5 */ |
400 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
401 | make_float64(0x3FC99999987D8730), status), |
402 | status); /* A4+V*A6 */ |
403 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
404 | make_float64(0xBFCFFFFFFF6F7E97), status), |
405 | status); /* A3+V*A5 */ |
406 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ |
407 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ |
408 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
409 | make_float64(0x3FD55555555555A4), status), |
410 | status); /* A2+V*(A4+V*A6) */ |
411 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
412 | make_float64(0xBFE0000000000008), status), |
413 | status); /* A1+V*(A3+V*A5) */ |
414 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ |
415 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ |
416 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ |
417 | fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ |
418 | |
419 | fp1 = floatx80_add(fp1, log_tbl[j + 1], |
420 | status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ |
421 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ |
422 | |
423 | status->float_rounding_mode = user_rnd_mode; |
424 | status->floatx80_rounding_precision = user_rnd_prec; |
425 | |
426 | a = floatx80_add(fp0, klog2, status); |
427 | |
428 | float_raise(float_flag_inexact, status); |
429 | |
430 | return a; |
431 | } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { |
432 | /* |X| < 1/16 or |X| > -1/16 */ |
433 | /* LP1CARE */ |
434 | fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000); |
435 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ |
436 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ |
437 | |
438 | if (compact >= 0x3FFF8000) { /* 1+Z >= 1 */ |
439 | /* KISZERO */ |
440 | fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x3F800000), |
441 | status), f, status); /* 1-F */ |
442 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (1-F)+Z */ |
443 | fp1 = packFloatx80(0, 0, 0); /* K = 0 */ |
444 | } else { |
445 | /* KISNEG */ |
446 | fp0 = floatx80_sub(float32_to_floatx80(make_float32(0x40000000), |
447 | status), f, status); /* 2-F */ |
448 | fp1 = floatx80_add(fp1, fp1, status); /* 2Z */ |
449 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS Y-F = (2-F)+2Z */ |
450 | fp1 = packFloatx80(1, one_exp, one_sig); /* K = -1 */ |
451 | } |
452 | goto lp1cont1; |
453 | } else { |
454 | /* LP1ONE16 */ |
455 | fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2Z */ |
456 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), |
457 | status), status); /* FP0 IS 1+X */ |
458 | |
459 | /* LP1CONT2 */ |
460 | fp1 = floatx80_div(fp1, fp0, status); /* U */ |
461 | saveu = fp1; |
462 | fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ |
463 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ |
464 | |
465 | fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), |
466 | status); /* B5 */ |
467 | fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), |
468 | status); /* B4 */ |
469 | fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ |
470 | fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ |
471 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
472 | make_float64(0x3F624924928BCCFF), status), |
473 | status); /* B3+W*B5 */ |
474 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
475 | make_float64(0x3F899999999995EC), status), |
476 | status); /* B2+W*B4 */ |
477 | fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ |
478 | fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ |
479 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
480 | make_float64(0x3FB5555555555555), status), |
481 | status); /* B1+W*(B3+W*B5) */ |
482 | |
483 | fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ |
484 | fp1 = floatx80_add(fp1, fp2, |
485 | status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ |
486 | fp0 = floatx80_mul(fp0, fp1, |
487 | status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ |
488 | |
489 | status->float_rounding_mode = user_rnd_mode; |
490 | status->floatx80_rounding_precision = user_rnd_prec; |
491 | |
492 | a = floatx80_add(fp0, saveu, status); |
493 | |
494 | /*if (!floatx80_is_zero(a)) { */ |
495 | float_raise(float_flag_inexact, status); |
496 | /*} */ |
497 | |
498 | return a; |
499 | } |
500 | } |
501 | |
502 | /* |
503 | * Log base e |
504 | */ |
505 | |
506 | floatx80 floatx80_logn(floatx80 a, float_status *status) |
507 | { |
508 | flag aSign; |
509 | int32_t aExp; |
510 | uint64_t aSig, fSig; |
511 | |
512 | int8_t user_rnd_mode, user_rnd_prec; |
513 | |
514 | int32_t compact, j, k, adjk; |
515 | floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; |
516 | |
517 | aSig = extractFloatx80Frac(a); |
518 | aExp = extractFloatx80Exp(a); |
519 | aSign = extractFloatx80Sign(a); |
520 | |
521 | if (aExp == 0x7FFF) { |
522 | if ((uint64_t) (aSig << 1)) { |
523 | propagateFloatx80NaNOneArg(a, status); |
524 | } |
525 | if (aSign == 0) { |
526 | return packFloatx80(0, floatx80_infinity.high, |
527 | floatx80_infinity.low); |
528 | } |
529 | } |
530 | |
531 | adjk = 0; |
532 | |
533 | if (aExp == 0) { |
534 | if (aSig == 0) { /* zero */ |
535 | float_raise(float_flag_divbyzero, status); |
536 | return packFloatx80(1, floatx80_infinity.high, |
537 | floatx80_infinity.low); |
538 | } |
539 | if ((aSig & one_sig) == 0) { /* denormal */ |
540 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); |
541 | adjk = -100; |
542 | aExp += 100; |
543 | a = packFloatx80(aSign, aExp, aSig); |
544 | } |
545 | } |
546 | |
547 | if (aSign) { |
548 | float_raise(float_flag_invalid, status); |
549 | return floatx80_default_nan(status); |
550 | } |
551 | |
552 | user_rnd_mode = status->float_rounding_mode; |
553 | user_rnd_prec = status->floatx80_rounding_precision; |
554 | status->float_rounding_mode = float_round_nearest_even; |
555 | status->floatx80_rounding_precision = 80; |
556 | |
557 | compact = floatx80_make_compact(aExp, aSig); |
558 | |
559 | if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { |
560 | /* |X| < 15/16 or |X| > 17/16 */ |
561 | k = aExp - 0x3FFF; |
562 | k += adjk; |
563 | fp1 = int32_to_floatx80(k, status); |
564 | |
565 | fSig = (aSig & UINT64_C(0xFE00000000000000)) | UINT64_C(0x0100000000000000); |
566 | j = (fSig >> 56) & 0x7E; /* DISPLACEMENT FOR 1/F */ |
567 | |
568 | f = packFloatx80(0, 0x3FFF, fSig); /* F */ |
569 | fp0 = packFloatx80(0, 0x3FFF, aSig); /* Y */ |
570 | |
571 | fp0 = floatx80_sub(fp0, f, status); /* Y-F */ |
572 | |
573 | /* LP1CONT1 */ |
574 | fp0 = floatx80_mul(fp0, log_tbl[j], status); /* FP0 IS U = (Y-F)/F */ |
575 | logof2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); |
576 | klog2 = floatx80_mul(fp1, logof2, status); /* FP1 IS K*LOG2 */ |
577 | fp2 = floatx80_mul(fp0, fp0, status); /* FP2 IS V=U*U */ |
578 | |
579 | fp3 = fp2; |
580 | fp1 = fp2; |
581 | |
582 | fp1 = floatx80_mul(fp1, float64_to_floatx80( |
583 | make_float64(0x3FC2499AB5E4040B), status), |
584 | status); /* V*A6 */ |
585 | fp2 = floatx80_mul(fp2, float64_to_floatx80( |
586 | make_float64(0xBFC555B5848CB7DB), status), |
587 | status); /* V*A5 */ |
588 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
589 | make_float64(0x3FC99999987D8730), status), |
590 | status); /* A4+V*A6 */ |
591 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
592 | make_float64(0xBFCFFFFFFF6F7E97), status), |
593 | status); /* A3+V*A5 */ |
594 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A4+V*A6) */ |
595 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A3+V*A5) */ |
596 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
597 | make_float64(0x3FD55555555555A4), status), |
598 | status); /* A2+V*(A4+V*A6) */ |
599 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
600 | make_float64(0xBFE0000000000008), status), |
601 | status); /* A1+V*(A3+V*A5) */ |
602 | fp1 = floatx80_mul(fp1, fp3, status); /* V*(A2+V*(A4+V*A6)) */ |
603 | fp2 = floatx80_mul(fp2, fp3, status); /* V*(A1+V*(A3+V*A5)) */ |
604 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V*(A2+V*(A4+V*A6)) */ |
605 | fp0 = floatx80_add(fp0, fp2, status); /* U+V*(A1+V*(A3+V*A5)) */ |
606 | |
607 | fp1 = floatx80_add(fp1, log_tbl[j + 1], |
608 | status); /* LOG(F)+U*V*(A2+V*(A4+V*A6)) */ |
609 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 IS LOG(F) + LOG(1+U) */ |
610 | |
611 | status->float_rounding_mode = user_rnd_mode; |
612 | status->floatx80_rounding_precision = user_rnd_prec; |
613 | |
614 | a = floatx80_add(fp0, klog2, status); |
615 | |
616 | float_raise(float_flag_inexact, status); |
617 | |
618 | return a; |
619 | } else { /* |X-1| >= 1/16 */ |
620 | fp0 = a; |
621 | fp1 = a; |
622 | fp1 = floatx80_sub(fp1, float32_to_floatx80(make_float32(0x3F800000), |
623 | status), status); /* FP1 IS X-1 */ |
624 | fp0 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), |
625 | status), status); /* FP0 IS X+1 */ |
626 | fp1 = floatx80_add(fp1, fp1, status); /* FP1 IS 2(X-1) */ |
627 | |
628 | /* LP1CONT2 */ |
629 | fp1 = floatx80_div(fp1, fp0, status); /* U */ |
630 | saveu = fp1; |
631 | fp0 = floatx80_mul(fp1, fp1, status); /* FP0 IS V = U*U */ |
632 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS W = V*V */ |
633 | |
634 | fp3 = float64_to_floatx80(make_float64(0x3F175496ADD7DAD6), |
635 | status); /* B5 */ |
636 | fp2 = float64_to_floatx80(make_float64(0x3F3C71C2FE80C7E0), |
637 | status); /* B4 */ |
638 | fp3 = floatx80_mul(fp3, fp1, status); /* W*B5 */ |
639 | fp2 = floatx80_mul(fp2, fp1, status); /* W*B4 */ |
640 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
641 | make_float64(0x3F624924928BCCFF), status), |
642 | status); /* B3+W*B5 */ |
643 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
644 | make_float64(0x3F899999999995EC), status), |
645 | status); /* B2+W*B4 */ |
646 | fp1 = floatx80_mul(fp1, fp3, status); /* W*(B3+W*B5) */ |
647 | fp2 = floatx80_mul(fp2, fp0, status); /* V*(B2+W*B4) */ |
648 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
649 | make_float64(0x3FB5555555555555), status), |
650 | status); /* B1+W*(B3+W*B5) */ |
651 | |
652 | fp0 = floatx80_mul(fp0, saveu, status); /* FP0 IS U*V */ |
653 | fp1 = floatx80_add(fp1, fp2, status); /* B1+W*(B3+W*B5) + V*(B2+W*B4) */ |
654 | fp0 = floatx80_mul(fp0, fp1, |
655 | status); /* U*V*([B1+W*(B3+W*B5)] + [V*(B2+W*B4)]) */ |
656 | |
657 | status->float_rounding_mode = user_rnd_mode; |
658 | status->floatx80_rounding_precision = user_rnd_prec; |
659 | |
660 | a = floatx80_add(fp0, saveu, status); |
661 | |
662 | /*if (!floatx80_is_zero(a)) { */ |
663 | float_raise(float_flag_inexact, status); |
664 | /*} */ |
665 | |
666 | return a; |
667 | } |
668 | } |
669 | |
670 | /* |
671 | * Log base 10 |
672 | */ |
673 | |
674 | floatx80 floatx80_log10(floatx80 a, float_status *status) |
675 | { |
676 | flag aSign; |
677 | int32_t aExp; |
678 | uint64_t aSig; |
679 | |
680 | int8_t user_rnd_mode, user_rnd_prec; |
681 | |
682 | floatx80 fp0, fp1; |
683 | |
684 | aSig = extractFloatx80Frac(a); |
685 | aExp = extractFloatx80Exp(a); |
686 | aSign = extractFloatx80Sign(a); |
687 | |
688 | if (aExp == 0x7FFF) { |
689 | if ((uint64_t) (aSig << 1)) { |
690 | propagateFloatx80NaNOneArg(a, status); |
691 | } |
692 | if (aSign == 0) { |
693 | return packFloatx80(0, floatx80_infinity.high, |
694 | floatx80_infinity.low); |
695 | } |
696 | } |
697 | |
698 | if (aExp == 0 && aSig == 0) { |
699 | float_raise(float_flag_divbyzero, status); |
700 | return packFloatx80(1, floatx80_infinity.high, |
701 | floatx80_infinity.low); |
702 | } |
703 | |
704 | if (aSign) { |
705 | float_raise(float_flag_invalid, status); |
706 | return floatx80_default_nan(status); |
707 | } |
708 | |
709 | user_rnd_mode = status->float_rounding_mode; |
710 | user_rnd_prec = status->floatx80_rounding_precision; |
711 | status->float_rounding_mode = float_round_nearest_even; |
712 | status->floatx80_rounding_precision = 80; |
713 | |
714 | fp0 = floatx80_logn(a, status); |
715 | fp1 = packFloatx80(0, 0x3FFD, UINT64_C(0xDE5BD8A937287195)); /* INV_L10 */ |
716 | |
717 | status->float_rounding_mode = user_rnd_mode; |
718 | status->floatx80_rounding_precision = user_rnd_prec; |
719 | |
720 | a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L10 */ |
721 | |
722 | float_raise(float_flag_inexact, status); |
723 | |
724 | return a; |
725 | } |
726 | |
727 | /* |
728 | * Log base 2 |
729 | */ |
730 | |
731 | floatx80 floatx80_log2(floatx80 a, float_status *status) |
732 | { |
733 | flag aSign; |
734 | int32_t aExp; |
735 | uint64_t aSig; |
736 | |
737 | int8_t user_rnd_mode, user_rnd_prec; |
738 | |
739 | floatx80 fp0, fp1; |
740 | |
741 | aSig = extractFloatx80Frac(a); |
742 | aExp = extractFloatx80Exp(a); |
743 | aSign = extractFloatx80Sign(a); |
744 | |
745 | if (aExp == 0x7FFF) { |
746 | if ((uint64_t) (aSig << 1)) { |
747 | propagateFloatx80NaNOneArg(a, status); |
748 | } |
749 | if (aSign == 0) { |
750 | return packFloatx80(0, floatx80_infinity.high, |
751 | floatx80_infinity.low); |
752 | } |
753 | } |
754 | |
755 | if (aExp == 0) { |
756 | if (aSig == 0) { |
757 | float_raise(float_flag_divbyzero, status); |
758 | return packFloatx80(1, floatx80_infinity.high, |
759 | floatx80_infinity.low); |
760 | } |
761 | normalizeFloatx80Subnormal(aSig, &aExp, &aSig); |
762 | } |
763 | |
764 | if (aSign) { |
765 | float_raise(float_flag_invalid, status); |
766 | return floatx80_default_nan(status); |
767 | } |
768 | |
769 | user_rnd_mode = status->float_rounding_mode; |
770 | user_rnd_prec = status->floatx80_rounding_precision; |
771 | status->float_rounding_mode = float_round_nearest_even; |
772 | status->floatx80_rounding_precision = 80; |
773 | |
774 | if (aSig == one_sig) { /* X is 2^k */ |
775 | status->float_rounding_mode = user_rnd_mode; |
776 | status->floatx80_rounding_precision = user_rnd_prec; |
777 | |
778 | a = int32_to_floatx80(aExp - 0x3FFF, status); |
779 | } else { |
780 | fp0 = floatx80_logn(a, status); |
781 | fp1 = packFloatx80(0, 0x3FFF, UINT64_C(0xB8AA3B295C17F0BC)); /* INV_L2 */ |
782 | |
783 | status->float_rounding_mode = user_rnd_mode; |
784 | status->floatx80_rounding_precision = user_rnd_prec; |
785 | |
786 | a = floatx80_mul(fp0, fp1, status); /* LOGN(X)*INV_L2 */ |
787 | } |
788 | |
789 | float_raise(float_flag_inexact, status); |
790 | |
791 | return a; |
792 | } |
793 | |
794 | /* |
795 | * e to x |
796 | */ |
797 | |
798 | floatx80 floatx80_etox(floatx80 a, float_status *status) |
799 | { |
800 | flag aSign; |
801 | int32_t aExp; |
802 | uint64_t aSig; |
803 | |
804 | int8_t user_rnd_mode, user_rnd_prec; |
805 | |
806 | int32_t compact, n, j, k, m, m1; |
807 | floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; |
808 | flag adjflag; |
809 | |
810 | aSig = extractFloatx80Frac(a); |
811 | aExp = extractFloatx80Exp(a); |
812 | aSign = extractFloatx80Sign(a); |
813 | |
814 | if (aExp == 0x7FFF) { |
815 | if ((uint64_t) (aSig << 1)) { |
816 | return propagateFloatx80NaNOneArg(a, status); |
817 | } |
818 | if (aSign) { |
819 | return packFloatx80(0, 0, 0); |
820 | } |
821 | return packFloatx80(0, floatx80_infinity.high, |
822 | floatx80_infinity.low); |
823 | } |
824 | |
825 | if (aExp == 0 && aSig == 0) { |
826 | return packFloatx80(0, one_exp, one_sig); |
827 | } |
828 | |
829 | user_rnd_mode = status->float_rounding_mode; |
830 | user_rnd_prec = status->floatx80_rounding_precision; |
831 | status->float_rounding_mode = float_round_nearest_even; |
832 | status->floatx80_rounding_precision = 80; |
833 | |
834 | adjflag = 0; |
835 | |
836 | if (aExp >= 0x3FBE) { /* |X| >= 2^(-65) */ |
837 | compact = floatx80_make_compact(aExp, aSig); |
838 | |
839 | if (compact < 0x400CB167) { /* |X| < 16380 log2 */ |
840 | fp0 = a; |
841 | fp1 = a; |
842 | fp0 = floatx80_mul(fp0, float32_to_floatx80( |
843 | make_float32(0x42B8AA3B), status), |
844 | status); /* 64/log2 * X */ |
845 | adjflag = 0; |
846 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ |
847 | fp0 = int32_to_floatx80(n, status); |
848 | |
849 | j = n & 0x3F; /* J = N mod 64 */ |
850 | m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ |
851 | if (n < 0 && j) { |
852 | /* |
853 | * arithmetic right shift is division and |
854 | * round towards minus infinity |
855 | */ |
856 | m--; |
857 | } |
858 | m += 0x3FFF; /* biased exponent of 2^(M) */ |
859 | |
860 | expcont1: |
861 | fp2 = fp0; /* N */ |
862 | fp0 = floatx80_mul(fp0, float32_to_floatx80( |
863 | make_float32(0xBC317218), status), |
864 | status); /* N * L1, L1 = lead(-log2/64) */ |
865 | l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6)); |
866 | fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ |
867 | fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ |
868 | fp0 = floatx80_add(fp0, fp2, status); /* R */ |
869 | |
870 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ |
871 | fp2 = float32_to_floatx80(make_float32(0x3AB60B70), |
872 | status); /* A5 */ |
873 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A5 */ |
874 | fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3C088895), |
875 | status), fp1, |
876 | status); /* fp3 is S*A4 */ |
877 | fp2 = floatx80_add(fp2, float64_to_floatx80(make_float64( |
878 | 0x3FA5555555554431), status), |
879 | status); /* fp2 is A3+S*A5 */ |
880 | fp3 = floatx80_add(fp3, float64_to_floatx80(make_float64( |
881 | 0x3FC5555555554018), status), |
882 | status); /* fp3 is A2+S*A4 */ |
883 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*(A3+S*A5) */ |
884 | fp3 = floatx80_mul(fp3, fp1, status); /* fp3 is S*(A2+S*A4) */ |
885 | fp2 = floatx80_add(fp2, float32_to_floatx80( |
886 | make_float32(0x3F000000), status), |
887 | status); /* fp2 is A1+S*(A3+S*A5) */ |
888 | fp3 = floatx80_mul(fp3, fp0, status); /* fp3 IS R*S*(A2+S*A4) */ |
889 | fp2 = floatx80_mul(fp2, fp1, |
890 | status); /* fp2 IS S*(A1+S*(A3+S*A5)) */ |
891 | fp0 = floatx80_add(fp0, fp3, status); /* fp0 IS R+R*S*(A2+S*A4) */ |
892 | fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ |
893 | |
894 | fp1 = exp_tbl[j]; |
895 | fp0 = floatx80_mul(fp0, fp1, status); /* 2^(J/64)*(Exp(R)-1) */ |
896 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), |
897 | status); /* accurate 2^(J/64) */ |
898 | fp0 = floatx80_add(fp0, fp1, |
899 | status); /* 2^(J/64) + 2^(J/64)*(Exp(R)-1) */ |
900 | |
901 | scale = packFloatx80(0, m, one_sig); |
902 | if (adjflag) { |
903 | adjscale = packFloatx80(0, m1, one_sig); |
904 | fp0 = floatx80_mul(fp0, adjscale, status); |
905 | } |
906 | |
907 | status->float_rounding_mode = user_rnd_mode; |
908 | status->floatx80_rounding_precision = user_rnd_prec; |
909 | |
910 | a = floatx80_mul(fp0, scale, status); |
911 | |
912 | float_raise(float_flag_inexact, status); |
913 | |
914 | return a; |
915 | } else { /* |X| >= 16380 log2 */ |
916 | if (compact > 0x400CB27C) { /* |X| >= 16480 log2 */ |
917 | status->float_rounding_mode = user_rnd_mode; |
918 | status->floatx80_rounding_precision = user_rnd_prec; |
919 | if (aSign) { |
920 | a = roundAndPackFloatx80( |
921 | status->floatx80_rounding_precision, |
922 | 0, -0x1000, aSig, 0, status); |
923 | } else { |
924 | a = roundAndPackFloatx80( |
925 | status->floatx80_rounding_precision, |
926 | 0, 0x8000, aSig, 0, status); |
927 | } |
928 | float_raise(float_flag_inexact, status); |
929 | |
930 | return a; |
931 | } else { |
932 | fp0 = a; |
933 | fp1 = a; |
934 | fp0 = floatx80_mul(fp0, float32_to_floatx80( |
935 | make_float32(0x42B8AA3B), status), |
936 | status); /* 64/log2 * X */ |
937 | adjflag = 1; |
938 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ |
939 | fp0 = int32_to_floatx80(n, status); |
940 | |
941 | j = n & 0x3F; /* J = N mod 64 */ |
942 | /* NOTE: this is really arithmetic right shift by 6 */ |
943 | k = n / 64; |
944 | if (n < 0 && j) { |
945 | /* arithmetic right shift is division and |
946 | * round towards minus infinity |
947 | */ |
948 | k--; |
949 | } |
950 | /* NOTE: this is really arithmetic right shift by 1 */ |
951 | m1 = k / 2; |
952 | if (k < 0 && (k & 1)) { |
953 | /* arithmetic right shift is division and |
954 | * round towards minus infinity |
955 | */ |
956 | m1--; |
957 | } |
958 | m = k - m1; |
959 | m1 += 0x3FFF; /* biased exponent of 2^(M1) */ |
960 | m += 0x3FFF; /* biased exponent of 2^(M) */ |
961 | |
962 | goto expcont1; |
963 | } |
964 | } |
965 | } else { /* |X| < 2^(-65) */ |
966 | status->float_rounding_mode = user_rnd_mode; |
967 | status->floatx80_rounding_precision = user_rnd_prec; |
968 | |
969 | a = floatx80_add(a, float32_to_floatx80(make_float32(0x3F800000), |
970 | status), status); /* 1 + X */ |
971 | |
972 | float_raise(float_flag_inexact, status); |
973 | |
974 | return a; |
975 | } |
976 | } |
977 | |
978 | /* |
979 | * 2 to x |
980 | */ |
981 | |
982 | floatx80 floatx80_twotox(floatx80 a, float_status *status) |
983 | { |
984 | flag aSign; |
985 | int32_t aExp; |
986 | uint64_t aSig; |
987 | |
988 | int8_t user_rnd_mode, user_rnd_prec; |
989 | |
990 | int32_t compact, n, j, l, m, m1; |
991 | floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; |
992 | |
993 | aSig = extractFloatx80Frac(a); |
994 | aExp = extractFloatx80Exp(a); |
995 | aSign = extractFloatx80Sign(a); |
996 | |
997 | if (aExp == 0x7FFF) { |
998 | if ((uint64_t) (aSig << 1)) { |
999 | return propagateFloatx80NaNOneArg(a, status); |
1000 | } |
1001 | if (aSign) { |
1002 | return packFloatx80(0, 0, 0); |
1003 | } |
1004 | return packFloatx80(0, floatx80_infinity.high, |
1005 | floatx80_infinity.low); |
1006 | } |
1007 | |
1008 | if (aExp == 0 && aSig == 0) { |
1009 | return packFloatx80(0, one_exp, one_sig); |
1010 | } |
1011 | |
1012 | user_rnd_mode = status->float_rounding_mode; |
1013 | user_rnd_prec = status->floatx80_rounding_precision; |
1014 | status->float_rounding_mode = float_round_nearest_even; |
1015 | status->floatx80_rounding_precision = 80; |
1016 | |
1017 | fp0 = a; |
1018 | |
1019 | compact = floatx80_make_compact(aExp, aSig); |
1020 | |
1021 | if (compact < 0x3FB98000 || compact > 0x400D80C0) { |
1022 | /* |X| > 16480 or |X| < 2^(-70) */ |
1023 | if (compact > 0x3FFF8000) { /* |X| > 16480 */ |
1024 | status->float_rounding_mode = user_rnd_mode; |
1025 | status->floatx80_rounding_precision = user_rnd_prec; |
1026 | |
1027 | if (aSign) { |
1028 | return roundAndPackFloatx80(status->floatx80_rounding_precision, |
1029 | 0, -0x1000, aSig, 0, status); |
1030 | } else { |
1031 | return roundAndPackFloatx80(status->floatx80_rounding_precision, |
1032 | 0, 0x8000, aSig, 0, status); |
1033 | } |
1034 | } else { /* |X| < 2^(-70) */ |
1035 | status->float_rounding_mode = user_rnd_mode; |
1036 | status->floatx80_rounding_precision = user_rnd_prec; |
1037 | |
1038 | a = floatx80_add(fp0, float32_to_floatx80( |
1039 | make_float32(0x3F800000), status), |
1040 | status); /* 1 + X */ |
1041 | |
1042 | float_raise(float_flag_inexact, status); |
1043 | |
1044 | return a; |
1045 | } |
1046 | } else { /* 2^(-70) <= |X| <= 16480 */ |
1047 | fp1 = fp0; /* X */ |
1048 | fp1 = floatx80_mul(fp1, float32_to_floatx80( |
1049 | make_float32(0x42800000), status), |
1050 | status); /* X * 64 */ |
1051 | n = floatx80_to_int32(fp1, status); |
1052 | fp1 = int32_to_floatx80(n, status); |
1053 | j = n & 0x3F; |
1054 | l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ |
1055 | if (n < 0 && j) { |
1056 | /* |
1057 | * arithmetic right shift is division and |
1058 | * round towards minus infinity |
1059 | */ |
1060 | l--; |
1061 | } |
1062 | m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ |
1063 | if (l < 0 && (l & 1)) { |
1064 | /* |
1065 | * arithmetic right shift is division and |
1066 | * round towards minus infinity |
1067 | */ |
1068 | m--; |
1069 | } |
1070 | m1 = l - m; |
1071 | m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ |
1072 | |
1073 | adjfact = packFloatx80(0, m1, one_sig); |
1074 | fact1 = exp2_tbl[j]; |
1075 | fact1.high += m; |
1076 | fact2.high = exp2_tbl2[j] >> 16; |
1077 | fact2.high += m; |
1078 | fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); |
1079 | fact2.low <<= 48; |
1080 | |
1081 | fp1 = floatx80_mul(fp1, float32_to_floatx80( |
1082 | make_float32(0x3C800000), status), |
1083 | status); /* (1/64)*N */ |
1084 | fp0 = floatx80_sub(fp0, fp1, status); /* X - (1/64)*INT(64 X) */ |
1085 | fp2 = packFloatx80(0, 0x3FFE, UINT64_C(0xB17217F7D1CF79AC)); /* LOG2 */ |
1086 | fp0 = floatx80_mul(fp0, fp2, status); /* R */ |
1087 | |
1088 | /* EXPR */ |
1089 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ |
1090 | fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), |
1091 | status); /* A5 */ |
1092 | fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), |
1093 | status); /* A4 */ |
1094 | fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ |
1095 | fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ |
1096 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1097 | make_float64(0x3FA5555555554CC1), status), |
1098 | status); /* A3+S*A5 */ |
1099 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1100 | make_float64(0x3FC5555555554A54), status), |
1101 | status); /* A2+S*A4 */ |
1102 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ |
1103 | fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ |
1104 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1105 | make_float64(0x3FE0000000000000), status), |
1106 | status); /* A1+S*(A3+S*A5) */ |
1107 | fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ |
1108 | |
1109 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ |
1110 | fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ |
1111 | fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ |
1112 | |
1113 | fp0 = floatx80_mul(fp0, fact1, status); |
1114 | fp0 = floatx80_add(fp0, fact2, status); |
1115 | fp0 = floatx80_add(fp0, fact1, status); |
1116 | |
1117 | status->float_rounding_mode = user_rnd_mode; |
1118 | status->floatx80_rounding_precision = user_rnd_prec; |
1119 | |
1120 | a = floatx80_mul(fp0, adjfact, status); |
1121 | |
1122 | float_raise(float_flag_inexact, status); |
1123 | |
1124 | return a; |
1125 | } |
1126 | } |
1127 | |
1128 | /* |
1129 | * 10 to x |
1130 | */ |
1131 | |
1132 | floatx80 floatx80_tentox(floatx80 a, float_status *status) |
1133 | { |
1134 | flag aSign; |
1135 | int32_t aExp; |
1136 | uint64_t aSig; |
1137 | |
1138 | int8_t user_rnd_mode, user_rnd_prec; |
1139 | |
1140 | int32_t compact, n, j, l, m, m1; |
1141 | floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; |
1142 | |
1143 | aSig = extractFloatx80Frac(a); |
1144 | aExp = extractFloatx80Exp(a); |
1145 | aSign = extractFloatx80Sign(a); |
1146 | |
1147 | if (aExp == 0x7FFF) { |
1148 | if ((uint64_t) (aSig << 1)) { |
1149 | return propagateFloatx80NaNOneArg(a, status); |
1150 | } |
1151 | if (aSign) { |
1152 | return packFloatx80(0, 0, 0); |
1153 | } |
1154 | return packFloatx80(0, floatx80_infinity.high, |
1155 | floatx80_infinity.low); |
1156 | } |
1157 | |
1158 | if (aExp == 0 && aSig == 0) { |
1159 | return packFloatx80(0, one_exp, one_sig); |
1160 | } |
1161 | |
1162 | user_rnd_mode = status->float_rounding_mode; |
1163 | user_rnd_prec = status->floatx80_rounding_precision; |
1164 | status->float_rounding_mode = float_round_nearest_even; |
1165 | status->floatx80_rounding_precision = 80; |
1166 | |
1167 | fp0 = a; |
1168 | |
1169 | compact = floatx80_make_compact(aExp, aSig); |
1170 | |
1171 | if (compact < 0x3FB98000 || compact > 0x400B9B07) { |
1172 | /* |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) */ |
1173 | if (compact > 0x3FFF8000) { /* |X| > 16480 */ |
1174 | status->float_rounding_mode = user_rnd_mode; |
1175 | status->floatx80_rounding_precision = user_rnd_prec; |
1176 | |
1177 | if (aSign) { |
1178 | return roundAndPackFloatx80(status->floatx80_rounding_precision, |
1179 | 0, -0x1000, aSig, 0, status); |
1180 | } else { |
1181 | return roundAndPackFloatx80(status->floatx80_rounding_precision, |
1182 | 0, 0x8000, aSig, 0, status); |
1183 | } |
1184 | } else { /* |X| < 2^(-70) */ |
1185 | status->float_rounding_mode = user_rnd_mode; |
1186 | status->floatx80_rounding_precision = user_rnd_prec; |
1187 | |
1188 | a = floatx80_add(fp0, float32_to_floatx80( |
1189 | make_float32(0x3F800000), status), |
1190 | status); /* 1 + X */ |
1191 | |
1192 | float_raise(float_flag_inexact, status); |
1193 | |
1194 | return a; |
1195 | } |
1196 | } else { /* 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 */ |
1197 | fp1 = fp0; /* X */ |
1198 | fp1 = floatx80_mul(fp1, float64_to_floatx80( |
1199 | make_float64(0x406A934F0979A371), |
1200 | status), status); /* X*64*LOG10/LOG2 */ |
1201 | n = floatx80_to_int32(fp1, status); /* N=INT(X*64*LOG10/LOG2) */ |
1202 | fp1 = int32_to_floatx80(n, status); |
1203 | |
1204 | j = n & 0x3F; |
1205 | l = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ |
1206 | if (n < 0 && j) { |
1207 | /* |
1208 | * arithmetic right shift is division and |
1209 | * round towards minus infinity |
1210 | */ |
1211 | l--; |
1212 | } |
1213 | m = l / 2; /* NOTE: this is really arithmetic right shift by 1 */ |
1214 | if (l < 0 && (l & 1)) { |
1215 | /* |
1216 | * arithmetic right shift is division and |
1217 | * round towards minus infinity |
1218 | */ |
1219 | m--; |
1220 | } |
1221 | m1 = l - m; |
1222 | m1 += 0x3FFF; /* ADJFACT IS 2^(M') */ |
1223 | |
1224 | adjfact = packFloatx80(0, m1, one_sig); |
1225 | fact1 = exp2_tbl[j]; |
1226 | fact1.high += m; |
1227 | fact2.high = exp2_tbl2[j] >> 16; |
1228 | fact2.high += m; |
1229 | fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); |
1230 | fact2.low <<= 48; |
1231 | |
1232 | fp2 = fp1; /* N */ |
1233 | fp1 = floatx80_mul(fp1, float64_to_floatx80( |
1234 | make_float64(0x3F734413509F8000), status), |
1235 | status); /* N*(LOG2/64LOG10)_LEAD */ |
1236 | fp3 = packFloatx80(1, 0x3FCD, UINT64_C(0xC0219DC1DA994FD2)); |
1237 | fp2 = floatx80_mul(fp2, fp3, status); /* N*(LOG2/64LOG10)_TRAIL */ |
1238 | fp0 = floatx80_sub(fp0, fp1, status); /* X - N L_LEAD */ |
1239 | fp0 = floatx80_sub(fp0, fp2, status); /* X - N L_TRAIL */ |
1240 | fp2 = packFloatx80(0, 0x4000, UINT64_C(0x935D8DDDAAA8AC17)); /* LOG10 */ |
1241 | fp0 = floatx80_mul(fp0, fp2, status); /* R */ |
1242 | |
1243 | /* EXPR */ |
1244 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ |
1245 | fp2 = float64_to_floatx80(make_float64(0x3F56C16D6F7BD0B2), |
1246 | status); /* A5 */ |
1247 | fp3 = float64_to_floatx80(make_float64(0x3F811112302C712C), |
1248 | status); /* A4 */ |
1249 | fp2 = floatx80_mul(fp2, fp1, status); /* S*A5 */ |
1250 | fp3 = floatx80_mul(fp3, fp1, status); /* S*A4 */ |
1251 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1252 | make_float64(0x3FA5555555554CC1), status), |
1253 | status); /* A3+S*A5 */ |
1254 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1255 | make_float64(0x3FC5555555554A54), status), |
1256 | status); /* A2+S*A4 */ |
1257 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A3+S*A5) */ |
1258 | fp3 = floatx80_mul(fp3, fp1, status); /* S*(A2+S*A4) */ |
1259 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1260 | make_float64(0x3FE0000000000000), status), |
1261 | status); /* A1+S*(A3+S*A5) */ |
1262 | fp3 = floatx80_mul(fp3, fp0, status); /* R*S*(A2+S*A4) */ |
1263 | |
1264 | fp2 = floatx80_mul(fp2, fp1, status); /* S*(A1+S*(A3+S*A5)) */ |
1265 | fp0 = floatx80_add(fp0, fp3, status); /* R+R*S*(A2+S*A4) */ |
1266 | fp0 = floatx80_add(fp0, fp2, status); /* EXP(R) - 1 */ |
1267 | |
1268 | fp0 = floatx80_mul(fp0, fact1, status); |
1269 | fp0 = floatx80_add(fp0, fact2, status); |
1270 | fp0 = floatx80_add(fp0, fact1, status); |
1271 | |
1272 | status->float_rounding_mode = user_rnd_mode; |
1273 | status->floatx80_rounding_precision = user_rnd_prec; |
1274 | |
1275 | a = floatx80_mul(fp0, adjfact, status); |
1276 | |
1277 | float_raise(float_flag_inexact, status); |
1278 | |
1279 | return a; |
1280 | } |
1281 | } |
1282 | |
1283 | /* |
1284 | * Tangent |
1285 | */ |
1286 | |
1287 | floatx80 floatx80_tan(floatx80 a, float_status *status) |
1288 | { |
1289 | flag aSign, xSign; |
1290 | int32_t aExp, xExp; |
1291 | uint64_t aSig, xSig; |
1292 | |
1293 | int8_t user_rnd_mode, user_rnd_prec; |
1294 | |
1295 | int32_t compact, l, n, j; |
1296 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; |
1297 | float32 twoto63; |
1298 | flag endflag; |
1299 | |
1300 | aSig = extractFloatx80Frac(a); |
1301 | aExp = extractFloatx80Exp(a); |
1302 | aSign = extractFloatx80Sign(a); |
1303 | |
1304 | if (aExp == 0x7FFF) { |
1305 | if ((uint64_t) (aSig << 1)) { |
1306 | return propagateFloatx80NaNOneArg(a, status); |
1307 | } |
1308 | float_raise(float_flag_invalid, status); |
1309 | return floatx80_default_nan(status); |
1310 | } |
1311 | |
1312 | if (aExp == 0 && aSig == 0) { |
1313 | return packFloatx80(aSign, 0, 0); |
1314 | } |
1315 | |
1316 | user_rnd_mode = status->float_rounding_mode; |
1317 | user_rnd_prec = status->floatx80_rounding_precision; |
1318 | status->float_rounding_mode = float_round_nearest_even; |
1319 | status->floatx80_rounding_precision = 80; |
1320 | |
1321 | compact = floatx80_make_compact(aExp, aSig); |
1322 | |
1323 | fp0 = a; |
1324 | |
1325 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { |
1326 | /* 2^(-40) > |X| > 15 PI */ |
1327 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ |
1328 | /* REDUCEX */ |
1329 | fp1 = packFloatx80(0, 0, 0); |
1330 | if (compact == 0x7FFEFFFF) { |
1331 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, |
1332 | UINT64_C(0xC90FDAA200000000)); |
1333 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, |
1334 | UINT64_C(0x85A308D300000000)); |
1335 | fp0 = floatx80_add(fp0, twopi1, status); |
1336 | fp1 = fp0; |
1337 | fp0 = floatx80_add(fp0, twopi2, status); |
1338 | fp1 = floatx80_sub(fp1, fp0, status); |
1339 | fp1 = floatx80_add(fp1, twopi2, status); |
1340 | } |
1341 | loop: |
1342 | xSign = extractFloatx80Sign(fp0); |
1343 | xExp = extractFloatx80Exp(fp0); |
1344 | xExp -= 0x3FFF; |
1345 | if (xExp <= 28) { |
1346 | l = 0; |
1347 | endflag = 1; |
1348 | } else { |
1349 | l = xExp - 27; |
1350 | endflag = 0; |
1351 | } |
1352 | invtwopi = packFloatx80(0, 0x3FFE - l, |
1353 | UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ |
1354 | twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000)); |
1355 | twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000)); |
1356 | |
1357 | /* SIGN(INARG)*2^63 IN SGL */ |
1358 | twoto63 = packFloat32(xSign, 0xBE, 0); |
1359 | |
1360 | fp2 = floatx80_mul(fp0, invtwopi, status); |
1361 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), |
1362 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ |
1363 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), |
1364 | status); /* FP2 is N */ |
1365 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ |
1366 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ |
1367 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ |
1368 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ |
1369 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ |
1370 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ |
1371 | fp3 = fp0; /* FP3 is A */ |
1372 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ |
1373 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ |
1374 | |
1375 | if (endflag > 0) { |
1376 | n = floatx80_to_int32(fp2, status); |
1377 | goto tancont; |
1378 | } |
1379 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ |
1380 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ |
1381 | goto loop; |
1382 | } else { |
1383 | status->float_rounding_mode = user_rnd_mode; |
1384 | status->floatx80_rounding_precision = user_rnd_prec; |
1385 | |
1386 | a = floatx80_move(a, status); |
1387 | |
1388 | float_raise(float_flag_inexact, status); |
1389 | |
1390 | return a; |
1391 | } |
1392 | } else { |
1393 | fp1 = floatx80_mul(fp0, float64_to_floatx80( |
1394 | make_float64(0x3FE45F306DC9C883), status), |
1395 | status); /* X*2/PI */ |
1396 | |
1397 | n = floatx80_to_int32(fp1, status); |
1398 | j = 32 + n; |
1399 | |
1400 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ |
1401 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), |
1402 | status); /* FP0 IS R = (X-Y1)-Y2 */ |
1403 | |
1404 | tancont: |
1405 | if (n & 1) { |
1406 | /* NODD */ |
1407 | fp1 = fp0; /* R */ |
1408 | fp0 = floatx80_mul(fp0, fp0, status); /* S = R*R */ |
1409 | fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), |
1410 | status); /* Q4 */ |
1411 | fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), |
1412 | status); /* P3 */ |
1413 | fp3 = floatx80_mul(fp3, fp0, status); /* SQ4 */ |
1414 | fp2 = floatx80_mul(fp2, fp0, status); /* SP3 */ |
1415 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1416 | make_float64(0xBF346F59B39BA65F), status), |
1417 | status); /* Q3+SQ4 */ |
1418 | fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00)); |
1419 | fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ |
1420 | fp3 = floatx80_mul(fp3, fp0, status); /* S(Q3+SQ4) */ |
1421 | fp2 = floatx80_mul(fp2, fp0, status); /* S(P2+SP3) */ |
1422 | fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1)); |
1423 | fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ |
1424 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA)); |
1425 | fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ |
1426 | fp3 = floatx80_mul(fp3, fp0, status); /* S(Q2+S(Q3+SQ4)) */ |
1427 | fp2 = floatx80_mul(fp2, fp0, status); /* S(P1+S(P2+SP3)) */ |
1428 | fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE)); |
1429 | fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ |
1430 | fp2 = floatx80_mul(fp2, fp1, status); /* RS(P1+S(P2+SP3)) */ |
1431 | fp0 = floatx80_mul(fp0, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ |
1432 | fp1 = floatx80_add(fp1, fp2, status); /* R+RS(P1+S(P2+SP3)) */ |
1433 | fp0 = floatx80_add(fp0, float32_to_floatx80( |
1434 | make_float32(0x3F800000), status), |
1435 | status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ |
1436 | |
1437 | xSign = extractFloatx80Sign(fp1); |
1438 | xExp = extractFloatx80Exp(fp1); |
1439 | xSig = extractFloatx80Frac(fp1); |
1440 | xSign ^= 1; |
1441 | fp1 = packFloatx80(xSign, xExp, xSig); |
1442 | |
1443 | status->float_rounding_mode = user_rnd_mode; |
1444 | status->floatx80_rounding_precision = user_rnd_prec; |
1445 | |
1446 | a = floatx80_div(fp0, fp1, status); |
1447 | |
1448 | float_raise(float_flag_inexact, status); |
1449 | |
1450 | return a; |
1451 | } else { |
1452 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ |
1453 | fp3 = float64_to_floatx80(make_float64(0x3EA0B759F50F8688), |
1454 | status); /* Q4 */ |
1455 | fp2 = float64_to_floatx80(make_float64(0xBEF2BAA5A8924F04), |
1456 | status); /* P3 */ |
1457 | fp3 = floatx80_mul(fp3, fp1, status); /* SQ4 */ |
1458 | fp2 = floatx80_mul(fp2, fp1, status); /* SP3 */ |
1459 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1460 | make_float64(0xBF346F59B39BA65F), status), |
1461 | status); /* Q3+SQ4 */ |
1462 | fp4 = packFloatx80(0, 0x3FF6, UINT64_C(0xE073D3FC199C4A00)); |
1463 | fp2 = floatx80_add(fp2, fp4, status); /* P2+SP3 */ |
1464 | fp3 = floatx80_mul(fp3, fp1, status); /* S(Q3+SQ4) */ |
1465 | fp2 = floatx80_mul(fp2, fp1, status); /* S(P2+SP3) */ |
1466 | fp4 = packFloatx80(0, 0x3FF9, UINT64_C(0xD23CD68415D95FA1)); |
1467 | fp3 = floatx80_add(fp3, fp4, status); /* Q2+S(Q3+SQ4) */ |
1468 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0x8895A6C5FB423BCA)); |
1469 | fp2 = floatx80_add(fp2, fp4, status); /* P1+S(P2+SP3) */ |
1470 | fp3 = floatx80_mul(fp3, fp1, status); /* S(Q2+S(Q3+SQ4)) */ |
1471 | fp2 = floatx80_mul(fp2, fp1, status); /* S(P1+S(P2+SP3)) */ |
1472 | fp4 = packFloatx80(1, 0x3FFD, UINT64_C(0xEEF57E0DA84BC8CE)); |
1473 | fp3 = floatx80_add(fp3, fp4, status); /* Q1+S(Q2+S(Q3+SQ4)) */ |
1474 | fp2 = floatx80_mul(fp2, fp0, status); /* RS(P1+S(P2+SP3)) */ |
1475 | fp1 = floatx80_mul(fp1, fp3, status); /* S(Q1+S(Q2+S(Q3+SQ4))) */ |
1476 | fp0 = floatx80_add(fp0, fp2, status); /* R+RS(P1+S(P2+SP3)) */ |
1477 | fp1 = floatx80_add(fp1, float32_to_floatx80( |
1478 | make_float32(0x3F800000), status), |
1479 | status); /* 1+S(Q1+S(Q2+S(Q3+SQ4))) */ |
1480 | |
1481 | status->float_rounding_mode = user_rnd_mode; |
1482 | status->floatx80_rounding_precision = user_rnd_prec; |
1483 | |
1484 | a = floatx80_div(fp0, fp1, status); |
1485 | |
1486 | float_raise(float_flag_inexact, status); |
1487 | |
1488 | return a; |
1489 | } |
1490 | } |
1491 | } |
1492 | |
1493 | /* |
1494 | * Sine |
1495 | */ |
1496 | |
1497 | floatx80 floatx80_sin(floatx80 a, float_status *status) |
1498 | { |
1499 | flag aSign, xSign; |
1500 | int32_t aExp, xExp; |
1501 | uint64_t aSig, xSig; |
1502 | |
1503 | int8_t user_rnd_mode, user_rnd_prec; |
1504 | |
1505 | int32_t compact, l, n, j; |
1506 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; |
1507 | float32 posneg1, twoto63; |
1508 | flag endflag; |
1509 | |
1510 | aSig = extractFloatx80Frac(a); |
1511 | aExp = extractFloatx80Exp(a); |
1512 | aSign = extractFloatx80Sign(a); |
1513 | |
1514 | if (aExp == 0x7FFF) { |
1515 | if ((uint64_t) (aSig << 1)) { |
1516 | return propagateFloatx80NaNOneArg(a, status); |
1517 | } |
1518 | float_raise(float_flag_invalid, status); |
1519 | return floatx80_default_nan(status); |
1520 | } |
1521 | |
1522 | if (aExp == 0 && aSig == 0) { |
1523 | return packFloatx80(aSign, 0, 0); |
1524 | } |
1525 | |
1526 | user_rnd_mode = status->float_rounding_mode; |
1527 | user_rnd_prec = status->floatx80_rounding_precision; |
1528 | status->float_rounding_mode = float_round_nearest_even; |
1529 | status->floatx80_rounding_precision = 80; |
1530 | |
1531 | compact = floatx80_make_compact(aExp, aSig); |
1532 | |
1533 | fp0 = a; |
1534 | |
1535 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { |
1536 | /* 2^(-40) > |X| > 15 PI */ |
1537 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ |
1538 | /* REDUCEX */ |
1539 | fp1 = packFloatx80(0, 0, 0); |
1540 | if (compact == 0x7FFEFFFF) { |
1541 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, |
1542 | UINT64_C(0xC90FDAA200000000)); |
1543 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, |
1544 | UINT64_C(0x85A308D300000000)); |
1545 | fp0 = floatx80_add(fp0, twopi1, status); |
1546 | fp1 = fp0; |
1547 | fp0 = floatx80_add(fp0, twopi2, status); |
1548 | fp1 = floatx80_sub(fp1, fp0, status); |
1549 | fp1 = floatx80_add(fp1, twopi2, status); |
1550 | } |
1551 | loop: |
1552 | xSign = extractFloatx80Sign(fp0); |
1553 | xExp = extractFloatx80Exp(fp0); |
1554 | xExp -= 0x3FFF; |
1555 | if (xExp <= 28) { |
1556 | l = 0; |
1557 | endflag = 1; |
1558 | } else { |
1559 | l = xExp - 27; |
1560 | endflag = 0; |
1561 | } |
1562 | invtwopi = packFloatx80(0, 0x3FFE - l, |
1563 | UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ |
1564 | twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000)); |
1565 | twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000)); |
1566 | |
1567 | /* SIGN(INARG)*2^63 IN SGL */ |
1568 | twoto63 = packFloat32(xSign, 0xBE, 0); |
1569 | |
1570 | fp2 = floatx80_mul(fp0, invtwopi, status); |
1571 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), |
1572 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ |
1573 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), |
1574 | status); /* FP2 is N */ |
1575 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ |
1576 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ |
1577 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ |
1578 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ |
1579 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ |
1580 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ |
1581 | fp3 = fp0; /* FP3 is A */ |
1582 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ |
1583 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ |
1584 | |
1585 | if (endflag > 0) { |
1586 | n = floatx80_to_int32(fp2, status); |
1587 | goto sincont; |
1588 | } |
1589 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ |
1590 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ |
1591 | goto loop; |
1592 | } else { |
1593 | /* SINSM */ |
1594 | fp0 = float32_to_floatx80(make_float32(0x3F800000), |
1595 | status); /* 1 */ |
1596 | |
1597 | status->float_rounding_mode = user_rnd_mode; |
1598 | status->floatx80_rounding_precision = user_rnd_prec; |
1599 | |
1600 | /* SINTINY */ |
1601 | a = floatx80_move(a, status); |
1602 | float_raise(float_flag_inexact, status); |
1603 | |
1604 | return a; |
1605 | } |
1606 | } else { |
1607 | fp1 = floatx80_mul(fp0, float64_to_floatx80( |
1608 | make_float64(0x3FE45F306DC9C883), status), |
1609 | status); /* X*2/PI */ |
1610 | |
1611 | n = floatx80_to_int32(fp1, status); |
1612 | j = 32 + n; |
1613 | |
1614 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ |
1615 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), |
1616 | status); /* FP0 IS R = (X-Y1)-Y2 */ |
1617 | |
1618 | sincont: |
1619 | if (n & 1) { |
1620 | /* COSPOLY */ |
1621 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ |
1622 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ |
1623 | fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), |
1624 | status); /* B8 */ |
1625 | fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), |
1626 | status); /* B7 */ |
1627 | |
1628 | xSign = extractFloatx80Sign(fp0); /* X IS S */ |
1629 | xExp = extractFloatx80Exp(fp0); |
1630 | xSig = extractFloatx80Frac(fp0); |
1631 | |
1632 | if ((n >> 1) & 1) { |
1633 | xSign ^= 1; |
1634 | posneg1 = make_float32(0xBF800000); /* -1 */ |
1635 | } else { |
1636 | xSign ^= 0; |
1637 | posneg1 = make_float32(0x3F800000); /* 1 */ |
1638 | } /* X IS NOW R'= SGN*R */ |
1639 | |
1640 | fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ |
1641 | fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ |
1642 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1643 | make_float64(0x3E21EED90612C972), status), |
1644 | status); /* B6+TB8 */ |
1645 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1646 | make_float64(0xBE927E4FB79D9FCF), status), |
1647 | status); /* B5+TB7 */ |
1648 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ |
1649 | fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ |
1650 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1651 | make_float64(0x3EFA01A01A01D423), status), |
1652 | status); /* B4+T(B6+TB8) */ |
1653 | fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438)); |
1654 | fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ |
1655 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ |
1656 | fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ |
1657 | fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E)); |
1658 | fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ |
1659 | fp1 = floatx80_add(fp1, float32_to_floatx80( |
1660 | make_float32(0xBF000000), status), |
1661 | status); /* B1+T(B3+T(B5+TB7)) */ |
1662 | fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ |
1663 | fp0 = floatx80_add(fp0, fp1, status); /* [B1+T(B3+T(B5+TB7))]+ |
1664 | * [S(B2+T(B4+T(B6+TB8)))] |
1665 | */ |
1666 | |
1667 | x = packFloatx80(xSign, xExp, xSig); |
1668 | fp0 = floatx80_mul(fp0, x, status); |
1669 | |
1670 | status->float_rounding_mode = user_rnd_mode; |
1671 | status->floatx80_rounding_precision = user_rnd_prec; |
1672 | |
1673 | a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); |
1674 | |
1675 | float_raise(float_flag_inexact, status); |
1676 | |
1677 | return a; |
1678 | } else { |
1679 | /* SINPOLY */ |
1680 | xSign = extractFloatx80Sign(fp0); /* X IS R */ |
1681 | xExp = extractFloatx80Exp(fp0); |
1682 | xSig = extractFloatx80Frac(fp0); |
1683 | |
1684 | xSign ^= (n >> 1) & 1; /* X IS NOW R'= SGN*R */ |
1685 | |
1686 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ |
1687 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ |
1688 | fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), |
1689 | status); /* A7 */ |
1690 | fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), |
1691 | status); /* A6 */ |
1692 | fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ |
1693 | fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ |
1694 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1695 | make_float64(0xBE5AE6452A118AE4), status), |
1696 | status); /* A5+T*A7 */ |
1697 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1698 | make_float64(0x3EC71DE3A5341531), status), |
1699 | status); /* A4+T*A6 */ |
1700 | fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ |
1701 | fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ |
1702 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1703 | make_float64(0xBF2A01A01A018B59), status), |
1704 | status); /* A3+T(A5+TA7) */ |
1705 | fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF)); |
1706 | fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ |
1707 | fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ |
1708 | fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ |
1709 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99)); |
1710 | fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ |
1711 | fp1 = floatx80_add(fp1, fp2, |
1712 | status); /* [A1+T(A3+T(A5+TA7))]+ |
1713 | * [S(A2+T(A4+TA6))] |
1714 | */ |
1715 | |
1716 | x = packFloatx80(xSign, xExp, xSig); |
1717 | fp0 = floatx80_mul(fp0, x, status); /* R'*S */ |
1718 | fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ |
1719 | |
1720 | status->float_rounding_mode = user_rnd_mode; |
1721 | status->floatx80_rounding_precision = user_rnd_prec; |
1722 | |
1723 | a = floatx80_add(fp0, x, status); |
1724 | |
1725 | float_raise(float_flag_inexact, status); |
1726 | |
1727 | return a; |
1728 | } |
1729 | } |
1730 | } |
1731 | |
1732 | /* |
1733 | * Cosine |
1734 | */ |
1735 | |
1736 | floatx80 floatx80_cos(floatx80 a, float_status *status) |
1737 | { |
1738 | flag aSign, xSign; |
1739 | int32_t aExp, xExp; |
1740 | uint64_t aSig, xSig; |
1741 | |
1742 | int8_t user_rnd_mode, user_rnd_prec; |
1743 | |
1744 | int32_t compact, l, n, j; |
1745 | floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; |
1746 | float32 posneg1, twoto63; |
1747 | flag endflag; |
1748 | |
1749 | aSig = extractFloatx80Frac(a); |
1750 | aExp = extractFloatx80Exp(a); |
1751 | aSign = extractFloatx80Sign(a); |
1752 | |
1753 | if (aExp == 0x7FFF) { |
1754 | if ((uint64_t) (aSig << 1)) { |
1755 | return propagateFloatx80NaNOneArg(a, status); |
1756 | } |
1757 | float_raise(float_flag_invalid, status); |
1758 | return floatx80_default_nan(status); |
1759 | } |
1760 | |
1761 | if (aExp == 0 && aSig == 0) { |
1762 | return packFloatx80(0, one_exp, one_sig); |
1763 | } |
1764 | |
1765 | user_rnd_mode = status->float_rounding_mode; |
1766 | user_rnd_prec = status->floatx80_rounding_precision; |
1767 | status->float_rounding_mode = float_round_nearest_even; |
1768 | status->floatx80_rounding_precision = 80; |
1769 | |
1770 | compact = floatx80_make_compact(aExp, aSig); |
1771 | |
1772 | fp0 = a; |
1773 | |
1774 | if (compact < 0x3FD78000 || compact > 0x4004BC7E) { |
1775 | /* 2^(-40) > |X| > 15 PI */ |
1776 | if (compact > 0x3FFF8000) { /* |X| >= 15 PI */ |
1777 | /* REDUCEX */ |
1778 | fp1 = packFloatx80(0, 0, 0); |
1779 | if (compact == 0x7FFEFFFF) { |
1780 | twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, |
1781 | UINT64_C(0xC90FDAA200000000)); |
1782 | twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, |
1783 | UINT64_C(0x85A308D300000000)); |
1784 | fp0 = floatx80_add(fp0, twopi1, status); |
1785 | fp1 = fp0; |
1786 | fp0 = floatx80_add(fp0, twopi2, status); |
1787 | fp1 = floatx80_sub(fp1, fp0, status); |
1788 | fp1 = floatx80_add(fp1, twopi2, status); |
1789 | } |
1790 | loop: |
1791 | xSign = extractFloatx80Sign(fp0); |
1792 | xExp = extractFloatx80Exp(fp0); |
1793 | xExp -= 0x3FFF; |
1794 | if (xExp <= 28) { |
1795 | l = 0; |
1796 | endflag = 1; |
1797 | } else { |
1798 | l = xExp - 27; |
1799 | endflag = 0; |
1800 | } |
1801 | invtwopi = packFloatx80(0, 0x3FFE - l, |
1802 | UINT64_C(0xA2F9836E4E44152A)); /* INVTWOPI */ |
1803 | twopi1 = packFloatx80(0, 0x3FFF + l, UINT64_C(0xC90FDAA200000000)); |
1804 | twopi2 = packFloatx80(0, 0x3FDD + l, UINT64_C(0x85A308D300000000)); |
1805 | |
1806 | /* SIGN(INARG)*2^63 IN SGL */ |
1807 | twoto63 = packFloat32(xSign, 0xBE, 0); |
1808 | |
1809 | fp2 = floatx80_mul(fp0, invtwopi, status); |
1810 | fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), |
1811 | status); /* THE FRACT PART OF FP2 IS ROUNDED */ |
1812 | fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), |
1813 | status); /* FP2 is N */ |
1814 | fp4 = floatx80_mul(twopi1, fp2, status); /* W = N*P1 */ |
1815 | fp5 = floatx80_mul(twopi2, fp2, status); /* w = N*P2 */ |
1816 | fp3 = floatx80_add(fp4, fp5, status); /* FP3 is P */ |
1817 | fp4 = floatx80_sub(fp4, fp3, status); /* W-P */ |
1818 | fp0 = floatx80_sub(fp0, fp3, status); /* FP0 is A := R - P */ |
1819 | fp4 = floatx80_add(fp4, fp5, status); /* FP4 is p = (W-P)+w */ |
1820 | fp3 = fp0; /* FP3 is A */ |
1821 | fp1 = floatx80_sub(fp1, fp4, status); /* FP1 is a := r - p */ |
1822 | fp0 = floatx80_add(fp0, fp1, status); /* FP0 is R := A+a */ |
1823 | |
1824 | if (endflag > 0) { |
1825 | n = floatx80_to_int32(fp2, status); |
1826 | goto sincont; |
1827 | } |
1828 | fp3 = floatx80_sub(fp3, fp0, status); /* A-R */ |
1829 | fp1 = floatx80_add(fp1, fp3, status); /* FP1 is r := (A-R)+a */ |
1830 | goto loop; |
1831 | } else { |
1832 | /* SINSM */ |
1833 | fp0 = float32_to_floatx80(make_float32(0x3F800000), status); /* 1 */ |
1834 | |
1835 | status->float_rounding_mode = user_rnd_mode; |
1836 | status->floatx80_rounding_precision = user_rnd_prec; |
1837 | |
1838 | /* COSTINY */ |
1839 | a = floatx80_sub(fp0, float32_to_floatx80( |
1840 | make_float32(0x00800000), status), |
1841 | status); |
1842 | float_raise(float_flag_inexact, status); |
1843 | |
1844 | return a; |
1845 | } |
1846 | } else { |
1847 | fp1 = floatx80_mul(fp0, float64_to_floatx80( |
1848 | make_float64(0x3FE45F306DC9C883), status), |
1849 | status); /* X*2/PI */ |
1850 | |
1851 | n = floatx80_to_int32(fp1, status); |
1852 | j = 32 + n; |
1853 | |
1854 | fp0 = floatx80_sub(fp0, pi_tbl[j], status); /* X-Y1 */ |
1855 | fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), |
1856 | status); /* FP0 IS R = (X-Y1)-Y2 */ |
1857 | |
1858 | sincont: |
1859 | if ((n + 1) & 1) { |
1860 | /* COSPOLY */ |
1861 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ |
1862 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ |
1863 | fp2 = float64_to_floatx80(make_float64(0x3D2AC4D0D6011EE3), |
1864 | status); /* B8 */ |
1865 | fp3 = float64_to_floatx80(make_float64(0xBDA9396F9F45AC19), |
1866 | status); /* B7 */ |
1867 | |
1868 | xSign = extractFloatx80Sign(fp0); /* X IS S */ |
1869 | xExp = extractFloatx80Exp(fp0); |
1870 | xSig = extractFloatx80Frac(fp0); |
1871 | |
1872 | if (((n + 1) >> 1) & 1) { |
1873 | xSign ^= 1; |
1874 | posneg1 = make_float32(0xBF800000); /* -1 */ |
1875 | } else { |
1876 | xSign ^= 0; |
1877 | posneg1 = make_float32(0x3F800000); /* 1 */ |
1878 | } /* X IS NOW R'= SGN*R */ |
1879 | |
1880 | fp2 = floatx80_mul(fp2, fp1, status); /* TB8 */ |
1881 | fp3 = floatx80_mul(fp3, fp1, status); /* TB7 */ |
1882 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1883 | make_float64(0x3E21EED90612C972), status), |
1884 | status); /* B6+TB8 */ |
1885 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1886 | make_float64(0xBE927E4FB79D9FCF), status), |
1887 | status); /* B5+TB7 */ |
1888 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B6+TB8) */ |
1889 | fp3 = floatx80_mul(fp3, fp1, status); /* T(B5+TB7) */ |
1890 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1891 | make_float64(0x3EFA01A01A01D423), status), |
1892 | status); /* B4+T(B6+TB8) */ |
1893 | fp4 = packFloatx80(1, 0x3FF5, UINT64_C(0xB60B60B60B61D438)); |
1894 | fp3 = floatx80_add(fp3, fp4, status); /* B3+T(B5+TB7) */ |
1895 | fp2 = floatx80_mul(fp2, fp1, status); /* T(B4+T(B6+TB8)) */ |
1896 | fp1 = floatx80_mul(fp1, fp3, status); /* T(B3+T(B5+TB7)) */ |
1897 | fp4 = packFloatx80(0, 0x3FFA, UINT64_C(0xAAAAAAAAAAAAAB5E)); |
1898 | fp2 = floatx80_add(fp2, fp4, status); /* B2+T(B4+T(B6+TB8)) */ |
1899 | fp1 = floatx80_add(fp1, float32_to_floatx80( |
1900 | make_float32(0xBF000000), status), |
1901 | status); /* B1+T(B3+T(B5+TB7)) */ |
1902 | fp0 = floatx80_mul(fp0, fp2, status); /* S(B2+T(B4+T(B6+TB8))) */ |
1903 | fp0 = floatx80_add(fp0, fp1, status); |
1904 | /* [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] */ |
1905 | |
1906 | x = packFloatx80(xSign, xExp, xSig); |
1907 | fp0 = floatx80_mul(fp0, x, status); |
1908 | |
1909 | status->float_rounding_mode = user_rnd_mode; |
1910 | status->floatx80_rounding_precision = user_rnd_prec; |
1911 | |
1912 | a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); |
1913 | |
1914 | float_raise(float_flag_inexact, status); |
1915 | |
1916 | return a; |
1917 | } else { |
1918 | /* SINPOLY */ |
1919 | xSign = extractFloatx80Sign(fp0); /* X IS R */ |
1920 | xExp = extractFloatx80Exp(fp0); |
1921 | xSig = extractFloatx80Frac(fp0); |
1922 | |
1923 | xSign ^= ((n + 1) >> 1) & 1; /* X IS NOW R'= SGN*R */ |
1924 | |
1925 | fp0 = floatx80_mul(fp0, fp0, status); /* FP0 IS S */ |
1926 | fp1 = floatx80_mul(fp0, fp0, status); /* FP1 IS T */ |
1927 | fp3 = float64_to_floatx80(make_float64(0xBD6AAA77CCC994F5), |
1928 | status); /* A7 */ |
1929 | fp2 = float64_to_floatx80(make_float64(0x3DE612097AAE8DA1), |
1930 | status); /* A6 */ |
1931 | fp3 = floatx80_mul(fp3, fp1, status); /* T*A7 */ |
1932 | fp2 = floatx80_mul(fp2, fp1, status); /* T*A6 */ |
1933 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1934 | make_float64(0xBE5AE6452A118AE4), status), |
1935 | status); /* A5+T*A7 */ |
1936 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
1937 | make_float64(0x3EC71DE3A5341531), status), |
1938 | status); /* A4+T*A6 */ |
1939 | fp3 = floatx80_mul(fp3, fp1, status); /* T(A5+TA7) */ |
1940 | fp2 = floatx80_mul(fp2, fp1, status); /* T(A4+TA6) */ |
1941 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
1942 | make_float64(0xBF2A01A01A018B59), status), |
1943 | status); /* A3+T(A5+TA7) */ |
1944 | fp4 = packFloatx80(0, 0x3FF8, UINT64_C(0x88888888888859AF)); |
1945 | fp2 = floatx80_add(fp2, fp4, status); /* A2+T(A4+TA6) */ |
1946 | fp1 = floatx80_mul(fp1, fp3, status); /* T(A3+T(A5+TA7)) */ |
1947 | fp2 = floatx80_mul(fp2, fp0, status); /* S(A2+T(A4+TA6)) */ |
1948 | fp4 = packFloatx80(1, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAA99)); |
1949 | fp1 = floatx80_add(fp1, fp4, status); /* A1+T(A3+T(A5+TA7)) */ |
1950 | fp1 = floatx80_add(fp1, fp2, status); |
1951 | /* [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] */ |
1952 | |
1953 | x = packFloatx80(xSign, xExp, xSig); |
1954 | fp0 = floatx80_mul(fp0, x, status); /* R'*S */ |
1955 | fp0 = floatx80_mul(fp0, fp1, status); /* SIN(R')-R' */ |
1956 | |
1957 | status->float_rounding_mode = user_rnd_mode; |
1958 | status->floatx80_rounding_precision = user_rnd_prec; |
1959 | |
1960 | a = floatx80_add(fp0, x, status); |
1961 | |
1962 | float_raise(float_flag_inexact, status); |
1963 | |
1964 | return a; |
1965 | } |
1966 | } |
1967 | } |
1968 | |
1969 | /* |
1970 | * Arc tangent |
1971 | */ |
1972 | |
1973 | floatx80 floatx80_atan(floatx80 a, float_status *status) |
1974 | { |
1975 | flag aSign; |
1976 | int32_t aExp; |
1977 | uint64_t aSig; |
1978 | |
1979 | int8_t user_rnd_mode, user_rnd_prec; |
1980 | |
1981 | int32_t compact, tbl_index; |
1982 | floatx80 fp0, fp1, fp2, fp3, xsave; |
1983 | |
1984 | aSig = extractFloatx80Frac(a); |
1985 | aExp = extractFloatx80Exp(a); |
1986 | aSign = extractFloatx80Sign(a); |
1987 | |
1988 | if (aExp == 0x7FFF) { |
1989 | if ((uint64_t) (aSig << 1)) { |
1990 | return propagateFloatx80NaNOneArg(a, status); |
1991 | } |
1992 | a = packFloatx80(aSign, piby2_exp, pi_sig); |
1993 | float_raise(float_flag_inexact, status); |
1994 | return floatx80_move(a, status); |
1995 | } |
1996 | |
1997 | if (aExp == 0 && aSig == 0) { |
1998 | return packFloatx80(aSign, 0, 0); |
1999 | } |
2000 | |
2001 | compact = floatx80_make_compact(aExp, aSig); |
2002 | |
2003 | user_rnd_mode = status->float_rounding_mode; |
2004 | user_rnd_prec = status->floatx80_rounding_precision; |
2005 | status->float_rounding_mode = float_round_nearest_even; |
2006 | status->floatx80_rounding_precision = 80; |
2007 | |
2008 | if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { |
2009 | /* |X| >= 16 or |X| < 1/16 */ |
2010 | if (compact > 0x3FFF8000) { /* |X| >= 16 */ |
2011 | if (compact > 0x40638000) { /* |X| > 2^(100) */ |
2012 | fp0 = packFloatx80(aSign, piby2_exp, pi_sig); |
2013 | fp1 = packFloatx80(aSign, 0x0001, one_sig); |
2014 | |
2015 | status->float_rounding_mode = user_rnd_mode; |
2016 | status->floatx80_rounding_precision = user_rnd_prec; |
2017 | |
2018 | a = floatx80_sub(fp0, fp1, status); |
2019 | |
2020 | float_raise(float_flag_inexact, status); |
2021 | |
2022 | return a; |
2023 | } else { |
2024 | fp0 = a; |
2025 | fp1 = packFloatx80(1, one_exp, one_sig); /* -1 */ |
2026 | fp1 = floatx80_div(fp1, fp0, status); /* X' = -1/X */ |
2027 | xsave = fp1; |
2028 | fp0 = floatx80_mul(fp1, fp1, status); /* Y = X'*X' */ |
2029 | fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ |
2030 | fp3 = float64_to_floatx80(make_float64(0xBFB70BF398539E6A), |
2031 | status); /* C5 */ |
2032 | fp2 = float64_to_floatx80(make_float64(0x3FBC7187962D1D7D), |
2033 | status); /* C4 */ |
2034 | fp3 = floatx80_mul(fp3, fp1, status); /* Z*C5 */ |
2035 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*C4 */ |
2036 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
2037 | make_float64(0xBFC24924827107B8), status), |
2038 | status); /* C3+Z*C5 */ |
2039 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
2040 | make_float64(0x3FC999999996263E), status), |
2041 | status); /* C2+Z*C4 */ |
2042 | fp1 = floatx80_mul(fp1, fp3, status); /* Z*(C3+Z*C5) */ |
2043 | fp2 = floatx80_mul(fp2, fp0, status); /* Y*(C2+Z*C4) */ |
2044 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
2045 | make_float64(0xBFD5555555555536), status), |
2046 | status); /* C1+Z*(C3+Z*C5) */ |
2047 | fp0 = floatx80_mul(fp0, xsave, status); /* X'*Y */ |
2048 | /* [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] */ |
2049 | fp1 = floatx80_add(fp1, fp2, status); |
2050 | /* X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? */ |
2051 | fp0 = floatx80_mul(fp0, fp1, status); |
2052 | fp0 = floatx80_add(fp0, xsave, status); |
2053 | fp1 = packFloatx80(aSign, piby2_exp, pi_sig); |
2054 | |
2055 | status->float_rounding_mode = user_rnd_mode; |
2056 | status->floatx80_rounding_precision = user_rnd_prec; |
2057 | |
2058 | a = floatx80_add(fp0, fp1, status); |
2059 | |
2060 | float_raise(float_flag_inexact, status); |
2061 | |
2062 | return a; |
2063 | } |
2064 | } else { /* |X| < 1/16 */ |
2065 | if (compact < 0x3FD78000) { /* |X| < 2^(-40) */ |
2066 | status->float_rounding_mode = user_rnd_mode; |
2067 | status->floatx80_rounding_precision = user_rnd_prec; |
2068 | |
2069 | a = floatx80_move(a, status); |
2070 | |
2071 | float_raise(float_flag_inexact, status); |
2072 | |
2073 | return a; |
2074 | } else { |
2075 | fp0 = a; |
2076 | xsave = a; |
2077 | fp0 = floatx80_mul(fp0, fp0, status); /* Y = X*X */ |
2078 | fp1 = floatx80_mul(fp0, fp0, status); /* Z = Y*Y */ |
2079 | fp2 = float64_to_floatx80(make_float64(0x3FB344447F876989), |
2080 | status); /* B6 */ |
2081 | fp3 = float64_to_floatx80(make_float64(0xBFB744EE7FAF45DB), |
2082 | status); /* B5 */ |
2083 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*B6 */ |
2084 | fp3 = floatx80_mul(fp3, fp1, status); /* Z*B5 */ |
2085 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
2086 | make_float64(0x3FBC71C646940220), status), |
2087 | status); /* B4+Z*B6 */ |
2088 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
2089 | make_float64(0xBFC24924921872F9), |
2090 | status), status); /* B3+Z*B5 */ |
2091 | fp2 = floatx80_mul(fp2, fp1, status); /* Z*(B4+Z*B6) */ |
2092 | fp1 = floatx80_mul(fp1, fp3, status); /* Z*(B3+Z*B5) */ |
2093 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
2094 | make_float64(0x3FC9999999998FA9), status), |
2095 | status); /* B2+Z*(B4+Z*B6) */ |
2096 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
2097 | make_float64(0xBFD5555555555555), status), |
2098 | status); /* B1+Z*(B3+Z*B5) */ |
2099 | fp2 = floatx80_mul(fp2, fp0, status); /* Y*(B2+Z*(B4+Z*B6)) */ |
2100 | fp0 = floatx80_mul(fp0, xsave, status); /* X*Y */ |
2101 | /* [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] */ |
2102 | fp1 = floatx80_add(fp1, fp2, status); |
2103 | /* X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) */ |
2104 | fp0 = floatx80_mul(fp0, fp1, status); |
2105 | |
2106 | status->float_rounding_mode = user_rnd_mode; |
2107 | status->floatx80_rounding_precision = user_rnd_prec; |
2108 | |
2109 | a = floatx80_add(fp0, xsave, status); |
2110 | |
2111 | float_raise(float_flag_inexact, status); |
2112 | |
2113 | return a; |
2114 | } |
2115 | } |
2116 | } else { |
2117 | aSig &= UINT64_C(0xF800000000000000); |
2118 | aSig |= UINT64_C(0x0400000000000000); |
2119 | xsave = packFloatx80(aSign, aExp, aSig); /* F */ |
2120 | fp0 = a; |
2121 | fp1 = a; /* X */ |
2122 | fp2 = packFloatx80(0, one_exp, one_sig); /* 1 */ |
2123 | fp1 = floatx80_mul(fp1, xsave, status); /* X*F */ |
2124 | fp0 = floatx80_sub(fp0, xsave, status); /* X-F */ |
2125 | fp1 = floatx80_add(fp1, fp2, status); /* 1 + X*F */ |
2126 | fp0 = floatx80_div(fp0, fp1, status); /* U = (X-F)/(1+X*F) */ |
2127 | |
2128 | tbl_index = compact; |
2129 | |
2130 | tbl_index &= 0x7FFF0000; |
2131 | tbl_index -= 0x3FFB0000; |
2132 | tbl_index >>= 1; |
2133 | tbl_index += compact & 0x00007800; |
2134 | tbl_index >>= 11; |
2135 | |
2136 | fp3 = atan_tbl[tbl_index]; |
2137 | |
2138 | fp3.high |= aSign ? 0x8000 : 0; /* ATAN(F) */ |
2139 | |
2140 | fp1 = floatx80_mul(fp0, fp0, status); /* V = U*U */ |
2141 | fp2 = float64_to_floatx80(make_float64(0xBFF6687E314987D8), |
2142 | status); /* A3 */ |
2143 | fp2 = floatx80_add(fp2, fp1, status); /* A3+V */ |
2144 | fp2 = floatx80_mul(fp2, fp1, status); /* V*(A3+V) */ |
2145 | fp1 = floatx80_mul(fp1, fp0, status); /* U*V */ |
2146 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
2147 | make_float64(0x4002AC6934A26DB3), status), |
2148 | status); /* A2+V*(A3+V) */ |
2149 | fp1 = floatx80_mul(fp1, float64_to_floatx80( |
2150 | make_float64(0xBFC2476F4E1DA28E), status), |
2151 | status); /* A1+U*V */ |
2152 | fp1 = floatx80_mul(fp1, fp2, status); /* A1*U*V*(A2+V*(A3+V)) */ |
2153 | fp0 = floatx80_add(fp0, fp1, status); /* ATAN(U) */ |
2154 | |
2155 | status->float_rounding_mode = user_rnd_mode; |
2156 | status->floatx80_rounding_precision = user_rnd_prec; |
2157 | |
2158 | a = floatx80_add(fp0, fp3, status); /* ATAN(X) */ |
2159 | |
2160 | float_raise(float_flag_inexact, status); |
2161 | |
2162 | return a; |
2163 | } |
2164 | } |
2165 | |
2166 | /* |
2167 | * Arc sine |
2168 | */ |
2169 | |
2170 | floatx80 floatx80_asin(floatx80 a, float_status *status) |
2171 | { |
2172 | flag aSign; |
2173 | int32_t aExp; |
2174 | uint64_t aSig; |
2175 | |
2176 | int8_t user_rnd_mode, user_rnd_prec; |
2177 | |
2178 | int32_t compact; |
2179 | floatx80 fp0, fp1, fp2, one; |
2180 | |
2181 | aSig = extractFloatx80Frac(a); |
2182 | aExp = extractFloatx80Exp(a); |
2183 | aSign = extractFloatx80Sign(a); |
2184 | |
2185 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { |
2186 | return propagateFloatx80NaNOneArg(a, status); |
2187 | } |
2188 | |
2189 | if (aExp == 0 && aSig == 0) { |
2190 | return packFloatx80(aSign, 0, 0); |
2191 | } |
2192 | |
2193 | compact = floatx80_make_compact(aExp, aSig); |
2194 | |
2195 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ |
2196 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ |
2197 | float_raise(float_flag_inexact, status); |
2198 | a = packFloatx80(aSign, piby2_exp, pi_sig); |
2199 | return floatx80_move(a, status); |
2200 | } else { /* |X| > 1 */ |
2201 | float_raise(float_flag_invalid, status); |
2202 | return floatx80_default_nan(status); |
2203 | } |
2204 | |
2205 | } /* |X| < 1 */ |
2206 | |
2207 | user_rnd_mode = status->float_rounding_mode; |
2208 | user_rnd_prec = status->floatx80_rounding_precision; |
2209 | status->float_rounding_mode = float_round_nearest_even; |
2210 | status->floatx80_rounding_precision = 80; |
2211 | |
2212 | one = packFloatx80(0, one_exp, one_sig); |
2213 | fp0 = a; |
2214 | |
2215 | fp1 = floatx80_sub(one, fp0, status); /* 1 - X */ |
2216 | fp2 = floatx80_add(one, fp0, status); /* 1 + X */ |
2217 | fp1 = floatx80_mul(fp2, fp1, status); /* (1+X)*(1-X) */ |
2218 | fp1 = floatx80_sqrt(fp1, status); /* SQRT((1+X)*(1-X)) */ |
2219 | fp0 = floatx80_div(fp0, fp1, status); /* X/SQRT((1+X)*(1-X)) */ |
2220 | |
2221 | status->float_rounding_mode = user_rnd_mode; |
2222 | status->floatx80_rounding_precision = user_rnd_prec; |
2223 | |
2224 | a = floatx80_atan(fp0, status); /* ATAN(X/SQRT((1+X)*(1-X))) */ |
2225 | |
2226 | float_raise(float_flag_inexact, status); |
2227 | |
2228 | return a; |
2229 | } |
2230 | |
2231 | /* |
2232 | * Arc cosine |
2233 | */ |
2234 | |
2235 | floatx80 floatx80_acos(floatx80 a, float_status *status) |
2236 | { |
2237 | flag aSign; |
2238 | int32_t aExp; |
2239 | uint64_t aSig; |
2240 | |
2241 | int8_t user_rnd_mode, user_rnd_prec; |
2242 | |
2243 | int32_t compact; |
2244 | floatx80 fp0, fp1, one; |
2245 | |
2246 | aSig = extractFloatx80Frac(a); |
2247 | aExp = extractFloatx80Exp(a); |
2248 | aSign = extractFloatx80Sign(a); |
2249 | |
2250 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { |
2251 | return propagateFloatx80NaNOneArg(a, status); |
2252 | } |
2253 | if (aExp == 0 && aSig == 0) { |
2254 | float_raise(float_flag_inexact, status); |
2255 | return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, |
2256 | piby2_exp, pi_sig, 0, status); |
2257 | } |
2258 | |
2259 | compact = floatx80_make_compact(aExp, aSig); |
2260 | |
2261 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ |
2262 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ |
2263 | if (aSign) { /* X == -1 */ |
2264 | a = packFloatx80(0, pi_exp, pi_sig); |
2265 | float_raise(float_flag_inexact, status); |
2266 | return floatx80_move(a, status); |
2267 | } else { /* X == +1 */ |
2268 | return packFloatx80(0, 0, 0); |
2269 | } |
2270 | } else { /* |X| > 1 */ |
2271 | float_raise(float_flag_invalid, status); |
2272 | return floatx80_default_nan(status); |
2273 | } |
2274 | } /* |X| < 1 */ |
2275 | |
2276 | user_rnd_mode = status->float_rounding_mode; |
2277 | user_rnd_prec = status->floatx80_rounding_precision; |
2278 | status->float_rounding_mode = float_round_nearest_even; |
2279 | status->floatx80_rounding_precision = 80; |
2280 | |
2281 | one = packFloatx80(0, one_exp, one_sig); |
2282 | fp0 = a; |
2283 | |
2284 | fp1 = floatx80_add(one, fp0, status); /* 1 + X */ |
2285 | fp0 = floatx80_sub(one, fp0, status); /* 1 - X */ |
2286 | fp0 = floatx80_div(fp0, fp1, status); /* (1-X)/(1+X) */ |
2287 | fp0 = floatx80_sqrt(fp0, status); /* SQRT((1-X)/(1+X)) */ |
2288 | fp0 = floatx80_atan(fp0, status); /* ATAN(SQRT((1-X)/(1+X))) */ |
2289 | |
2290 | status->float_rounding_mode = user_rnd_mode; |
2291 | status->floatx80_rounding_precision = user_rnd_prec; |
2292 | |
2293 | a = floatx80_add(fp0, fp0, status); /* 2 * ATAN(SQRT((1-X)/(1+X))) */ |
2294 | |
2295 | float_raise(float_flag_inexact, status); |
2296 | |
2297 | return a; |
2298 | } |
2299 | |
2300 | /* |
2301 | * Hyperbolic arc tangent |
2302 | */ |
2303 | |
2304 | floatx80 floatx80_atanh(floatx80 a, float_status *status) |
2305 | { |
2306 | flag aSign; |
2307 | int32_t aExp; |
2308 | uint64_t aSig; |
2309 | |
2310 | int8_t user_rnd_mode, user_rnd_prec; |
2311 | |
2312 | int32_t compact; |
2313 | floatx80 fp0, fp1, fp2, one; |
2314 | |
2315 | aSig = extractFloatx80Frac(a); |
2316 | aExp = extractFloatx80Exp(a); |
2317 | aSign = extractFloatx80Sign(a); |
2318 | |
2319 | if (aExp == 0x7FFF && (uint64_t) (aSig << 1)) { |
2320 | return propagateFloatx80NaNOneArg(a, status); |
2321 | } |
2322 | |
2323 | if (aExp == 0 && aSig == 0) { |
2324 | return packFloatx80(aSign, 0, 0); |
2325 | } |
2326 | |
2327 | compact = floatx80_make_compact(aExp, aSig); |
2328 | |
2329 | if (compact >= 0x3FFF8000) { /* |X| >= 1 */ |
2330 | if (aExp == one_exp && aSig == one_sig) { /* |X| == 1 */ |
2331 | float_raise(float_flag_divbyzero, status); |
2332 | return packFloatx80(aSign, floatx80_infinity.high, |
2333 | floatx80_infinity.low); |
2334 | } else { /* |X| > 1 */ |
2335 | float_raise(float_flag_invalid, status); |
2336 | return floatx80_default_nan(status); |
2337 | } |
2338 | } /* |X| < 1 */ |
2339 | |
2340 | user_rnd_mode = status->float_rounding_mode; |
2341 | user_rnd_prec = status->floatx80_rounding_precision; |
2342 | status->float_rounding_mode = float_round_nearest_even; |
2343 | status->floatx80_rounding_precision = 80; |
2344 | |
2345 | one = packFloatx80(0, one_exp, one_sig); |
2346 | fp2 = packFloatx80(aSign, 0x3FFE, one_sig); /* SIGN(X) * (1/2) */ |
2347 | fp0 = packFloatx80(0, aExp, aSig); /* Y = |X| */ |
2348 | fp1 = packFloatx80(1, aExp, aSig); /* -Y */ |
2349 | fp0 = floatx80_add(fp0, fp0, status); /* 2Y */ |
2350 | fp1 = floatx80_add(fp1, one, status); /* 1-Y */ |
2351 | fp0 = floatx80_div(fp0, fp1, status); /* Z = 2Y/(1-Y) */ |
2352 | fp0 = floatx80_lognp1(fp0, status); /* LOG1P(Z) */ |
2353 | |
2354 | status->float_rounding_mode = user_rnd_mode; |
2355 | status->floatx80_rounding_precision = user_rnd_prec; |
2356 | |
2357 | a = floatx80_mul(fp0, fp2, |
2358 | status); /* ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) */ |
2359 | |
2360 | float_raise(float_flag_inexact, status); |
2361 | |
2362 | return a; |
2363 | } |
2364 | |
2365 | /* |
2366 | * e to x minus 1 |
2367 | */ |
2368 | |
2369 | floatx80 floatx80_etoxm1(floatx80 a, float_status *status) |
2370 | { |
2371 | flag aSign; |
2372 | int32_t aExp; |
2373 | uint64_t aSig; |
2374 | |
2375 | int8_t user_rnd_mode, user_rnd_prec; |
2376 | |
2377 | int32_t compact, n, j, m, m1; |
2378 | floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; |
2379 | |
2380 | aSig = extractFloatx80Frac(a); |
2381 | aExp = extractFloatx80Exp(a); |
2382 | aSign = extractFloatx80Sign(a); |
2383 | |
2384 | if (aExp == 0x7FFF) { |
2385 | if ((uint64_t) (aSig << 1)) { |
2386 | return propagateFloatx80NaNOneArg(a, status); |
2387 | } |
2388 | if (aSign) { |
2389 | return packFloatx80(aSign, one_exp, one_sig); |
2390 | } |
2391 | return packFloatx80(0, floatx80_infinity.high, |
2392 | floatx80_infinity.low); |
2393 | } |
2394 | |
2395 | if (aExp == 0 && aSig == 0) { |
2396 | return packFloatx80(aSign, 0, 0); |
2397 | } |
2398 | |
2399 | user_rnd_mode = status->float_rounding_mode; |
2400 | user_rnd_prec = status->floatx80_rounding_precision; |
2401 | status->float_rounding_mode = float_round_nearest_even; |
2402 | status->floatx80_rounding_precision = 80; |
2403 | |
2404 | if (aExp >= 0x3FFD) { /* |X| >= 1/4 */ |
2405 | compact = floatx80_make_compact(aExp, aSig); |
2406 | |
2407 | if (compact <= 0x4004C215) { /* |X| <= 70 log2 */ |
2408 | fp0 = a; |
2409 | fp1 = a; |
2410 | fp0 = floatx80_mul(fp0, float32_to_floatx80( |
2411 | make_float32(0x42B8AA3B), status), |
2412 | status); /* 64/log2 * X */ |
2413 | n = floatx80_to_int32(fp0, status); /* int(64/log2*X) */ |
2414 | fp0 = int32_to_floatx80(n, status); |
2415 | |
2416 | j = n & 0x3F; /* J = N mod 64 */ |
2417 | m = n / 64; /* NOTE: this is really arithmetic right shift by 6 */ |
2418 | if (n < 0 && j) { |
2419 | /* |
2420 | * arithmetic right shift is division and |
2421 | * round towards minus infinity |
2422 | */ |
2423 | m--; |
2424 | } |
2425 | m1 = -m; |
2426 | /*m += 0x3FFF; // biased exponent of 2^(M) */ |
2427 | /*m1 += 0x3FFF; // biased exponent of -2^(-M) */ |
2428 | |
2429 | fp2 = fp0; /* N */ |
2430 | fp0 = floatx80_mul(fp0, float32_to_floatx80( |
2431 | make_float32(0xBC317218), status), |
2432 | status); /* N * L1, L1 = lead(-log2/64) */ |
2433 | l2 = packFloatx80(0, 0x3FDC, UINT64_C(0x82E308654361C4C6)); |
2434 | fp2 = floatx80_mul(fp2, l2, status); /* N * L2, L1+L2 = -log2/64 */ |
2435 | fp0 = floatx80_add(fp0, fp1, status); /* X + N*L1 */ |
2436 | fp0 = floatx80_add(fp0, fp2, status); /* R */ |
2437 | |
2438 | fp1 = floatx80_mul(fp0, fp0, status); /* S = R*R */ |
2439 | fp2 = float32_to_floatx80(make_float32(0x3950097B), |
2440 | status); /* A6 */ |
2441 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 is S*A6 */ |
2442 | fp3 = floatx80_mul(float32_to_floatx80(make_float32(0x3AB60B6A), |
2443 | status), fp1, status); /* fp3 is S*A5 */ |
2444 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
2445 | make_float64(0x3F81111111174385), status), |
2446 | status); /* fp2 IS A4+S*A6 */ |
2447 | fp3 = floatx80_add(fp3, float64_to_floatx80( |
2448 | make_float64(0x3FA5555555554F5A), status), |
2449 | status); /* fp3 is A3+S*A5 */ |
2450 | fp2 = floatx80_mul(fp2, fp1, status); /* fp2 IS S*(A4+S*A6) */ |
2451 | fp3 = floatx80_mul(fp3, fp1, status); /* fp3 IS S*(A3+S*A5) */ |
2452 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
2453 | make_float64(0x3FC5555555555555), status), |
2454 | status); /* fp2 IS A2+S*(A4+S*A6) */ |
2455 | fp3 = floatx80_add(fp3, float32_to_floatx80( |
2456 | make_float32(0x3F000000), status), |
2457 | status); /* fp3 IS A1+S*(A3+S*A5) */ |
2458 | fp2 = floatx80_mul(fp2, fp1, |
2459 | status); /* fp2 IS S*(A2+S*(A4+S*A6)) */ |
2460 | fp1 = floatx80_mul(fp1, fp3, |
2461 | status); /* fp1 IS S*(A1+S*(A3+S*A5)) */ |
2462 | fp2 = floatx80_mul(fp2, fp0, |
2463 | status); /* fp2 IS R*S*(A2+S*(A4+S*A6)) */ |
2464 | fp0 = floatx80_add(fp0, fp1, |
2465 | status); /* fp0 IS R+S*(A1+S*(A3+S*A5)) */ |
2466 | fp0 = floatx80_add(fp0, fp2, status); /* fp0 IS EXP(R) - 1 */ |
2467 | |
2468 | fp0 = floatx80_mul(fp0, exp_tbl[j], |
2469 | status); /* 2^(J/64)*(Exp(R)-1) */ |
2470 | |
2471 | if (m >= 64) { |
2472 | fp1 = float32_to_floatx80(exp_tbl2[j], status); |
2473 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ |
2474 | fp1 = floatx80_add(fp1, onebysc, status); |
2475 | fp0 = floatx80_add(fp0, fp1, status); |
2476 | fp0 = floatx80_add(fp0, exp_tbl[j], status); |
2477 | } else if (m < -3) { |
2478 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], |
2479 | status), status); |
2480 | fp0 = floatx80_add(fp0, exp_tbl[j], status); |
2481 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ |
2482 | fp0 = floatx80_add(fp0, onebysc, status); |
2483 | } else { /* -3 <= m <= 63 */ |
2484 | fp1 = exp_tbl[j]; |
2485 | fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], |
2486 | status), status); |
2487 | onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); /* -2^(-M) */ |
2488 | fp1 = floatx80_add(fp1, onebysc, status); |
2489 | fp0 = floatx80_add(fp0, fp1, status); |
2490 | } |
2491 | |
2492 | sc = packFloatx80(0, m + 0x3FFF, one_sig); |
2493 | |
2494 | status->float_rounding_mode = user_rnd_mode; |
2495 | status->floatx80_rounding_precision = user_rnd_prec; |
2496 | |
2497 | a = floatx80_mul(fp0, sc, status); |
2498 | |
2499 | float_raise(float_flag_inexact, status); |
2500 | |
2501 | return a; |
2502 | } else { /* |X| > 70 log2 */ |
2503 | if (aSign) { |
2504 | fp0 = float32_to_floatx80(make_float32(0xBF800000), |
2505 | status); /* -1 */ |
2506 | |
2507 | status->float_rounding_mode = user_rnd_mode; |
2508 | status->floatx80_rounding_precision = user_rnd_prec; |
2509 | |
2510 | a = floatx80_add(fp0, float32_to_floatx80( |
2511 | make_float32(0x00800000), status), |
2512 | status); /* -1 + 2^(-126) */ |
2513 | |
2514 | float_raise(float_flag_inexact, status); |
2515 | |
2516 | return a; |
2517 | } else { |
2518 | status->float_rounding_mode = user_rnd_mode; |
2519 | status->floatx80_rounding_precision = user_rnd_prec; |
2520 | |
2521 | return floatx80_etox(a, status); |
2522 | } |
2523 | } |
2524 | } else { /* |X| < 1/4 */ |
2525 | if (aExp >= 0x3FBE) { |
2526 | fp0 = a; |
2527 | fp0 = floatx80_mul(fp0, fp0, status); /* S = X*X */ |
2528 | fp1 = float32_to_floatx80(make_float32(0x2F30CAA8), |
2529 | status); /* B12 */ |
2530 | fp1 = floatx80_mul(fp1, fp0, status); /* S * B12 */ |
2531 | fp2 = float32_to_floatx80(make_float32(0x310F8290), |
2532 | status); /* B11 */ |
2533 | fp1 = floatx80_add(fp1, float32_to_floatx80( |
2534 | make_float32(0x32D73220), status), |
2535 | status); /* B10 */ |
2536 | fp2 = floatx80_mul(fp2, fp0, status); |
2537 | fp1 = floatx80_mul(fp1, fp0, status); |
2538 | fp2 = floatx80_add(fp2, float32_to_floatx80( |
2539 | make_float32(0x3493F281), status), |
2540 | status); /* B9 */ |
2541 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
2542 | make_float64(0x3EC71DE3A5774682), status), |
2543 | status); /* B8 */ |
2544 | fp2 = floatx80_mul(fp2, fp0, status); |
2545 | fp1 = floatx80_mul(fp1, fp0, status); |
2546 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
2547 | make_float64(0x3EFA01A019D7CB68), status), |
2548 | status); /* B7 */ |
2549 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
2550 | make_float64(0x3F2A01A01A019DF3), status), |
2551 | status); /* B6 */ |
2552 | fp2 = floatx80_mul(fp2, fp0, status); |
2553 | fp1 = floatx80_mul(fp1, fp0, status); |
2554 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
2555 | make_float64(0x3F56C16C16C170E2), status), |
2556 | status); /* B5 */ |
2557 | fp1 = floatx80_add(fp1, float64_to_floatx80( |
2558 | make_float64(0x3F81111111111111), status), |
2559 | status); /* B4 */ |
2560 | fp2 = floatx80_mul(fp2, fp0, status); |
2561 | fp1 = floatx80_mul(fp1, fp0, status); |
2562 | fp2 = floatx80_add(fp2, float64_to_floatx80( |
2563 | make_float64(0x3FA5555555555555), status), |
2564 | status); /* B3 */ |
2565 | fp3 = packFloatx80(0, 0x3FFC, UINT64_C(0xAAAAAAAAAAAAAAAB)); |
2566 | fp1 = floatx80_add(fp1, fp3, status); /* B2 */ |
2567 | fp2 = floatx80_mul(fp2, fp0, status); |
2568 | fp1 = floatx80_mul(fp1, fp0, status); |
2569 | |
2570 | fp2 = floatx80_mul(fp2, fp0, status); |
2571 | fp1 = floatx80_mul(fp1, a, status); |
2572 | |
2573 | fp0 = floatx80_mul(fp0, float32_to_floatx80( |
2574 | make_float32(0x3F000000), status), |
2575 | status); /* S*B1 */ |
2576 | fp1 = floatx80_add(fp1, fp2, status); /* Q */ |
2577 | fp0 = floatx80_add(fp0, fp1, status); /* S*B1+Q */ |
2578 | |
2579 | status->float_rounding_mode = user_rnd_mode; |
2580 | status->floatx80_rounding_precision = user_rnd_prec; |
2581 | |
2582 | a = floatx80_add(fp0, a, status); |
2583 | |
2584 | float_raise(float_flag_inexact, status); |
2585 | |
2586 | return a; |
2587 | } else { /* |X| < 2^(-65) */ |
2588 | sc = packFloatx80(1, 1, one_sig); |
2589 | fp0 = a; |
2590 | |
2591 | if (aExp < 0x0033) { /* |X| < 2^(-16382) */ |
2592 | fp0 = floatx80_mul(fp0, float64_to_floatx80( |
2593 | make_float64(0x48B0000000000000), status), |
2594 | status); |
2595 | fp0 = floatx80_add(fp0, sc, status); |
2596 | |
2597 | status->float_rounding_mode = user_rnd_mode; |
2598 | status->floatx80_rounding_precision = user_rnd_prec; |
2599 | |
2600 | a = floatx80_mul(fp0, float64_to_floatx80( |
2601 | make_float64(0x3730000000000000), status), |
2602 | status); |
2603 | } else { |
2604 | status->float_rounding_mode = user_rnd_mode; |
2605 | status->floatx80_rounding_precision = user_rnd_prec; |
2606 | |
2607 | a = floatx80_add(fp0, sc, status); |
2608 | } |
2609 | |
2610 | float_raise(float_flag_inexact, status); |
2611 | |
2612 | return a; |
2613 | } |
2614 | } |
2615 | } |
2616 | |
2617 | /* |
2618 | * Hyperbolic tangent |
2619 | */ |
2620 | |
2621 | floatx80 floatx80_tanh(floatx80 a, float_status *status) |
2622 | { |
2623 | flag aSign, vSign; |
2624 | int32_t aExp, vExp; |
2625 | uint64_t aSig, vSig; |
2626 | |
2627 | int8_t user_rnd_mode, user_rnd_prec; |
2628 | |
2629 | int32_t compact; |
2630 | floatx80 fp0, fp1; |
2631 | uint32_t sign; |
2632 | |
2633 | aSig = extractFloatx80Frac(a); |
2634 | aExp = extractFloatx80Exp(a); |
2635 | aSign = extractFloatx80Sign(a); |
2636 | |
2637 | if (aExp == 0x7FFF) { |
2638 | if ((uint64_t) (aSig << 1)) { |
2639 | return propagateFloatx80NaNOneArg(a, status); |
2640 | } |
2641 | return packFloatx80(aSign, one_exp, one_sig); |
2642 | } |
2643 | |
2644 | if (aExp == 0 && aSig == 0) { |
2645 | return packFloatx80(aSign, 0, 0); |
2646 | } |
2647 | |
2648 | user_rnd_mode = status->float_rounding_mode; |
2649 | user_rnd_prec = status->floatx80_rounding_precision; |
2650 | status->float_rounding_mode = float_round_nearest_even; |
2651 | status->floatx80_rounding_precision = 80; |
2652 | |
2653 | compact = floatx80_make_compact(aExp, aSig); |
2654 | |
2655 | if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { |
2656 | /* TANHBORS */ |
2657 | if (compact < 0x3FFF8000) { |
2658 | /* TANHSM */ |
2659 | status->float_rounding_mode = user_rnd_mode; |
2660 | status->floatx80_rounding_precision = user_rnd_prec; |
2661 | |
2662 | a = floatx80_move(a, status); |
2663 | |
2664 | float_raise(float_flag_inexact, status); |
2665 | |
2666 | return a; |
2667 | } else { |
2668 | if (compact > 0x40048AA1) { |
2669 | /* TANHHUGE */ |
2670 | sign = 0x3F800000; |
2671 | sign |= aSign ? 0x80000000 : 0x00000000; |
2672 | fp0 = float32_to_floatx80(make_float32(sign), status); |
2673 | sign &= 0x80000000; |
2674 | sign ^= 0x80800000; /* -SIGN(X)*EPS */ |
2675 | |
2676 | status->float_rounding_mode = user_rnd_mode; |
2677 | status->floatx80_rounding_precision = user_rnd_prec; |
2678 | |
2679 | a = floatx80_add(fp0, float32_to_floatx80(make_float32(sign), |
2680 | status), status); |
2681 | |
2682 | float_raise(float_flag_inexact, status); |
2683 | |
2684 | return a; |
2685 | } else { |
2686 | fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ |
2687 | fp0 = floatx80_etox(fp0, status); /* FP0 IS EXP(Y) */ |
2688 | fp0 = floatx80_add(fp0, float32_to_floatx80( |
2689 | make_float32(0x3F800000), |
2690 | status), status); /* EXP(Y)+1 */ |
2691 | sign = aSign ? 0x80000000 : 0x00000000; |
2692 | fp1 = floatx80_div(float32_to_floatx80(make_float32( |
2693 | sign ^ 0xC0000000), status), fp0, |
2694 | status); /* -SIGN(X)*2 / [EXP(Y)+1] */ |
2695 | fp0 = float32_to_floatx80(make_float32(sign | 0x3F800000), |
2696 | status); /* SIGN */ |
2697 | |
2698 | status->float_rounding_mode = user_rnd_mode; |
2699 | status->floatx80_rounding_precision = user_rnd_prec; |
2700 | |
2701 | a = floatx80_add(fp1, fp0, status); |
2702 | |
2703 | float_raise(float_flag_inexact, status); |
2704 | |
2705 | return a; |
2706 | } |
2707 | } |
2708 | } else { /* 2**(-40) < |X| < (5/2)LOG2 */ |
2709 | fp0 = packFloatx80(0, aExp + 1, aSig); /* Y = 2|X| */ |
2710 | fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ |
2711 | fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x40000000), |
2712 | status), |
2713 | status); /* Z+2 */ |
2714 | |
2715 | vSign = extractFloatx80Sign(fp1); |
2716 | vExp = extractFloatx80Exp(fp1); |
2717 | vSig = extractFloatx80Frac(fp1); |
2718 | |
2719 | fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); |
2720 | |
2721 | status->float_rounding_mode = user_rnd_mode; |
2722 | status->floatx80_rounding_precision = user_rnd_prec; |
2723 | |
2724 | a = floatx80_div(fp0, fp1, status); |
2725 | |
2726 | float_raise(float_flag_inexact, status); |
2727 | |
2728 | return a; |
2729 | } |
2730 | } |
2731 | |
2732 | /* |
2733 | * Hyperbolic sine |
2734 | */ |
2735 | |
2736 | floatx80 floatx80_sinh(floatx80 a, float_status *status) |
2737 | { |
2738 | flag aSign; |
2739 | int32_t aExp; |
2740 | uint64_t aSig; |
2741 | |
2742 | int8_t user_rnd_mode, user_rnd_prec; |
2743 | |
2744 | int32_t compact; |
2745 | floatx80 fp0, fp1, fp2; |
2746 | float32 fact; |
2747 | |
2748 | aSig = extractFloatx80Frac(a); |
2749 | aExp = extractFloatx80Exp(a); |
2750 | aSign = extractFloatx80Sign(a); |
2751 | |
2752 | if (aExp == 0x7FFF) { |
2753 | if ((uint64_t) (aSig << 1)) { |
2754 | return propagateFloatx80NaNOneArg(a, status); |
2755 | } |
2756 | return packFloatx80(aSign, floatx80_infinity.high, |
2757 | floatx80_infinity.low); |
2758 | } |
2759 | |
2760 | if (aExp == 0 && aSig == 0) { |
2761 | return packFloatx80(aSign, 0, 0); |
2762 | } |
2763 | |
2764 | user_rnd_mode = status->float_rounding_mode; |
2765 | user_rnd_prec = status->floatx80_rounding_precision; |
2766 | status->float_rounding_mode = float_round_nearest_even; |
2767 | status->floatx80_rounding_precision = 80; |
2768 | |
2769 | compact = floatx80_make_compact(aExp, aSig); |
2770 | |
2771 | if (compact > 0x400CB167) { |
2772 | /* SINHBIG */ |
2773 | if (compact > 0x400CB2B3) { |
2774 | status->float_rounding_mode = user_rnd_mode; |
2775 | status->floatx80_rounding_precision = user_rnd_prec; |
2776 | |
2777 | return roundAndPackFloatx80(status->floatx80_rounding_precision, |
2778 | aSign, 0x8000, aSig, 0, status); |
2779 | } else { |
2780 | fp0 = floatx80_abs(a); /* Y = |X| */ |
2781 | fp0 = floatx80_sub(fp0, float64_to_floatx80( |
2782 | make_float64(0x40C62D38D3D64634), status), |
2783 | status); /* (|X|-16381LOG2_LEAD) */ |
2784 | fp0 = floatx80_sub(fp0, float64_to_floatx80( |
2785 | make_float64(0x3D6F90AEB1E75CC7), status), |
2786 | status); /* |X| - 16381 LOG2, ACCURATE */ |
2787 | fp0 = floatx80_etox(fp0, status); |
2788 | fp2 = packFloatx80(aSign, 0x7FFB, one_sig); |
2789 | |
2790 | status->float_rounding_mode = user_rnd_mode; |
2791 | status->floatx80_rounding_precision = user_rnd_prec; |
2792 | |
2793 | a = floatx80_mul(fp0, fp2, status); |
2794 | |
2795 | float_raise(float_flag_inexact, status); |
2796 | |
2797 | return a; |
2798 | } |
2799 | } else { /* |X| < 16380 LOG2 */ |
2800 | fp0 = floatx80_abs(a); /* Y = |X| */ |
2801 | fp0 = floatx80_etoxm1(fp0, status); /* FP0 IS Z = EXPM1(Y) */ |
2802 | fp1 = floatx80_add(fp0, float32_to_floatx80(make_float32(0x3F800000), |
2803 | status), status); /* 1+Z */ |
2804 | fp2 = fp0; |
2805 | fp0 = floatx80_div(fp0, fp1, status); /* Z/(1+Z) */ |
2806 | fp0 = floatx80_add(fp0, fp2, status); |
2807 | |
2808 | fact = packFloat32(aSign, 0x7E, 0); |
2809 | |
2810 | status->float_rounding_mode = user_rnd_mode; |
2811 | status->floatx80_rounding_precision = user_rnd_prec; |
2812 | |
2813 | a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); |
2814 | |
2815 | float_raise(float_flag_inexact, status); |
2816 | |
2817 | return a; |
2818 | } |
2819 | } |
2820 | |
2821 | /* |
2822 | * Hyperbolic cosine |
2823 | */ |
2824 | |
2825 | floatx80 floatx80_cosh(floatx80 a, float_status *status) |
2826 | { |
2827 | int32_t aExp; |
2828 | uint64_t aSig; |
2829 | |
2830 | int8_t user_rnd_mode, user_rnd_prec; |
2831 | |
2832 | int32_t compact; |
2833 | floatx80 fp0, fp1; |
2834 | |
2835 | aSig = extractFloatx80Frac(a); |
2836 | aExp = extractFloatx80Exp(a); |
2837 | |
2838 | if (aExp == 0x7FFF) { |
2839 | if ((uint64_t) (aSig << 1)) { |
2840 | return propagateFloatx80NaNOneArg(a, status); |
2841 | } |
2842 | return packFloatx80(0, floatx80_infinity.high, |
2843 | floatx80_infinity.low); |
2844 | } |
2845 | |
2846 | if (aExp == 0 && aSig == 0) { |
2847 | return packFloatx80(0, one_exp, one_sig); |
2848 | } |
2849 | |
2850 | user_rnd_mode = status->float_rounding_mode; |
2851 | user_rnd_prec = status->floatx80_rounding_precision; |
2852 | status->float_rounding_mode = float_round_nearest_even; |
2853 | status->floatx80_rounding_precision = 80; |
2854 | |
2855 | compact = floatx80_make_compact(aExp, aSig); |
2856 | |
2857 | if (compact > 0x400CB167) { |
2858 | if (compact > 0x400CB2B3) { |
2859 | status->float_rounding_mode = user_rnd_mode; |
2860 | status->floatx80_rounding_precision = user_rnd_prec; |
2861 | return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, |
2862 | 0x8000, one_sig, 0, status); |
2863 | } else { |
2864 | fp0 = packFloatx80(0, aExp, aSig); |
2865 | fp0 = floatx80_sub(fp0, float64_to_floatx80( |
2866 | make_float64(0x40C62D38D3D64634), status), |
2867 | status); |
2868 | fp0 = floatx80_sub(fp0, float64_to_floatx80( |
2869 | make_float64(0x3D6F90AEB1E75CC7), status), |
2870 | status); |
2871 | fp0 = floatx80_etox(fp0, status); |
2872 | fp1 = packFloatx80(0, 0x7FFB, one_sig); |
2873 | |
2874 | status->float_rounding_mode = user_rnd_mode; |
2875 | status->floatx80_rounding_precision = user_rnd_prec; |
2876 | |
2877 | a = floatx80_mul(fp0, fp1, status); |
2878 | |
2879 | float_raise(float_flag_inexact, status); |
2880 | |
2881 | return a; |
2882 | } |
2883 | } |
2884 | |
2885 | fp0 = packFloatx80(0, aExp, aSig); /* |X| */ |
2886 | fp0 = floatx80_etox(fp0, status); /* EXP(|X|) */ |
2887 | fp0 = floatx80_mul(fp0, float32_to_floatx80(make_float32(0x3F000000), |
2888 | status), status); /* (1/2)*EXP(|X|) */ |
2889 | fp1 = float32_to_floatx80(make_float32(0x3E800000), status); /* 1/4 */ |
2890 | fp1 = floatx80_div(fp1, fp0, status); /* 1/(2*EXP(|X|)) */ |
2891 | |
2892 | status->float_rounding_mode = user_rnd_mode; |
2893 | status->floatx80_rounding_precision = user_rnd_prec; |
2894 | |
2895 | a = floatx80_add(fp0, fp1, status); |
2896 | |
2897 | float_raise(float_flag_inexact, status); |
2898 | |
2899 | return a; |
2900 | } |
2901 | |