1/*
2 * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "jni.h"
27#include "runtime/interfaceSupport.inline.hpp"
28#include "runtime/sharedRuntime.hpp"
29#include "runtime/sharedRuntimeMath.hpp"
30
31// This file contains copies of the fdlibm routines used by
32// StrictMath. It turns out that it is almost always required to use
33// these runtime routines; the Intel CPU doesn't meet the Java
34// specification for sin/cos outside a certain limited argument range,
35// and the SPARC CPU doesn't appear to have sin/cos instructions. It
36// also turns out that avoiding the indirect call through function
37// pointer out to libjava.so in SharedRuntime speeds these routines up
38// by roughly 15% on both Win32/x86 and Solaris/SPARC.
39
40/*
41 * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
42 * double x[],y[]; int e0,nx,prec; int ipio2[];
43 *
44 * __kernel_rem_pio2 return the last three digits of N with
45 * y = x - N*pi/2
46 * so that |y| < pi/2.
47 *
48 * The method is to compute the integer (mod 8) and fraction parts of
49 * (2/pi)*x without doing the full multiplication. In general we
50 * skip the part of the product that are known to be a huge integer (
51 * more accurately, = 0 mod 8 ). Thus the number of operations are
52 * independent of the exponent of the input.
53 *
54 * (2/pi) is represented by an array of 24-bit integers in ipio2[].
55 *
56 * Input parameters:
57 * x[] The input value (must be positive) is broken into nx
58 * pieces of 24-bit integers in double precision format.
59 * x[i] will be the i-th 24 bit of x. The scaled exponent
60 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
61 * match x's up to 24 bits.
62 *
63 * Example of breaking a double positive z into x[0]+x[1]+x[2]:
64 * e0 = ilogb(z)-23
65 * z = scalbn(z,-e0)
66 * for i = 0,1,2
67 * x[i] = floor(z)
68 * z = (z-x[i])*2**24
69 *
70 *
71 * y[] ouput result in an array of double precision numbers.
72 * The dimension of y[] is:
73 * 24-bit precision 1
74 * 53-bit precision 2
75 * 64-bit precision 2
76 * 113-bit precision 3
77 * The actual value is the sum of them. Thus for 113-bit
78 * precsion, one may have to do something like:
79 *
80 * long double t,w,r_head, r_tail;
81 * t = (long double)y[2] + (long double)y[1];
82 * w = (long double)y[0];
83 * r_head = t+w;
84 * r_tail = w - (r_head - t);
85 *
86 * e0 The exponent of x[0]
87 *
88 * nx dimension of x[]
89 *
90 * prec an interger indicating the precision:
91 * 0 24 bits (single)
92 * 1 53 bits (double)
93 * 2 64 bits (extended)
94 * 3 113 bits (quad)
95 *
96 * ipio2[]
97 * integer array, contains the (24*i)-th to (24*i+23)-th
98 * bit of 2/pi after binary point. The corresponding
99 * floating value is
100 *
101 * ipio2[i] * 2^(-24(i+1)).
102 *
103 * External function:
104 * double scalbn(), floor();
105 *
106 *
107 * Here is the description of some local variables:
108 *
109 * jk jk+1 is the initial number of terms of ipio2[] needed
110 * in the computation. The recommended value is 2,3,4,
111 * 6 for single, double, extended,and quad.
112 *
113 * jz local integer variable indicating the number of
114 * terms of ipio2[] used.
115 *
116 * jx nx - 1
117 *
118 * jv index for pointing to the suitable ipio2[] for the
119 * computation. In general, we want
120 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
121 * is an integer. Thus
122 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv
123 * Hence jv = max(0,(e0-3)/24).
124 *
125 * jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
126 *
127 * q[] double array with integral value, representing the
128 * 24-bits chunk of the product of x and 2/pi.
129 *
130 * q0 the corresponding exponent of q[0]. Note that the
131 * exponent for q[i] would be q0-24*i.
132 *
133 * PIo2[] double precision array, obtained by cutting pi/2
134 * into 24 bits chunks.
135 *
136 * f[] ipio2[] in floating point
137 *
138 * iq[] integer array by breaking up q[] in 24-bits chunk.
139 *
140 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
141 *
142 * ih integer. If >0 it indicates q[] is >= 0.5, hence
143 * it also indicates the *sign* of the result.
144 *
145 */
146
147
148/*
149 * Constants:
150 * The hexadecimal values are the intended ones for the following
151 * constants. The decimal values may be used, provided that the
152 * compiler will convert from decimal to binary accurately enough
153 * to produce the hexadecimal values shown.
154 */
155
156
157static const int init_jk[] = {2,3,4,6}; /* initial value for jk */
158
159static const double PIo2[] = {
160 1.57079625129699707031e+00, /* 0x3FF921FB, 0x40000000 */
161 7.54978941586159635335e-08, /* 0x3E74442D, 0x00000000 */
162 5.39030252995776476554e-15, /* 0x3CF84698, 0x80000000 */
163 3.28200341580791294123e-22, /* 0x3B78CC51, 0x60000000 */
164 1.27065575308067607349e-29, /* 0x39F01B83, 0x80000000 */
165 1.22933308981111328932e-36, /* 0x387A2520, 0x40000000 */
166 2.73370053816464559624e-44, /* 0x36E38222, 0x80000000 */
167 2.16741683877804819444e-51, /* 0x3569F31D, 0x00000000 */
168};
169
170static const double
171zeroB = 0.0,
172one = 1.0,
173two24B = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
174twon24 = 5.96046447753906250000e-08; /* 0x3E700000, 0x00000000 */
175
176static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, const int *ipio2) {
177 int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
178 double z,fw,f[20],fq[20],q[20];
179
180 /* initialize jk*/
181 jk = init_jk[prec];
182 jp = jk;
183
184 /* determine jx,jv,q0, note that 3>q0 */
185 jx = nx-1;
186 jv = (e0-3)/24; if(jv<0) jv=0;
187 q0 = e0-24*(jv+1);
188
189 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
190 j = jv-jx; m = jx+jk;
191 for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : (double) ipio2[j];
192
193 /* compute q[0],q[1],...q[jk] */
194 for (i=0;i<=jk;i++) {
195 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
196 }
197
198 jz = jk;
199recompute:
200 /* distill q[] into iq[] reversingly */
201 for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
202 fw = (double)((int)(twon24* z));
203 iq[i] = (int)(z-two24B*fw);
204 z = q[j-1]+fw;
205 }
206
207 /* compute n */
208 z = scalbnA(z,q0); /* actual value of z */
209 z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */
210 n = (int) z;
211 z -= (double)n;
212 ih = 0;
213 if(q0>0) { /* need iq[jz-1] to determine n */
214 i = (iq[jz-1]>>(24-q0)); n += i;
215 iq[jz-1] -= i<<(24-q0);
216 ih = iq[jz-1]>>(23-q0);
217 }
218 else if(q0==0) ih = iq[jz-1]>>23;
219 else if(z>=0.5) ih=2;
220
221 if(ih>0) { /* q > 0.5 */
222 n += 1; carry = 0;
223 for(i=0;i<jz ;i++) { /* compute 1-q */
224 j = iq[i];
225 if(carry==0) {
226 if(j!=0) {
227 carry = 1; iq[i] = 0x1000000- j;
228 }
229 } else iq[i] = 0xffffff - j;
230 }
231 if(q0>0) { /* rare case: chance is 1 in 12 */
232 switch(q0) {
233 case 1:
234 iq[jz-1] &= 0x7fffff; break;
235 case 2:
236 iq[jz-1] &= 0x3fffff; break;
237 }
238 }
239 if(ih==2) {
240 z = one - z;
241 if(carry!=0) z -= scalbnA(one,q0);
242 }
243 }
244
245 /* check if recomputation is needed */
246 if(z==zeroB) {
247 j = 0;
248 for (i=jz-1;i>=jk;i--) j |= iq[i];
249 if(j==0) { /* need recomputation */
250 for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */
251
252 for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */
253 f[jx+i] = (double) ipio2[jv+i];
254 for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
255 q[i] = fw;
256 }
257 jz += k;
258 goto recompute;
259 }
260 }
261
262 /* chop off zero terms */
263 if(z==0.0) {
264 jz -= 1; q0 -= 24;
265 while(iq[jz]==0) { jz--; q0-=24;}
266 } else { /* break z into 24-bit if necessary */
267 z = scalbnA(z,-q0);
268 if(z>=two24B) {
269 fw = (double)((int)(twon24*z));
270 iq[jz] = (int)(z-two24B*fw);
271 jz += 1; q0 += 24;
272 iq[jz] = (int) fw;
273 } else iq[jz] = (int) z ;
274 }
275
276 /* convert integer "bit" chunk to floating-point value */
277 fw = scalbnA(one,q0);
278 for(i=jz;i>=0;i--) {
279 q[i] = fw*(double)iq[i]; fw*=twon24;
280 }
281
282 /* compute PIo2[0,...,jp]*q[jz,...,0] */
283 for(i=jz;i>=0;i--) {
284 for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
285 fq[jz-i] = fw;
286 }
287
288 /* compress fq[] into y[] */
289 switch(prec) {
290 case 0:
291 fw = 0.0;
292 for (i=jz;i>=0;i--) fw += fq[i];
293 y[0] = (ih==0)? fw: -fw;
294 break;
295 case 1:
296 case 2:
297 fw = 0.0;
298 for (i=jz;i>=0;i--) fw += fq[i];
299 y[0] = (ih==0)? fw: -fw;
300 fw = fq[0]-fw;
301 for (i=1;i<=jz;i++) fw += fq[i];
302 y[1] = (ih==0)? fw: -fw;
303 break;
304 case 3: /* painful */
305 for (i=jz;i>0;i--) {
306 fw = fq[i-1]+fq[i];
307 fq[i] += fq[i-1]-fw;
308 fq[i-1] = fw;
309 }
310 for (i=jz;i>1;i--) {
311 fw = fq[i-1]+fq[i];
312 fq[i] += fq[i-1]-fw;
313 fq[i-1] = fw;
314 }
315 for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
316 if(ih==0) {
317 y[0] = fq[0]; y[1] = fq[1]; y[2] = fw;
318 } else {
319 y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
320 }
321 }
322 return n&7;
323}
324
325
326/*
327 * ====================================================
328 * Copyright (c) 1993 Oracle and/or its affiliates. All rights reserved.
329 *
330 * Developed at SunPro, a Sun Microsystems, Inc. business.
331 * Permission to use, copy, modify, and distribute this
332 * software is freely granted, provided that this notice
333 * is preserved.
334 * ====================================================
335 *
336 */
337
338/* __ieee754_rem_pio2(x,y)
339 *
340 * return the remainder of x rem pi/2 in y[0]+y[1]
341 * use __kernel_rem_pio2()
342 */
343
344/*
345 * Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
346 */
347static const int two_over_pi[] = {
348 0xA2F983, 0x6E4E44, 0x1529FC, 0x2757D1, 0xF534DD, 0xC0DB62,
349 0x95993C, 0x439041, 0xFE5163, 0xABDEBB, 0xC561B7, 0x246E3A,
350 0x424DD2, 0xE00649, 0x2EEA09, 0xD1921C, 0xFE1DEB, 0x1CB129,
351 0xA73EE8, 0x8235F5, 0x2EBB44, 0x84E99C, 0x7026B4, 0x5F7E41,
352 0x3991D6, 0x398353, 0x39F49C, 0x845F8B, 0xBDF928, 0x3B1FF8,
353 0x97FFDE, 0x05980F, 0xEF2F11, 0x8B5A0A, 0x6D1F6D, 0x367ECF,
354 0x27CB09, 0xB74F46, 0x3F669E, 0x5FEA2D, 0x7527BA, 0xC7EBE5,
355 0xF17B3D, 0x0739F7, 0x8A5292, 0xEA6BFB, 0x5FB11F, 0x8D5D08,
356 0x560330, 0x46FC7B, 0x6BABF0, 0xCFBC20, 0x9AF436, 0x1DA9E3,
357 0x91615E, 0xE61B08, 0x659985, 0x5F14A0, 0x68408D, 0xFFD880,
358 0x4D7327, 0x310606, 0x1556CA, 0x73A8C9, 0x60E27B, 0xC08C6B,
359};
360
361static const int npio2_hw[] = {
362 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
363 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
364 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
365 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
366 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
367 0x404858EB, 0x404921FB,
368};
369
370/*
371 * invpio2: 53 bits of 2/pi
372 * pio2_1: first 33 bit of pi/2
373 * pio2_1t: pi/2 - pio2_1
374 * pio2_2: second 33 bit of pi/2
375 * pio2_2t: pi/2 - (pio2_1+pio2_2)
376 * pio2_3: third 33 bit of pi/2
377 * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
378 */
379
380static const double
381zeroA = 0.00000000000000000000e+00, /* 0x00000000, 0x00000000 */
382half = 5.00000000000000000000e-01, /* 0x3FE00000, 0x00000000 */
383two24A = 1.67772160000000000000e+07, /* 0x41700000, 0x00000000 */
384invpio2 = 6.36619772367581382433e-01, /* 0x3FE45F30, 0x6DC9C883 */
385pio2_1 = 1.57079632673412561417e+00, /* 0x3FF921FB, 0x54400000 */
386pio2_1t = 6.07710050650619224932e-11, /* 0x3DD0B461, 0x1A626331 */
387pio2_2 = 6.07710050630396597660e-11, /* 0x3DD0B461, 0x1A600000 */
388pio2_2t = 2.02226624879595063154e-21, /* 0x3BA3198A, 0x2E037073 */
389pio2_3 = 2.02226624871116645580e-21, /* 0x3BA3198A, 0x2E000000 */
390pio2_3t = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
391
392static int __ieee754_rem_pio2(double x, double *y) {
393 double z,w,t,r,fn;
394 double tx[3];
395 int e0,i,j,nx,n,ix,hx,i0;
396
397 i0 = ((*(int*)&two24A)>>30)^1; /* high word index */
398 hx = *(i0+(int*)&x); /* high word of x */
399 ix = hx&0x7fffffff;
400 if(ix<=0x3fe921fb) /* |x| ~<= pi/4 , no need for reduction */
401 {y[0] = x; y[1] = 0; return 0;}
402 if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */
403 if(hx>0) {
404 z = x - pio2_1;
405 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
406 y[0] = z - pio2_1t;
407 y[1] = (z-y[0])-pio2_1t;
408 } else { /* near pi/2, use 33+33+53 bit pi */
409 z -= pio2_2;
410 y[0] = z - pio2_2t;
411 y[1] = (z-y[0])-pio2_2t;
412 }
413 return 1;
414 } else { /* negative x */
415 z = x + pio2_1;
416 if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */
417 y[0] = z + pio2_1t;
418 y[1] = (z-y[0])+pio2_1t;
419 } else { /* near pi/2, use 33+33+53 bit pi */
420 z += pio2_2;
421 y[0] = z + pio2_2t;
422 y[1] = (z-y[0])+pio2_2t;
423 }
424 return -1;
425 }
426 }
427 if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
428 t = fabsd(x);
429 n = (int) (t*invpio2+half);
430 fn = (double)n;
431 r = t-fn*pio2_1;
432 w = fn*pio2_1t; /* 1st round good to 85 bit */
433 if(n<32&&ix!=npio2_hw[n-1]) {
434 y[0] = r-w; /* quick check no cancellation */
435 } else {
436 j = ix>>20;
437 y[0] = r-w;
438 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
439 if(i>16) { /* 2nd iteration needed, good to 118 */
440 t = r;
441 w = fn*pio2_2;
442 r = t-w;
443 w = fn*pio2_2t-((t-r)-w);
444 y[0] = r-w;
445 i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
446 if(i>49) { /* 3rd iteration need, 151 bits acc */
447 t = r; /* will cover all possible cases */
448 w = fn*pio2_3;
449 r = t-w;
450 w = fn*pio2_3t-((t-r)-w);
451 y[0] = r-w;
452 }
453 }
454 }
455 y[1] = (r-y[0])-w;
456 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
457 else return n;
458 }
459 /*
460 * all other (large) arguments
461 */
462 if(ix>=0x7ff00000) { /* x is inf or NaN */
463 y[0]=y[1]=x-x; return 0;
464 }
465 /* set z = scalbn(|x|,ilogb(x)-23) */
466 *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
467 e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */
468 *(i0+(int*)&z) = ix - (e0<<20);
469 for(i=0;i<2;i++) {
470 tx[i] = (double)((int)(z));
471 z = (z-tx[i])*two24A;
472 }
473 tx[2] = z;
474 nx = 3;
475 while(tx[nx-1]==zeroA) nx--; /* skip zero term */
476 n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
477 if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
478 return n;
479}
480
481
482/* __kernel_sin( x, y, iy)
483 * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
484 * Input x is assumed to be bounded by ~pi/4 in magnitude.
485 * Input y is the tail of x.
486 * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
487 *
488 * Algorithm
489 * 1. Since sin(-x) = -sin(x), we need only to consider positive x.
490 * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
491 * 3. sin(x) is approximated by a polynomial of degree 13 on
492 * [0,pi/4]
493 * 3 13
494 * sin(x) ~ x + S1*x + ... + S6*x
495 * where
496 *
497 * |sin(x) 2 4 6 8 10 12 | -58
498 * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2
499 * | x |
500 *
501 * 4. sin(x+y) = sin(x) + sin'(x')*y
502 * ~ sin(x) + (1-x*x/2)*y
503 * For better accuracy, let
504 * 3 2 2 2 2
505 * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
506 * then 3 2
507 * sin(x) = x + (S1*x + (x *(r-y/2)+y))
508 */
509
510static const double
511S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
512S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
513S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
514S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
515S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
516S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
517
518static double __kernel_sin(double x, double y, int iy)
519{
520 double z,r,v;
521 int ix;
522 ix = high(x)&0x7fffffff; /* high word of x */
523 if(ix<0x3e400000) /* |x| < 2**-27 */
524 {if((int)x==0) return x;} /* generate inexact */
525 z = x*x;
526 v = z*x;
527 r = S2+z*(S3+z*(S4+z*(S5+z*S6)));
528 if(iy==0) return x+v*(S1+z*r);
529 else return x-((z*(half*y-v*r)-y)-v*S1);
530}
531
532/*
533 * __kernel_cos( x, y )
534 * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
535 * Input x is assumed to be bounded by ~pi/4 in magnitude.
536 * Input y is the tail of x.
537 *
538 * Algorithm
539 * 1. Since cos(-x) = cos(x), we need only to consider positive x.
540 * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
541 * 3. cos(x) is approximated by a polynomial of degree 14 on
542 * [0,pi/4]
543 * 4 14
544 * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
545 * where the remez error is
546 *
547 * | 2 4 6 8 10 12 14 | -58
548 * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2
549 * | |
550 *
551 * 4 6 8 10 12 14
552 * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then
553 * cos(x) = 1 - x*x/2 + r
554 * since cos(x+y) ~ cos(x) - sin(x)*y
555 * ~ cos(x) - x*y,
556 * a correction term is necessary in cos(x) and hence
557 * cos(x+y) = 1 - (x*x/2 - (r - x*y))
558 * For better accuracy when x > 0.3, let qx = |x|/4 with
559 * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
560 * Then
561 * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
562 * Note that 1-qx and (x*x/2-qx) is EXACT here, and the
563 * magnitude of the latter is at least a quarter of x*x/2,
564 * thus, reducing the rounding error in the subtraction.
565 */
566
567static const double
568C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
569C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
570C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
571C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
572C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
573C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
574
575static double __kernel_cos(double x, double y)
576{
577 double a,h,z,r,qx=0;
578 int ix;
579 ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/
580 if(ix<0x3e400000) { /* if x < 2**27 */
581 if(((int)x)==0) return one; /* generate inexact */
582 }
583 z = x*x;
584 r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
585 if(ix < 0x3FD33333) /* if |x| < 0.3 */
586 return one - (0.5*z - (z*r - x*y));
587 else {
588 if(ix > 0x3fe90000) { /* x > 0.78125 */
589 qx = 0.28125;
590 } else {
591 set_high(&qx, ix-0x00200000); /* x/4 */
592 set_low(&qx, 0);
593 }
594 h = 0.5*z-qx;
595 a = one-qx;
596 return a - (h - (z*r-x*y));
597 }
598}
599
600/* __kernel_tan( x, y, k )
601 * kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
602 * Input x is assumed to be bounded by ~pi/4 in magnitude.
603 * Input y is the tail of x.
604 * Input k indicates whether tan (if k=1) or
605 * -1/tan (if k= -1) is returned.
606 *
607 * Algorithm
608 * 1. Since tan(-x) = -tan(x), we need only to consider positive x.
609 * 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
610 * 3. tan(x) is approximated by a odd polynomial of degree 27 on
611 * [0,0.67434]
612 * 3 27
613 * tan(x) ~ x + T1*x + ... + T13*x
614 * where
615 *
616 * |tan(x) 2 4 26 | -59.2
617 * |----- - (1+T1*x +T2*x +.... +T13*x )| <= 2
618 * | x |
619 *
620 * Note: tan(x+y) = tan(x) + tan'(x)*y
621 * ~ tan(x) + (1+x*x)*y
622 * Therefore, for better accuracy in computing tan(x+y), let
623 * 3 2 2 2 2
624 * r = x *(T2+x *(T3+x *(...+x *(T12+x *T13))))
625 * then
626 * 3 2
627 * tan(x+y) = x + (T1*x + (x *(r+y)+y))
628 *
629 * 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
630 * tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
631 * = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
632 */
633
634static const double
635pio4 = 7.85398163397448278999e-01, /* 0x3FE921FB, 0x54442D18 */
636pio4lo= 3.06161699786838301793e-17, /* 0x3C81A626, 0x33145C07 */
637T[] = {
638 3.33333333333334091986e-01, /* 0x3FD55555, 0x55555563 */
639 1.33333333333201242699e-01, /* 0x3FC11111, 0x1110FE7A */
640 5.39682539762260521377e-02, /* 0x3FABA1BA, 0x1BB341FE */
641 2.18694882948595424599e-02, /* 0x3F9664F4, 0x8406D637 */
642 8.86323982359930005737e-03, /* 0x3F8226E3, 0xE96E8493 */
643 3.59207910759131235356e-03, /* 0x3F6D6D22, 0xC9560328 */
644 1.45620945432529025516e-03, /* 0x3F57DBC8, 0xFEE08315 */
645 5.88041240820264096874e-04, /* 0x3F4344D8, 0xF2F26501 */
646 2.46463134818469906812e-04, /* 0x3F3026F7, 0x1A8D1068 */
647 7.81794442939557092300e-05, /* 0x3F147E88, 0xA03792A6 */
648 7.14072491382608190305e-05, /* 0x3F12B80F, 0x32F0A7E9 */
649 -1.85586374855275456654e-05, /* 0xBEF375CB, 0xDB605373 */
650 2.59073051863633712884e-05, /* 0x3EFB2A70, 0x74BF7AD4 */
651};
652
653static double __kernel_tan(double x, double y, int iy)
654{
655 double z,r,v,w,s;
656 int ix,hx;
657 hx = high(x); /* high word of x */
658 ix = hx&0x7fffffff; /* high word of |x| */
659 if(ix<0x3e300000) { /* x < 2**-28 */
660 if((int)x==0) { /* generate inexact */
661 if (((ix | low(x)) | (iy + 1)) == 0)
662 return one / fabsd(x);
663 else {
664 if (iy == 1)
665 return x;
666 else { /* compute -1 / (x+y) carefully */
667 double a, t;
668
669 z = w = x + y;
670 set_low(&z, 0);
671 v = y - (z - x);
672 t = a = -one / w;
673 set_low(&t, 0);
674 s = one + t * z;
675 return t + a * (s + t * v);
676 }
677 }
678 }
679 }
680 if(ix>=0x3FE59428) { /* |x|>=0.6744 */
681 if(hx<0) {x = -x; y = -y;}
682 z = pio4-x;
683 w = pio4lo-y;
684 x = z+w; y = 0.0;
685 }
686 z = x*x;
687 w = z*z;
688 /* Break x^5*(T[1]+x^2*T[2]+...) into
689 * x^5(T[1]+x^4*T[3]+...+x^20*T[11]) +
690 * x^5(x^2*(T[2]+x^4*T[4]+...+x^22*[T12]))
691 */
692 r = T[1]+w*(T[3]+w*(T[5]+w*(T[7]+w*(T[9]+w*T[11]))));
693 v = z*(T[2]+w*(T[4]+w*(T[6]+w*(T[8]+w*(T[10]+w*T[12])))));
694 s = z*x;
695 r = y + z*(s*(r+v)+y);
696 r += T[0]*s;
697 w = x+r;
698 if(ix>=0x3FE59428) {
699 v = (double)iy;
700 return (double)(1-((hx>>30)&2))*(v-2.0*(x-(w*w/(w+v)-r)));
701 }
702 if(iy==1) return w;
703 else { /* if allow error up to 2 ulp,
704 simply return -1.0/(x+r) here */
705 /* compute -1.0/(x+r) accurately */
706 double a,t;
707 z = w;
708 set_low(&z, 0);
709 v = r-(z - x); /* z+v = r+x */
710 t = a = -1.0/w; /* a = -1.0/w */
711 set_low(&t, 0);
712 s = 1.0+t*z;
713 return t+a*(s+t*v);
714 }
715}
716
717
718//----------------------------------------------------------------------
719//
720// Routines for new sin/cos implementation
721//
722//----------------------------------------------------------------------
723
724/* sin(x)
725 * Return sine function of x.
726 *
727 * kernel function:
728 * __kernel_sin ... sine function on [-pi/4,pi/4]
729 * __kernel_cos ... cose function on [-pi/4,pi/4]
730 * __ieee754_rem_pio2 ... argument reduction routine
731 *
732 * Method.
733 * Let S,C and T denote the sin, cos and tan respectively on
734 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
735 * in [-pi/4 , +pi/4], and let n = k mod 4.
736 * We have
737 *
738 * n sin(x) cos(x) tan(x)
739 * ----------------------------------------------------------
740 * 0 S C T
741 * 1 C -S -1/T
742 * 2 -S -C T
743 * 3 -C S -1/T
744 * ----------------------------------------------------------
745 *
746 * Special cases:
747 * Let trig be any of sin, cos, or tan.
748 * trig(+-INF) is NaN, with signals;
749 * trig(NaN) is that NaN;
750 *
751 * Accuracy:
752 * TRIG(x) returns trig(x) nearly rounded
753 */
754
755JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
756 double y[2],z=0.0;
757 int n, ix;
758
759 /* High word of x. */
760 ix = high(x);
761
762 /* |x| ~< pi/4 */
763 ix &= 0x7fffffff;
764 if(ix <= 0x3fe921fb) return __kernel_sin(x,z,0);
765
766 /* sin(Inf or NaN) is NaN */
767 else if (ix>=0x7ff00000) return x-x;
768
769 /* argument reduction needed */
770 else {
771 n = __ieee754_rem_pio2(x,y);
772 switch(n&3) {
773 case 0: return __kernel_sin(y[0],y[1],1);
774 case 1: return __kernel_cos(y[0],y[1]);
775 case 2: return -__kernel_sin(y[0],y[1],1);
776 default:
777 return -__kernel_cos(y[0],y[1]);
778 }
779 }
780JRT_END
781
782/* cos(x)
783 * Return cosine function of x.
784 *
785 * kernel function:
786 * __kernel_sin ... sine function on [-pi/4,pi/4]
787 * __kernel_cos ... cosine function on [-pi/4,pi/4]
788 * __ieee754_rem_pio2 ... argument reduction routine
789 *
790 * Method.
791 * Let S,C and T denote the sin, cos and tan respectively on
792 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
793 * in [-pi/4 , +pi/4], and let n = k mod 4.
794 * We have
795 *
796 * n sin(x) cos(x) tan(x)
797 * ----------------------------------------------------------
798 * 0 S C T
799 * 1 C -S -1/T
800 * 2 -S -C T
801 * 3 -C S -1/T
802 * ----------------------------------------------------------
803 *
804 * Special cases:
805 * Let trig be any of sin, cos, or tan.
806 * trig(+-INF) is NaN, with signals;
807 * trig(NaN) is that NaN;
808 *
809 * Accuracy:
810 * TRIG(x) returns trig(x) nearly rounded
811 */
812
813JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
814 double y[2],z=0.0;
815 int n, ix;
816
817 /* High word of x. */
818 ix = high(x);
819
820 /* |x| ~< pi/4 */
821 ix &= 0x7fffffff;
822 if(ix <= 0x3fe921fb) return __kernel_cos(x,z);
823
824 /* cos(Inf or NaN) is NaN */
825 else if (ix>=0x7ff00000) return x-x;
826
827 /* argument reduction needed */
828 else {
829 n = __ieee754_rem_pio2(x,y);
830 switch(n&3) {
831 case 0: return __kernel_cos(y[0],y[1]);
832 case 1: return -__kernel_sin(y[0],y[1],1);
833 case 2: return -__kernel_cos(y[0],y[1]);
834 default:
835 return __kernel_sin(y[0],y[1],1);
836 }
837 }
838JRT_END
839
840/* tan(x)
841 * Return tangent function of x.
842 *
843 * kernel function:
844 * __kernel_tan ... tangent function on [-pi/4,pi/4]
845 * __ieee754_rem_pio2 ... argument reduction routine
846 *
847 * Method.
848 * Let S,C and T denote the sin, cos and tan respectively on
849 * [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
850 * in [-pi/4 , +pi/4], and let n = k mod 4.
851 * We have
852 *
853 * n sin(x) cos(x) tan(x)
854 * ----------------------------------------------------------
855 * 0 S C T
856 * 1 C -S -1/T
857 * 2 -S -C T
858 * 3 -C S -1/T
859 * ----------------------------------------------------------
860 *
861 * Special cases:
862 * Let trig be any of sin, cos, or tan.
863 * trig(+-INF) is NaN, with signals;
864 * trig(NaN) is that NaN;
865 *
866 * Accuracy:
867 * TRIG(x) returns trig(x) nearly rounded
868 */
869
870JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
871 double y[2],z=0.0;
872 int n, ix;
873
874 /* High word of x. */
875 ix = high(x);
876
877 /* |x| ~< pi/4 */
878 ix &= 0x7fffffff;
879 if(ix <= 0x3fe921fb) return __kernel_tan(x,z,1);
880
881 /* tan(Inf or NaN) is NaN */
882 else if (ix>=0x7ff00000) return x-x; /* NaN */
883
884 /* argument reduction needed */
885 else {
886 n = __ieee754_rem_pio2(x,y);
887 return __kernel_tan(y[0],y[1],1-((n&1)<<1)); /* 1 -- n even
888 -1 -- n odd */
889 }
890JRT_END
891