sharedRuntimeTrig.cpp source code [OpenJDK/src/hotspot/share/runtime/sharedRuntimeTrig.cpp]

1	/*
2	* Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4	*
5	* This code is free software; you can redistribute it and/or modify it
6	* under the terms of the GNU General Public License version 2 only, as
7	* published by the Free Software Foundation.
8	*
9	* This code is distributed in the hope that it will be useful, but WITHOUT
10	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12	* version 2 for more details (a copy is included in the LICENSE file that
13	* accompanied this code).
14	*
15	* You should have received a copy of the GNU General Public License version
16	* 2 along with this work; if not, write to the Free Software Foundation,
17	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18	*
19	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20	* or visit www.oracle.com if you need additional information or have any
21	* questions.
22	*
23	*/
24
25	#include "precompiled.hpp"
26	#include "jni.h"
27	#include "runtime/interfaceSupport.inline.hpp"
28	#include "runtime/sharedRuntime.hpp"
29	#include "runtime/sharedRuntimeMath.hpp"
30
31	// This file contains copies of the fdlibm routines used by
32	// StrictMath. It turns out that it is almost always required to use
33	// these runtime routines; the Intel CPU doesn't meet the Java
34	// specification for sin/cos outside a certain limited argument range,
35	// and the SPARC CPU doesn't appear to have sin/cos instructions. It
36	// also turns out that avoiding the indirect call through function
37	// pointer out to libjava.so in SharedRuntime speeds these routines up
38	// by roughly 15% on both Win32/x86 and Solaris/SPARC.
39
40	/*
41	* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
42	* double x[],y[]; int e0,nx,prec; int ipio2[];
43	*
44	* __kernel_rem_pio2 return the last three digits of N with
45	* y = x - N*pi/2
46	* so that \|y\| < pi/2.
47	*
48	* The method is to compute the integer (mod 8) and fraction parts of
49	* (2/pi)*x without doing the full multiplication. In general we
50	* skip the part of the product that are known to be a huge integer (
51	* more accurately, = 0 mod 8 ). Thus the number of operations are
52	* independent of the exponent of the input.
53	*
54	* (2/pi) is represented by an array of 24-bit integers in ipio2[].
55	*
56	* Input parameters:
57	* x[] The input value (must be positive) is broken into nx
58	* pieces of 24-bit integers in double precision format.
59	* x[i] will be the i-th 24 bit of x. The scaled exponent
60	* of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
61	* match x's up to 24 bits.
62	*
63	* Example of breaking a double positive z into x[0]+x[1]+x[2]:
64	* e0 = ilogb(z)-23
65	* z = scalbn(z,-e0)
66	* for i = 0,1,2
67	* x[i] = floor(z)
68	* z = (z-x[i])2*24
69	*
70	*
71	* y[] ouput result in an array of double precision numbers.
72	* The dimension of y[] is:
73	* 24-bit precision 1
74	* 53-bit precision 2
75	* 64-bit precision 2
76	* 113-bit precision 3
77	* The actual value is the sum of them. Thus for 113-bit
78	* precsion, one may have to do something like:
79	*
80	* long double t,w,r_head, r_tail;
81	* t = (long double)y[2] + (long double)y[1];
82	* w = (long double)y[0];
83	* r_head = t+w;
84	* r_tail = w - (r_head - t);
85	*
86	* e0 The exponent of x[0]
87	*
88	* nx dimension of x[]
89	*
90	* prec an interger indicating the precision:
91	* 0 24 bits (single)
92	* 1 53 bits (double)
93	* 2 64 bits (extended)
94	* 3 113 bits (quad)
95	*
96	* ipio2[]
97	* integer array, contains the (24i)-th to (24i+23)-th
98	* bit of 2/pi after binary point. The corresponding
99	* floating value is
100	*
101	* ipio2[i] * 2^(-24(i+1)).
102	*
103	* External function:
104	* double scalbn(), floor();
105	*
106	*
107	* Here is the description of some local variables:
108	*
109	* jk jk+1 is the initial number of terms of ipio2[] needed
110	* in the computation. The recommended value is 2,3,4,
111	* 6 for single, double, extended,and quad.
112	*
113	* jz local integer variable indicating the number of
114	* terms of ipio2[] used.
115	*
116	* jx nx - 1
117	*
118	* jv index for pointing to the suitable ipio2[] for the
119	* computation. In general, we want
120	* ( 2^e0x[0] ipio2[jv-1]*2^(-24jv) )/8
121	* is an integer. Thus
122	* e0-3-24*jv >= 0 or (e0-3)/24 >= jv
123	* Hence jv = max(0,(e0-3)/24).
124	*
125	* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
126	*
127	* q[] double array with integral value, representing the
128	* 24-bits chunk of the product of x and 2/pi.
129	*
130	* q0 the corresponding exponent of q[0]. Note that the
131	* exponent for q[i] would be q0-24*i.
132	*
133	* PIo2[] double precision array, obtained by cutting pi/2
134	* into 24 bits chunks.
135	*
136	* f[] ipio2[] in floating point
137	*
138	* iq[] integer array by breaking up q[] in 24-bits chunk.
139	*
140	* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
141	*
142	* ih integer. If >0 it indicates q[] is >= 0.5, hence
143	* it also indicates the sign of the result.
144	*
145	*/
146
147
148	/*
149	* Constants:
150	* The hexadecimal values are the intended ones for the following
151	* constants. The decimal values may be used, provided that the
152	* compiler will convert from decimal to binary accurately enough
153	* to produce the hexadecimal values shown.
154	*/
155
156
157	static const int init_jk[] = {`2`,`3`,`4`,`6`}; / initial value for jk /
158
159	static const double PIo2[] = {
160	`1.57079625129699707031e+00`, / 0x3FF921FB, 0x40000000 /
161	`7.54978941586159635335e-08`, / 0x3E74442D, 0x00000000 /
162	`5.39030252995776476554e-15`, / 0x3CF84698, 0x80000000 /
163	`3.28200341580791294123e-22`, / 0x3B78CC51, 0x60000000 /
164	`1.27065575308067607349e-29`, / 0x39F01B83, 0x80000000 /
165	`1.22933308981111328932e-36`, / 0x387A2520, 0x40000000 /
166	`2.73370053816464559624e-44`, / 0x36E38222, 0x80000000 /
167	`2.16741683877804819444e-51`, / 0x3569F31D, 0x00000000 /
168	};
169
170	static const double
171	zeroB = `0.0`,
172	one = `1.0`,
173	two24B = `1.67772160000000000000e+07`, / 0x41700000, 0x00000000 /
174	twon24 = `5.96046447753906250000e-08`; / 0x3E700000, 0x00000000 /
175
176	static int __kernel_rem_pio2(double x, double* y, int* e0, int nx, int prec, const int *ipio2) {
177	int jz,jx,jv,jp,jk,carry,n,iq[`20`],i,j,k,m,q0,ih;
178	double z,fw,f[`20`],fq[`20`],q[`20`];
179
180	/ initialize jk/
181	jk = init_jk[prec];
182	jp = jk;
183
184	/ determine jx,jv,q0, note that 3>q0 /
185	jx = nx-`1`;
186	jv = (e0-`3`)/`24`; if(jv<`0`) jv=`0`;
187	q0 = e0-`24`*(jv+`1`);
188
189	/ set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] /
190	j = jv-jx; m = jx+jk;
191	for(i=`0`;i<=m;i++,j++) f[i] = (j<`0`)? zeroB : (double) ipio2[j];
192
193	/ compute q[0],q[1],...q[jk] /
194	for (i=`0`;i<=jk;i++) {
195	for(j=`0`,fw=`0.0`;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
196	}
197
198	jz = jk;
199	recompute:
200	/ distill q[] into iq[] reversingly /
201	for(i=`0`,j=jz,z=q[jz];j>`0`;i++,j--) {
202	fw = (double)((int)(twon24* z));
203	iq[i] = (int)(z-two24B*fw);
204	z = q[j-`1`]+fw;
205	}
206
207	/ compute n /
208	z = scalbnA(z,q0); / actual value of z /
209	z -= `8.0`floor(z`0.125`); / trim off integer >= 8 /
210	n = (int) z;
211	z -= (double)n;
212	ih = `0`;
213	if(q0>`0`) { / need iq[jz-1] to determine n /
214	i = (iq[jz-`1`]>>(`24`-q0)); n += i;
215	iq[jz-`1`] -= i<<(`24`-q0);
216	ih = iq[jz-`1`]>>(`23`-q0);
217	}
218	else if(q0==`0`) ih = iq[jz-`1`]>>`23`;
219	else if(z>=`0.5`) ih=`2`;
220
221	if(ih>`0`) { / q > 0.5 /
222	n += `1`; carry = `0`;
223	for(i=`0`;i<jz ;i++) { / compute 1-q /
224	j = iq[i];
225	if(carry==`0`) {
226	if(j!=`0`) {
227	carry = `1`; iq[i] = `0x1000000`- j;
228	}
229	} else iq[i] = `0xffffff` - j;
230	}
231	if(q0>`0`) { / rare case: chance is 1 in 12 /
232	switch(q0) {
233	case `1`:
234	iq[jz-`1`] &= `0x7fffff`; break;
235	case `2`:
236	iq[jz-`1`] &= `0x3fffff`; break;
237	}
238	}
239	if(ih==`2`) {
240	z = one - z;
241	if(carry!=`0`) z -= scalbnA(one,q0);
242	}
243	}
244
245	/ check if recomputation is needed /
246	if(z==zeroB) {
247	j = `0`;
248	for (i=jz-`1`;i>=jk;i--) j \|= iq[i];
249	if(j==`0`) { / need recomputation /
250	for(k=`1`;iq[jk-k]==`0`;k++); / k = no. of terms needed /
251
252	for(i=jz+`1`;i<=jz+k;i++) { / add q[jz+1] to q[jz+k] /
253	f[jx+i] = (double) ipio2[jv+i];
254	for(j=`0`,fw=`0.0`;j<=jx;j++) fw += x[j]*f[jx+i-j];
255	q[i] = fw;
256	}
257	jz += k;
258	goto recompute;
259	}
260	}
261
262	/ chop off zero terms /
263	if(z==`0.0`) {
264	jz -= `1`; q0 -= `24`;
265	while(iq[jz]==`0`) { jz--; q0-=`24`;}
266	} else { / break z into 24-bit if necessary /
267	z = scalbnA(z,-q0);
268	if(z>=two24B) {
269	fw = (double)((int)(twon24*z));
270	iq[jz] = (int)(z-two24B*fw);
271	jz += `1`; q0 += `24`;
272	iq[jz] = (int) fw;
273	} else iq[jz] = (int) z ;
274	}
275
276	/ convert integer "bit" chunk to floating-point value /
277	fw = scalbnA(one,q0);
278	for(i=jz;i>=`0`;i--) {
279	q[i] = fw(double)iq[i]; fw=twon24;
280	}
281
282	/ compute PIo2[0,...,jp]q[jz,...,0] /*
283	for(i=jz;i>=`0`;i--) {
284	for(fw=`0.0`,k=`0`;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
285	fq[jz-i] = fw;
286	}
287
288	/ compress fq[] into y[] /
289	switch(prec) {
290	case `0`:
291	fw = `0.0`;
292	for (i=jz;i>=`0`;i--) fw += fq[i];
293	y[`0`] = (ih==`0`)? fw: -fw;
294	break;
295	case `1`:
296	case `2`:
297	fw = `0.0`;
298	for (i=jz;i>=`0`;i--) fw += fq[i];
299	y[`0`] = (ih==`0`)? fw: -fw;
300	fw = fq[`0`]-fw;
301	for (i=`1`;i<=jz;i++) fw += fq[i];
302	y[`1`] = (ih==`0`)? fw: -fw;
303	break;
304	case `3`: / painful /
305	for (i=jz;i>`0`;i--) {
306	fw = fq[i-`1`]+fq[i];
307	fq[i] += fq[i-`1`]-fw;
308	fq[i-`1`] = fw;
309	}
310	for (i=jz;i>`1`;i--) {
311	fw = fq[i-`1`]+fq[i];
312	fq[i] += fq[i-`1`]-fw;
313	fq[i-`1`] = fw;
314	}
315	for (fw=`0.0`,i=jz;i>=`2`;i--) fw += fq[i];
316	if(ih==`0`) {
317	y[`0`] = fq[`0`]; y[`1`] = fq[`1`]; y[`2`] = fw;
318	} else {
319	y[`0`] = -fq[`0`]; y[`1`] = -fq[`1`]; y[`2`] = -fw;
320	}
321	}
322	return n&`7`;
323	}
324
325
326	/*
327	* ====================================================
328	* Copyright (c) 1993 Oracle and/or its affiliates. All rights reserved.
329	*
330	* Developed at SunPro, a Sun Microsystems, Inc. business.
331	* Permission to use, copy, modify, and distribute this
332	* software is freely granted, provided that this notice
333	* is preserved.
334	* ====================================================
335	*
336	*/
337
338	/ __ieee754_rem_pio2(x,y)*
339	*
340	* return the remainder of x rem pi/2 in y[0]+y[1]
341	* use __kernel_rem_pio2()
342	*/
343
344	/*
345	* Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi
346	*/
347	static const int two_over_pi[] = {
348	`0xA2F983`, `0x6E4E44`, `0x1529FC`, `0x2757D1`, `0xF534DD`, `0xC0DB62`,
349	`0x95993C`, `0x439041`, `0xFE5163`, `0xABDEBB`, `0xC561B7`, `0x246E3A`,
350	`0x424DD2`, `0xE00649`, `0x2EEA09`, `0xD1921C`, `0xFE1DEB`, `0x1CB129`,
351	`0xA73EE8`, `0x8235F5`, `0x2EBB44`, `0x84E99C`, `0x7026B4`, `0x5F7E41`,
352	`0x3991D6`, `0x398353`, `0x39F49C`, `0x845F8B`, `0xBDF928`, `0x3B1FF8`,
353	`0x97FFDE`, `0x05980F`, `0xEF2F11`, `0x8B5A0A`, `0x6D1F6D`, `0x367ECF`,
354	`0x27CB09`, `0xB74F46`, `0x3F669E`, `0x5FEA2D`, `0x7527BA`, `0xC7EBE5`,
355	`0xF17B3D`, `0x0739F7`, `0x8A5292`, `0xEA6BFB`, `0x5FB11F`, `0x8D5D08`,
356	`0x560330`, `0x46FC7B`, `0x6BABF0`, `0xCFBC20`, `0x9AF436`, `0x1DA9E3`,
357	`0x91615E`, `0xE61B08`, `0x659985`, `0x5F14A0`, `0x68408D`, `0xFFD880`,
358	`0x4D7327`, `0x310606`, `0x1556CA`, `0x73A8C9`, `0x60E27B`, `0xC08C6B`,
359	};
360
361	static const int npio2_hw[] = {
362	`0x3FF921FB`, `0x400921FB`, `0x4012D97C`, `0x401921FB`, `0x401F6A7A`, `0x4022D97C`,
363	`0x4025FDBB`, `0x402921FB`, `0x402C463A`, `0x402F6A7A`, `0x4031475C`, `0x4032D97C`,
364	`0x40346B9C`, `0x4035FDBB`, `0x40378FDB`, `0x403921FB`, `0x403AB41B`, `0x403C463A`,
365	`0x403DD85A`, `0x403F6A7A`, `0x40407E4C`, `0x4041475C`, `0x4042106C`, `0x4042D97C`,
366	`0x4043A28C`, `0x40446B9C`, `0x404534AC`, `0x4045FDBB`, `0x4046C6CB`, `0x40478FDB`,
367	`0x404858EB`, `0x404921FB`,
368	};
369
370	/*
371	* invpio2: 53 bits of 2/pi
372	* pio2_1: first 33 bit of pi/2
373	* pio2_1t: pi/2 - pio2_1
374	* pio2_2: second 33 bit of pi/2
375	* pio2_2t: pi/2 - (pio2_1+pio2_2)
376	* pio2_3: third 33 bit of pi/2
377	* pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3)
378	*/
379
380	static const double
381	zeroA = `0.00000000000000000000e+00`, / 0x00000000, 0x00000000 /
382	half = `5.00000000000000000000e-01`, / 0x3FE00000, 0x00000000 /
383	two24A = `1.67772160000000000000e+07`, / 0x41700000, 0x00000000 /
384	invpio2 = `6.36619772367581382433e-01`, / 0x3FE45F30, 0x6DC9C883 /
385	pio2_1 = `1.57079632673412561417e+00`, / 0x3FF921FB, 0x54400000 /
386	pio2_1t = `6.07710050650619224932e-11`, / 0x3DD0B461, 0x1A626331 /
387	pio2_2 = `6.07710050630396597660e-11`, / 0x3DD0B461, 0x1A600000 /
388	pio2_2t = `2.02226624879595063154e-21`, / 0x3BA3198A, 0x2E037073 /
389	pio2_3 = `2.02226624871116645580e-21`, / 0x3BA3198A, 0x2E000000 /
390	pio2_3t = `8.47842766036889956997e-32`; / 0x397B839A, 0x252049C1 /
391
392	static int __ieee754_rem_pio2(double x, double *y) {
393	double z,w,t,r,fn;
394	double tx[`3`];
395	int e0,i,j,nx,n,ix,hx,i0;
396
397	i0 = (((int*)&two24A)>>`30`)^`1`; /* high word index /
398	hx = (i0+(int*)&x); /* high word of x /
399	ix = hx&`0x7fffffff`;
400	if(ix<=`0x3fe921fb`) / \|x\| ~<= pi/4 , no need for reduction /
401	{y[`0`] = x; y[`1`] = `0`; return `0`;}
402	if(ix<`0x4002d97c`) { / \|x\| < 3pi/4, special case with n=+-1 /
403	if(hx>`0`) {
404	z = x - pio2_1;
405	if(ix!=`0x3ff921fb`) { / 33+53 bit pi is good enough /
406	y[`0`] = z - pio2_1t;
407	y[`1`] = (z-y[`0`])-pio2_1t;
408	} else { / near pi/2, use 33+33+53 bit pi /
409	z -= pio2_2;
410	y[`0`] = z - pio2_2t;
411	y[`1`] = (z-y[`0`])-pio2_2t;
412	}
413	return `1`;
414	} else { / negative x /
415	z = x + pio2_1;
416	if(ix!=`0x3ff921fb`) { / 33+53 bit pi is good enough /
417	y[`0`] = z + pio2_1t;
418	y[`1`] = (z-y[`0`])+pio2_1t;
419	} else { / near pi/2, use 33+33+53 bit pi /
420	z += pio2_2;
421	y[`0`] = z + pio2_2t;
422	y[`1`] = (z-y[`0`])+pio2_2t;
423	}
424	return -`1`;
425	}
426	}
427	if(ix<=`0x413921fb`) { / \|x\| ~<= 2^19(pi/2), medium size /*
428	t = fabsd(x);
429	n = (int) (t*invpio2+half);
430	fn = (double)n;
431	r = t-fn*pio2_1;
432	w = fnpio2_1t; /* 1st round good to 85 bit /
433	if(n<`32`&&ix!=npio2_hw[n-`1`]) {
434	y[`0`] = r-w; / quick check no cancellation /
435	} else {
436	j = ix>>`20`;
437	y[`0`] = r-w;
438	i = j-((((i0+(int**)&y[`0`]))>>`20`)&`0x7ff`);
439	if(i>`16`) { / 2nd iteration needed, good to 118 /
440	t = r;
441	w = fn*pio2_2;
442	r = t-w;
443	w = fn*pio2_2t-((t-r)-w);
444	y[`0`] = r-w;
445	i = j-((((i0+(int**)&y[`0`]))>>`20`)&`0x7ff`);
446	if(i>`49`) { / 3rd iteration need, 151 bits acc /
447	t = r; / will cover all possible cases /
448	w = fn*pio2_3;
449	r = t-w;
450	w = fn*pio2_3t-((t-r)-w);
451	y[`0`] = r-w;
452	}
453	}
454	}
455	y[`1`] = (r-y[`0`])-w;
456	if(hx<`0`) {y[`0`] = -y[`0`]; y[`1`] = -y[`1`]; return -n;}
457	else return n;
458	}
459	/*
460	* all other (large) arguments
461	*/
462	if(ix>=`0x7ff00000`) { / x is inf or NaN /
463	y[`0`]=y[`1`]=x-x; return `0`;
464	}
465	/ set z = scalbn(\|x\|,ilogb(x)-23) /
466	(`1`-i0+(int*)&z) = (`1`-i0+(int*)&x);
467	e0 = (ix>>`20`)-`1046`; / e0 = ilogb(z)-23; /
468	(i0+(int**)&z) = ix - (e0<<`20`);
469	for(i=`0`;i<`2`;i++) {
470	tx[i] = (double)((int)(z));
471	z = (z-tx[i])*two24A;
472	}
473	tx[`2`] = z;
474	nx = `3`;
475	while(tx[nx-`1`]==zeroA) nx--; / skip zero term /
476	n = __kernel_rem_pio2(tx,y,e0,nx,`2`,two_over_pi);
477	if(hx<`0`) {y[`0`] = -y[`0`]; y[`1`] = -y[`1`]; return -n;}
478	return n;
479	}
480
481
482	/ __kernel_sin( x, y, iy)*
483	* kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
484	* Input x is assumed to be bounded by ~pi/4 in magnitude.
485	* Input y is the tail of x.
486	* Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
487	*
488	* Algorithm
489	* 1. Since sin(-x) = -sin(x), we need only to consider positive x.
490	* 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
491	* 3. sin(x) is approximated by a polynomial of degree 13 on
492	* [0,pi/4]
493	* 3 13
494	* sin(x) ~ x + S1x + ... + S6x
495	* where
496	*
497	* \|sin(x) 2 4 6 8 10 12 \| -58
498	* \|----- - (1+S1x +S2x +S3x +S4x +S5x +S6x )\| <= 2
499	* \| x \|
500	*
501	* 4. sin(x+y) = sin(x) + sin'(x')*y
502	* ~ sin(x) + (1-xx/2)y
503	* For better accuracy, let
504	* 3 2 2 2 2
505	* r = x (S2+x (S3+x (S4+x (S5+x *S6))))
506	* then 3 2
507	* sin(x) = x + (S1x + (x (r-y/2)+y))
508	*/
509
510	static const double
511	S1 = -`1.66666666666666324348e-01`, / 0xBFC55555, 0x55555549 /
512	S2 = `8.33333333332248946124e-03`, / 0x3F811111, 0x1110F8A6 /
513	S3 = -`1.98412698298579493134e-04`, / 0xBF2A01A0, 0x19C161D5 /
514	S4 = `2.75573137070700676789e-06`, / 0x3EC71DE3, 0x57B1FE7D /
515	S5 = -`2.50507602534068634195e-08`, / 0xBE5AE5E6, 0x8A2B9CEB /
516	S6 = `1.58969099521155010221e-10`; / 0x3DE5D93A, 0x5ACFD57C /
517
518	static double __kernel_sin(double x, double y, int iy)
519	{
520	double z,r,v;
521	int ix;
522	ix = high(x)&`0x7fffffff`; / high word of x /
523	if(ix<`0x3e400000`) / \|x\| < 2*-27 /*
524	{if((int)x==`0`) return x;} / generate inexact /
525	z = x*x;
526	v = z*x;
527	r = S2+z(S3+z(S4+z(S5+zS6)));
528	if(iy==`0`) return x+v(S1+zr);
529	else return x-((z(halfy-vr)-y)-vS1);
530	}
531
532	/*
533	* __kernel_cos( x, y )
534	* kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
535	* Input x is assumed to be bounded by ~pi/4 in magnitude.
536	* Input y is the tail of x.
537	*
538	* Algorithm
539	* 1. Since cos(-x) = cos(x), we need only to consider positive x.
540	* 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
541	* 3. cos(x) is approximated by a polynomial of degree 14 on
542	* [0,pi/4]
543	* 4 14
544	* cos(x) ~ 1 - xx/2 + C1x + ... + C6*x
545	* where the remez error is
546	*
547	* \| 2 4 6 8 10 12 14 \| -58
548	* \|cos(x)-(1-.5x +C1x +C2x +C3x +C4x +C5x +C6*x )\| <= 2
549	* \| \|
550	*
551	* 4 6 8 10 12 14
552	* 4. let r = C1x +C2x +C3x +C4x +C5x +C6x , then
553	* cos(x) = 1 - x*x/2 + r
554	* since cos(x+y) ~ cos(x) - sin(x)*y
555	* ~ cos(x) - x*y,
556	* a correction term is necessary in cos(x) and hence
557	* cos(x+y) = 1 - (xx/2 - (r - xy))
558	* For better accuracy when x > 0.3, let qx = \|x\|/4 with
559	* the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
560	* Then
561	* cos(x+y) = (1-qx) - ((xx/2-qx) - (r-xy)).
562	* Note that 1-qx and (x*x/2-qx) is EXACT here, and the
563	* magnitude of the latter is at least a quarter of x*x/2,
564	* thus, reducing the rounding error in the subtraction.
565	*/
566
567	static const double
568	C1 = `4.16666666666666019037e-02`, / 0x3FA55555, 0x5555554C /
569	C2 = -`1.38888888888741095749e-03`, / 0xBF56C16C, 0x16C15177 /
570	C3 = `2.48015872894767294178e-05`, / 0x3EFA01A0, 0x19CB1590 /
571	C4 = -`2.75573143513906633035e-07`, / 0xBE927E4F, 0x809C52AD /
572	C5 = `2.08757232129817482790e-09`, / 0x3E21EE9E, 0xBDB4B1C4 /
573	C6 = -`1.13596475577881948265e-11`; / 0xBDA8FAE9, 0xBE8838D4 /
574
575	static double __kernel_cos(double x, double y)
576	{
577	double a,h,z,r,qx=`0`;
578	int ix;
579	ix = high(x)&`0x7fffffff`; / ix = \|x\|'s high word/
580	if(ix<`0x3e400000`) { / if x < 2*27 /*
581	if(((int)x)==`0`) return one; / generate inexact /
582	}
583	z = x*x;
584	r = z(C1+z(C2+z(C3+z(C4+z(C5+zC6)))));
585	if(ix < `0x3FD33333`) / if \|x\| < 0.3 /
586	return one - (`0.5`z - (zr - x*y));
587	else {
588	if(ix > `0x3fe90000`) { / x > 0.78125 /
589	qx = `0.28125`;
590	} else {
591	set_high(&qx, ix-`0x00200000`); / x/4 /
592	set_low(&qx, `0`);
593	}
594	h = `0.5`*z-qx;
595	a = one-qx;
596	return a - (h - (zr-xy));
597	}
598	}
599
600	/ __kernel_tan( x, y, k )*
601	* kernel tan function on [-pi/4, pi/4], pi/4 ~ 0.7854
602	* Input x is assumed to be bounded by ~pi/4 in magnitude.
603	* Input y is the tail of x.
604	* Input k indicates whether tan (if k=1) or
605	* -1/tan (if k= -1) is returned.
606	*
607	* Algorithm
608	* 1. Since tan(-x) = -tan(x), we need only to consider positive x.
609	* 2. if x < 2^-28 (hx<0x3e300000 0), return x with inexact if x!=0.
610	* 3. tan(x) is approximated by a odd polynomial of degree 27 on
611	* [0,0.67434]
612	* 3 27
613	* tan(x) ~ x + T1x + ... + T13x
614	* where
615	*
616	* \|tan(x) 2 4 26 \| -59.2
617	* \|----- - (1+T1x +T2x +.... +T13*x )\| <= 2
618	* \| x \|
619	*
620	* Note: tan(x+y) = tan(x) + tan'(x)*y
621	* ~ tan(x) + (1+xx)y
622	* Therefore, for better accuracy in computing tan(x+y), let
623	* 3 2 2 2 2
624	* r = x (T2+x (T3+x (...+x (T12+x *T13))))
625	* then
626	* 3 2
627	* tan(x+y) = x + (T1x + (x (r+y)+y))
628	*
629	* 4. For x in [0.67434,pi/4], let y = pi/4 - x, then
630	* tan(x) = tan(pi/4-y) = (1-tan(y))/(1+tan(y))
631	* = 1 - 2*(tan(y) - (tan(y)^2)/(1+tan(y)))
632	*/
633
634	static const double
635	pio4 = `7.85398163397448278999e-01`, / 0x3FE921FB, 0x54442D18 /
636	pio4lo= `3.06161699786838301793e-17`, / 0x3C81A626, 0x33145C07 /
637	T[] = {
638	`3.33333333333334091986e-01`, / 0x3FD55555, 0x55555563 /
639	`1.33333333333201242699e-01`, / 0x3FC11111, 0x1110FE7A /
640	`5.39682539762260521377e-02`, / 0x3FABA1BA, 0x1BB341FE /
641	`2.18694882948595424599e-02`, / 0x3F9664F4, 0x8406D637 /
642	`8.86323982359930005737e-03`, / 0x3F8226E3, 0xE96E8493 /
643	`3.59207910759131235356e-03`, / 0x3F6D6D22, 0xC9560328 /
644	`1.45620945432529025516e-03`, / 0x3F57DBC8, 0xFEE08315 /
645	`5.88041240820264096874e-04`, / 0x3F4344D8, 0xF2F26501 /
646	`2.46463134818469906812e-04`, / 0x3F3026F7, 0x1A8D1068 /
647	`7.81794442939557092300e-05`, / 0x3F147E88, 0xA03792A6 /
648	`7.14072491382608190305e-05`, / 0x3F12B80F, 0x32F0A7E9 /
649	-`1.85586374855275456654e-05`, / 0xBEF375CB, 0xDB605373 /
650	`2.59073051863633712884e-05`, / 0x3EFB2A70, 0x74BF7AD4 /
651	};
652
653	static double __kernel_tan(double x, double y, int iy)
654	{
655	double z,r,v,w,s;
656	int ix,hx;
657	hx = high(x); / high word of x /
658	ix = hx&`0x7fffffff`; / high word of \|x\| /
659	if(ix<`0x3e300000`) { / x < 2*-28 /*
660	if((int)x==`0`) { / generate inexact /
661	if (((ix \| low(x)) \| (iy + `1`)) == `0`)
662	return one / fabsd(x);
663	else {
664	if (iy == `1`)
665	return x;
666	else { / compute -1 / (x+y) carefully /
667	double a, t;
668
669	z = w = x + y;
670	set_low(&z, `0`);
671	v = y - (z - x);
672	t = a = -one / w;
673	set_low(&t, `0`);
674	s = one + t * z;
675	return t + a * (s + t * v);
676	}
677	}
678	}
679	}
680	if(ix>=`0x3FE59428`) { / \|x\|>=0.6744 /
681	if(hx<`0`) {x = -x; y = -y;}
682	z = pio4-x;
683	w = pio4lo-y;
684	x = z+w; y = `0.0`;
685	}
686	z = x*x;
687	w = z*z;
688	/ Break x^5(T[1]+x^2T[2]+...) into*
689	* x^5(T[1]+x^4T[3]+...+x^20T[11]) +
690	* x^5(x^2(T[2]+x^4T[4]+...+x^22*[T12]))
691	*/
692	r = T[`1`]+w(T[`3`]+w(T[`5`]+w(T[`7`]+w(T[`9`]+w*T[`11`]))));
693	v = z(T[`2`]+w(T[`4`]+w(T[`6`]+w(T[`8`]+w(T[`10`]+wT[`12`])))));
694	s = z*x;
695	r = y + z(s(r+v)+y);
696	r += T[`0`]*s;
697	w = x+r;
698	if(ix>=`0x3FE59428`) {
699	v = (double)iy;
700	return (double)(`1`-((hx>>`30`)&`2`))(v-`2.0`(x-(w*w/(w+v)-r)));
701	}
702	if(iy==`1`) return w;
703	else { / if allow error up to 2 ulp,*
704	simply return -1.0/(x+r) here /*
705	/ compute -1.0/(x+r) accurately /
706	double a,t;
707	z = w;
708	set_low(&z, `0`);
709	v = r-(z - x); / z+v = r+x /
710	t = a = -`1.0`/w; / a = -1.0/w /
711	set_low(&t, `0`);
712	s = `1.0`+t*z;
713	return t+a(s+tv);
714	}
715	}
716
717
718	//----------------------------------------------------------------------
719	//
720	// Routines for new sin/cos implementation
721	//
722	//----------------------------------------------------------------------
723
724	/ sin(x)*
725	* Return sine function of x.
726	*
727	* kernel function:
728	* __kernel_sin ... sine function on [-pi/4,pi/4]
729	* __kernel_cos ... cose function on [-pi/4,pi/4]
730	* __ieee754_rem_pio2 ... argument reduction routine
731	*
732	* Method.
733	* Let S,C and T denote the sin, cos and tan respectively on
734	* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
735	* in [-pi/4 , +pi/4], and let n = k mod 4.
736	* We have
737	*
738	* n sin(x) cos(x) tan(x)
739	* ----------------------------------------------------------
740	* 0 S C T
741	* 1 C -S -1/T
742	* 2 -S -C T
743	* 3 -C S -1/T
744	* ----------------------------------------------------------
745	*
746	* Special cases:
747	* Let trig be any of sin, cos, or tan.
748	* trig(+-INF) is NaN, with signals;
749	* trig(NaN) is that NaN;
750	*
751	* Accuracy:
752	* TRIG(x) returns trig(x) nearly rounded
753	*/
754
755	JRT_LEAF(jdouble, SharedRuntime::dsin(jdouble x))
756	double y[`2`],z=`0.0`;
757	int n, ix;
758
759	/ High word of x. /
760	ix = high(x);
761
762	/ \|x\| ~< pi/4 /
763	ix &= `0x7fffffff`;
764	if(ix <= `0x3fe921fb`) return __kernel_sin(x,z,`0`);
765
766	/ sin(Inf or NaN) is NaN /
767	else if (ix>=`0x7ff00000`) return x-x;
768
769	/ argument reduction needed /
770	else {
771	n = __ieee754_rem_pio2(x,y);
772	switch(n&`3`) {
773	case `0`: return __kernel_sin(y[`0`],y[`1`],`1`);
774	case `1`: return __kernel_cos(y[`0`],y[`1`]);
775	case `2`: return -__kernel_sin(y[`0`],y[`1`],`1`);
776	default:
777	return -__kernel_cos(y[`0`],y[`1`]);
778	}
779	}
780	JRT_END
781
782	/ cos(x)*
783	* Return cosine function of x.
784	*
785	* kernel function:
786	* __kernel_sin ... sine function on [-pi/4,pi/4]
787	* __kernel_cos ... cosine function on [-pi/4,pi/4]
788	* __ieee754_rem_pio2 ... argument reduction routine
789	*
790	* Method.
791	* Let S,C and T denote the sin, cos and tan respectively on
792	* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
793	* in [-pi/4 , +pi/4], and let n = k mod 4.
794	* We have
795	*
796	* n sin(x) cos(x) tan(x)
797	* ----------------------------------------------------------
798	* 0 S C T
799	* 1 C -S -1/T
800	* 2 -S -C T
801	* 3 -C S -1/T
802	* ----------------------------------------------------------
803	*
804	* Special cases:
805	* Let trig be any of sin, cos, or tan.
806	* trig(+-INF) is NaN, with signals;
807	* trig(NaN) is that NaN;
808	*
809	* Accuracy:
810	* TRIG(x) returns trig(x) nearly rounded
811	*/
812
813	JRT_LEAF(jdouble, SharedRuntime::dcos(jdouble x))
814	double y[`2`],z=`0.0`;
815	int n, ix;
816
817	/ High word of x. /
818	ix = high(x);
819
820	/ \|x\| ~< pi/4 /
821	ix &= `0x7fffffff`;
822	if(ix <= `0x3fe921fb`) return __kernel_cos(x,z);
823
824	/ cos(Inf or NaN) is NaN /
825	else if (ix>=`0x7ff00000`) return x-x;
826
827	/ argument reduction needed /
828	else {
829	n = __ieee754_rem_pio2(x,y);
830	switch(n&`3`) {
831	case `0`: return __kernel_cos(y[`0`],y[`1`]);
832	case `1`: return -__kernel_sin(y[`0`],y[`1`],`1`);
833	case `2`: return -__kernel_cos(y[`0`],y[`1`]);
834	default:
835	return __kernel_sin(y[`0`],y[`1`],`1`);
836	}
837	}
838	JRT_END
839
840	/ tan(x)*
841	* Return tangent function of x.
842	*
843	* kernel function:
844	* __kernel_tan ... tangent function on [-pi/4,pi/4]
845	* __ieee754_rem_pio2 ... argument reduction routine
846	*
847	* Method.
848	* Let S,C and T denote the sin, cos and tan respectively on
849	* [-PI/4, +PI/4]. Reduce the argument x to y1+y2 = x-k*pi/2
850	* in [-pi/4 , +pi/4], and let n = k mod 4.
851	* We have
852	*
853	* n sin(x) cos(x) tan(x)
854	* ----------------------------------------------------------
855	* 0 S C T
856	* 1 C -S -1/T
857	* 2 -S -C T
858	* 3 -C S -1/T
859	* ----------------------------------------------------------
860	*
861	* Special cases:
862	* Let trig be any of sin, cos, or tan.
863	* trig(+-INF) is NaN, with signals;
864	* trig(NaN) is that NaN;
865	*
866	* Accuracy:
867	* TRIG(x) returns trig(x) nearly rounded
868	*/
869
870	JRT_LEAF(jdouble, SharedRuntime::dtan(jdouble x))
871	double y[`2`],z=`0.0`;
872	int n, ix;
873
874	/ High word of x. /
875	ix = high(x);
876
877	/ \|x\| ~< pi/4 /
878	ix &= `0x7fffffff`;
879	if(ix <= `0x3fe921fb`) return __kernel_tan(x,z,`1`);
880
881	/ tan(Inf or NaN) is NaN /
882	else if (ix>=`0x7ff00000`) return x-x; / NaN /
883
884	/ argument reduction needed /
885	else {
886	n = __ieee754_rem_pio2(x,y);
887	return __kernel_tan(y[`0`],y[`1`],`1`-((n&`1`)<<`1`)); / 1 -- n even*
888	-1 -- n odd /*
889	}
890	JRT_END
891

Browse the source code of OpenJDK/src/hotspot/share/runtime/sharedRuntimeTrig.cpp