k_rem_pio2.c source code [OpenJDK/src/java.base/share/native/libfdlibm/k_rem_pio2.c]

1	/*
2	* Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4	*
5	* This code is free software; you can redistribute it and/or modify it
6	* under the terms of the GNU General Public License version 2 only, as
7	* published by the Free Software Foundation. Oracle designates this
8	* particular file as subject to the "Classpath" exception as provided
9	* by Oracle in the LICENSE file that accompanied this code.
10	*
11	* This code is distributed in the hope that it will be useful, but WITHOUT
12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14	* version 2 for more details (a copy is included in the LICENSE file that
15	* accompanied this code).
16	*
17	* You should have received a copy of the GNU General Public License version
18	* 2 along with this work; if not, write to the Free Software Foundation,
19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20	*
21	* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22	* or visit www.oracle.com if you need additional information or have any
23	* questions.
24	*/
25
26	/*
27	* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
28	* double x[],y[]; int e0,nx,prec; int ipio2[];
29	*
30	* __kernel_rem_pio2 return the last three digits of N with
31	* y = x - N*pi/2
32	* so that \|y\| < pi/2.
33	*
34	* The method is to compute the integer (mod 8) and fraction parts of
35	* (2/pi)*x without doing the full multiplication. In general we
36	* skip the part of the product that are known to be a huge integer (
37	* more accurately, = 0 mod 8 ). Thus the number of operations are
38	* independent of the exponent of the input.
39	*
40	* (2/pi) is represented by an array of 24-bit integers in ipio2[].
41	*
42	* Input parameters:
43	* x[] The input value (must be positive) is broken into nx
44	* pieces of 24-bit integers in double precision format.
45	* x[i] will be the i-th 24 bit of x. The scaled exponent
46	* of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
47	* match x's up to 24 bits.
48	*
49	* Example of breaking a double positive z into x[0]+x[1]+x[2]:
50	* e0 = ilogb(z)-23
51	* z = scalbn(z,-e0)
52	* for i = 0,1,2
53	* x[i] = floor(z)
54	* z = (z-x[i])2*24
55	*
56	*
57	* y[] output result in an array of double precision numbers.
58	* The dimension of y[] is:
59	* 24-bit precision 1
60	* 53-bit precision 2
61	* 64-bit precision 2
62	* 113-bit precision 3
63	* The actual value is the sum of them. Thus for 113-bit
64	* precison, one may have to do something like:
65	*
66	* long double t,w,r_head, r_tail;
67	* t = (long double)y[2] + (long double)y[1];
68	* w = (long double)y[0];
69	* r_head = t+w;
70	* r_tail = w - (r_head - t);
71	*
72	* e0 The exponent of x[0]
73	*
74	* nx dimension of x[]
75	*
76	* prec an integer indicating the precision:
77	* 0 24 bits (single)
78	* 1 53 bits (double)
79	* 2 64 bits (extended)
80	* 3 113 bits (quad)
81	*
82	* ipio2[]
83	* integer array, contains the (24i)-th to (24i+23)-th
84	* bit of 2/pi after binary point. The corresponding
85	* floating value is
86	*
87	* ipio2[i] * 2^(-24(i+1)).
88	*
89	* External function:
90	* double scalbn(), floor();
91	*
92	*
93	* Here is the description of some local variables:
94	*
95	* jk jk+1 is the initial number of terms of ipio2[] needed
96	* in the computation. The recommended value is 2,3,4,
97	* 6 for single, double, extended,and quad.
98	*
99	* jz local integer variable indicating the number of
100	* terms of ipio2[] used.
101	*
102	* jx nx - 1
103	*
104	* jv index for pointing to the suitable ipio2[] for the
105	* computation. In general, we want
106	* ( 2^e0x[0] ipio2[jv-1]*2^(-24jv) )/8
107	* is an integer. Thus
108	* e0-3-24*jv >= 0 or (e0-3)/24 >= jv
109	* Hence jv = max(0,(e0-3)/24).
110	*
111	* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
112	*
113	* q[] double array with integral value, representing the
114	* 24-bits chunk of the product of x and 2/pi.
115	*
116	* q0 the corresponding exponent of q[0]. Note that the
117	* exponent for q[i] would be q0-24*i.
118	*
119	* PIo2[] double precision array, obtained by cutting pi/2
120	* into 24 bits chunks.
121	*
122	* f[] ipio2[] in floating point
123	*
124	* iq[] integer array by breaking up q[] in 24-bits chunk.
125	*
126	* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
127	*
128	* ih integer. If >0 it indicates q[] is >= 0.5, hence
129	* it also indicates the sign of the result.
130	*
131	*/
132
133
134	/*
135	* Constants:
136	* The hexadecimal values are the intended ones for the following
137	* constants. The decimal values may be used, provided that the
138	* compiler will convert from decimal to binary accurately enough
139	* to produce the hexadecimal values shown.
140	*/
141
142	#include "fdlibm.h"
143
144	#ifdef __STDC__
145	static const int init_jk[] = {`2`,`3`,`4`,`6`}; / initial value for jk /
146	#else
147	static int init_jk[] = {`2`,`3`,`4`,`6`};
148	#endif
149
150	#ifdef __STDC__
151	static const double PIo2[] = {
152	#else
153	static double PIo2[] = {
154	#endif
155	`1.57079625129699707031e+00`, / 0x3FF921FB, 0x40000000 /
156	`7.54978941586159635335e-08`, / 0x3E74442D, 0x00000000 /
157	`5.39030252995776476554e-15`, / 0x3CF84698, 0x80000000 /
158	`3.28200341580791294123e-22`, / 0x3B78CC51, 0x60000000 /
159	`1.27065575308067607349e-29`, / 0x39F01B83, 0x80000000 /
160	`1.22933308981111328932e-36`, / 0x387A2520, 0x40000000 /
161	`2.73370053816464559624e-44`, / 0x36E38222, 0x80000000 /
162	`2.16741683877804819444e-51`, / 0x3569F31D, 0x00000000 /
163	};
164
165	#ifdef __STDC__
166	static const double
167	#else
168	static double
169	#endif
170	zero = `0.0`,
171	one = `1.0`,
172	two24 = `1.67772160000000000000e+07`, / 0x41700000, 0x00000000 /
173	twon24 = `5.96046447753906250000e-08`; / 0x3E700000, 0x00000000 /
174
175	#ifdef __STDC__
176	int __kernel_rem_pio2(double x, double* y, int* e0, int nx, int prec, const int *ipio2)
177	#else
178	int __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
179	double x[], y[]; int e0,nx,prec; int ipio2[];
180	#endif
181	{
182	int jz,jx,jv,jp,jk,carry,n,iq[`20`],i,j,k,m,q0,ih;
183	double z,fw,f[`20`],fq[`20`],q[`20`];
184
185	/ initialize jk/
186	jk = init_jk[prec];
187	jp = jk;
188
189	/ determine jx,jv,q0, note that 3>q0 /
190	jx = nx-`1`;
191	jv = (e0-`3`)/`24`; if(jv<`0`) jv=`0`;
192	q0 = e0-`24`*(jv+`1`);
193
194	/ set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] /
195	j = jv-jx; m = jx+jk;
196	for(i=`0`;i<=m;i++,j++) f[i] = (j<`0`)? zero : (double) ipio2[j];
197
198	/ compute q[0],q[1],...q[jk] /
199	for (i=`0`;i<=jk;i++) {
200	for(j=`0`,fw=`0.0`;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
201	}
202
203	jz = jk;
204	recompute:
205	/ distill q[] into iq[] reversingly /
206	for(i=`0`,j=jz,z=q[jz];j>`0`;i++,j--) {
207	fw = (double)((int)(twon24* z));
208	iq[i] = (int)(z-two24*fw);
209	z = q[j-`1`]+fw;
210	}
211
212	/ compute n /
213	z = scalbn(z,q0); / actual value of z /
214	z -= `8.0`floor(z`0.125`); / trim off integer >= 8 /
215	n = (int) z;
216	z -= (double)n;
217	ih = `0`;
218	if(q0>`0`) { / need iq[jz-1] to determine n /
219	i = (iq[jz-`1`]>>(`24`-q0)); n += i;
220	iq[jz-`1`] -= i<<(`24`-q0);
221	ih = iq[jz-`1`]>>(`23`-q0);
222	}
223	else if(q0==`0`) ih = iq[jz-`1`]>>`23`;
224	else if(z>=`0.5`) ih=`2`;
225
226	if(ih>`0`) { / q > 0.5 /
227	n += `1`; carry = `0`;
228	for(i=`0`;i<jz ;i++) { / compute 1-q /
229	j = iq[i];
230	if(carry==`0`) {
231	if(j!=`0`) {
232	carry = `1`; iq[i] = `0x1000000`- j;
233	}
234	} else iq[i] = `0xffffff` - j;
235	}
236	if(q0>`0`) { / rare case: chance is 1 in 12 /
237	switch(q0) {
238	case `1`:
239	iq[jz-`1`] &= `0x7fffff`; break;
240	case `2`:
241	iq[jz-`1`] &= `0x3fffff`; break;
242	}
243	}
244	if(ih==`2`) {
245	z = one - z;
246	if(carry!=`0`) z -= scalbn(one,q0);
247	}
248	}
249
250	/ check if recomputation is needed /
251	if(z==zero) {
252	j = `0`;
253	for (i=jz-`1`;i>=jk;i--) j \|= iq[i];
254	if(j==`0`) { / need recomputation /
255	for(k=`1`;iq[jk-k]==`0`;k++); / k = no. of terms needed /
256
257	for(i=jz+`1`;i<=jz+k;i++) { / add q[jz+1] to q[jz+k] /
258	f[jx+i] = (double) ipio2[jv+i];
259	for(j=`0`,fw=`0.0`;j<=jx;j++) fw += x[j]*f[jx+i-j];
260	q[i] = fw;
261	}
262	jz += k;
263	goto recompute;
264	}
265	}
266
267	/ chop off zero terms /
268	if(z==`0.0`) {
269	jz -= `1`; q0 -= `24`;
270	while(iq[jz]==`0`) { jz--; q0-=`24`;}
271	} else { / break z into 24-bit if necessary /
272	z = scalbn(z,-q0);
273	if(z>=two24) {
274	fw = (double)((int)(twon24*z));
275	iq[jz] = (int)(z-two24*fw);
276	jz += `1`; q0 += `24`;
277	iq[jz] = (int) fw;
278	} else iq[jz] = (int) z ;
279	}
280
281	/ convert integer "bit" chunk to floating-point value /
282	fw = scalbn(one,q0);
283	for(i=jz;i>=`0`;i--) {
284	q[i] = fw(double)iq[i]; fw=twon24;
285	}
286
287	/ compute PIo2[0,...,jp]q[jz,...,0] /*
288	for(i=jz;i>=`0`;i--) {
289	for(fw=`0.0`,k=`0`;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
290	fq[jz-i] = fw;
291	}
292
293	/ compress fq[] into y[] /
294	switch(prec) {
295	case `0`:
296	fw = `0.0`;
297	for (i=jz;i>=`0`;i--) fw += fq[i];
298	y[`0`] = (ih==`0`)? fw: -fw;
299	break;
300	case `1`:
301	case `2`:
302	fw = `0.0`;
303	for (i=jz;i>=`0`;i--) fw += fq[i];
304	y[`0`] = (ih==`0`)? fw: -fw;
305	fw = fq[`0`]-fw;
306	for (i=`1`;i<=jz;i++) fw += fq[i];
307	y[`1`] = (ih==`0`)? fw: -fw;
308	break;
309	case `3`: / painful /
310	for (i=jz;i>`0`;i--) {
311	fw = fq[i-`1`]+fq[i];
312	fq[i] += fq[i-`1`]-fw;
313	fq[i-`1`] = fw;
314	}
315	for (i=jz;i>`1`;i--) {
316	fw = fq[i-`1`]+fq[i];
317	fq[i] += fq[i-`1`]-fw;
318	fq[i-`1`] = fw;
319	}
320	for (fw=`0.0`,i=jz;i>=`2`;i--) fw += fq[i];
321	if(ih==`0`) {
322	y[`0`] = fq[`0`]; y[`1`] = fq[`1`]; y[`2`] = fw;
323	} else {
324	y[`0`] = -fq[`0`]; y[`1`] = -fq[`1`]; y[`2`] = -fw;
325	}
326	}
327	return n&`7`;
328	}
329

Browse the source code of OpenJDK/src/java.base/share/native/libfdlibm/k_rem_pio2.c