k_rem_pio2.c source code [SDL/src/libm/k_rem_pio2.c]

1	#include "SDL_internal.h"
2	/*
3	* ====================================================
4	* Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
5	*
6	* Developed at SunPro, a Sun Microsystems, Inc. business.
7	* Permission to use, copy, modify, and distribute this
8	* software is freely granted, provided that this notice
9	* is preserved.
10	* ====================================================
11	*/
12
13	/*
14	* __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
15	* double x[],y[]; int e0,nx,prec; int ipio2[];
16	*
17	* __kernel_rem_pio2 return the last three digits of N with
18	* y = x - N*pi/2
19	* so that \|y\| < pi/2.
20	*
21	* The method is to compute the integer (mod 8) and fraction parts of
22	* (2/pi)*x without doing the full multiplication. In general we
23	* skip the part of the product that are known to be a huge integer (
24	* more accurately, = 0 mod 8 ). Thus the number of operations are
25	* independent of the exponent of the input.
26	*
27	* (2/pi) is represented by an array of 24-bit integers in ipio2[].
28	*
29	* Input parameters:
30	* x[] The input value (must be positive) is broken into nx
31	* pieces of 24-bit integers in double precision format.
32	* x[i] will be the i-th 24 bit of x. The scaled exponent
33	* of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
34	* match x's up to 24 bits.
35	*
36	* Example of breaking a double positive z into x[0]+x[1]+x[2]:
37	* e0 = ilogb(z)-23
38	* z = scalbn(z,-e0)
39	* for i = 0,1,2
40	* x[i] = floor(z)
41	* z = (z-x[i])2*24
42	*
43	*
44	* y[] ouput result in an array of double precision numbers.
45	* The dimension of y[] is:
46	* 24-bit precision 1
47	* 53-bit precision 2
48	* 64-bit precision 2
49	* 113-bit precision 3
50	* The actual value is the sum of them. Thus for 113-bit
51	* precison, one may have to do something like:
52	*
53	* long double t,w,r_head, r_tail;
54	* t = (long double)y[2] + (long double)y[1];
55	* w = (long double)y[0];
56	* r_head = t+w;
57	* r_tail = w - (r_head - t);
58	*
59	* e0 The exponent of x[0]
60	*
61	* nx dimension of x[]
62	*
63	* prec an integer indicating the precision:
64	* 0 24 bits (single)
65	* 1 53 bits (double)
66	* 2 64 bits (extended)
67	* 3 113 bits (quad)
68	*
69	* ipio2[]
70	* integer array, contains the (24i)-th to (24i+23)-th
71	* bit of 2/pi after binary point. The corresponding
72	* floating value is
73	*
74	* ipio2[i] * 2^(-24(i+1)).
75	*
76	* External function:
77	* double scalbn(), floor();
78	*
79	*
80	* Here is the description of some local variables:
81	*
82	* jk jk+1 is the initial number of terms of ipio2[] needed
83	* in the computation. The recommended value is 2,3,4,
84	* 6 for single, double, extended,and quad.
85	*
86	* jz local integer variable indicating the number of
87	* terms of ipio2[] used.
88	*
89	* jx nx - 1
90	*
91	* jv index for pointing to the suitable ipio2[] for the
92	* computation. In general, we want
93	* ( 2^e0x[0] ipio2[jv-1]*2^(-24jv) )/8
94	* is an integer. Thus
95	* e0-3-24*jv >= 0 or (e0-3)/24 >= jv
96	* Hence jv = max(0,(e0-3)/24).
97	*
98	* jp jp+1 is the number of terms in PIo2[] needed, jp = jk.
99	*
100	* q[] double array with integral value, representing the
101	* 24-bits chunk of the product of x and 2/pi.
102	*
103	* q0 the corresponding exponent of q[0]. Note that the
104	* exponent for q[i] would be q0-24*i.
105	*
106	* PIo2[] double precision array, obtained by cutting pi/2
107	* into 24 bits chunks.
108	*
109	* f[] ipio2[] in floating point
110	*
111	* iq[] integer array by breaking up q[] in 24-bits chunk.
112	*
113	* fq[] final product of x*(2/pi) in fq[0],..,fq[jk]
114	*
115	* ih integer. If >0 it indicates q[] is >= 0.5, hence
116	* it also indicates the sign of the result.
117	*
118	*/
119
120
121	/*
122	* Constants:
123	* The hexadecimal values are the intended ones for the following
124	* constants. The decimal values may be used, provided that the
125	* compiler will convert from decimal to binary accurately enough
126	* to produce the hexadecimal values shown.
127	*/
128
129	#include "math_libm.h"
130	#include "math_private.h"
131
132
133	static const int init_jk[] = {`2`,`3`,`4`,`6`}; / initial value for jk /
134
135	static const double PIo2[] = {
136	`1.57079625129699707031e+00`, / 0x3FF921FB, 0x40000000 /
137	`7.54978941586159635335e-08`, / 0x3E74442D, 0x00000000 /
138	`5.39030252995776476554e-15`, / 0x3CF84698, 0x80000000 /
139	`3.28200341580791294123e-22`, / 0x3B78CC51, 0x60000000 /
140	`1.27065575308067607349e-29`, / 0x39F01B83, 0x80000000 /
141	`1.22933308981111328932e-36`, / 0x387A2520, 0x40000000 /
142	`2.73370053816464559624e-44`, / 0x36E38222, 0x80000000 /
143	`2.16741683877804819444e-51`, / 0x3569F31D, 0x00000000 /
144	};
145
146	static const double
147	zero = `0.0`,
148	one = `1.0`,
149	two24 = `1.67772160000000000000e+07`, / 0x41700000, 0x00000000 /
150	twon24 = `5.96046447753906250000e-08`; / 0x3E700000, 0x00000000 /
151
152	int32_t attribute_hidden __kernel_rem_pio2(const double x, double* y, int* e0, int nx, const unsigned int prec, const int32_t *ipio2)
153	{
154	int32_t jz,jx,jv,jp,jk,carry,n,iq[`20`],i,j,k,m,q0,ih;
155	double z,fw,f[`20`],fq[`20`],q[`20`];
156
157	if (nx < `1`) {
158	return `0`;
159	}
160
161	/ initialize jk/
162	SDL_assert(prec < SDL_arraysize(init_jk));
163	jk = init_jk[prec];
164	SDL_assert(jk > `0`);
165	jp = jk;
166
167	/ determine jx,jv,q0, note that 3>q0 /
168	jx = nx-`1`;
169	jv = (e0-`3`)/`24`; if(jv<`0`) jv=`0`;
170	q0 = e0-`24`*(jv+`1`);
171
172	/ set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] /
173	j = jv-jx; m = jx+jk;
174	for(i=`0`;i<=m;i++,j++) f[i] = (j<`0`)? zero : (double) ipio2[j];
175	if ((m+`1`) < SDL_arraysize(f)) {
176	SDL_memset(&f[m+`1`], `0`, sizeof (f) - ((m+`1`) * sizeof (f[`0`])));
177	}
178
179	/ compute q[0],q[1],...q[jk] /
180	for (i=`0`;i<=jk;i++) {
181	for(j=`0`,fw=`0.0`;j<=jx;j++) fw += x[j]*f[jx+i-j];
182	q[i] = fw;
183	}
184
185	jz = jk;
186	recompute:
187	/ distill q[] into iq[] reversingly /
188	for(i=`0`,j=jz,z=q[jz];j>`0`;i++,j--) {
189	fw = (double)((int32_t)(twon24* z));
190	iq[i] = (int32_t)(z-two24*fw);
191	z = q[j-`1`]+fw;
192	}
193	if (jz < SDL_arraysize(iq)) {
194	SDL_memset(&iq[jz], `0`, sizeof (iq) - (jz * sizeof (iq[`0`])));
195	}
196
197	/ compute n /
198	z = scalbn(z,q0); / actual value of z /
199	z -= `8.0`floor(z`0.125`); / trim off integer >= 8 /
200	n = (int32_t) z;
201	z -= (double)n;
202	ih = `0`;
203	if(q0>`0`) { / need iq[jz-1] to determine n /
204	i = (iq[jz-`1`]>>(`24`-q0)); n += i;
205	iq[jz-`1`] -= i<<(`24`-q0);
206	ih = iq[jz-`1`]>>(`23`-q0);
207	}
208	else if(q0==`0`) ih = iq[jz-`1`]>>`23`;
209	else if(z>=`0.5`) ih=`2`;
210
211	if(ih>`0`) { / q > 0.5 /
212	n += `1`; carry = `0`;
213	for(i=`0`;i<jz ;i++) { / compute 1-q /
214	j = iq[i];
215	if(carry==`0`) {
216	if(j!=`0`) {
217	carry = `1`; iq[i] = `0x1000000`- j;
218	}
219	} else iq[i] = `0xffffff` - j;
220	}
221	if(q0>`0`) { / rare case: chance is 1 in 12 /
222	switch(q0) {
223	case `1`:
224	iq[jz-`1`] &= `0x7fffff`; break;
225	case `2`:
226	iq[jz-`1`] &= `0x3fffff`; break;
227	}
228	}
229	if(ih==`2`) {
230	z = one - z;
231	if(carry!=`0`) z -= scalbn(one,q0);
232	}
233	}
234
235	/ check if recomputation is needed /
236	if(z==zero) {
237	j = `0`;
238	for (i=jz-`1`;i>=jk;i--) j \|= iq[i];
239	if(j==`0`) { / need recomputation /
240	for(k=`1`;iq[jk-k]==`0`;k++); / k = no. of terms needed /
241
242	for(i=jz+`1`;i<=jz+k;i++) { / add q[jz+1] to q[jz+k] /
243	f[jx+i] = (double) ipio2[jv+i];
244	for(j=`0`,fw=`0.0`;j<=jx;j++) fw += x[j]*f[jx+i-j];
245	q[i] = fw;
246	}
247	jz += k;
248	goto recompute;
249	}
250	}
251
252	/ chop off zero terms /
253	if(z==`0.0`) {
254	jz -= `1`; q0 -= `24`;
255	SDL_assert(jz >= `0`);
256	while(iq[jz]==`0`) { jz--; SDL_assert(jz >= `0`); q0-=`24`;}
257	} else { / break z into 24-bit if necessary /
258	z = scalbn(z,-q0);
259	if(z>=two24) {
260	fw = (double)((int32_t)(twon24*z));
261	iq[jz] = (int32_t)(z-two24*fw);
262	jz += `1`; q0 += `24`;
263	iq[jz] = (int32_t) fw;
264	} else iq[jz] = (int32_t) z ;
265	}
266
267	/ convert integer "bit" chunk to floating-point value /
268	fw = scalbn(one,q0);
269	for(i=jz;i>=`0`;i--) {
270	q[i] = fw(double)iq[i]; fw=twon24;
271	}
272
273	/ compute PIo2[0,...,jp]q[jz,...,0] /*
274	SDL_zero(fq);
275	for(i=jz;i>=`0`;i--) {
276	for(fw=`0.0`,k=`0`;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
277	fq[jz-i] = fw;
278	}
279
280	/ compress fq[] into y[] /
281	switch(prec) {
282	case `0`:
283	fw = `0.0`;
284	for (i=jz;i>=`0`;i--) fw += fq[i];
285	y[`0`] = (ih==`0`)? fw: -fw;
286	break;
287	case `1`:
288	case `2`:
289	fw = `0.0`;
290	for (i=jz;i>=`0`;i--) fw += fq[i];
291	y[`0`] = (ih==`0`)? fw: -fw;
292	fw = fq[`0`]-fw;
293	for (i=`1`;i<=jz;i++) fw += fq[i];
294	y[`1`] = (ih==`0`)? fw: -fw;
295	break;
296	case `3`: / painful /
297	for (i=jz;i>`0`;i--) {
298	fw = fq[i-`1`]+fq[i];
299	fq[i] += fq[i-`1`]-fw;
300	fq[i-`1`] = fw;
301	}
302	for (i=jz;i>`1`;i--) {
303	fw = fq[i-`1`]+fq[i];
304	fq[i] += fq[i-`1`]-fw;
305	fq[i-`1`] = fw;
306	}
307	for (fw=`0.0`,i=jz;i>=`2`;i--) fw += fq[i];
308	if(ih==`0`) {
309	y[`0`] = fq[`0`]; y[`1`] = fq[`1`]; y[`2`] = fw;
310	} else {
311	y[`0`] = -fq[`0`]; y[`1`] = -fq[`1`]; y[`2`] = -fw;
312	}
313	}
314	return n&`7`;
315	}
316

Browse the source code of SDL/src/libm/k_rem_pio2.c