powl_helper.c source code [Glibc/sysdeps/x86/fpu/powl_helper.c]

1	/ Implement powl for x86 using extra-precision log.*
2	Copyright (C) 2012-2020 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<https://www.gnu.org/licenses/>. /*
18
19	#include <math.h>
20	#include <math_private.h>
21	#include <math-underflow.h>
22	#include <stdbool.h>
23
24	/ High parts and low parts of -log (k/16), for integer k from 12 to*
25	24. /*
26
27	static const long double powl_log_table[] =
28	{
29	`0x4.9a58844d36e49e1p-4L`, -`0x1.0522624fd558f574p-68L`,
30	`0x3.527da7915b3c6de4p-4L`, `0x1.7d4ef4b901b99b9ep-68L`,
31	`0x2.22f1d044fc8f7bc8p-4L`, -`0x1.8e97c071a42fc388p-68L`,
32	`0x1.08598b59e3a0688ap-4L`, `0x3.fd9bf503372c12fcp-72L`,
33	-`0x0p+0L`, `0x0p+0L`,
34	-`0xf.85186008b15330cp-8L`, `0x1.9b47488a6687672cp-72L`,
35	-`0x1.e27076e2af2e5e9ep-4L`, -`0xa.87ffe1fe9e155dcp-72L`,
36	-`0x2.bfe60e14f27a791p-4L`, `0x1.83bebf1bdb88a032p-68L`,
37	-`0x3.91fef8f353443584p-4L`, -`0xb.b03de5ff734495cp-72L`,
38	-`0x4.59d72aeae98380e8p-4L`, `0xc.e0aa3be4747dc1p-72L`,
39	-`0x5.1862f08717b09f4p-4L`, -`0x2.decdeccf1cd10578p-68L`,
40	-`0x5.ce75fdaef401a738p-4L`, -`0x9.314feb4fbde5aaep-72L`,
41	-`0x6.7cc8fb2fe612fcbp-4L`, `0x2.5ca2642feb779f98p-68L`,
42	};
43
44	/ High 32 bits of log2 (e), and remainder rounded to 64 bits. /
45	static const long double log2e_hi = `0x1.71547652p+0L`;
46	static const long double log2e_lo = `0xb.82fe1777d0ffda1p-36L`;
47
48	/ Given a number with high part HI and low part LO, add the number X*
49	to it and store the result in RHI and RLO. It is given that
50	either \|X\| < \|0.7 HI\|, or HI == LO == 0, and that the values are*
51	small enough that no overflow occurs. The result does not need to
52	be exact to 128 bits; 78-bit accuracy of the final accumulated
53	result suffices. /*
54
55	static inline void
56	acc_split (long double rhi, long* double rlo, long* double hi, long double lo,
57	long double x)
58	{
59	long double thi = hi + x;
60	long double tlo = (hi - thi) + x + lo;
61	*rhi = thi + tlo;
62	rlo = (thi - rhi) + tlo;
63	}
64
65	extern long double __powl_helper (long double x, long double y);
66	libm_hidden_proto (__powl_helper)
67
68	/ Given X a value that is finite and nonzero, or a NaN, and Y a*
69	finite nonzero value with 0x1p-79 <= \|Y\| <= 0x1p78, compute X to
70	the power Y. /*
71
72	long double
73	__powl_helper (long double x, long double y)
74	{
75	if (isnan (x))
76	return __ieee754_expl (y * __ieee754_logl (x));
77	bool negate;
78	if (x < `0`)
79	{
80	long double absy = fabsl (y);
81	if (absy >= `0x1p64L`)
82	negate = false;
83	else
84	{
85	unsigned long long yll = absy;
86	if (yll != absy)
87	return __ieee754_expl (y * __ieee754_logl (x));
88	negate = (yll & `1`) != `0`;
89	}
90	x = fabsl (x);
91	}
92	else
93	negate = false;
94
95	/ We need to compute Y * log2 (X) to at least 64 bits after the*
96	point for normal results (that is, to at least 78 bits
97	precision). /*
98	int x_int_exponent;
99	long double x_frac;
100	x_frac = __frexpl (x, &x_int_exponent);
101	if (x_frac <= `0x0.aaaaaaaaaaaaaaaap0L`) / 2.0L / 3.0L, rounded down /
102	{
103	x_frac *= `2.0`;
104	x_int_exponent--;
105	}
106
107	long double log_x_frac_hi, log_x_frac_lo;
108	/ Determine an initial approximation to log (X_FRAC) using*
109	POWL_LOG_TABLE, and multiply by a value K/16 to reduce to an
110	interval (24/25, 26/25). /*
111	int k = (int) ((`16.0L` / x_frac) + `0.5L`);
112	log_x_frac_hi = powl_log_table[`2` * k - `24`];
113	log_x_frac_lo = powl_log_table[`2` * k - `23`];
114	long double x_frac_low;
115	if (k == `16`)
116	x_frac_low = `0.0L`;
117	else
118	{
119	/ Mask off low 5 bits of X_FRAC so the multiplication by K/16*
120	is exact. These bits are small enough that they can be
121	corrected for by adding log2 (e) X_FRAC_LOW to the final*
122	result. /*
123	int32_t se;
124	uint32_t i0, i1;
125	GET_LDOUBLE_WORDS (se, i0, i1, x_frac);
126	x_frac_low = x_frac;
127	i1 &= `0xffffffe0`;
128	SET_LDOUBLE_WORDS (x_frac, se, i0, i1);
129	x_frac_low -= x_frac;
130	x_frac_low /= x_frac;
131	x_frac *= k / `16.0L`;
132	}
133
134	/ Now compute log (X_FRAC) for X_FRAC in (24/25, 26/25). Separate*
135	W = X_FRAC - 1 into high 16 bits and remaining bits, so that
136	multiplications for low-order power series terms are exact. The
137	remaining bits are small enough that adding a 64-bit value of
138	log2 (1 + W_LO / (1 + W_HI)) will be a sufficient correction for
139	them. /*
140	long double w = x_frac - `1`;
141	long double w_hi, w_lo;
142	int32_t se;
143	uint32_t i0, i1;
144	GET_LDOUBLE_WORDS (se, i0, i1, w);
145	i0 &= `0xffff0000`;
146	i1 = `0`;
147	SET_LDOUBLE_WORDS (w_hi, se, i0, i1);
148	w_lo = w - w_hi;
149	long double wp = w_hi;
150	acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo, wp);
151	wp *= -w_hi;
152	acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
153	wp / `2.0L`);
154	wp *= -w_hi;
155	acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
156	wp * `0x0.5555p0L`); / -W_HI*3 / 3, high part. /*
157	acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
158	wp * `0x0.5555555555555555p-16L`); / -W_HI*3 / 3, low part. /*
159	wp *= -w_hi;
160	acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
161	wp / `4.0L`);
162	/ Subsequent terms are small enough that they only need be computed*
163	to 64 bits. /*
164	for (int i = `5`; i <= `17`; i++)
165	{
166	wp *= -w_hi;
167	acc_split (&log_x_frac_hi, &log_x_frac_lo, log_x_frac_hi, log_x_frac_lo,
168	wp / i);
169	}
170
171	/ Convert LOG_X_FRAC_HI + LOG_X_FRAC_LO to a base-2 logarithm. /
172	long double log2_x_frac_hi, log2_x_frac_lo;
173	long double log_x_frac_hi32, log_x_frac_lo64;
174	GET_LDOUBLE_WORDS (se, i0, i1, log_x_frac_hi);
175	i1 = `0`;
176	SET_LDOUBLE_WORDS (log_x_frac_hi32, se, i0, i1);
177	log_x_frac_lo64 = (log_x_frac_hi - log_x_frac_hi32) + log_x_frac_lo;
178	long double log2_x_frac_hi1 = log_x_frac_hi32 * log2e_hi;
179	long double log2_x_frac_lo1
180	= log_x_frac_lo64 * log2e_hi + log_x_frac_hi * log2e_lo;
181	log2_x_frac_hi = log2_x_frac_hi1 + log2_x_frac_lo1;
182	log2_x_frac_lo = (log2_x_frac_hi1 - log2_x_frac_hi) + log2_x_frac_lo1;
183
184	/ Correct for the masking off of W_LO. /
185	long double log2_1p_w_lo;
186	asm ("fyl2xp1"
187	: "=t" (log2_1p_w_lo)
188	: "0" (w_lo / (`1.0L` + w_hi)), "u" (`1.0L`)
189	: "st(1)");
190	acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo,
191	log2_1p_w_lo);
192
193	/ Correct for the masking off of X_FRAC_LOW. /
194	acc_split (&log2_x_frac_hi, &log2_x_frac_lo, log2_x_frac_hi, log2_x_frac_lo,
195	x_frac_low * M_LOG2El);
196
197	/ Add the integer and fractional parts of the base-2 logarithm. /
198	long double log2_x_hi, log2_x_lo;
199	log2_x_hi = x_int_exponent + log2_x_frac_hi;
200	log2_x_lo = ((x_int_exponent - log2_x_hi) + log2_x_frac_hi) + log2_x_frac_lo;
201
202	/ Compute the base-2 logarithm of the result. /
203	long double log2_res_hi, log2_res_lo;
204	long double log2_x_hi32, log2_x_lo64;
205	GET_LDOUBLE_WORDS (se, i0, i1, log2_x_hi);
206	i1 = `0`;
207	SET_LDOUBLE_WORDS (log2_x_hi32, se, i0, i1);
208	log2_x_lo64 = (log2_x_hi - log2_x_hi32) + log2_x_lo;
209	long double y_hi32, y_lo32;
210	GET_LDOUBLE_WORDS (se, i0, i1, y);
211	i1 = `0`;
212	SET_LDOUBLE_WORDS (y_hi32, se, i0, i1);
213	y_lo32 = y - y_hi32;
214	log2_res_hi = log2_x_hi32 * y_hi32;
215	log2_res_lo = log2_x_hi32 * y_lo32 + log2_x_lo64 * y;
216
217	/ Split the base-2 logarithm of the result into integer and*
218	fractional parts. /*
219	long double log2_res_int = roundl (log2_res_hi);
220	long double log2_res_frac = log2_res_hi - log2_res_int + log2_res_lo;
221	/ If the integer part is very large, the computed fractional part*
222	may be outside the valid range for f2xm1. /*
223	if (fabsl (log2_res_int) > `16500`)
224	log2_res_frac = `0`;
225
226	/ Compute the final result. /
227	long double res;
228	asm ("f2xm1" : "=t" (res) : "0" (log2_res_frac));
229	res += `1.0L`;
230	if (negate)
231	res = -res;
232	asm ("fscale" : "=t" (res) : "0" (res), "u" (log2_res_int));
233	math_check_force_underflow (res);
234	return res;
235	}
236
237	libm_hidden_def (__powl_helper)
238

Browse the source code of Glibc/sysdeps/x86/fpu/powl_helper.c