op-2.h source code [Glibc/soft-fp/op-2.h]

1	/ Software floating-point emulation.*
2	Basic two-word fraction declaration and manipulation.
3	Copyright (C) 1997-2020 Free Software Foundation, Inc.
4	This file is part of the GNU C Library.
5	Contributed by Richard Henderson (rth@cygnus.com),
6	Jakub Jelinek (jj@ultra.linux.cz),
7	David S. Miller (davem@redhat.com) and
8	Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
10	The GNU C Library is free software; you can redistribute it and/or
11	modify it under the terms of the GNU Lesser General Public
12	License as published by the Free Software Foundation; either
13	version 2.1 of the License, or (at your option) any later version.
14
15	In addition to the permissions in the GNU Lesser General Public
16	License, the Free Software Foundation gives you unlimited
17	permission to link the compiled version of this file into
18	combinations with other programs, and to distribute those
19	combinations without any restriction coming from the use of this
20	file. (The Lesser General Public License restrictions do apply in
21	other respects; for example, they cover modification of the file,
22	and distribution when not linked into a combine executable.)
23
24	The GNU C Library is distributed in the hope that it will be useful,
25	but WITHOUT ANY WARRANTY; without even the implied warranty of
26	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
27	Lesser General Public License for more details.
28
29	You should have received a copy of the GNU Lesser General Public
30	License along with the GNU C Library; if not, see
31	<https://www.gnu.org/licenses/>. /*
32
33	#ifndef SOFT_FP_OP_2_H
34	#define SOFT_FP_OP_2_H 1
35
36	#define _FP_FRAC_DECL_2(X) \
37	_FP_W_TYPE X##_f0 _FP_ZERO_INIT, X##_f1 _FP_ZERO_INIT
38	#define _FP_FRAC_COPY_2(D, S) (D##_f0 = S##_f0, D##_f1 = S##_f1)
39	#define _FP_FRAC_SET_2(X, I) __FP_FRAC_SET_2 (X, I)
40	#define _FP_FRAC_HIGH_2(X) (X##_f1)
41	#define _FP_FRAC_LOW_2(X) (X##_f0)
42	#define _FP_FRAC_WORD_2(X, w) (X##_f##w)
43
44	#define _FP_FRAC_SLL_2(X, N) \
45	(void) (((N) < _FP_W_TYPE_SIZE) \
46	? ({ \
47	if (__builtin_constant_p (N) && (N) == 1) \
48	{ \
49	X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE) (X##_f0)) < 0); \
50	X##_f0 += X##_f0; \
51	} \
52	else \
53	{ \
54	X##_f1 = X##_f1 << (N) \| X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
55	X##_f0 <<= (N); \
56	} \
57	0; \
58	}) \
59	: ({ \
60	X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE); \
61	X##_f0 = 0; \
62	}))
63
64
65	#define _FP_FRAC_SRL_2(X, N) \
66	(void) (((N) < _FP_W_TYPE_SIZE) \
67	? ({ \
68	X##_f0 = X##_f0 >> (N) \| X##_f1 << (_FP_W_TYPE_SIZE - (N)); \
69	X##_f1 >>= (N); \
70	}) \
71	: ({ \
72	X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE); \
73	X##_f1 = 0; \
74	}))
75
76	/ Right shift with sticky-lsb. /
77	#define _FP_FRAC_SRST_2(X, S, N, sz) \
78	(void) (((N) < _FP_W_TYPE_SIZE) \
79	? ({ \
80	S = (__builtin_constant_p (N) && (N) == 1 \
81	? X##_f0 & 1 \
82	: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0); \
83	X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) \| X##_f0 >> (N)); \
84	X##_f1 >>= (N); \
85	}) \
86	: ({ \
87	S = ((((N) == _FP_W_TYPE_SIZE \
88	? 0 \
89	: (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
90	\| X##_f0) != 0); \
91	X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE)); \
92	X##_f1 = 0; \
93	}))
94
95	#define _FP_FRAC_SRS_2(X, N, sz) \
96	(void) (((N) < _FP_W_TYPE_SIZE) \
97	? ({ \
98	X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) \| X##_f0 >> (N) \
99	\| (__builtin_constant_p (N) && (N) == 1 \
100	? X##_f0 & 1 \
101	: (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0)); \
102	X##_f1 >>= (N); \
103	}) \
104	: ({ \
105	X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) \
106	\| ((((N) == _FP_W_TYPE_SIZE \
107	? 0 \
108	: (X##_f1 << (2*_FP_W_TYPE_SIZE - (N)))) \
109	\| X##_f0) != 0)); \
110	X##_f1 = 0; \
111	}))
112
113	#define _FP_FRAC_ADDI_2(X, I) \
114	__FP_FRAC_ADDI_2 (X##_f1, X##_f0, I)
115
116	#define _FP_FRAC_ADD_2(R, X, Y) \
117	__FP_FRAC_ADD_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
118
119	#define _FP_FRAC_SUB_2(R, X, Y) \
120	__FP_FRAC_SUB_2 (R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
121
122	#define _FP_FRAC_DEC_2(X, Y) \
123	__FP_FRAC_DEC_2 (X##_f1, X##_f0, Y##_f1, Y##_f0)
124
125	#define _FP_FRAC_CLZ_2(R, X) \
126	do \
127	{ \
128	if (X##_f1) \
129	__FP_CLZ ((R), X##_f1); \
130	else \
131	{ \
132	__FP_CLZ ((R), X##_f0); \
133	(R) += _FP_W_TYPE_SIZE; \
134	} \
135	} \
136	while (0)
137
138	/ Predicates. /
139	#define _FP_FRAC_NEGP_2(X) ((_FP_WS_TYPE) X##_f1 < 0)
140	#define _FP_FRAC_ZEROP_2(X) ((X##_f1 \| X##_f0) == 0)
141	#define _FP_FRAC_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) & _FP_OVERFLOW_##fs)
142	#define _FP_FRAC_CLEAR_OVERP_2(fs, X) (_FP_FRAC_HIGH_##fs (X) &= ~_FP_OVERFLOW_##fs)
143	#define _FP_FRAC_HIGHBIT_DW_2(fs, X) \
144	(_FP_FRAC_HIGH_DW_##fs (X) & _FP_HIGHBIT_DW_##fs)
145	#define _FP_FRAC_EQ_2(X, Y) (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
146	#define _FP_FRAC_GT_2(X, Y) \
147	(X##_f1 > Y##_f1 \|\| (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
148	#define _FP_FRAC_GE_2(X, Y) \
149	(X##_f1 > Y##_f1 \|\| (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
150
151	#define _FP_ZEROFRAC_2 0, 0
152	#define _FP_MINFRAC_2 0, 1
153	#define _FP_MAXFRAC_2 (~(_FP_WS_TYPE) 0), (~(_FP_WS_TYPE) 0)
154
155	/ Internals. /
156
157	#define __FP_FRAC_SET_2(X, I1, I0) (X##_f0 = I0, X##_f1 = I1)
158
159	#define __FP_CLZ_2(R, xh, xl) \
160	do \
161	{ \
162	if (xh) \
163	__FP_CLZ ((R), xh); \
164	else \
165	{ \
166	__FP_CLZ ((R), xl); \
167	(R) += _FP_W_TYPE_SIZE; \
168	} \
169	} \
170	while (0)
171
172	#if 0
173
174	# ifndef __FP_FRAC_ADDI_2
175	# define __FP_FRAC_ADDI_2(xh, xl, i) \
176	(xh += ((xl += i) < i))
177	# endif
178	# ifndef __FP_FRAC_ADD_2
179	# define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
180	(rh = xh + yh + ((rl = xl + yl) < xl))
181	# endif
182	# ifndef __FP_FRAC_SUB_2
183	# define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
184	(rh = xh - yh - ((rl = xl - yl) > xl))
185	# endif
186	# ifndef __FP_FRAC_DEC_2
187	# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
188	do \
189	{ \
190	UWtype __FP_FRAC_DEC_2_t = xl; \
191	xh -= yh + ((xl -= yl) > __FP_FRAC_DEC_2_t); \
192	} \
193	while (0)
194	# endif
195
196	#else
197
198	# undef __FP_FRAC_ADDI_2
199	# define __FP_FRAC_ADDI_2(xh, xl, i) add_ssaaaa (xh, xl, xh, xl, 0, i)
200	# undef __FP_FRAC_ADD_2
201	# define __FP_FRAC_ADD_2 add_ssaaaa
202	# undef __FP_FRAC_SUB_2
203	# define __FP_FRAC_SUB_2 sub_ddmmss
204	# undef __FP_FRAC_DEC_2
205	# define __FP_FRAC_DEC_2(xh, xl, yh, yl) \
206	sub_ddmmss (xh, xl, xh, xl, yh, yl)
207
208	#endif
209
210	/ Unpack the raw bits of a native fp value. Do not classify or*
211	normalize the data. /*
212
213	#define _FP_UNPACK_RAW_2(fs, X, val) \
214	do \
215	{ \
216	union _FP_UNION_##fs _FP_UNPACK_RAW_2_flo; \
217	_FP_UNPACK_RAW_2_flo.flt = (val); \
218	\
219	X##_f0 = _FP_UNPACK_RAW_2_flo.bits.frac0; \
220	X##_f1 = _FP_UNPACK_RAW_2_flo.bits.frac1; \
221	X##_e = _FP_UNPACK_RAW_2_flo.bits.exp; \
222	X##_s = _FP_UNPACK_RAW_2_flo.bits.sign; \
223	} \
224	while (0)
225
226	#define _FP_UNPACK_RAW_2_P(fs, X, val) \
227	do \
228	{ \
229	union _FP_UNION_##fs *_FP_UNPACK_RAW_2_P_flo \
230	= (union _FP_UNION_##fs *) (val); \
231	\
232	X##_f0 = _FP_UNPACK_RAW_2_P_flo->bits.frac0; \
233	X##_f1 = _FP_UNPACK_RAW_2_P_flo->bits.frac1; \
234	X##_e = _FP_UNPACK_RAW_2_P_flo->bits.exp; \
235	X##_s = _FP_UNPACK_RAW_2_P_flo->bits.sign; \
236	} \
237	while (0)
238
239
240	/ Repack the raw bits of a native fp value. /
241
242	#define _FP_PACK_RAW_2(fs, val, X) \
243	do \
244	{ \
245	union _FP_UNION_##fs _FP_PACK_RAW_2_flo; \
246	\
247	_FP_PACK_RAW_2_flo.bits.frac0 = X##_f0; \
248	_FP_PACK_RAW_2_flo.bits.frac1 = X##_f1; \
249	_FP_PACK_RAW_2_flo.bits.exp = X##_e; \
250	_FP_PACK_RAW_2_flo.bits.sign = X##_s; \
251	\
252	(val) = _FP_PACK_RAW_2_flo.flt; \
253	} \
254	while (0)
255
256	#define _FP_PACK_RAW_2_P(fs, val, X) \
257	do \
258	{ \
259	union _FP_UNION_##fs *_FP_PACK_RAW_2_P_flo \
260	= (union _FP_UNION_##fs *) (val); \
261	\
262	_FP_PACK_RAW_2_P_flo->bits.frac0 = X##_f0; \
263	_FP_PACK_RAW_2_P_flo->bits.frac1 = X##_f1; \
264	_FP_PACK_RAW_2_P_flo->bits.exp = X##_e; \
265	_FP_PACK_RAW_2_P_flo->bits.sign = X##_s; \
266	} \
267	while (0)
268
269
270	/ Multiplication algorithms: /
271
272	/ Given a 1W * 1W => 2W primitive, do the extended multiplication. /
273
274	#define _FP_MUL_MEAT_DW_2_wide(wfracbits, R, X, Y, doit) \
275	do \
276	{ \
277	_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_b); \
278	_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_c); \
279	\
280	doit (_FP_FRAC_WORD_4 (R, 1), _FP_FRAC_WORD_4 (R, 0), \
281	X##_f0, Y##_f0); \
282	doit (_FP_MUL_MEAT_DW_2_wide_b_f1, _FP_MUL_MEAT_DW_2_wide_b_f0, \
283	X##_f0, Y##_f1); \
284	doit (_FP_MUL_MEAT_DW_2_wide_c_f1, _FP_MUL_MEAT_DW_2_wide_c_f0, \
285	X##_f1, Y##_f0); \
286	doit (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
287	X##_f1, Y##_f1); \
288	\
289	__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
290	_FP_FRAC_WORD_4 (R, 1), 0, \
291	_FP_MUL_MEAT_DW_2_wide_b_f1, \
292	_FP_MUL_MEAT_DW_2_wide_b_f0, \
293	_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
294	_FP_FRAC_WORD_4 (R, 1)); \
295	__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
296	_FP_FRAC_WORD_4 (R, 1), 0, \
297	_FP_MUL_MEAT_DW_2_wide_c_f1, \
298	_FP_MUL_MEAT_DW_2_wide_c_f0, \
299	_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
300	_FP_FRAC_WORD_4 (R, 1)); \
301	} \
302	while (0)
303
304	#define _FP_MUL_MEAT_2_wide(wfracbits, R, X, Y, doit) \
305	do \
306	{ \
307	_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_z); \
308	\
309	_FP_MUL_MEAT_DW_2_wide ((wfracbits), _FP_MUL_MEAT_2_wide_z, \
310	X, Y, doit); \
311	\
312	/* Normalize since we know where the msb of the multiplicands \
313	were (bit B), we know that the msb of the of the product is \
314	at either 2B or 2B-1. */ \
315	_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_z, (wfracbits)-1, \
316	2*(wfracbits)); \
317	R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 0); \
318	R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_z, 1); \
319	} \
320	while (0)
321
322	/ Given a 1W * 1W => 2W primitive, do the extended multiplication.*
323	Do only 3 multiplications instead of four. This one is for machines
324	where multiplication is much more expensive than subtraction. /*
325
326	#define _FP_MUL_MEAT_DW_2_wide_3mul(wfracbits, R, X, Y, doit) \
327	do \
328	{ \
329	_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_b); \
330	_FP_FRAC_DECL_2 (_FP_MUL_MEAT_DW_2_wide_3mul_c); \
331	_FP_W_TYPE _FP_MUL_MEAT_DW_2_wide_3mul_d; \
332	int _FP_MUL_MEAT_DW_2_wide_3mul_c1; \
333	int _FP_MUL_MEAT_DW_2_wide_3mul_c2; \
334	\
335	_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 = X##_f0 + X##_f1; \
336	_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
337	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f0 < X##_f0; \
338	_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 = Y##_f0 + Y##_f1; \
339	_FP_MUL_MEAT_DW_2_wide_3mul_c2 \
340	= _FP_MUL_MEAT_DW_2_wide_3mul_b_f1 < Y##_f0; \
341	doit (_FP_MUL_MEAT_DW_2_wide_3mul_d, _FP_FRAC_WORD_4 (R, 0), \
342	X##_f0, Y##_f0); \
343	doit (_FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1), \
344	_FP_MUL_MEAT_DW_2_wide_3mul_b_f0, \
345	_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
346	doit (_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
347	_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, X##_f1, Y##_f1); \
348	\
349	_FP_MUL_MEAT_DW_2_wide_3mul_b_f0 \
350	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c2; \
351	_FP_MUL_MEAT_DW_2_wide_3mul_b_f1 \
352	&= -_FP_MUL_MEAT_DW_2_wide_3mul_c1; \
353	__FP_FRAC_ADD_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
354	_FP_FRAC_WORD_4 (R, 1), \
355	(_FP_MUL_MEAT_DW_2_wide_3mul_c1 \
356	& _FP_MUL_MEAT_DW_2_wide_3mul_c2), 0, \
357	_FP_MUL_MEAT_DW_2_wide_3mul_d, \
358	0, _FP_FRAC_WORD_4 (R, 2), _FP_FRAC_WORD_4 (R, 1)); \
359	__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
360	_FP_MUL_MEAT_DW_2_wide_3mul_b_f0); \
361	__FP_FRAC_ADDI_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
362	_FP_MUL_MEAT_DW_2_wide_3mul_b_f1); \
363	__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
364	_FP_FRAC_WORD_4 (R, 1), \
365	0, _FP_MUL_MEAT_DW_2_wide_3mul_d, \
366	_FP_FRAC_WORD_4 (R, 0)); \
367	__FP_FRAC_DEC_3 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
368	_FP_FRAC_WORD_4 (R, 1), 0, \
369	_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
370	_FP_MUL_MEAT_DW_2_wide_3mul_c_f0); \
371	__FP_FRAC_ADD_2 (_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2), \
372	_FP_MUL_MEAT_DW_2_wide_3mul_c_f1, \
373	_FP_MUL_MEAT_DW_2_wide_3mul_c_f0, \
374	_FP_FRAC_WORD_4 (R, 3), _FP_FRAC_WORD_4 (R, 2)); \
375	} \
376	while (0)
377
378	#define _FP_MUL_MEAT_2_wide_3mul(wfracbits, R, X, Y, doit) \
379	do \
380	{ \
381	_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_wide_3mul_z); \
382	\
383	_FP_MUL_MEAT_DW_2_wide_3mul ((wfracbits), \
384	_FP_MUL_MEAT_2_wide_3mul_z, \
385	X, Y, doit); \
386	\
387	/* Normalize since we know where the msb of the multiplicands \
388	were (bit B), we know that the msb of the of the product is \
389	at either 2B or 2B-1. */ \
390	_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_wide_3mul_z, \
391	(wfracbits)-1, 2*(wfracbits)); \
392	R##_f0 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 0); \
393	R##_f1 = _FP_FRAC_WORD_4 (_FP_MUL_MEAT_2_wide_3mul_z, 1); \
394	} \
395	while (0)
396
397	#define _FP_MUL_MEAT_DW_2_gmp(wfracbits, R, X, Y) \
398	do \
399	{ \
400	_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_x[2]; \
401	_FP_W_TYPE _FP_MUL_MEAT_DW_2_gmp_y[2]; \
402	_FP_MUL_MEAT_DW_2_gmp_x[0] = X##_f0; \
403	_FP_MUL_MEAT_DW_2_gmp_x[1] = X##_f1; \
404	_FP_MUL_MEAT_DW_2_gmp_y[0] = Y##_f0; \
405	_FP_MUL_MEAT_DW_2_gmp_y[1] = Y##_f1; \
406	\
407	mpn_mul_n (R##_f, _FP_MUL_MEAT_DW_2_gmp_x, \
408	_FP_MUL_MEAT_DW_2_gmp_y, 2); \
409	} \
410	while (0)
411
412	#define _FP_MUL_MEAT_2_gmp(wfracbits, R, X, Y) \
413	do \
414	{ \
415	_FP_FRAC_DECL_4 (_FP_MUL_MEAT_2_gmp_z); \
416	\
417	_FP_MUL_MEAT_DW_2_gmp ((wfracbits), _FP_MUL_MEAT_2_gmp_z, X, Y); \
418	\
419	/* Normalize since we know where the msb of the multiplicands \
420	were (bit B), we know that the msb of the of the product is \
421	at either 2B or 2B-1. */ \
422	_FP_FRAC_SRS_4 (_FP_MUL_MEAT_2_gmp_z, (wfracbits)-1, \
423	2*(wfracbits)); \
424	R##_f0 = _FP_MUL_MEAT_2_gmp_z_f[0]; \
425	R##_f1 = _FP_MUL_MEAT_2_gmp_z_f[1]; \
426	} \
427	while (0)
428
429	/ Do at most 120x120=240 bits multiplication using double floating*
430	point multiplication. This is useful if floating point
431	multiplication has much bigger throughput than integer multiply.
432	It is supposed to work for _FP_W_TYPE_SIZE 64 and wfracbits
433	between 106 and 120 only.
434	Caller guarantees that X and Y has (1LLL << (wfracbits - 1)) set.
435	SETFETZ is a macro which will disable all FPU exceptions and set rounding
436	towards zero, RESETFE should optionally reset it back. /*
437
438	#define _FP_MUL_MEAT_2_120_240_double(wfracbits, R, X, Y, setfetz, resetfe) \
439	do \
440	{ \
441	static const double _const[] = \
442	{ \
443	/* 2^-24 */ 5.9604644775390625e-08, \
444	/* 2^-48 */ 3.5527136788005009e-15, \
445	/* 2^-72 */ 2.1175823681357508e-22, \
446	/* 2^-96 */ 1.2621774483536189e-29, \
447	/* 2^28 */ 2.68435456e+08, \
448	/* 2^4 */ 1.600000e+01, \
449	/* 2^-20 */ 9.5367431640625e-07, \
450	/* 2^-44 */ 5.6843418860808015e-14, \
451	/* 2^-68 */ 3.3881317890172014e-21, \
452	/* 2^-92 */ 2.0194839173657902e-28, \
453	/* 2^-116 */ 1.2037062152420224e-35 \
454	}; \
455	double _a240, _b240, _c240, _d240, _e240, _f240, \
456	_g240, _h240, _i240, _j240, _k240; \
457	union { double d; UDItype i; } _l240, _m240, _n240, _o240, \
458	_p240, _q240, _r240, _s240; \
459	UDItype _t240, _u240, _v240, _w240, _x240, _y240 = 0; \
460	\
461	_FP_STATIC_ASSERT ((wfracbits) >= 106 && (wfracbits) <= 120, \
462	"wfracbits out of range"); \
463	\
464	setfetz; \
465	\
466	_e240 = (double) (long) (X##_f0 & 0xffffff); \
467	_j240 = (double) (long) (Y##_f0 & 0xffffff); \
468	_d240 = (double) (long) ((X##_f0 >> 24) & 0xffffff); \
469	_i240 = (double) (long) ((Y##_f0 >> 24) & 0xffffff); \
470	_c240 = (double) (long) (((X##_f1 << 16) & 0xffffff) \| (X##_f0 >> 48)); \
471	_h240 = (double) (long) (((Y##_f1 << 16) & 0xffffff) \| (Y##_f0 >> 48)); \
472	_b240 = (double) (long) ((X##_f1 >> 8) & 0xffffff); \
473	_g240 = (double) (long) ((Y##_f1 >> 8) & 0xffffff); \
474	_a240 = (double) (long) (X##_f1 >> 32); \
475	_f240 = (double) (long) (Y##_f1 >> 32); \
476	_e240 *= _const[3]; \
477	_j240 *= _const[3]; \
478	_d240 *= _const[2]; \
479	_i240 *= _const[2]; \
480	_c240 *= _const[1]; \
481	_h240 *= _const[1]; \
482	_b240 *= _const[0]; \
483	_g240 *= _const[0]; \
484	_s240.d = _e240*_j240; \
485	_r240.d = _d240_j240 + _e240_i240; \
486	_q240.d = _c240_j240 + _d240_i240 + _e240*_h240; \
487	_p240.d = _b240_j240 + _c240_i240 + _d240_h240 + _e240_g240; \
488	_o240.d = _a240_j240 + _b240_i240 + _c240_h240 + _d240_g240 + _e240*_f240; \
489	_n240.d = _a240_i240 + _b240_h240 + _c240_g240 + _d240_f240; \
490	_m240.d = _a240_h240 + _b240_g240 + _c240*_f240; \
491	_l240.d = _a240_g240 + _b240_f240; \
492	_k240 = _a240*_f240; \
493	_r240.d += _s240.d; \
494	_q240.d += _r240.d; \
495	_p240.d += _q240.d; \
496	_o240.d += _p240.d; \
497	_n240.d += _o240.d; \
498	_m240.d += _n240.d; \
499	_l240.d += _m240.d; \
500	_k240 += _l240.d; \
501	_s240.d -= ((_const[10]+_s240.d)-_const[10]); \
502	_r240.d -= ((_const[9]+_r240.d)-_const[9]); \
503	_q240.d -= ((_const[8]+_q240.d)-_const[8]); \
504	_p240.d -= ((_const[7]+_p240.d)-_const[7]); \
505	_o240.d += _const[7]; \
506	_n240.d += _const[6]; \
507	_m240.d += _const[5]; \
508	_l240.d += _const[4]; \
509	if (_s240.d != 0.0) \
510	_y240 = 1; \
511	if (_r240.d != 0.0) \
512	_y240 = 1; \
513	if (_q240.d != 0.0) \
514	_y240 = 1; \
515	if (_p240.d != 0.0) \
516	_y240 = 1; \
517	_t240 = (DItype) _k240; \
518	_u240 = _l240.i; \
519	_v240 = _m240.i; \
520	_w240 = _n240.i; \
521	_x240 = _o240.i; \
522	R##_f1 = ((_t240 << (128 - (wfracbits - 1))) \
523	\| ((_u240 & 0xffffff) >> ((wfracbits - 1) - 104))); \
524	R##_f0 = (((_u240 & 0xffffff) << (168 - (wfracbits - 1))) \
525	\| ((_v240 & 0xffffff) << (144 - (wfracbits - 1))) \
526	\| ((_w240 & 0xffffff) << (120 - (wfracbits - 1))) \
527	\| ((_x240 & 0xffffff) >> ((wfracbits - 1) - 96)) \
528	\| _y240); \
529	resetfe; \
530	} \
531	while (0)
532
533	/ Division algorithms: /
534
535	#define _FP_DIV_MEAT_2_udiv(fs, R, X, Y) \
536	do \
537	{ \
538	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f2; \
539	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f1; \
540	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_n_f0; \
541	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f1; \
542	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_r_f0; \
543	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f1; \
544	_FP_W_TYPE _FP_DIV_MEAT_2_udiv_m_f0; \
545	if (_FP_FRAC_GE_2 (X, Y)) \
546	{ \
547	_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1 >> 1; \
548	_FP_DIV_MEAT_2_udiv_n_f1 \
549	= X##_f1 << (_FP_W_TYPE_SIZE - 1) \| X##_f0 >> 1; \
550	_FP_DIV_MEAT_2_udiv_n_f0 \
551	= X##_f0 << (_FP_W_TYPE_SIZE - 1); \
552	} \
553	else \
554	{ \
555	R##_e--; \
556	_FP_DIV_MEAT_2_udiv_n_f2 = X##_f1; \
557	_FP_DIV_MEAT_2_udiv_n_f1 = X##_f0; \
558	_FP_DIV_MEAT_2_udiv_n_f0 = 0; \
559	} \
560	\
561	/* Normalize, i.e. make the most significant bit of the \
562	denominator set. */ \
563	_FP_FRAC_SLL_2 (Y, _FP_WFRACXBITS_##fs); \
564	\
565	udiv_qrnnd (R##_f1, _FP_DIV_MEAT_2_udiv_r_f1, \
566	_FP_DIV_MEAT_2_udiv_n_f2, _FP_DIV_MEAT_2_udiv_n_f1, \
567	Y##_f1); \
568	umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, _FP_DIV_MEAT_2_udiv_m_f0, \
569	R##_f1, Y##_f0); \
570	_FP_DIV_MEAT_2_udiv_r_f0 = _FP_DIV_MEAT_2_udiv_n_f0; \
571	if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, _FP_DIV_MEAT_2_udiv_r)) \
572	{ \
573	R##_f1--; \
574	_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
575	_FP_DIV_MEAT_2_udiv_r); \
576	if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
577	&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
578	_FP_DIV_MEAT_2_udiv_r)) \
579	{ \
580	R##_f1--; \
581	_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
582	_FP_DIV_MEAT_2_udiv_r); \
583	} \
584	} \
585	_FP_FRAC_DEC_2 (_FP_DIV_MEAT_2_udiv_r, _FP_DIV_MEAT_2_udiv_m); \
586	\
587	if (_FP_DIV_MEAT_2_udiv_r_f1 == Y##_f1) \
588	{ \
589	/* This is a special case, not an optimization \
590	(_FP_DIV_MEAT_2_udiv_r/Y##_f1 would not fit into UWtype). \
591	As _FP_DIV_MEAT_2_udiv_r is guaranteed to be < Y, \
592	R##_f0 can be either (UWtype)-1 or (UWtype)-2. But as we \
593	know what kind of bits it is (sticky, guard, round), \
594	we don't care. We also don't care what the reminder is, \
595	because the guard bit will be set anyway. -jj */ \
596	R##_f0 = -1; \
597	} \
598	else \
599	{ \
600	udiv_qrnnd (R##_f0, _FP_DIV_MEAT_2_udiv_r_f1, \
601	_FP_DIV_MEAT_2_udiv_r_f1, \
602	_FP_DIV_MEAT_2_udiv_r_f0, Y##_f1); \
603	umul_ppmm (_FP_DIV_MEAT_2_udiv_m_f1, \
604	_FP_DIV_MEAT_2_udiv_m_f0, R##_f0, Y##_f0); \
605	_FP_DIV_MEAT_2_udiv_r_f0 = 0; \
606	if (_FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
607	_FP_DIV_MEAT_2_udiv_r)) \
608	{ \
609	R##_f0--; \
610	_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
611	_FP_DIV_MEAT_2_udiv_r); \
612	if (_FP_FRAC_GE_2 (_FP_DIV_MEAT_2_udiv_r, Y) \
613	&& _FP_FRAC_GT_2 (_FP_DIV_MEAT_2_udiv_m, \
614	_FP_DIV_MEAT_2_udiv_r)) \
615	{ \
616	R##_f0--; \
617	_FP_FRAC_ADD_2 (_FP_DIV_MEAT_2_udiv_r, Y, \
618	_FP_DIV_MEAT_2_udiv_r); \
619	} \
620	} \
621	if (!_FP_FRAC_EQ_2 (_FP_DIV_MEAT_2_udiv_r, \
622	_FP_DIV_MEAT_2_udiv_m)) \
623	R##_f0 \|= _FP_WORK_STICKY; \
624	} \
625	} \
626	while (0)
627
628
629	/ Square root algorithms:*
630	We have just one right now, maybe Newton approximation
631	should be added for those machines where division is fast. /*
632
633	#define _FP_SQRT_MEAT_2(R, S, T, X, q) \
634	do \
635	{ \
636	while (q) \
637	{ \
638	T##_f1 = S##_f1 + (q); \
639	if (T##_f1 <= X##_f1) \
640	{ \
641	S##_f1 = T##_f1 + (q); \
642	X##_f1 -= T##_f1; \
643	R##_f1 += (q); \
644	} \
645	_FP_FRAC_SLL_2 (X, 1); \
646	(q) >>= 1; \
647	} \
648	(q) = (_FP_W_TYPE) 1 << (_FP_W_TYPE_SIZE - 1); \
649	while ((q) != _FP_WORK_ROUND) \
650	{ \
651	T##_f0 = S##_f0 + (q); \
652	T##_f1 = S##_f1; \
653	if (T##_f1 < X##_f1 \
654	\|\| (T##_f1 == X##_f1 && T##_f0 <= X##_f0)) \
655	{ \
656	S##_f0 = T##_f0 + (q); \
657	S##_f1 += (T##_f0 > S##_f0); \
658	_FP_FRAC_DEC_2 (X, T); \
659	R##_f0 += (q); \
660	} \
661	_FP_FRAC_SLL_2 (X, 1); \
662	(q) >>= 1; \
663	} \
664	if (X##_f0 \| X##_f1) \
665	{ \
666	if (S##_f1 < X##_f1 \
667	\|\| (S##_f1 == X##_f1 && S##_f0 < X##_f0)) \
668	R##_f0 \|= _FP_WORK_ROUND; \
669	R##_f0 \|= _FP_WORK_STICKY; \
670	} \
671	} \
672	while (0)
673
674
675	/ Assembly/disassembly for converting to/from integral types.*
676	No shifting or overflow handled here. /*
677
678	#define _FP_FRAC_ASSEMBLE_2(r, X, rsize) \
679	(void) (((rsize) <= _FP_W_TYPE_SIZE) \
680	? ({ (r) = X##_f0; }) \
681	: ({ \
682	(r) = X##_f1; \
683	(r) <<= _FP_W_TYPE_SIZE; \
684	(r) += X##_f0; \
685	}))
686
687	#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize) \
688	do \
689	{ \
690	X##_f0 = (r); \
691	X##_f1 = ((rsize) <= _FP_W_TYPE_SIZE \
692	? 0 \
693	: (r) >> _FP_W_TYPE_SIZE); \
694	} \
695	while (0)
696
697	/ Convert FP values between word sizes. /
698
699	#define _FP_FRAC_COPY_1_2(D, S) (D##_f = S##_f0)
700
701	#define _FP_FRAC_COPY_2_1(D, S) ((D##_f0 = S##_f), (D##_f1 = 0))
702
703	#define _FP_FRAC_COPY_2_2(D, S) _FP_FRAC_COPY_2 (D, S)
704
705	#endif /* !SOFT_FP_OP_2_H */
706

Browse the source code of Glibc/soft-fp/op-2.h