softfloat.c source code [qemu/fpu/softfloat.c]

1	/*
2	* QEMU float support
3	*
4	* The code in this source file is derived from release 2a of the SoftFloat
5	* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6	* some later contributions) are provided under that license, as detailed below.
7	* It has subsequently been modified by contributors to the QEMU Project,
8	* so some portions are provided under:
9	* the SoftFloat-2a license
10	* the BSD license
11	* GPL-v2-or-later
12	*
13	* Any future contributions to this file after December 1st 2014 will be
14	* taken to be licensed under the Softfloat-2a license unless specifically
15	* indicated otherwise.
16	*/
17
18	/*
19	===============================================================================
20	This C source file is part of the SoftFloat IEC/IEEE Floating-point
21	Arithmetic Package, Release 2a.
22
23	Written by John R. Hauser. This work was made possible in part by the
24	International Computer Science Institute, located at Suite 600, 1947 Center
25	Street, Berkeley, California 94704. Funding was partially provided by the
26	National Science Foundation under grant MIP-9311980. The original version
27	of this code was written as part of a project to build a fixed-point vector
28	processor in collaboration with the University of California at Berkeley,
29	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30	is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31	arithmetic/SoftFloat.html'.
32
33	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34	has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35	TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36	PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37	AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39	Derivative works are acceptable, even for commercial purposes, so long as
40	(1) they include prominent notice that the work is derivative, and (2) they
41	include prominent notice akin to these four paragraphs for those parts of
42	this code that are retained.
43
44	===============================================================================
45	*/
46
47	/ BSD licensing:*
48	* Copyright (c) 2006, Fabrice Bellard
49	* All rights reserved.
50	*
51	* Redistribution and use in source and binary forms, with or without
52	* modification, are permitted provided that the following conditions are met:
53	*
54	* 1. Redistributions of source code must retain the above copyright notice,
55	* this list of conditions and the following disclaimer.
56	*
57	* 2. Redistributions in binary form must reproduce the above copyright notice,
58	* this list of conditions and the following disclaimer in the documentation
59	* and/or other materials provided with the distribution.
60	*
61	* 3. Neither the name of the copyright holder nor the names of its contributors
62	* may be used to endorse or promote products derived from this software without
63	* specific prior written permission.
64	*
65	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75	* THE POSSIBILITY OF SUCH DAMAGE.
76	*/
77
78	/ Portions of this work are licensed under the terms of the GNU GPL,*
79	* version 2 or later. See the COPYING file in the top-level directory.
80	*/
81
82	/ softfloat (and in particular the code in softfloat-specialize.h) is*
83	* target-dependent and needs the TARGET_* macros.
84	*/
85	#include "qemu/osdep.h"
86	#include <math.h>
87	#include "qemu/bitops.h"
88	#include "fpu/softfloat.h"
89
90	/ We only need stdlib for abort() /
91
92	/----------------------------------------------------------------------------*
93	\| Primitive arithmetic functions, including multi-word arithmetic, and
94	\| division and square root approximations. (Can be specialized to target if
95	\| desired.)
96	----------------------------------------------------------------------------/
97	#include "fpu/softfloat-macros.h"
98
99	/*
100	* Hardfloat
101	*
102	* Fast emulation of guest FP instructions is challenging for two reasons.
103	* First, FP instruction semantics are similar but not identical, particularly
104	* when handling NaNs. Second, emulating at reasonable speed the guest FP
105	* exception flags is not trivial: reading the host's flags register with a
106	* feclearexcept & fetestexcept pair is slow [slightly slower than soft-fp],
107	* and trapping on every FP exception is not fast nor pleasant to work with.
108	*
109	* We address these challenges by leveraging the host FPU for a subset of the
110	* operations. To do this we expand on the idea presented in this paper:
111	*
112	* Guo, Yu-Chuan, et al. "Translating the ARM Neon and VFP instructions in a
113	* binary translator." Software: Practice and Experience 46.12 (2016):1591-1615.
114	*
115	* The idea is thus to leverage the host FPU to (1) compute FP operations
116	* and (2) identify whether FP exceptions occurred while avoiding
117	* expensive exception flag register accesses.
118	*
119	* An important optimization shown in the paper is that given that exception
120	* flags are rarely cleared by the guest, we can avoid recomputing some flags.
121	* This is particularly useful for the inexact flag, which is very frequently
122	* raised in floating-point workloads.
123	*
124	* We optimize the code further by deferring to soft-fp whenever FP exception
125	* detection might get hairy. Two examples: (1) when at least one operand is
126	* denormal/inf/NaN; (2) when operands are not guaranteed to lead to a 0 result
127	* and the result is < the minimum normal.
128	*/
129	#define GEN_INPUT_FLUSH__NOCHECK(name, soft_t) \
130	static inline void name(soft_t a, float_status s) \
131	{ \
132	if (unlikely(soft_t ## _is_denormal(*a))) { \
133	*a = soft_t ## _set_sign(soft_t ## _zero, \
134	soft_t ## _is_neg(*a)); \
135	s->float_exception_flags \|= float_flag_input_denormal; \
136	} \
137	}
138
139	GEN_INPUT_FLUSH__NOCHECK(float32_input_flush__nocheck, float32)
140	GEN_INPUT_FLUSH__NOCHECK(float64_input_flush__nocheck, float64)
141	#undef GEN_INPUT_FLUSH__NOCHECK
142
143	#define GEN_INPUT_FLUSH1(name, soft_t) \
144	static inline void name(soft_t a, float_status s) \
145	{ \
146	if (likely(!s->flush_inputs_to_zero)) { \
147	return; \
148	} \
149	soft_t ## _input_flush__nocheck(a, s); \
150	}
151
152	GEN_INPUT_FLUSH1(float32_input_flush1, float32)
153	GEN_INPUT_FLUSH1(float64_input_flush1, float64)
154	#undef GEN_INPUT_FLUSH1
155
156	#define GEN_INPUT_FLUSH2(name, soft_t) \
157	static inline void name(soft_t a, soft_t b, float_status *s) \
158	{ \
159	if (likely(!s->flush_inputs_to_zero)) { \
160	return; \
161	} \
162	soft_t ## _input_flush__nocheck(a, s); \
163	soft_t ## _input_flush__nocheck(b, s); \
164	}
165
166	GEN_INPUT_FLUSH2(float32_input_flush2, float32)
167	GEN_INPUT_FLUSH2(float64_input_flush2, float64)
168	#undef GEN_INPUT_FLUSH2
169
170	#define GEN_INPUT_FLUSH3(name, soft_t) \
171	static inline void name(soft_t a, soft_t b, soft_t c, float_status s) \
172	{ \
173	if (likely(!s->flush_inputs_to_zero)) { \
174	return; \
175	} \
176	soft_t ## _input_flush__nocheck(a, s); \
177	soft_t ## _input_flush__nocheck(b, s); \
178	soft_t ## _input_flush__nocheck(c, s); \
179	}
180
181	GEN_INPUT_FLUSH3(float32_input_flush3, float32)
182	GEN_INPUT_FLUSH3(float64_input_flush3, float64)
183	#undef GEN_INPUT_FLUSH3
184
185	/*
186	* Choose whether to use fpclassify or float32/64_* primitives in the generated
187	* hardfloat functions. Each combination of number of inputs and float size
188	* gets its own value.
189	*/
190	#if defined(__x86_64__)
191	# define QEMU_HARDFLOAT_1F32_USE_FP 0
192	# define QEMU_HARDFLOAT_1F64_USE_FP 1
193	# define QEMU_HARDFLOAT_2F32_USE_FP 0
194	# define QEMU_HARDFLOAT_2F64_USE_FP 1
195	# define QEMU_HARDFLOAT_3F32_USE_FP 0
196	# define QEMU_HARDFLOAT_3F64_USE_FP 1
197	#else
198	# define QEMU_HARDFLOAT_1F32_USE_FP 0
199	# define QEMU_HARDFLOAT_1F64_USE_FP 0
200	# define QEMU_HARDFLOAT_2F32_USE_FP 0
201	# define QEMU_HARDFLOAT_2F64_USE_FP 0
202	# define QEMU_HARDFLOAT_3F32_USE_FP 0
203	# define QEMU_HARDFLOAT_3F64_USE_FP 0
204	#endif
205
206	/*
207	* QEMU_HARDFLOAT_USE_ISINF chooses whether to use isinf() over
208	* float{32,64}_is_infinity when !USE_FP.
209	* On x86_64/aarch64, using the former over the latter can yield a ~6% speedup.
210	* On power64 however, using isinf() reduces fp-bench performance by up to 50%.
211	*/
212	#if defined(__x86_64__) \|\| defined(__aarch64__)
213	# define QEMU_HARDFLOAT_USE_ISINF 1
214	#else
215	# define QEMU_HARDFLOAT_USE_ISINF 0
216	#endif
217
218	/*
219	* Some targets clear the FP flags before most FP operations. This prevents
220	* the use of hardfloat, since hardfloat relies on the inexact flag being
221	* already set.
222	*/
223	#if defined(TARGET_PPC) \|\| defined(__FAST_MATH__)
224	# if defined(__FAST_MATH__)
225	# warning disabling hardfloat due to -ffast-math: hardfloat requires an exact \
226	IEEE implementation
227	# endif
228	# define QEMU_NO_HARDFLOAT 1
229	# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN
230	#else
231	# define QEMU_NO_HARDFLOAT 0
232	# define QEMU_SOFTFLOAT_ATTR QEMU_FLATTEN __attribute__((noinline))
233	#endif
234
235	static inline bool can_use_fpu(const float_status *s)
236	{
237	if (QEMU_NO_HARDFLOAT) {
238	return false;
239	}
240	return likely(s->float_exception_flags & float_flag_inexact &&
241	s->float_rounding_mode == float_round_nearest_even);
242	}
243
244	/*
245	* Hardfloat generation functions. Each operation can have two flavors:
246	* either using softfloat primitives (e.g. float32_is_zero_or_normal) for
247	* most condition checks, or native ones (e.g. fpclassify).
248	*
249	* The flavor is chosen by the callers. Instead of using macros, we rely on the
250	* compiler to propagate constants and inline everything into the callers.
251	*
252	* We only generate functions for operations with two inputs, since only
253	* these are common enough to justify consolidating them into common code.
254	*/
255
256	typedef union {
257	float32 s;
258	float h;
259	} union_float32;
260
261	typedef union {
262	float64 s;
263	double h;
264	} union_float64;
265
266	typedef bool (*f32_check_fn)(union_float32 a, union_float32 b);
267	typedef bool (*f64_check_fn)(union_float64 a, union_float64 b);
268
269	typedef float32 (soft_f32_op2_fn)(float32 a, float32 b, float_status s);
270	typedef float64 (soft_f64_op2_fn)(float64 a, float64 b, float_status s);
271	typedef float (hard_f32_op2_fn)(float* a, float b);
272	typedef double (hard_f64_op2_fn)(double* a, double b);
273
274	/ 2-input is-zero-or-normal /
275	static inline bool f32_is_zon2(union_float32 a, union_float32 b)
276	{
277	if (QEMU_HARDFLOAT_2F32_USE_FP) {
278	/*
279	* Not using a temp variable for consecutive fpclassify calls ends up
280	* generating faster code.
281	*/
282	return (fpclassify(a.h) == FP_NORMAL \|\| fpclassify(a.h) == FP_ZERO) &&
283	(fpclassify(b.h) == FP_NORMAL \|\| fpclassify(b.h) == FP_ZERO);
284	}
285	return float32_is_zero_or_normal(a.s) &&
286	float32_is_zero_or_normal(b.s);
287	}
288
289	static inline bool f64_is_zon2(union_float64 a, union_float64 b)
290	{
291	if (QEMU_HARDFLOAT_2F64_USE_FP) {
292	return (fpclassify(a.h) == FP_NORMAL \|\| fpclassify(a.h) == FP_ZERO) &&
293	(fpclassify(b.h) == FP_NORMAL \|\| fpclassify(b.h) == FP_ZERO);
294	}
295	return float64_is_zero_or_normal(a.s) &&
296	float64_is_zero_or_normal(b.s);
297	}
298
299	/ 3-input is-zero-or-normal /
300	static inline
301	bool f32_is_zon3(union_float32 a, union_float32 b, union_float32 c)
302	{
303	if (QEMU_HARDFLOAT_3F32_USE_FP) {
304	return (fpclassify(a.h) == FP_NORMAL \|\| fpclassify(a.h) == FP_ZERO) &&
305	(fpclassify(b.h) == FP_NORMAL \|\| fpclassify(b.h) == FP_ZERO) &&
306	(fpclassify(c.h) == FP_NORMAL \|\| fpclassify(c.h) == FP_ZERO);
307	}
308	return float32_is_zero_or_normal(a.s) &&
309	float32_is_zero_or_normal(b.s) &&
310	float32_is_zero_or_normal(c.s);
311	}
312
313	static inline
314	bool f64_is_zon3(union_float64 a, union_float64 b, union_float64 c)
315	{
316	if (QEMU_HARDFLOAT_3F64_USE_FP) {
317	return (fpclassify(a.h) == FP_NORMAL \|\| fpclassify(a.h) == FP_ZERO) &&
318	(fpclassify(b.h) == FP_NORMAL \|\| fpclassify(b.h) == FP_ZERO) &&
319	(fpclassify(c.h) == FP_NORMAL \|\| fpclassify(c.h) == FP_ZERO);
320	}
321	return float64_is_zero_or_normal(a.s) &&
322	float64_is_zero_or_normal(b.s) &&
323	float64_is_zero_or_normal(c.s);
324	}
325
326	static inline bool f32_is_inf(union_float32 a)
327	{
328	if (QEMU_HARDFLOAT_USE_ISINF) {
329	return isinf(a.h);
330	}
331	return float32_is_infinity(a.s);
332	}
333
334	static inline bool f64_is_inf(union_float64 a)
335	{
336	if (QEMU_HARDFLOAT_USE_ISINF) {
337	return isinf(a.h);
338	}
339	return float64_is_infinity(a.s);
340	}
341
342	/ Note: @fast_test and @post can be NULL /
343	static inline float32
344	float32_gen2(float32 xa, float32 xb, float_status *s,
345	hard_f32_op2_fn hard, soft_f32_op2_fn soft,
346	f32_check_fn pre, f32_check_fn post,
347	f32_check_fn fast_test, soft_f32_op2_fn fast_op)
348	{
349	union_float32 ua, ub, ur;
350
351	ua.s = xa;
352	ub.s = xb;
353
354	if (unlikely(!can_use_fpu(s))) {
355	goto soft;
356	}
357
358	float32_input_flush2(&ua.s, &ub.s, s);
359	if (unlikely(!pre(ua, ub))) {
360	goto soft;
361	}
362	if (fast_test && fast_test(ua, ub)) {
363	return fast_op(ua.s, ub.s, s);
364	}
365
366	ur.h = hard(ua.h, ub.h);
367	if (unlikely(f32_is_inf(ur))) {
368	s->float_exception_flags \|= float_flag_overflow;
369	} else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
370	if (post == NULL \|\| post(ua, ub)) {
371	goto soft;
372	}
373	}
374	return ur.s;
375
376	soft:
377	return soft(ua.s, ub.s, s);
378	}
379
380	static inline float64
381	float64_gen2(float64 xa, float64 xb, float_status *s,
382	hard_f64_op2_fn hard, soft_f64_op2_fn soft,
383	f64_check_fn pre, f64_check_fn post,
384	f64_check_fn fast_test, soft_f64_op2_fn fast_op)
385	{
386	union_float64 ua, ub, ur;
387
388	ua.s = xa;
389	ub.s = xb;
390
391	if (unlikely(!can_use_fpu(s))) {
392	goto soft;
393	}
394
395	float64_input_flush2(&ua.s, &ub.s, s);
396	if (unlikely(!pre(ua, ub))) {
397	goto soft;
398	}
399	if (fast_test && fast_test(ua, ub)) {
400	return fast_op(ua.s, ub.s, s);
401	}
402
403	ur.h = hard(ua.h, ub.h);
404	if (unlikely(f64_is_inf(ur))) {
405	s->float_exception_flags \|= float_flag_overflow;
406	} else if (unlikely(fabs(ur.h) <= DBL_MIN)) {
407	if (post == NULL \|\| post(ua, ub)) {
408	goto soft;
409	}
410	}
411	return ur.s;
412
413	soft:
414	return soft(ua.s, ub.s, s);
415	}
416
417	/----------------------------------------------------------------------------*
418	\| Returns the fraction bits of the single-precision floating-point value `a'.
419	----------------------------------------------------------------------------/
420
421	static inline uint32_t extractFloat32Frac(float32 a)
422	{
423	return float32_val(a) & `0x007FFFFF`;
424	}
425
426	/----------------------------------------------------------------------------*
427	\| Returns the exponent bits of the single-precision floating-point value `a'.
428	----------------------------------------------------------------------------/
429
430	static inline int extractFloat32Exp(float32 a)
431	{
432	return (float32_val(a) >> `23`) & `0xFF`;
433	}
434
435	/----------------------------------------------------------------------------*
436	\| Returns the sign bit of the single-precision floating-point value `a'.
437	----------------------------------------------------------------------------/
438
439	static inline flag extractFloat32Sign(float32 a)
440	{
441	return float32_val(a) >> `31`;
442	}
443
444	/----------------------------------------------------------------------------*
445	\| Returns the fraction bits of the double-precision floating-point value `a'.
446	----------------------------------------------------------------------------/
447
448	static inline uint64_t extractFloat64Frac(float64 a)
449	{
450	return float64_val(a) & UINT64_C(`0x000FFFFFFFFFFFFF`);
451	}
452
453	/----------------------------------------------------------------------------*
454	\| Returns the exponent bits of the double-precision floating-point value `a'.
455	----------------------------------------------------------------------------/
456
457	static inline int extractFloat64Exp(float64 a)
458	{
459	return (float64_val(a) >> `52`) & `0x7FF`;
460	}
461
462	/----------------------------------------------------------------------------*
463	\| Returns the sign bit of the double-precision floating-point value `a'.
464	----------------------------------------------------------------------------/
465
466	static inline flag extractFloat64Sign(float64 a)
467	{
468	return float64_val(a) >> `63`;
469	}
470
471	/*
472	* Classify a floating point number. Everything above float_class_qnan
473	* is a NaN so cls >= float_class_qnan is any NaN.
474	*/
475
476	typedef enum __attribute__ ((__packed__)) {
477	float_class_unclassified,
478	float_class_zero,
479	float_class_normal,
480	float_class_inf,
481	float_class_qnan, / all NaNs from here /
482	float_class_snan,
483	} FloatClass;
484
485	/ Simple helpers for checking if, or what kind of, NaN we have /
486	static inline __attribute__((unused)) bool is_nan(FloatClass c)
487	{
488	return unlikely(c >= float_class_qnan);
489	}
490
491	static inline __attribute__((unused)) bool is_snan(FloatClass c)
492	{
493	return c == float_class_snan;
494	}
495
496	static inline __attribute__((unused)) bool is_qnan(FloatClass c)
497	{
498	return c == float_class_qnan;
499	}
500
501	/*
502	* Structure holding all of the decomposed parts of a float. The
503	* exponent is unbiased and the fraction is normalized. All
504	* calculations are done with a 64 bit fraction and then rounded as
505	* appropriate for the final format.
506	*
507	* Thanks to the packed FloatClass a decent compiler should be able to
508	* fit the whole structure into registers and avoid using the stack
509	* for parameter passing.
510	*/
511
512	typedef struct {
513	uint64_t frac;
514	int32_t exp;
515	FloatClass cls;
516	bool sign;
517	} FloatParts;
518
519	#define DECOMPOSED_BINARY_POINT (64 - 2)
520	#define DECOMPOSED_IMPLICIT_BIT (1ull << DECOMPOSED_BINARY_POINT)
521	#define DECOMPOSED_OVERFLOW_BIT (DECOMPOSED_IMPLICIT_BIT << 1)
522
523	/ Structure holding all of the relevant parameters for a format.*
524	* exp_size: the size of the exponent field
525	* exp_bias: the offset applied to the exponent field
526	* exp_max: the maximum normalised exponent
527	* frac_size: the size of the fraction field
528	* frac_shift: shift to normalise the fraction with DECOMPOSED_BINARY_POINT
529	* The following are computed based the size of fraction
530	* frac_lsb: least significant bit of fraction
531	* frac_lsbm1: the bit below the least significant bit (for rounding)
532	* round_mask/roundeven_mask: masks used for rounding
533	* The following optional modifiers are available:
534	* arm_althp: handle ARM Alternative Half Precision
535	*/
536	typedef struct {
537	int exp_size;
538	int exp_bias;
539	int exp_max;
540	int frac_size;
541	int frac_shift;
542	uint64_t frac_lsb;
543	uint64_t frac_lsbm1;
544	uint64_t round_mask;
545	uint64_t roundeven_mask;
546	bool arm_althp;
547	} FloatFmt;
548
549	/ Expand fields based on the size of exponent and fraction /
550	#define FLOAT_PARAMS(E, F) \
551	.exp_size = E, \
552	.exp_bias = ((1 << E) - 1) >> 1, \
553	.exp_max = (1 << E) - 1, \
554	.frac_size = F, \
555	.frac_shift = DECOMPOSED_BINARY_POINT - F, \
556	.frac_lsb = 1ull << (DECOMPOSED_BINARY_POINT - F), \
557	.frac_lsbm1 = 1ull << ((DECOMPOSED_BINARY_POINT - F) - 1), \
558	.round_mask = (1ull << (DECOMPOSED_BINARY_POINT - F)) - 1, \
559	.roundeven_mask = (2ull << (DECOMPOSED_BINARY_POINT - F)) - 1
560
561	static const FloatFmt float16_params = {
562	FLOAT_PARAMS(`5`, `10`)
563	};
564
565	static const FloatFmt float16_params_ahp = {
566	FLOAT_PARAMS(`5`, `10`),
567	.arm_althp = true
568	};
569
570	static const FloatFmt float32_params = {
571	FLOAT_PARAMS(`8`, `23`)
572	};
573
574	static const FloatFmt float64_params = {
575	FLOAT_PARAMS(`11`, `52`)
576	};
577
578	/ Unpack a float to parts, but do not canonicalize. /
579	static inline FloatParts unpack_raw(FloatFmt fmt, uint64_t raw)
580	{
581	const int sign_pos = fmt.frac_size + fmt.exp_size;
582
583	return (FloatParts) {
584	.cls = float_class_unclassified,
585	.sign = extract64(raw, sign_pos, `1`),
586	.exp = extract64(raw, fmt.frac_size, fmt.exp_size),
587	.frac = extract64(raw, `0`, fmt.frac_size),
588	};
589	}
590
591	static inline FloatParts float16_unpack_raw(float16 f)
592	{
593	return unpack_raw(float16_params, f);
594	}
595
596	static inline FloatParts float32_unpack_raw(float32 f)
597	{
598	return unpack_raw(float32_params, f);
599	}
600
601	static inline FloatParts float64_unpack_raw(float64 f)
602	{
603	return unpack_raw(float64_params, f);
604	}
605
606	/ Pack a float from parts, but do not canonicalize. /
607	static inline uint64_t pack_raw(FloatFmt fmt, FloatParts p)
608	{
609	const int sign_pos = fmt.frac_size + fmt.exp_size;
610	uint64_t ret = deposit64(p.frac, fmt.frac_size, fmt.exp_size, p.exp);
611	return deposit64(ret, sign_pos, `1`, p.sign);
612	}
613
614	static inline float16 float16_pack_raw(FloatParts p)
615	{
616	return make_float16(pack_raw(float16_params, p));
617	}
618
619	static inline float32 float32_pack_raw(FloatParts p)
620	{
621	return make_float32(pack_raw(float32_params, p));
622	}
623
624	static inline float64 float64_pack_raw(FloatParts p)
625	{
626	return make_float64(pack_raw(float64_params, p));
627	}
628
629	/----------------------------------------------------------------------------*
630	\| Functions and definitions to determine: (1) whether tininess for underflow
631	\| is detected before or after rounding by default, (2) what (if anything)
632	\| happens when exceptions are raised, (3) how signaling NaNs are distinguished
633	\| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
634	\| are propagated from function inputs to output. These details are target-
635	\| specific.
636	----------------------------------------------------------------------------/
637	#include "softfloat-specialize.inc.c"
638
639	/ Canonicalize EXP and FRAC, setting CLS. /
640	static FloatParts sf_canonicalize(FloatParts part, const FloatFmt *parm,
641	float_status *status)
642	{
643	if (part.exp == parm->exp_max && !parm->arm_althp) {
644	if (part.frac == `0`) {
645	part.cls = float_class_inf;
646	} else {
647	part.frac <<= parm->frac_shift;
648	part.cls = (parts_is_snan_frac(part.frac, status)
649	? float_class_snan : float_class_qnan);
650	}
651	} else if (part.exp == `0`) {
652	if (likely(part.frac == `0`)) {
653	part.cls = float_class_zero;
654	} else if (status->flush_inputs_to_zero) {
655	float_raise(float_flag_input_denormal, status);
656	part.cls = float_class_zero;
657	part.frac = `0`;
658	} else {
659	int shift = clz64(part.frac) - `1`;
660	part.cls = float_class_normal;
661	part.exp = parm->frac_shift - parm->exp_bias - shift + `1`;
662	part.frac <<= shift;
663	}
664	} else {
665	part.cls = float_class_normal;
666	part.exp -= parm->exp_bias;
667	part.frac = DECOMPOSED_IMPLICIT_BIT + (part.frac << parm->frac_shift);
668	}
669	return part;
670	}
671
672	/ Round and uncanonicalize a floating-point number by parts. There*
673	* are FRAC_SHIFT bits that may require rounding at the bottom of the
674	* fraction; these bits will be removed. The exponent will be biased
675	* by EXP_BIAS and must be bounded by [EXP_MAX-1, 0].
676	*/
677
678	static FloatParts round_canonical(FloatParts p, float_status *s,
679	const FloatFmt *parm)
680	{
681	const uint64_t frac_lsb = parm->frac_lsb;
682	const uint64_t frac_lsbm1 = parm->frac_lsbm1;
683	const uint64_t round_mask = parm->round_mask;
684	const uint64_t roundeven_mask = parm->roundeven_mask;
685	const int exp_max = parm->exp_max;
686	const int frac_shift = parm->frac_shift;
687	uint64_t frac, inc;
688	int exp, flags = `0`;
689	bool overflow_norm;
690
691	frac = p.frac;
692	exp = p.exp;
693
694	switch (p.cls) {
695	case float_class_normal:
696	switch (s->float_rounding_mode) {
697	case float_round_nearest_even:
698	overflow_norm = false;
699	inc = ((frac & roundeven_mask) != frac_lsbm1 ? frac_lsbm1 : `0`);
700	break;
701	case float_round_ties_away:
702	overflow_norm = false;
703	inc = frac_lsbm1;
704	break;
705	case float_round_to_zero:
706	overflow_norm = true;
707	inc = `0`;
708	break;
709	case float_round_up:
710	inc = p.sign ? `0` : round_mask;
711	overflow_norm = p.sign;
712	break;
713	case float_round_down:
714	inc = p.sign ? round_mask : `0`;
715	overflow_norm = !p.sign;
716	break;
717	case float_round_to_odd:
718	overflow_norm = true;
719	inc = frac & frac_lsb ? `0` : round_mask;
720	break;
721	default:
722	g_assert_not_reached();
723	}
724
725	exp += parm->exp_bias;
726	if (likely(exp > `0`)) {
727	if (frac & round_mask) {
728	flags \|= float_flag_inexact;
729	frac += inc;
730	if (frac & DECOMPOSED_OVERFLOW_BIT) {
731	frac >>= `1`;
732	exp++;
733	}
734	}
735	frac >>= frac_shift;
736
737	if (parm->arm_althp) {
738	/ ARM Alt HP eschews Inf and NaN for a wider exponent. /
739	if (unlikely(exp > exp_max)) {
740	/ Overflow. Return the maximum normal. /
741	flags = float_flag_invalid;
742	exp = exp_max;
743	frac = -`1`;
744	}
745	} else if (unlikely(exp >= exp_max)) {
746	flags \|= float_flag_overflow \| float_flag_inexact;
747	if (overflow_norm) {
748	exp = exp_max - `1`;
749	frac = -`1`;
750	} else {
751	p.cls = float_class_inf;
752	goto do_inf;
753	}
754	}
755	} else if (s->flush_to_zero) {
756	flags \|= float_flag_output_denormal;
757	p.cls = float_class_zero;
758	goto do_zero;
759	} else {
760	bool is_tiny = (s->float_detect_tininess
761	== float_tininess_before_rounding)
762	\|\| (exp < `0`)
763	\|\| !((frac + inc) & DECOMPOSED_OVERFLOW_BIT);
764
765	shift64RightJamming(frac, `1` - exp, &frac);
766	if (frac & round_mask) {
767	/ Need to recompute round-to-even. /
768	switch (s->float_rounding_mode) {
769	case float_round_nearest_even:
770	inc = ((frac & roundeven_mask) != frac_lsbm1
771	? frac_lsbm1 : `0`);
772	break;
773	case float_round_to_odd:
774	inc = frac & frac_lsb ? `0` : round_mask;
775	break;
776	}
777	flags \|= float_flag_inexact;
778	frac += inc;
779	}
780
781	exp = (frac & DECOMPOSED_IMPLICIT_BIT ? `1` : `0`);
782	frac >>= frac_shift;
783
784	if (is_tiny && (flags & float_flag_inexact)) {
785	flags \|= float_flag_underflow;
786	}
787	if (exp == `0` && frac == `0`) {
788	p.cls = float_class_zero;
789	}
790	}
791	break;
792
793	case float_class_zero:
794	do_zero:
795	exp = `0`;
796	frac = `0`;
797	break;
798
799	case float_class_inf:
800	do_inf:
801	assert(!parm->arm_althp);
802	exp = exp_max;
803	frac = `0`;
804	break;
805
806	case float_class_qnan:
807	case float_class_snan:
808	assert(!parm->arm_althp);
809	exp = exp_max;
810	frac >>= parm->frac_shift;
811	break;
812
813	default:
814	g_assert_not_reached();
815	}
816
817	float_raise(flags, s);
818	p.exp = exp;
819	p.frac = frac;
820	return p;
821	}
822
823	/ Explicit FloatFmt version /
824	static FloatParts float16a_unpack_canonical(float16 f, float_status *s,
825	const FloatFmt *params)
826	{
827	return sf_canonicalize(float16_unpack_raw(f), params, s);
828	}
829
830	static FloatParts float16_unpack_canonical(float16 f, float_status *s)
831	{
832	return float16a_unpack_canonical(f, s, &float16_params);
833	}
834
835	static float16 float16a_round_pack_canonical(FloatParts p, float_status *s,
836	const FloatFmt *params)
837	{
838	return float16_pack_raw(round_canonical(p, s, params));
839	}
840
841	static float16 float16_round_pack_canonical(FloatParts p, float_status *s)
842	{
843	return float16a_round_pack_canonical(p, s, &float16_params);
844	}
845
846	static FloatParts float32_unpack_canonical(float32 f, float_status *s)
847	{
848	return sf_canonicalize(float32_unpack_raw(f), &float32_params, s);
849	}
850
851	static float32 float32_round_pack_canonical(FloatParts p, float_status *s)
852	{
853	return float32_pack_raw(round_canonical(p, s, &float32_params));
854	}
855
856	static FloatParts float64_unpack_canonical(float64 f, float_status *s)
857	{
858	return sf_canonicalize(float64_unpack_raw(f), &float64_params, s);
859	}
860
861	static float64 float64_round_pack_canonical(FloatParts p, float_status *s)
862	{
863	return float64_pack_raw(round_canonical(p, s, &float64_params));
864	}
865
866	static FloatParts return_nan(FloatParts a, float_status *s)
867	{
868	switch (a.cls) {
869	case float_class_snan:
870	s->float_exception_flags \|= float_flag_invalid;
871	a = parts_silence_nan(a, s);
872	/ fall through /
873	case float_class_qnan:
874	if (s->default_nan_mode) {
875	return parts_default_nan(s);
876	}
877	break;
878
879	default:
880	g_assert_not_reached();
881	}
882	return a;
883	}
884
885	static FloatParts pick_nan(FloatParts a, FloatParts b, float_status *s)
886	{
887	if (is_snan(a.cls) \|\| is_snan(b.cls)) {
888	s->float_exception_flags \|= float_flag_invalid;
889	}
890
891	if (s->default_nan_mode) {
892	return parts_default_nan(s);
893	} else {
894	if (pickNaN(a.cls, b.cls,
895	a.frac > b.frac \|\|
896	(a.frac == b.frac && a.sign < b.sign))) {
897	a = b;
898	}
899	if (is_snan(a.cls)) {
900	return parts_silence_nan(a, s);
901	}
902	}
903	return a;
904	}
905
906	static FloatParts pick_nan_muladd(FloatParts a, FloatParts b, FloatParts c,
907	bool inf_zero, float_status *s)
908	{
909	int which;
910
911	if (is_snan(a.cls) \|\| is_snan(b.cls) \|\| is_snan(c.cls)) {
912	s->float_exception_flags \|= float_flag_invalid;
913	}
914
915	which = pickNaNMulAdd(a.cls, b.cls, c.cls, inf_zero, s);
916
917	if (s->default_nan_mode) {
918	/ Note that this check is after pickNaNMulAdd so that function*
919	* has an opportunity to set the Invalid flag.
920	*/
921	which = `3`;
922	}
923
924	switch (which) {
925	case `0`:
926	break;
927	case `1`:
928	a = b;
929	break;
930	case `2`:
931	a = c;
932	break;
933	case `3`:
934	return parts_default_nan(s);
935	default:
936	g_assert_not_reached();
937	}
938
939	if (is_snan(a.cls)) {
940	return parts_silence_nan(a, s);
941	}
942	return a;
943	}
944
945	/*
946	* Returns the result of adding or subtracting the values of the
947	* floating-point values `a' and `b'. The operation is performed
948	* according to the IEC/IEEE Standard for Binary Floating-Point
949	* Arithmetic.
950	*/
951
952	static FloatParts addsub_floats(FloatParts a, FloatParts b, bool subtract,
953	float_status *s)
954	{
955	bool a_sign = a.sign;
956	bool b_sign = b.sign ^ subtract;
957
958	if (a_sign != b_sign) {
959	/ Subtraction /
960
961	if (a.cls == float_class_normal && b.cls == float_class_normal) {
962	if (a.exp > b.exp \|\| (a.exp == b.exp && a.frac >= b.frac)) {
963	shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
964	a.frac = a.frac - b.frac;
965	} else {
966	shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
967	a.frac = b.frac - a.frac;
968	a.exp = b.exp;
969	a_sign ^= `1`;
970	}
971
972	if (a.frac == `0`) {
973	a.cls = float_class_zero;
974	a.sign = s->float_rounding_mode == float_round_down;
975	} else {
976	int shift = clz64(a.frac) - `1`;
977	a.frac = a.frac << shift;
978	a.exp = a.exp - shift;
979	a.sign = a_sign;
980	}
981	return a;
982	}
983	if (is_nan(a.cls) \|\| is_nan(b.cls)) {
984	return pick_nan(a, b, s);
985	}
986	if (a.cls == float_class_inf) {
987	if (b.cls == float_class_inf) {
988	float_raise(float_flag_invalid, s);
989	return parts_default_nan(s);
990	}
991	return a;
992	}
993	if (a.cls == float_class_zero && b.cls == float_class_zero) {
994	a.sign = s->float_rounding_mode == float_round_down;
995	return a;
996	}
997	if (a.cls == float_class_zero \|\| b.cls == float_class_inf) {
998	b.sign = a_sign ^ `1`;
999	return b;
1000	}
1001	if (b.cls == float_class_zero) {
1002	return a;
1003	}
1004	} else {
1005	/ Addition /
1006	if (a.cls == float_class_normal && b.cls == float_class_normal) {
1007	if (a.exp > b.exp) {
1008	shift64RightJamming(b.frac, a.exp - b.exp, &b.frac);
1009	} else if (a.exp < b.exp) {
1010	shift64RightJamming(a.frac, b.exp - a.exp, &a.frac);
1011	a.exp = b.exp;
1012	}
1013	a.frac += b.frac;
1014	if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
1015	shift64RightJamming(a.frac, `1`, &a.frac);
1016	a.exp += `1`;
1017	}
1018	return a;
1019	}
1020	if (is_nan(a.cls) \|\| is_nan(b.cls)) {
1021	return pick_nan(a, b, s);
1022	}
1023	if (a.cls == float_class_inf \|\| b.cls == float_class_zero) {
1024	return a;
1025	}
1026	if (b.cls == float_class_inf \|\| a.cls == float_class_zero) {
1027	b.sign = b_sign;
1028	return b;
1029	}
1030	}
1031	g_assert_not_reached();
1032	}
1033
1034	/*
1035	* Returns the result of adding or subtracting the floating-point
1036	* values `a' and `b'. The operation is performed according to the
1037	* IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1038	*/
1039
1040	float16 QEMU_FLATTEN float16_add(float16 a, float16 b, float_status *status)
1041	{
1042	FloatParts pa = float16_unpack_canonical(a, status);
1043	FloatParts pb = float16_unpack_canonical(b, status);
1044	FloatParts pr = addsub_floats(pa, pb, false, status);
1045
1046	return float16_round_pack_canonical(pr, status);
1047	}
1048
1049	float16 QEMU_FLATTEN float16_sub(float16 a, float16 b, float_status *status)
1050	{
1051	FloatParts pa = float16_unpack_canonical(a, status);
1052	FloatParts pb = float16_unpack_canonical(b, status);
1053	FloatParts pr = addsub_floats(pa, pb, true, status);
1054
1055	return float16_round_pack_canonical(pr, status);
1056	}
1057
1058	static float32 QEMU_SOFTFLOAT_ATTR
1059	soft_f32_addsub(float32 a, float32 b, bool subtract, float_status *status)
1060	{
1061	FloatParts pa = float32_unpack_canonical(a, status);
1062	FloatParts pb = float32_unpack_canonical(b, status);
1063	FloatParts pr = addsub_floats(pa, pb, subtract, status);
1064
1065	return float32_round_pack_canonical(pr, status);
1066	}
1067
1068	static inline float32 soft_f32_add(float32 a, float32 b, float_status *status)
1069	{
1070	return soft_f32_addsub(a, b, false, status);
1071	}
1072
1073	static inline float32 soft_f32_sub(float32 a, float32 b, float_status *status)
1074	{
1075	return soft_f32_addsub(a, b, true, status);
1076	}
1077
1078	static float64 QEMU_SOFTFLOAT_ATTR
1079	soft_f64_addsub(float64 a, float64 b, bool subtract, float_status *status)
1080	{
1081	FloatParts pa = float64_unpack_canonical(a, status);
1082	FloatParts pb = float64_unpack_canonical(b, status);
1083	FloatParts pr = addsub_floats(pa, pb, subtract, status);
1084
1085	return float64_round_pack_canonical(pr, status);
1086	}
1087
1088	static inline float64 soft_f64_add(float64 a, float64 b, float_status *status)
1089	{
1090	return soft_f64_addsub(a, b, false, status);
1091	}
1092
1093	static inline float64 soft_f64_sub(float64 a, float64 b, float_status *status)
1094	{
1095	return soft_f64_addsub(a, b, true, status);
1096	}
1097
1098	static float hard_f32_add(float a, float b)
1099	{
1100	return a + b;
1101	}
1102
1103	static float hard_f32_sub(float a, float b)
1104	{
1105	return a - b;
1106	}
1107
1108	static double hard_f64_add(double a, double b)
1109	{
1110	return a + b;
1111	}
1112
1113	static double hard_f64_sub(double a, double b)
1114	{
1115	return a - b;
1116	}
1117
1118	static bool f32_addsub_post(union_float32 a, union_float32 b)
1119	{
1120	if (QEMU_HARDFLOAT_2F32_USE_FP) {
1121	return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1122	}
1123	return !(float32_is_zero(a.s) && float32_is_zero(b.s));
1124	}
1125
1126	static bool f64_addsub_post(union_float64 a, union_float64 b)
1127	{
1128	if (QEMU_HARDFLOAT_2F64_USE_FP) {
1129	return !(fpclassify(a.h) == FP_ZERO && fpclassify(b.h) == FP_ZERO);
1130	} else {
1131	return !(float64_is_zero(a.s) && float64_is_zero(b.s));
1132	}
1133	}
1134
1135	static float32 float32_addsub(float32 a, float32 b, float_status *s,
1136	hard_f32_op2_fn hard, soft_f32_op2_fn soft)
1137	{
1138	return float32_gen2(a, b, s, hard, soft,
1139	f32_is_zon2, f32_addsub_post, NULL, NULL);
1140	}
1141
1142	static float64 float64_addsub(float64 a, float64 b, float_status *s,
1143	hard_f64_op2_fn hard, soft_f64_op2_fn soft)
1144	{
1145	return float64_gen2(a, b, s, hard, soft,
1146	f64_is_zon2, f64_addsub_post, NULL, NULL);
1147	}
1148
1149	float32 QEMU_FLATTEN
1150	float32_add(float32 a, float32 b, float_status *s)
1151	{
1152	return float32_addsub(a, b, s, hard_f32_add, soft_f32_add);
1153	}
1154
1155	float32 QEMU_FLATTEN
1156	float32_sub(float32 a, float32 b, float_status *s)
1157	{
1158	return float32_addsub(a, b, s, hard_f32_sub, soft_f32_sub);
1159	}
1160
1161	float64 QEMU_FLATTEN
1162	float64_add(float64 a, float64 b, float_status *s)
1163	{
1164	return float64_addsub(a, b, s, hard_f64_add, soft_f64_add);
1165	}
1166
1167	float64 QEMU_FLATTEN
1168	float64_sub(float64 a, float64 b, float_status *s)
1169	{
1170	return float64_addsub(a, b, s, hard_f64_sub, soft_f64_sub);
1171	}
1172
1173	/*
1174	* Returns the result of multiplying the floating-point values `a' and
1175	* `b'. The operation is performed according to the IEC/IEEE Standard
1176	* for Binary Floating-Point Arithmetic.
1177	*/
1178
1179	static FloatParts mul_floats(FloatParts a, FloatParts b, float_status *s)
1180	{
1181	bool sign = a.sign ^ b.sign;
1182
1183	if (a.cls == float_class_normal && b.cls == float_class_normal) {
1184	uint64_t hi, lo;
1185	int exp = a.exp + b.exp;
1186
1187	mul64To128(a.frac, b.frac, &hi, &lo);
1188	shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1189	if (lo & DECOMPOSED_OVERFLOW_BIT) {
1190	shift64RightJamming(lo, `1`, &lo);
1191	exp += `1`;
1192	}
1193
1194	/ Re-use a /
1195	a.exp = exp;
1196	a.sign = sign;
1197	a.frac = lo;
1198	return a;
1199	}
1200	/ handle all the NaN cases /
1201	if (is_nan(a.cls) \|\| is_nan(b.cls)) {
1202	return pick_nan(a, b, s);
1203	}
1204	/ Inf * Zero == NaN /
1205	if ((a.cls == float_class_inf && b.cls == float_class_zero) \|\|
1206	(a.cls == float_class_zero && b.cls == float_class_inf)) {
1207	s->float_exception_flags \|= float_flag_invalid;
1208	return parts_default_nan(s);
1209	}
1210	/ Multiply by 0 or Inf /
1211	if (a.cls == float_class_inf \|\| a.cls == float_class_zero) {
1212	a.sign = sign;
1213	return a;
1214	}
1215	if (b.cls == float_class_inf \|\| b.cls == float_class_zero) {
1216	b.sign = sign;
1217	return b;
1218	}
1219	g_assert_not_reached();
1220	}
1221
1222	float16 QEMU_FLATTEN float16_mul(float16 a, float16 b, float_status *status)
1223	{
1224	FloatParts pa = float16_unpack_canonical(a, status);
1225	FloatParts pb = float16_unpack_canonical(b, status);
1226	FloatParts pr = mul_floats(pa, pb, status);
1227
1228	return float16_round_pack_canonical(pr, status);
1229	}
1230
1231	static float32 QEMU_SOFTFLOAT_ATTR
1232	soft_f32_mul(float32 a, float32 b, float_status *status)
1233	{
1234	FloatParts pa = float32_unpack_canonical(a, status);
1235	FloatParts pb = float32_unpack_canonical(b, status);
1236	FloatParts pr = mul_floats(pa, pb, status);
1237
1238	return float32_round_pack_canonical(pr, status);
1239	}
1240
1241	static float64 QEMU_SOFTFLOAT_ATTR
1242	soft_f64_mul(float64 a, float64 b, float_status *status)
1243	{
1244	FloatParts pa = float64_unpack_canonical(a, status);
1245	FloatParts pb = float64_unpack_canonical(b, status);
1246	FloatParts pr = mul_floats(pa, pb, status);
1247
1248	return float64_round_pack_canonical(pr, status);
1249	}
1250
1251	static float hard_f32_mul(float a, float b)
1252	{
1253	return a * b;
1254	}
1255
1256	static double hard_f64_mul(double a, double b)
1257	{
1258	return a * b;
1259	}
1260
1261	static bool f32_mul_fast_test(union_float32 a, union_float32 b)
1262	{
1263	return float32_is_zero(a.s) \|\| float32_is_zero(b.s);
1264	}
1265
1266	static bool f64_mul_fast_test(union_float64 a, union_float64 b)
1267	{
1268	return float64_is_zero(a.s) \|\| float64_is_zero(b.s);
1269	}
1270
1271	static float32 f32_mul_fast_op(float32 a, float32 b, float_status *s)
1272	{
1273	bool signbit = float32_is_neg(a) ^ float32_is_neg(b);
1274
1275	return float32_set_sign(float32_zero, signbit);
1276	}
1277
1278	static float64 f64_mul_fast_op(float64 a, float64 b, float_status *s)
1279	{
1280	bool signbit = float64_is_neg(a) ^ float64_is_neg(b);
1281
1282	return float64_set_sign(float64_zero, signbit);
1283	}
1284
1285	float32 QEMU_FLATTEN
1286	float32_mul(float32 a, float32 b, float_status *s)
1287	{
1288	return float32_gen2(a, b, s, hard_f32_mul, soft_f32_mul,
1289	f32_is_zon2, NULL, f32_mul_fast_test, f32_mul_fast_op);
1290	}
1291
1292	float64 QEMU_FLATTEN
1293	float64_mul(float64 a, float64 b, float_status *s)
1294	{
1295	return float64_gen2(a, b, s, hard_f64_mul, soft_f64_mul,
1296	f64_is_zon2, NULL, f64_mul_fast_test, f64_mul_fast_op);
1297	}
1298
1299	/*
1300	* Returns the result of multiplying the floating-point values `a' and
1301	* `b' then adding 'c', with no intermediate rounding step after the
1302	* multiplication. The operation is performed according to the
1303	* IEC/IEEE Standard for Binary Floating-Point Arithmetic 754-2008.
1304	* The flags argument allows the caller to select negation of the
1305	* addend, the intermediate product, or the final result. (The
1306	* difference between this and having the caller do a separate
1307	* negation is that negating externally will flip the sign bit on
1308	* NaNs.)
1309	*/
1310
1311	static FloatParts muladd_floats(FloatParts a, FloatParts b, FloatParts c,
1312	int flags, float_status *s)
1313	{
1314	bool inf_zero = ((`1` << a.cls) \| (`1` << b.cls)) ==
1315	((`1` << float_class_inf) \| (`1` << float_class_zero));
1316	bool p_sign;
1317	bool sign_flip = flags & float_muladd_negate_result;
1318	FloatClass p_class;
1319	uint64_t hi, lo;
1320	int p_exp;
1321
1322	/ It is implementation-defined whether the cases of (0,inf,qnan)*
1323	* and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
1324	* they return if they do), so we have to hand this information
1325	* off to the target-specific pick-a-NaN routine.
1326	*/
1327	if (is_nan(a.cls) \|\| is_nan(b.cls) \|\| is_nan(c.cls)) {
1328	return pick_nan_muladd(a, b, c, inf_zero, s);
1329	}
1330
1331	if (inf_zero) {
1332	s->float_exception_flags \|= float_flag_invalid;
1333	return parts_default_nan(s);
1334	}
1335
1336	if (flags & float_muladd_negate_c) {
1337	c.sign ^= `1`;
1338	}
1339
1340	p_sign = a.sign ^ b.sign;
1341
1342	if (flags & float_muladd_negate_product) {
1343	p_sign ^= `1`;
1344	}
1345
1346	if (a.cls == float_class_inf \|\| b.cls == float_class_inf) {
1347	p_class = float_class_inf;
1348	} else if (a.cls == float_class_zero \|\| b.cls == float_class_zero) {
1349	p_class = float_class_zero;
1350	} else {
1351	p_class = float_class_normal;
1352	}
1353
1354	if (c.cls == float_class_inf) {
1355	if (p_class == float_class_inf && p_sign != c.sign) {
1356	s->float_exception_flags \|= float_flag_invalid;
1357	return parts_default_nan(s);
1358	} else {
1359	a.cls = float_class_inf;
1360	a.sign = c.sign ^ sign_flip;
1361	return a;
1362	}
1363	}
1364
1365	if (p_class == float_class_inf) {
1366	a.cls = float_class_inf;
1367	a.sign = p_sign ^ sign_flip;
1368	return a;
1369	}
1370
1371	if (p_class == float_class_zero) {
1372	if (c.cls == float_class_zero) {
1373	if (p_sign != c.sign) {
1374	p_sign = s->float_rounding_mode == float_round_down;
1375	}
1376	c.sign = p_sign;
1377	} else if (flags & float_muladd_halve_result) {
1378	c.exp -= `1`;
1379	}
1380	c.sign ^= sign_flip;
1381	return c;
1382	}
1383
1384	/ a & b should be normals now... /
1385	assert(a.cls == float_class_normal &&
1386	b.cls == float_class_normal);
1387
1388	p_exp = a.exp + b.exp;
1389
1390	/ Multiply of 2 62-bit numbers produces a (262) == 124-bit
1391	* result.
1392	*/
1393	mul64To128(a.frac, b.frac, &hi, &lo);
1394	/ binary point now at bit 124 /
1395
1396	/ check for overflow /
1397	if (hi & (`1ULL` << (DECOMPOSED_BINARY_POINT * `2` + `1` - `64`))) {
1398	shift128RightJamming(hi, lo, `1`, &hi, &lo);
1399	p_exp += `1`;
1400	}
1401
1402	/ + add/sub /
1403	if (c.cls == float_class_zero) {
1404	/ move binary point back to 62 /
1405	shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1406	} else {
1407	int exp_diff = p_exp - c.exp;
1408	if (p_sign == c.sign) {
1409	/ Addition /
1410	if (exp_diff <= `0`) {
1411	shift128RightJamming(hi, lo,
1412	DECOMPOSED_BINARY_POINT - exp_diff,
1413	&hi, &lo);
1414	lo += c.frac;
1415	p_exp = c.exp;
1416	} else {
1417	uint64_t c_hi, c_lo;
1418	/ shift c to the same binary point as the product (124) /
1419	c_hi = c.frac >> `2`;
1420	c_lo = `0`;
1421	shift128RightJamming(c_hi, c_lo,
1422	exp_diff,
1423	&c_hi, &c_lo);
1424	add128(hi, lo, c_hi, c_lo, &hi, &lo);
1425	/ move binary point back to 62 /
1426	shift128RightJamming(hi, lo, DECOMPOSED_BINARY_POINT, &hi, &lo);
1427	}
1428
1429	if (lo & DECOMPOSED_OVERFLOW_BIT) {
1430	shift64RightJamming(lo, `1`, &lo);
1431	p_exp += `1`;
1432	}
1433
1434	} else {
1435	/ Subtraction /
1436	uint64_t c_hi, c_lo;
1437	/ make C binary point match product at bit 124 /
1438	c_hi = c.frac >> `2`;
1439	c_lo = `0`;
1440
1441	if (exp_diff <= `0`) {
1442	shift128RightJamming(hi, lo, -exp_diff, &hi, &lo);
1443	if (exp_diff == `0`
1444	&&
1445	(hi > c_hi \|\| (hi == c_hi && lo >= c_lo))) {
1446	sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1447	} else {
1448	sub128(c_hi, c_lo, hi, lo, &hi, &lo);
1449	p_sign ^= `1`;
1450	p_exp = c.exp;
1451	}
1452	} else {
1453	shift128RightJamming(c_hi, c_lo,
1454	exp_diff,
1455	&c_hi, &c_lo);
1456	sub128(hi, lo, c_hi, c_lo, &hi, &lo);
1457	}
1458
1459	if (hi == `0` && lo == `0`) {
1460	a.cls = float_class_zero;
1461	a.sign = s->float_rounding_mode == float_round_down;
1462	a.sign ^= sign_flip;
1463	return a;
1464	} else {
1465	int shift;
1466	if (hi != `0`) {
1467	shift = clz64(hi);
1468	} else {
1469	shift = clz64(lo) + `64`;
1470	}
1471	/ Normalizing to a binary point of 124 is the*
1472	correct adjust for the exponent. However since we're
1473	shifting, we might as well put the binary point back
1474	at 62 where we really want it. Therefore shift as
1475	if we're leaving 1 bit at the top of the word, but
1476	adjust the exponent as if we're leaving 3 bits. /*
1477	shift -= `1`;
1478	if (shift >= `64`) {
1479	lo = lo << (shift - `64`);
1480	} else {
1481	hi = (hi << shift) \| (lo >> (`64` - shift));
1482	lo = hi \| ((lo << shift) != `0`);
1483	}
1484	p_exp -= shift - `2`;
1485	}
1486	}
1487	}
1488
1489	if (flags & float_muladd_halve_result) {
1490	p_exp -= `1`;
1491	}
1492
1493	/ finally prepare our result /
1494	a.cls = float_class_normal;
1495	a.sign = p_sign ^ sign_flip;
1496	a.exp = p_exp;
1497	a.frac = lo;
1498
1499	return a;
1500	}
1501
1502	float16 QEMU_FLATTEN float16_muladd(float16 a, float16 b, float16 c,
1503	int flags, float_status *status)
1504	{
1505	FloatParts pa = float16_unpack_canonical(a, status);
1506	FloatParts pb = float16_unpack_canonical(b, status);
1507	FloatParts pc = float16_unpack_canonical(c, status);
1508	FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1509
1510	return float16_round_pack_canonical(pr, status);
1511	}
1512
1513	static float32 QEMU_SOFTFLOAT_ATTR
1514	soft_f32_muladd(float32 a, float32 b, float32 c, int flags,
1515	float_status *status)
1516	{
1517	FloatParts pa = float32_unpack_canonical(a, status);
1518	FloatParts pb = float32_unpack_canonical(b, status);
1519	FloatParts pc = float32_unpack_canonical(c, status);
1520	FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1521
1522	return float32_round_pack_canonical(pr, status);
1523	}
1524
1525	static float64 QEMU_SOFTFLOAT_ATTR
1526	soft_f64_muladd(float64 a, float64 b, float64 c, int flags,
1527	float_status *status)
1528	{
1529	FloatParts pa = float64_unpack_canonical(a, status);
1530	FloatParts pb = float64_unpack_canonical(b, status);
1531	FloatParts pc = float64_unpack_canonical(c, status);
1532	FloatParts pr = muladd_floats(pa, pb, pc, flags, status);
1533
1534	return float64_round_pack_canonical(pr, status);
1535	}
1536
1537	static bool force_soft_fma;
1538
1539	float32 QEMU_FLATTEN
1540	float32_muladd(float32 xa, float32 xb, float32 xc, int flags, float_status *s)
1541	{
1542	union_float32 ua, ub, uc, ur;
1543
1544	ua.s = xa;
1545	ub.s = xb;
1546	uc.s = xc;
1547
1548	if (unlikely(!can_use_fpu(s))) {
1549	goto soft;
1550	}
1551	if (unlikely(flags & float_muladd_halve_result)) {
1552	goto soft;
1553	}
1554
1555	float32_input_flush3(&ua.s, &ub.s, &uc.s, s);
1556	if (unlikely(!f32_is_zon3(ua, ub, uc))) {
1557	goto soft;
1558	}
1559
1560	if (unlikely(force_soft_fma)) {
1561	goto soft;
1562	}
1563
1564	/*
1565	* When (a \|\| b) == 0, there's no need to check for under/over flow,
1566	* since we know the addend is (normal \|\| 0) and the product is 0.
1567	*/
1568	if (float32_is_zero(ua.s) \|\| float32_is_zero(ub.s)) {
1569	union_float32 up;
1570	bool prod_sign;
1571
1572	prod_sign = float32_is_neg(ua.s) ^ float32_is_neg(ub.s);
1573	prod_sign ^= !!(flags & float_muladd_negate_product);
1574	up.s = float32_set_sign(float32_zero, prod_sign);
1575
1576	if (flags & float_muladd_negate_c) {
1577	uc.h = -uc.h;
1578	}
1579	ur.h = up.h + uc.h;
1580	} else {
1581	union_float32 ua_orig = ua;
1582	union_float32 uc_orig = uc;
1583
1584	if (flags & float_muladd_negate_product) {
1585	ua.h = -ua.h;
1586	}
1587	if (flags & float_muladd_negate_c) {
1588	uc.h = -uc.h;
1589	}
1590
1591	ur.h = fmaf(ua.h, ub.h, uc.h);
1592
1593	if (unlikely(f32_is_inf(ur))) {
1594	s->float_exception_flags \|= float_flag_overflow;
1595	} else if (unlikely(fabsf(ur.h) <= FLT_MIN)) {
1596	ua = ua_orig;
1597	uc = uc_orig;
1598	goto soft;
1599	}
1600	}
1601	if (flags & float_muladd_negate_result) {
1602	return float32_chs(ur.s);
1603	}
1604	return ur.s;
1605
1606	soft:
1607	return soft_f32_muladd(ua.s, ub.s, uc.s, flags, s);
1608	}
1609
1610	float64 QEMU_FLATTEN
1611	float64_muladd(float64 xa, float64 xb, float64 xc, int flags, float_status *s)
1612	{
1613	union_float64 ua, ub, uc, ur;
1614
1615	ua.s = xa;
1616	ub.s = xb;
1617	uc.s = xc;
1618
1619	if (unlikely(!can_use_fpu(s))) {
1620	goto soft;
1621	}
1622	if (unlikely(flags & float_muladd_halve_result)) {
1623	goto soft;
1624	}
1625
1626	float64_input_flush3(&ua.s, &ub.s, &uc.s, s);
1627	if (unlikely(!f64_is_zon3(ua, ub, uc))) {
1628	goto soft;
1629	}
1630
1631	if (unlikely(force_soft_fma)) {
1632	goto soft;
1633	}
1634
1635	/*
1636	* When (a \|\| b) == 0, there's no need to check for under/over flow,
1637	* since we know the addend is (normal \|\| 0) and the product is 0.
1638	*/
1639	if (float64_is_zero(ua.s) \|\| float64_is_zero(ub.s)) {
1640	union_float64 up;
1641	bool prod_sign;
1642
1643	prod_sign = float64_is_neg(ua.s) ^ float64_is_neg(ub.s);
1644	prod_sign ^= !!(flags & float_muladd_negate_product);
1645	up.s = float64_set_sign(float64_zero, prod_sign);
1646
1647	if (flags & float_muladd_negate_c) {
1648	uc.h = -uc.h;
1649	}
1650	ur.h = up.h + uc.h;
1651	} else {
1652	union_float64 ua_orig = ua;
1653	union_float64 uc_orig = uc;
1654
1655	if (flags & float_muladd_negate_product) {
1656	ua.h = -ua.h;
1657	}
1658	if (flags & float_muladd_negate_c) {
1659	uc.h = -uc.h;
1660	}
1661
1662	ur.h = fma(ua.h, ub.h, uc.h);
1663
1664	if (unlikely(f64_is_inf(ur))) {
1665	s->float_exception_flags \|= float_flag_overflow;
1666	} else if (unlikely(fabs(ur.h) <= FLT_MIN)) {
1667	ua = ua_orig;
1668	uc = uc_orig;
1669	goto soft;
1670	}
1671	}
1672	if (flags & float_muladd_negate_result) {
1673	return float64_chs(ur.s);
1674	}
1675	return ur.s;
1676
1677	soft:
1678	return soft_f64_muladd(ua.s, ub.s, uc.s, flags, s);
1679	}
1680
1681	/*
1682	* Returns the result of dividing the floating-point value `a' by the
1683	* corresponding value `b'. The operation is performed according to
1684	* the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1685	*/
1686
1687	static FloatParts div_floats(FloatParts a, FloatParts b, float_status *s)
1688	{
1689	bool sign = a.sign ^ b.sign;
1690
1691	if (a.cls == float_class_normal && b.cls == float_class_normal) {
1692	uint64_t n0, n1, q, r;
1693	int exp = a.exp - b.exp;
1694
1695	/*
1696	* We want a 2*N / N-bit division to produce exactly an N-bit
1697	* result, so that we do not lose any precision and so that we
1698	* do not have to renormalize afterward. If A.frac < B.frac,
1699	* then division would produce an (N-1)-bit result; shift A left
1700	* by one to produce the an N-bit result, and decrement the
1701	* exponent to match.
1702	*
1703	* The udiv_qrnnd algorithm that we're using requires normalization,
1704	* i.e. the msb of the denominator must be set. Since we know that
1705	* DECOMPOSED_BINARY_POINT is msb-1, the inputs must be shifted left
1706	* by one (more), and the remainder must be shifted right by one.
1707	*/
1708	if (a.frac < b.frac) {
1709	exp -= `1`;
1710	shift128Left(`0`, a.frac, DECOMPOSED_BINARY_POINT + `2`, &n1, &n0);
1711	} else {
1712	shift128Left(`0`, a.frac, DECOMPOSED_BINARY_POINT + `1`, &n1, &n0);
1713	}
1714	q = udiv_qrnnd(&r, n1, n0, b.frac << `1`);
1715
1716	/*
1717	* Set lsb if there is a remainder, to set inexact.
1718	* As mentioned above, to find the actual value of the remainder we
1719	* would need to shift right, but (1) we are only concerned about
1720	* non-zero-ness, and (2) the remainder will always be even because
1721	* both inputs to the division primitive are even.
1722	*/
1723	a.frac = q \| (r != `0`);
1724	a.sign = sign;
1725	a.exp = exp;
1726	return a;
1727	}
1728	/ handle all the NaN cases /
1729	if (is_nan(a.cls) \|\| is_nan(b.cls)) {
1730	return pick_nan(a, b, s);
1731	}
1732	/ 0/0 or Inf/Inf /
1733	if (a.cls == b.cls
1734	&&
1735	(a.cls == float_class_inf \|\| a.cls == float_class_zero)) {
1736	s->float_exception_flags \|= float_flag_invalid;
1737	return parts_default_nan(s);
1738	}
1739	/ Inf / x or 0 / x /
1740	if (a.cls == float_class_inf \|\| a.cls == float_class_zero) {
1741	a.sign = sign;
1742	return a;
1743	}
1744	/ Div 0 => Inf /
1745	if (b.cls == float_class_zero) {
1746	s->float_exception_flags \|= float_flag_divbyzero;
1747	a.cls = float_class_inf;
1748	a.sign = sign;
1749	return a;
1750	}
1751	/ Div by Inf /
1752	if (b.cls == float_class_inf) {
1753	a.cls = float_class_zero;
1754	a.sign = sign;
1755	return a;
1756	}
1757	g_assert_not_reached();
1758	}
1759
1760	float16 float16_div(float16 a, float16 b, float_status *status)
1761	{
1762	FloatParts pa = float16_unpack_canonical(a, status);
1763	FloatParts pb = float16_unpack_canonical(b, status);
1764	FloatParts pr = div_floats(pa, pb, status);
1765
1766	return float16_round_pack_canonical(pr, status);
1767	}
1768
1769	static float32 QEMU_SOFTFLOAT_ATTR
1770	soft_f32_div(float32 a, float32 b, float_status *status)
1771	{
1772	FloatParts pa = float32_unpack_canonical(a, status);
1773	FloatParts pb = float32_unpack_canonical(b, status);
1774	FloatParts pr = div_floats(pa, pb, status);
1775
1776	return float32_round_pack_canonical(pr, status);
1777	}
1778
1779	static float64 QEMU_SOFTFLOAT_ATTR
1780	soft_f64_div(float64 a, float64 b, float_status *status)
1781	{
1782	FloatParts pa = float64_unpack_canonical(a, status);
1783	FloatParts pb = float64_unpack_canonical(b, status);
1784	FloatParts pr = div_floats(pa, pb, status);
1785
1786	return float64_round_pack_canonical(pr, status);
1787	}
1788
1789	static float hard_f32_div(float a, float b)
1790	{
1791	return a / b;
1792	}
1793
1794	static double hard_f64_div(double a, double b)
1795	{
1796	return a / b;
1797	}
1798
1799	static bool f32_div_pre(union_float32 a, union_float32 b)
1800	{
1801	if (QEMU_HARDFLOAT_2F32_USE_FP) {
1802	return (fpclassify(a.h) == FP_NORMAL \|\| fpclassify(a.h) == FP_ZERO) &&
1803	fpclassify(b.h) == FP_NORMAL;
1804	}
1805	return float32_is_zero_or_normal(a.s) && float32_is_normal(b.s);
1806	}
1807
1808	static bool f64_div_pre(union_float64 a, union_float64 b)
1809	{
1810	if (QEMU_HARDFLOAT_2F64_USE_FP) {
1811	return (fpclassify(a.h) == FP_NORMAL \|\| fpclassify(a.h) == FP_ZERO) &&
1812	fpclassify(b.h) == FP_NORMAL;
1813	}
1814	return float64_is_zero_or_normal(a.s) && float64_is_normal(b.s);
1815	}
1816
1817	static bool f32_div_post(union_float32 a, union_float32 b)
1818	{
1819	if (QEMU_HARDFLOAT_2F32_USE_FP) {
1820	return fpclassify(a.h) != FP_ZERO;
1821	}
1822	return !float32_is_zero(a.s);
1823	}
1824
1825	static bool f64_div_post(union_float64 a, union_float64 b)
1826	{
1827	if (QEMU_HARDFLOAT_2F64_USE_FP) {
1828	return fpclassify(a.h) != FP_ZERO;
1829	}
1830	return !float64_is_zero(a.s);
1831	}
1832
1833	float32 QEMU_FLATTEN
1834	float32_div(float32 a, float32 b, float_status *s)
1835	{
1836	return float32_gen2(a, b, s, hard_f32_div, soft_f32_div,
1837	f32_div_pre, f32_div_post, NULL, NULL);
1838	}
1839
1840	float64 QEMU_FLATTEN
1841	float64_div(float64 a, float64 b, float_status *s)
1842	{
1843	return float64_gen2(a, b, s, hard_f64_div, soft_f64_div,
1844	f64_div_pre, f64_div_post, NULL, NULL);
1845	}
1846
1847	/*
1848	* Float to Float conversions
1849	*
1850	* Returns the result of converting one float format to another. The
1851	* conversion is performed according to the IEC/IEEE Standard for
1852	* Binary Floating-Point Arithmetic.
1853	*
1854	* The float_to_float helper only needs to take care of raising
1855	* invalid exceptions and handling the conversion on NaNs.
1856	*/
1857
1858	static FloatParts float_to_float(FloatParts a, const FloatFmt *dstf,
1859	float_status *s)
1860	{
1861	if (dstf->arm_althp) {
1862	switch (a.cls) {
1863	case float_class_qnan:
1864	case float_class_snan:
1865	/ There is no NaN in the destination format. Raise Invalid*
1866	* and return a zero with the sign of the input NaN.
1867	*/
1868	s->float_exception_flags \|= float_flag_invalid;
1869	a.cls = float_class_zero;
1870	a.frac = `0`;
1871	a.exp = `0`;
1872	break;
1873
1874	case float_class_inf:
1875	/ There is no Inf in the destination format. Raise Invalid*
1876	* and return the maximum normal with the correct sign.
1877	*/
1878	s->float_exception_flags \|= float_flag_invalid;
1879	a.cls = float_class_normal;
1880	a.exp = dstf->exp_max;
1881	a.frac = ((`1ull` << dstf->frac_size) - `1`) << dstf->frac_shift;
1882	break;
1883
1884	default:
1885	break;
1886	}
1887	} else if (is_nan(a.cls)) {
1888	if (is_snan(a.cls)) {
1889	s->float_exception_flags \|= float_flag_invalid;
1890	a = parts_silence_nan(a, s);
1891	}
1892	if (s->default_nan_mode) {
1893	return parts_default_nan(s);
1894	}
1895	}
1896	return a;
1897	}
1898
1899	float32 float16_to_float32(float16 a, bool ieee, float_status *s)
1900	{
1901	const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1902	FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1903	FloatParts pr = float_to_float(p, &float32_params, s);
1904	return float32_round_pack_canonical(pr, s);
1905	}
1906
1907	float64 float16_to_float64(float16 a, bool ieee, float_status *s)
1908	{
1909	const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1910	FloatParts p = float16a_unpack_canonical(a, s, fmt16);
1911	FloatParts pr = float_to_float(p, &float64_params, s);
1912	return float64_round_pack_canonical(pr, s);
1913	}
1914
1915	float16 float32_to_float16(float32 a, bool ieee, float_status *s)
1916	{
1917	const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1918	FloatParts p = float32_unpack_canonical(a, s);
1919	FloatParts pr = float_to_float(p, fmt16, s);
1920	return float16a_round_pack_canonical(pr, s, fmt16);
1921	}
1922
1923	float64 float32_to_float64(float32 a, float_status *s)
1924	{
1925	FloatParts p = float32_unpack_canonical(a, s);
1926	FloatParts pr = float_to_float(p, &float64_params, s);
1927	return float64_round_pack_canonical(pr, s);
1928	}
1929
1930	float16 float64_to_float16(float64 a, bool ieee, float_status *s)
1931	{
1932	const FloatFmt *fmt16 = ieee ? &float16_params : &float16_params_ahp;
1933	FloatParts p = float64_unpack_canonical(a, s);
1934	FloatParts pr = float_to_float(p, fmt16, s);
1935	return float16a_round_pack_canonical(pr, s, fmt16);
1936	}
1937
1938	float32 float64_to_float32(float64 a, float_status *s)
1939	{
1940	FloatParts p = float64_unpack_canonical(a, s);
1941	FloatParts pr = float_to_float(p, &float32_params, s);
1942	return float32_round_pack_canonical(pr, s);
1943	}
1944
1945	/*
1946	* Rounds the floating-point value `a' to an integer, and returns the
1947	* result as a floating-point value. The operation is performed
1948	* according to the IEC/IEEE Standard for Binary Floating-Point
1949	* Arithmetic.
1950	*/
1951
1952	static FloatParts round_to_int(FloatParts a, int rmode,
1953	int scale, float_status *s)
1954	{
1955	switch (a.cls) {
1956	case float_class_qnan:
1957	case float_class_snan:
1958	return return_nan(a, s);
1959
1960	case float_class_zero:
1961	case float_class_inf:
1962	/ already "integral" /
1963	break;
1964
1965	case float_class_normal:
1966	scale = MIN(MAX(scale, -`0x10000`), `0x10000`);
1967	a.exp += scale;
1968
1969	if (a.exp >= DECOMPOSED_BINARY_POINT) {
1970	/ already integral /
1971	break;
1972	}
1973	if (a.exp < `0`) {
1974	bool one;
1975	/ all fractional /
1976	s->float_exception_flags \|= float_flag_inexact;
1977	switch (rmode) {
1978	case float_round_nearest_even:
1979	one = a.exp == -`1` && a.frac > DECOMPOSED_IMPLICIT_BIT;
1980	break;
1981	case float_round_ties_away:
1982	one = a.exp == -`1` && a.frac >= DECOMPOSED_IMPLICIT_BIT;
1983	break;
1984	case float_round_to_zero:
1985	one = false;
1986	break;
1987	case float_round_up:
1988	one = !a.sign;
1989	break;
1990	case float_round_down:
1991	one = a.sign;
1992	break;
1993	case float_round_to_odd:
1994	one = true;
1995	break;
1996	default:
1997	g_assert_not_reached();
1998	}
1999
2000	if (one) {
2001	a.frac = DECOMPOSED_IMPLICIT_BIT;
2002	a.exp = `0`;
2003	} else {
2004	a.cls = float_class_zero;
2005	}
2006	} else {
2007	uint64_t frac_lsb = DECOMPOSED_IMPLICIT_BIT >> a.exp;
2008	uint64_t frac_lsbm1 = frac_lsb >> `1`;
2009	uint64_t rnd_even_mask = (frac_lsb - `1`) \| frac_lsb;
2010	uint64_t rnd_mask = rnd_even_mask >> `1`;
2011	uint64_t inc;
2012
2013	switch (rmode) {
2014	case float_round_nearest_even:
2015	inc = ((a.frac & rnd_even_mask) != frac_lsbm1 ? frac_lsbm1 : `0`);
2016	break;
2017	case float_round_ties_away:
2018	inc = frac_lsbm1;
2019	break;
2020	case float_round_to_zero:
2021	inc = `0`;
2022	break;
2023	case float_round_up:
2024	inc = a.sign ? `0` : rnd_mask;
2025	break;
2026	case float_round_down:
2027	inc = a.sign ? rnd_mask : `0`;
2028	break;
2029	case float_round_to_odd:
2030	inc = a.frac & frac_lsb ? `0` : rnd_mask;
2031	break;
2032	default:
2033	g_assert_not_reached();
2034	}
2035
2036	if (a.frac & rnd_mask) {
2037	s->float_exception_flags \|= float_flag_inexact;
2038	a.frac += inc;
2039	a.frac &= ~rnd_mask;
2040	if (a.frac & DECOMPOSED_OVERFLOW_BIT) {
2041	a.frac >>= `1`;
2042	a.exp++;
2043	}
2044	}
2045	}
2046	break;
2047	default:
2048	g_assert_not_reached();
2049	}
2050	return a;
2051	}
2052
2053	float16 float16_round_to_int(float16 a, float_status *s)
2054	{
2055	FloatParts pa = float16_unpack_canonical(a, s);
2056	FloatParts pr = round_to_int(pa, s->float_rounding_mode, `0`, s);
2057	return float16_round_pack_canonical(pr, s);
2058	}
2059
2060	float32 float32_round_to_int(float32 a, float_status *s)
2061	{
2062	FloatParts pa = float32_unpack_canonical(a, s);
2063	FloatParts pr = round_to_int(pa, s->float_rounding_mode, `0`, s);
2064	return float32_round_pack_canonical(pr, s);
2065	}
2066
2067	float64 float64_round_to_int(float64 a, float_status *s)
2068	{
2069	FloatParts pa = float64_unpack_canonical(a, s);
2070	FloatParts pr = round_to_int(pa, s->float_rounding_mode, `0`, s);
2071	return float64_round_pack_canonical(pr, s);
2072	}
2073
2074	/*
2075	* Returns the result of converting the floating-point value `a' to
2076	* the two's complement integer format. The conversion is performed
2077	* according to the IEC/IEEE Standard for Binary Floating-Point
2078	* Arithmetic---which means in particular that the conversion is
2079	* rounded according to the current rounding mode. If `a' is a NaN,
2080	* the largest positive integer is returned. Otherwise, if the
2081	* conversion overflows, the largest integer with the same sign as `a'
2082	* is returned.
2083	*/
2084
2085	static int64_t round_to_int_and_pack(FloatParts in, int rmode, int scale,
2086	int64_t min, int64_t max,
2087	float_status *s)
2088	{
2089	uint64_t r;
2090	int orig_flags = get_float_exception_flags(s);
2091	FloatParts p = round_to_int(in, rmode, scale, s);
2092
2093	switch (p.cls) {
2094	case float_class_snan:
2095	case float_class_qnan:
2096	s->float_exception_flags = orig_flags \| float_flag_invalid;
2097	return max;
2098	case float_class_inf:
2099	s->float_exception_flags = orig_flags \| float_flag_invalid;
2100	return p.sign ? min : max;
2101	case float_class_zero:
2102	return `0`;
2103	case float_class_normal:
2104	if (p.exp < DECOMPOSED_BINARY_POINT) {
2105	r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2106	} else if (p.exp - DECOMPOSED_BINARY_POINT < `2`) {
2107	r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2108	} else {
2109	r = UINT64_MAX;
2110	}
2111	if (p.sign) {
2112	if (r <= -(uint64_t) min) {
2113	return -r;
2114	} else {
2115	s->float_exception_flags = orig_flags \| float_flag_invalid;
2116	return min;
2117	}
2118	} else {
2119	if (r <= max) {
2120	return r;
2121	} else {
2122	s->float_exception_flags = orig_flags \| float_flag_invalid;
2123	return max;
2124	}
2125	}
2126	default:
2127	g_assert_not_reached();
2128	}
2129	}
2130
2131	int16_t float16_to_int16_scalbn(float16 a, int rmode, int scale,
2132	float_status *s)
2133	{
2134	return round_to_int_and_pack(float16_unpack_canonical(a, s),
2135	rmode, scale, INT16_MIN, INT16_MAX, s);
2136	}
2137
2138	int32_t float16_to_int32_scalbn(float16 a, int rmode, int scale,
2139	float_status *s)
2140	{
2141	return round_to_int_and_pack(float16_unpack_canonical(a, s),
2142	rmode, scale, INT32_MIN, INT32_MAX, s);
2143	}
2144
2145	int64_t float16_to_int64_scalbn(float16 a, int rmode, int scale,
2146	float_status *s)
2147	{
2148	return round_to_int_and_pack(float16_unpack_canonical(a, s),
2149	rmode, scale, INT64_MIN, INT64_MAX, s);
2150	}
2151
2152	int16_t float32_to_int16_scalbn(float32 a, int rmode, int scale,
2153	float_status *s)
2154	{
2155	return round_to_int_and_pack(float32_unpack_canonical(a, s),
2156	rmode, scale, INT16_MIN, INT16_MAX, s);
2157	}
2158
2159	int32_t float32_to_int32_scalbn(float32 a, int rmode, int scale,
2160	float_status *s)
2161	{
2162	return round_to_int_and_pack(float32_unpack_canonical(a, s),
2163	rmode, scale, INT32_MIN, INT32_MAX, s);
2164	}
2165
2166	int64_t float32_to_int64_scalbn(float32 a, int rmode, int scale,
2167	float_status *s)
2168	{
2169	return round_to_int_and_pack(float32_unpack_canonical(a, s),
2170	rmode, scale, INT64_MIN, INT64_MAX, s);
2171	}
2172
2173	int16_t float64_to_int16_scalbn(float64 a, int rmode, int scale,
2174	float_status *s)
2175	{
2176	return round_to_int_and_pack(float64_unpack_canonical(a, s),
2177	rmode, scale, INT16_MIN, INT16_MAX, s);
2178	}
2179
2180	int32_t float64_to_int32_scalbn(float64 a, int rmode, int scale,
2181	float_status *s)
2182	{
2183	return round_to_int_and_pack(float64_unpack_canonical(a, s),
2184	rmode, scale, INT32_MIN, INT32_MAX, s);
2185	}
2186
2187	int64_t float64_to_int64_scalbn(float64 a, int rmode, int scale,
2188	float_status *s)
2189	{
2190	return round_to_int_and_pack(float64_unpack_canonical(a, s),
2191	rmode, scale, INT64_MIN, INT64_MAX, s);
2192	}
2193
2194	int16_t float16_to_int16(float16 a, float_status *s)
2195	{
2196	return float16_to_int16_scalbn(a, s->float_rounding_mode, `0`, s);
2197	}
2198
2199	int32_t float16_to_int32(float16 a, float_status *s)
2200	{
2201	return float16_to_int32_scalbn(a, s->float_rounding_mode, `0`, s);
2202	}
2203
2204	int64_t float16_to_int64(float16 a, float_status *s)
2205	{
2206	return float16_to_int64_scalbn(a, s->float_rounding_mode, `0`, s);
2207	}
2208
2209	int16_t float32_to_int16(float32 a, float_status *s)
2210	{
2211	return float32_to_int16_scalbn(a, s->float_rounding_mode, `0`, s);
2212	}
2213
2214	int32_t float32_to_int32(float32 a, float_status *s)
2215	{
2216	return float32_to_int32_scalbn(a, s->float_rounding_mode, `0`, s);
2217	}
2218
2219	int64_t float32_to_int64(float32 a, float_status *s)
2220	{
2221	return float32_to_int64_scalbn(a, s->float_rounding_mode, `0`, s);
2222	}
2223
2224	int16_t float64_to_int16(float64 a, float_status *s)
2225	{
2226	return float64_to_int16_scalbn(a, s->float_rounding_mode, `0`, s);
2227	}
2228
2229	int32_t float64_to_int32(float64 a, float_status *s)
2230	{
2231	return float64_to_int32_scalbn(a, s->float_rounding_mode, `0`, s);
2232	}
2233
2234	int64_t float64_to_int64(float64 a, float_status *s)
2235	{
2236	return float64_to_int64_scalbn(a, s->float_rounding_mode, `0`, s);
2237	}
2238
2239	int16_t float16_to_int16_round_to_zero(float16 a, float_status *s)
2240	{
2241	return float16_to_int16_scalbn(a, float_round_to_zero, `0`, s);
2242	}
2243
2244	int32_t float16_to_int32_round_to_zero(float16 a, float_status *s)
2245	{
2246	return float16_to_int32_scalbn(a, float_round_to_zero, `0`, s);
2247	}
2248
2249	int64_t float16_to_int64_round_to_zero(float16 a, float_status *s)
2250	{
2251	return float16_to_int64_scalbn(a, float_round_to_zero, `0`, s);
2252	}
2253
2254	int16_t float32_to_int16_round_to_zero(float32 a, float_status *s)
2255	{
2256	return float32_to_int16_scalbn(a, float_round_to_zero, `0`, s);
2257	}
2258
2259	int32_t float32_to_int32_round_to_zero(float32 a, float_status *s)
2260	{
2261	return float32_to_int32_scalbn(a, float_round_to_zero, `0`, s);
2262	}
2263
2264	int64_t float32_to_int64_round_to_zero(float32 a, float_status *s)
2265	{
2266	return float32_to_int64_scalbn(a, float_round_to_zero, `0`, s);
2267	}
2268
2269	int16_t float64_to_int16_round_to_zero(float64 a, float_status *s)
2270	{
2271	return float64_to_int16_scalbn(a, float_round_to_zero, `0`, s);
2272	}
2273
2274	int32_t float64_to_int32_round_to_zero(float64 a, float_status *s)
2275	{
2276	return float64_to_int32_scalbn(a, float_round_to_zero, `0`, s);
2277	}
2278
2279	int64_t float64_to_int64_round_to_zero(float64 a, float_status *s)
2280	{
2281	return float64_to_int64_scalbn(a, float_round_to_zero, `0`, s);
2282	}
2283
2284	/*
2285	* Returns the result of converting the floating-point value `a' to
2286	* the unsigned integer format. The conversion is performed according
2287	* to the IEC/IEEE Standard for Binary Floating-Point
2288	* Arithmetic---which means in particular that the conversion is
2289	* rounded according to the current rounding mode. If `a' is a NaN,
2290	* the largest unsigned integer is returned. Otherwise, if the
2291	* conversion overflows, the largest unsigned integer is returned. If
2292	* the 'a' is negative, the result is rounded and zero is returned;
2293	* values that do not round to zero will raise the inexact exception
2294	* flag.
2295	*/
2296
2297	static uint64_t round_to_uint_and_pack(FloatParts in, int rmode, int scale,
2298	uint64_t max, float_status *s)
2299	{
2300	int orig_flags = get_float_exception_flags(s);
2301	FloatParts p = round_to_int(in, rmode, scale, s);
2302	uint64_t r;
2303
2304	switch (p.cls) {
2305	case float_class_snan:
2306	case float_class_qnan:
2307	s->float_exception_flags = orig_flags \| float_flag_invalid;
2308	return max;
2309	case float_class_inf:
2310	s->float_exception_flags = orig_flags \| float_flag_invalid;
2311	return p.sign ? `0` : max;
2312	case float_class_zero:
2313	return `0`;
2314	case float_class_normal:
2315	if (p.sign) {
2316	s->float_exception_flags = orig_flags \| float_flag_invalid;
2317	return `0`;
2318	}
2319
2320	if (p.exp < DECOMPOSED_BINARY_POINT) {
2321	r = p.frac >> (DECOMPOSED_BINARY_POINT - p.exp);
2322	} else if (p.exp - DECOMPOSED_BINARY_POINT < `2`) {
2323	r = p.frac << (p.exp - DECOMPOSED_BINARY_POINT);
2324	} else {
2325	s->float_exception_flags = orig_flags \| float_flag_invalid;
2326	return max;
2327	}
2328
2329	/ For uint64 this will never trip, but if p.exp is too large*
2330	* to shift a decomposed fraction we shall have exited via the
2331	* 3rd leg above.
2332	*/
2333	if (r > max) {
2334	s->float_exception_flags = orig_flags \| float_flag_invalid;
2335	return max;
2336	}
2337	return r;
2338	default:
2339	g_assert_not_reached();
2340	}
2341	}
2342
2343	uint16_t float16_to_uint16_scalbn(float16 a, int rmode, int scale,
2344	float_status *s)
2345	{
2346	return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2347	rmode, scale, UINT16_MAX, s);
2348	}
2349
2350	uint32_t float16_to_uint32_scalbn(float16 a, int rmode, int scale,
2351	float_status *s)
2352	{
2353	return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2354	rmode, scale, UINT32_MAX, s);
2355	}
2356
2357	uint64_t float16_to_uint64_scalbn(float16 a, int rmode, int scale,
2358	float_status *s)
2359	{
2360	return round_to_uint_and_pack(float16_unpack_canonical(a, s),
2361	rmode, scale, UINT64_MAX, s);
2362	}
2363
2364	uint16_t float32_to_uint16_scalbn(float32 a, int rmode, int scale,
2365	float_status *s)
2366	{
2367	return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2368	rmode, scale, UINT16_MAX, s);
2369	}
2370
2371	uint32_t float32_to_uint32_scalbn(float32 a, int rmode, int scale,
2372	float_status *s)
2373	{
2374	return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2375	rmode, scale, UINT32_MAX, s);
2376	}
2377
2378	uint64_t float32_to_uint64_scalbn(float32 a, int rmode, int scale,
2379	float_status *s)
2380	{
2381	return round_to_uint_and_pack(float32_unpack_canonical(a, s),
2382	rmode, scale, UINT64_MAX, s);
2383	}
2384
2385	uint16_t float64_to_uint16_scalbn(float64 a, int rmode, int scale,
2386	float_status *s)
2387	{
2388	return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2389	rmode, scale, UINT16_MAX, s);
2390	}
2391
2392	uint32_t float64_to_uint32_scalbn(float64 a, int rmode, int scale,
2393	float_status *s)
2394	{
2395	return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2396	rmode, scale, UINT32_MAX, s);
2397	}
2398
2399	uint64_t float64_to_uint64_scalbn(float64 a, int rmode, int scale,
2400	float_status *s)
2401	{
2402	return round_to_uint_and_pack(float64_unpack_canonical(a, s),
2403	rmode, scale, UINT64_MAX, s);
2404	}
2405
2406	uint16_t float16_to_uint16(float16 a, float_status *s)
2407	{
2408	return float16_to_uint16_scalbn(a, s->float_rounding_mode, `0`, s);
2409	}
2410
2411	uint32_t float16_to_uint32(float16 a, float_status *s)
2412	{
2413	return float16_to_uint32_scalbn(a, s->float_rounding_mode, `0`, s);
2414	}
2415
2416	uint64_t float16_to_uint64(float16 a, float_status *s)
2417	{
2418	return float16_to_uint64_scalbn(a, s->float_rounding_mode, `0`, s);
2419	}
2420
2421	uint16_t float32_to_uint16(float32 a, float_status *s)
2422	{
2423	return float32_to_uint16_scalbn(a, s->float_rounding_mode, `0`, s);
2424	}
2425
2426	uint32_t float32_to_uint32(float32 a, float_status *s)
2427	{
2428	return float32_to_uint32_scalbn(a, s->float_rounding_mode, `0`, s);
2429	}
2430
2431	uint64_t float32_to_uint64(float32 a, float_status *s)
2432	{
2433	return float32_to_uint64_scalbn(a, s->float_rounding_mode, `0`, s);
2434	}
2435
2436	uint16_t float64_to_uint16(float64 a, float_status *s)
2437	{
2438	return float64_to_uint16_scalbn(a, s->float_rounding_mode, `0`, s);
2439	}
2440
2441	uint32_t float64_to_uint32(float64 a, float_status *s)
2442	{
2443	return float64_to_uint32_scalbn(a, s->float_rounding_mode, `0`, s);
2444	}
2445
2446	uint64_t float64_to_uint64(float64 a, float_status *s)
2447	{
2448	return float64_to_uint64_scalbn(a, s->float_rounding_mode, `0`, s);
2449	}
2450
2451	uint16_t float16_to_uint16_round_to_zero(float16 a, float_status *s)
2452	{
2453	return float16_to_uint16_scalbn(a, float_round_to_zero, `0`, s);
2454	}
2455
2456	uint32_t float16_to_uint32_round_to_zero(float16 a, float_status *s)
2457	{
2458	return float16_to_uint32_scalbn(a, float_round_to_zero, `0`, s);
2459	}
2460
2461	uint64_t float16_to_uint64_round_to_zero(float16 a, float_status *s)
2462	{
2463	return float16_to_uint64_scalbn(a, float_round_to_zero, `0`, s);
2464	}
2465
2466	uint16_t float32_to_uint16_round_to_zero(float32 a, float_status *s)
2467	{
2468	return float32_to_uint16_scalbn(a, float_round_to_zero, `0`, s);
2469	}
2470
2471	uint32_t float32_to_uint32_round_to_zero(float32 a, float_status *s)
2472	{
2473	return float32_to_uint32_scalbn(a, float_round_to_zero, `0`, s);
2474	}
2475
2476	uint64_t float32_to_uint64_round_to_zero(float32 a, float_status *s)
2477	{
2478	return float32_to_uint64_scalbn(a, float_round_to_zero, `0`, s);
2479	}
2480
2481	uint16_t float64_to_uint16_round_to_zero(float64 a, float_status *s)
2482	{
2483	return float64_to_uint16_scalbn(a, float_round_to_zero, `0`, s);
2484	}
2485
2486	uint32_t float64_to_uint32_round_to_zero(float64 a, float_status *s)
2487	{
2488	return float64_to_uint32_scalbn(a, float_round_to_zero, `0`, s);
2489	}
2490
2491	uint64_t float64_to_uint64_round_to_zero(float64 a, float_status *s)
2492	{
2493	return float64_to_uint64_scalbn(a, float_round_to_zero, `0`, s);
2494	}
2495
2496	/*
2497	* Integer to float conversions
2498	*
2499	* Returns the result of converting the two's complement integer `a'
2500	* to the floating-point format. The conversion is performed according
2501	* to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2502	*/
2503
2504	static FloatParts int_to_float(int64_t a, int scale, float_status *status)
2505	{
2506	FloatParts r = { .sign = false };
2507
2508	if (a == `0`) {
2509	r.cls = float_class_zero;
2510	} else {
2511	uint64_t f = a;
2512	int shift;
2513
2514	r.cls = float_class_normal;
2515	if (a < `0`) {
2516	f = -f;
2517	r.sign = true;
2518	}
2519	shift = clz64(f) - `1`;
2520	scale = MIN(MAX(scale, -`0x10000`), `0x10000`);
2521
2522	r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2523	r.frac = (shift < `0` ? DECOMPOSED_IMPLICIT_BIT : f << shift);
2524	}
2525
2526	return r;
2527	}
2528
2529	float16 int64_to_float16_scalbn(int64_t a, int scale, float_status *status)
2530	{
2531	FloatParts pa = int_to_float(a, scale, status);
2532	return float16_round_pack_canonical(pa, status);
2533	}
2534
2535	float16 int32_to_float16_scalbn(int32_t a, int scale, float_status *status)
2536	{
2537	return int64_to_float16_scalbn(a, scale, status);
2538	}
2539
2540	float16 int16_to_float16_scalbn(int16_t a, int scale, float_status *status)
2541	{
2542	return int64_to_float16_scalbn(a, scale, status);
2543	}
2544
2545	float16 int64_to_float16(int64_t a, float_status *status)
2546	{
2547	return int64_to_float16_scalbn(a, `0`, status);
2548	}
2549
2550	float16 int32_to_float16(int32_t a, float_status *status)
2551	{
2552	return int64_to_float16_scalbn(a, `0`, status);
2553	}
2554
2555	float16 int16_to_float16(int16_t a, float_status *status)
2556	{
2557	return int64_to_float16_scalbn(a, `0`, status);
2558	}
2559
2560	float32 int64_to_float32_scalbn(int64_t a, int scale, float_status *status)
2561	{
2562	FloatParts pa = int_to_float(a, scale, status);
2563	return float32_round_pack_canonical(pa, status);
2564	}
2565
2566	float32 int32_to_float32_scalbn(int32_t a, int scale, float_status *status)
2567	{
2568	return int64_to_float32_scalbn(a, scale, status);
2569	}
2570
2571	float32 int16_to_float32_scalbn(int16_t a, int scale, float_status *status)
2572	{
2573	return int64_to_float32_scalbn(a, scale, status);
2574	}
2575
2576	float32 int64_to_float32(int64_t a, float_status *status)
2577	{
2578	return int64_to_float32_scalbn(a, `0`, status);
2579	}
2580
2581	float32 int32_to_float32(int32_t a, float_status *status)
2582	{
2583	return int64_to_float32_scalbn(a, `0`, status);
2584	}
2585
2586	float32 int16_to_float32(int16_t a, float_status *status)
2587	{
2588	return int64_to_float32_scalbn(a, `0`, status);
2589	}
2590
2591	float64 int64_to_float64_scalbn(int64_t a, int scale, float_status *status)
2592	{
2593	FloatParts pa = int_to_float(a, scale, status);
2594	return float64_round_pack_canonical(pa, status);
2595	}
2596
2597	float64 int32_to_float64_scalbn(int32_t a, int scale, float_status *status)
2598	{
2599	return int64_to_float64_scalbn(a, scale, status);
2600	}
2601
2602	float64 int16_to_float64_scalbn(int16_t a, int scale, float_status *status)
2603	{
2604	return int64_to_float64_scalbn(a, scale, status);
2605	}
2606
2607	float64 int64_to_float64(int64_t a, float_status *status)
2608	{
2609	return int64_to_float64_scalbn(a, `0`, status);
2610	}
2611
2612	float64 int32_to_float64(int32_t a, float_status *status)
2613	{
2614	return int64_to_float64_scalbn(a, `0`, status);
2615	}
2616
2617	float64 int16_to_float64(int16_t a, float_status *status)
2618	{
2619	return int64_to_float64_scalbn(a, `0`, status);
2620	}
2621
2622
2623	/*
2624	* Unsigned Integer to float conversions
2625	*
2626	* Returns the result of converting the unsigned integer `a' to the
2627	* floating-point format. The conversion is performed according to the
2628	* IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2629	*/
2630
2631	static FloatParts uint_to_float(uint64_t a, int scale, float_status *status)
2632	{
2633	FloatParts r = { .sign = false };
2634
2635	if (a == `0`) {
2636	r.cls = float_class_zero;
2637	} else {
2638	scale = MIN(MAX(scale, -`0x10000`), `0x10000`);
2639	r.cls = float_class_normal;
2640	if ((int64_t)a < `0`) {
2641	r.exp = DECOMPOSED_BINARY_POINT + `1` + scale;
2642	shift64RightJamming(a, `1`, &a);
2643	r.frac = a;
2644	} else {
2645	int shift = clz64(a) - `1`;
2646	r.exp = DECOMPOSED_BINARY_POINT - shift + scale;
2647	r.frac = a << shift;
2648	}
2649	}
2650
2651	return r;
2652	}
2653
2654	float16 uint64_to_float16_scalbn(uint64_t a, int scale, float_status *status)
2655	{
2656	FloatParts pa = uint_to_float(a, scale, status);
2657	return float16_round_pack_canonical(pa, status);
2658	}
2659
2660	float16 uint32_to_float16_scalbn(uint32_t a, int scale, float_status *status)
2661	{
2662	return uint64_to_float16_scalbn(a, scale, status);
2663	}
2664
2665	float16 uint16_to_float16_scalbn(uint16_t a, int scale, float_status *status)
2666	{
2667	return uint64_to_float16_scalbn(a, scale, status);
2668	}
2669
2670	float16 uint64_to_float16(uint64_t a, float_status *status)
2671	{
2672	return uint64_to_float16_scalbn(a, `0`, status);
2673	}
2674
2675	float16 uint32_to_float16(uint32_t a, float_status *status)
2676	{
2677	return uint64_to_float16_scalbn(a, `0`, status);
2678	}
2679
2680	float16 uint16_to_float16(uint16_t a, float_status *status)
2681	{
2682	return uint64_to_float16_scalbn(a, `0`, status);
2683	}
2684
2685	float32 uint64_to_float32_scalbn(uint64_t a, int scale, float_status *status)
2686	{
2687	FloatParts pa = uint_to_float(a, scale, status);
2688	return float32_round_pack_canonical(pa, status);
2689	}
2690
2691	float32 uint32_to_float32_scalbn(uint32_t a, int scale, float_status *status)
2692	{
2693	return uint64_to_float32_scalbn(a, scale, status);
2694	}
2695
2696	float32 uint16_to_float32_scalbn(uint16_t a, int scale, float_status *status)
2697	{
2698	return uint64_to_float32_scalbn(a, scale, status);
2699	}
2700
2701	float32 uint64_to_float32(uint64_t a, float_status *status)
2702	{
2703	return uint64_to_float32_scalbn(a, `0`, status);
2704	}
2705
2706	float32 uint32_to_float32(uint32_t a, float_status *status)
2707	{
2708	return uint64_to_float32_scalbn(a, `0`, status);
2709	}
2710
2711	float32 uint16_to_float32(uint16_t a, float_status *status)
2712	{
2713	return uint64_to_float32_scalbn(a, `0`, status);
2714	}
2715
2716	float64 uint64_to_float64_scalbn(uint64_t a, int scale, float_status *status)
2717	{
2718	FloatParts pa = uint_to_float(a, scale, status);
2719	return float64_round_pack_canonical(pa, status);
2720	}
2721
2722	float64 uint32_to_float64_scalbn(uint32_t a, int scale, float_status *status)
2723	{
2724	return uint64_to_float64_scalbn(a, scale, status);
2725	}
2726
2727	float64 uint16_to_float64_scalbn(uint16_t a, int scale, float_status *status)
2728	{
2729	return uint64_to_float64_scalbn(a, scale, status);
2730	}
2731
2732	float64 uint64_to_float64(uint64_t a, float_status *status)
2733	{
2734	return uint64_to_float64_scalbn(a, `0`, status);
2735	}
2736
2737	float64 uint32_to_float64(uint32_t a, float_status *status)
2738	{
2739	return uint64_to_float64_scalbn(a, `0`, status);
2740	}
2741
2742	float64 uint16_to_float64(uint16_t a, float_status *status)
2743	{
2744	return uint64_to_float64_scalbn(a, `0`, status);
2745	}
2746
2747	/ Float Min/Max /
2748	/ min() and max() functions. These can't be implemented as*
2749	* 'compare and pick one input' because that would mishandle
2750	* NaNs and +0 vs -0.
2751	*
2752	* minnum() and maxnum() functions. These are similar to the min()
2753	* and max() functions but if one of the arguments is a QNaN and
2754	* the other is numerical then the numerical argument is returned.
2755	* SNaNs will get quietened before being returned.
2756	* minnum() and maxnum correspond to the IEEE 754-2008 minNum()
2757	* and maxNum() operations. min() and max() are the typical min/max
2758	* semantics provided by many CPUs which predate that specification.
2759	*
2760	* minnummag() and maxnummag() functions correspond to minNumMag()
2761	* and minNumMag() from the IEEE-754 2008.
2762	*/
2763	static FloatParts minmax_floats(FloatParts a, FloatParts b, bool ismin,
2764	bool ieee, bool ismag, float_status *s)
2765	{
2766	if (unlikely(is_nan(a.cls) \|\| is_nan(b.cls))) {
2767	if (ieee) {
2768	/ Takes two floating-point values `a' and `b', one of*
2769	* which is a NaN, and returns the appropriate NaN
2770	* result. If either `a' or `b' is a signaling NaN,
2771	* the invalid exception is raised.
2772	*/
2773	if (is_snan(a.cls) \|\| is_snan(b.cls)) {
2774	return pick_nan(a, b, s);
2775	} else if (is_nan(a.cls) && !is_nan(b.cls)) {
2776	return b;
2777	} else if (is_nan(b.cls) && !is_nan(a.cls)) {
2778	return a;
2779	}
2780	}
2781	return pick_nan(a, b, s);
2782	} else {
2783	int a_exp, b_exp;
2784
2785	switch (a.cls) {
2786	case float_class_normal:
2787	a_exp = a.exp;
2788	break;
2789	case float_class_inf:
2790	a_exp = INT_MAX;
2791	break;
2792	case float_class_zero:
2793	a_exp = INT_MIN;
2794	break;
2795	default:
2796	g_assert_not_reached();
2797	break;
2798	}
2799	switch (b.cls) {
2800	case float_class_normal:
2801	b_exp = b.exp;
2802	break;
2803	case float_class_inf:
2804	b_exp = INT_MAX;
2805	break;
2806	case float_class_zero:
2807	b_exp = INT_MIN;
2808	break;
2809	default:
2810	g_assert_not_reached();
2811	break;
2812	}
2813
2814	if (ismag && (a_exp != b_exp \|\| a.frac != b.frac)) {
2815	bool a_less = a_exp < b_exp;
2816	if (a_exp == b_exp) {
2817	a_less = a.frac < b.frac;
2818	}
2819	return a_less ^ ismin ? b : a;
2820	}
2821
2822	if (a.sign == b.sign) {
2823	bool a_less = a_exp < b_exp;
2824	if (a_exp == b_exp) {
2825	a_less = a.frac < b.frac;
2826	}
2827	return a.sign ^ a_less ^ ismin ? b : a;
2828	} else {
2829	return a.sign ^ ismin ? b : a;
2830	}
2831	}
2832	}
2833
2834	#define MINMAX(sz, name, ismin, isiee, ismag) \
2835	float ## sz float ## sz ## _ ## name(float ## sz a, float ## sz b, \
2836	float_status *s) \
2837	{ \
2838	FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2839	FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2840	FloatParts pr = minmax_floats(pa, pb, ismin, isiee, ismag, s); \
2841	\
2842	return float ## sz ## _round_pack_canonical(pr, s); \
2843	}
2844
2845	MINMAX(`16`, min, true, false, false)
2846	MINMAX(`16`, minnum, true, true, false)
2847	MINMAX(`16`, minnummag, true, true, true)
2848	MINMAX(`16`, max, false, false, false)
2849	MINMAX(`16`, maxnum, false, true, false)
2850	MINMAX(`16`, maxnummag, false, true, true)
2851
2852	MINMAX(`32`, min, true, false, false)
2853	MINMAX(`32`, minnum, true, true, false)
2854	MINMAX(`32`, minnummag, true, true, true)
2855	MINMAX(`32`, max, false, false, false)
2856	MINMAX(`32`, maxnum, false, true, false)
2857	MINMAX(`32`, maxnummag, false, true, true)
2858
2859	MINMAX(`64`, min, true, false, false)
2860	MINMAX(`64`, minnum, true, true, false)
2861	MINMAX(`64`, minnummag, true, true, true)
2862	MINMAX(`64`, max, false, false, false)
2863	MINMAX(`64`, maxnum, false, true, false)
2864	MINMAX(`64`, maxnummag, false, true, true)
2865
2866	#undef MINMAX
2867
2868	/ Floating point compare /
2869	static int compare_floats(FloatParts a, FloatParts b, bool is_quiet,
2870	float_status *s)
2871	{
2872	if (is_nan(a.cls) \|\| is_nan(b.cls)) {
2873	if (!is_quiet \|\|
2874	a.cls == float_class_snan \|\|
2875	b.cls == float_class_snan) {
2876	s->float_exception_flags \|= float_flag_invalid;
2877	}
2878	return float_relation_unordered;
2879	}
2880
2881	if (a.cls == float_class_zero) {
2882	if (b.cls == float_class_zero) {
2883	return float_relation_equal;
2884	}
2885	return b.sign ? float_relation_greater : float_relation_less;
2886	} else if (b.cls == float_class_zero) {
2887	return a.sign ? float_relation_less : float_relation_greater;
2888	}
2889
2890	/ The only really important thing about infinity is its sign. If*
2891	* both are infinities the sign marks the smallest of the two.
2892	*/
2893	if (a.cls == float_class_inf) {
2894	if ((b.cls == float_class_inf) && (a.sign == b.sign)) {
2895	return float_relation_equal;
2896	}
2897	return a.sign ? float_relation_less : float_relation_greater;
2898	} else if (b.cls == float_class_inf) {
2899	return b.sign ? float_relation_greater : float_relation_less;
2900	}
2901
2902	if (a.sign != b.sign) {
2903	return a.sign ? float_relation_less : float_relation_greater;
2904	}
2905
2906	if (a.exp == b.exp) {
2907	if (a.frac == b.frac) {
2908	return float_relation_equal;
2909	}
2910	if (a.sign) {
2911	return a.frac > b.frac ?
2912	float_relation_less : float_relation_greater;
2913	} else {
2914	return a.frac > b.frac ?
2915	float_relation_greater : float_relation_less;
2916	}
2917	} else {
2918	if (a.sign) {
2919	return a.exp > b.exp ? float_relation_less : float_relation_greater;
2920	} else {
2921	return a.exp > b.exp ? float_relation_greater : float_relation_less;
2922	}
2923	}
2924	}
2925
2926	#define COMPARE(name, attr, sz) \
2927	static int attr \
2928	name(float ## sz a, float ## sz b, bool is_quiet, float_status *s) \
2929	{ \
2930	FloatParts pa = float ## sz ## _unpack_canonical(a, s); \
2931	FloatParts pb = float ## sz ## _unpack_canonical(b, s); \
2932	return compare_floats(pa, pb, is_quiet, s); \
2933	}
2934
2935	COMPARE(soft_f16_compare, QEMU_FLATTEN, `16`)
2936	COMPARE(soft_f32_compare, QEMU_SOFTFLOAT_ATTR, `32`)
2937	COMPARE(soft_f64_compare, QEMU_SOFTFLOAT_ATTR, `64`)
2938
2939	#undef COMPARE
2940
2941	int float16_compare(float16 a, float16 b, float_status *s)
2942	{
2943	return soft_f16_compare(a, b, false, s);
2944	}
2945
2946	int float16_compare_quiet(float16 a, float16 b, float_status *s)
2947	{
2948	return soft_f16_compare(a, b, true, s);
2949	}
2950
2951	static int QEMU_FLATTEN
2952	f32_compare(float32 xa, float32 xb, bool is_quiet, float_status *s)
2953	{
2954	union_float32 ua, ub;
2955
2956	ua.s = xa;
2957	ub.s = xb;
2958
2959	if (QEMU_NO_HARDFLOAT) {
2960	goto soft;
2961	}
2962
2963	float32_input_flush2(&ua.s, &ub.s, s);
2964	if (isgreaterequal(ua.h, ub.h)) {
2965	if (isgreater(ua.h, ub.h)) {
2966	return float_relation_greater;
2967	}
2968	return float_relation_equal;
2969	}
2970	if (likely(isless(ua.h, ub.h))) {
2971	return float_relation_less;
2972	}
2973	/ The only condition remaining is unordered.*
2974	* Fall through to set flags.
2975	*/
2976	soft:
2977	return soft_f32_compare(ua.s, ub.s, is_quiet, s);
2978	}
2979
2980	int float32_compare(float32 a, float32 b, float_status *s)
2981	{
2982	return f32_compare(a, b, false, s);
2983	}
2984
2985	int float32_compare_quiet(float32 a, float32 b, float_status *s)
2986	{
2987	return f32_compare(a, b, true, s);
2988	}
2989
2990	static int QEMU_FLATTEN
2991	f64_compare(float64 xa, float64 xb, bool is_quiet, float_status *s)
2992	{
2993	union_float64 ua, ub;
2994
2995	ua.s = xa;
2996	ub.s = xb;
2997
2998	if (QEMU_NO_HARDFLOAT) {
2999	goto soft;
3000	}
3001
3002	float64_input_flush2(&ua.s, &ub.s, s);
3003	if (isgreaterequal(ua.h, ub.h)) {
3004	if (isgreater(ua.h, ub.h)) {
3005	return float_relation_greater;
3006	}
3007	return float_relation_equal;
3008	}
3009	if (likely(isless(ua.h, ub.h))) {
3010	return float_relation_less;
3011	}
3012	/ The only condition remaining is unordered.*
3013	* Fall through to set flags.
3014	*/
3015	soft:
3016	return soft_f64_compare(ua.s, ub.s, is_quiet, s);
3017	}
3018
3019	int float64_compare(float64 a, float64 b, float_status *s)
3020	{
3021	return f64_compare(a, b, false, s);
3022	}
3023
3024	int float64_compare_quiet(float64 a, float64 b, float_status *s)
3025	{
3026	return f64_compare(a, b, true, s);
3027	}
3028
3029	/ Multiply A by 2 raised to the power N. /
3030	static FloatParts scalbn_decomposed(FloatParts a, int n, float_status *s)
3031	{
3032	if (unlikely(is_nan(a.cls))) {
3033	return return_nan(a, s);
3034	}
3035	if (a.cls == float_class_normal) {
3036	/ The largest float type (even though not supported by FloatParts)*
3037	* is float128, which has a 15 bit exponent. Bounding N to 16 bits
3038	* still allows rounding to infinity, without allowing overflow
3039	* within the int32_t that backs FloatParts.exp.
3040	*/
3041	n = MIN(MAX(n, -`0x10000`), `0x10000`);
3042	a.exp += n;
3043	}
3044	return a;
3045	}
3046
3047	float16 float16_scalbn(float16 a, int n, float_status *status)
3048	{
3049	FloatParts pa = float16_unpack_canonical(a, status);
3050	FloatParts pr = scalbn_decomposed(pa, n, status);
3051	return float16_round_pack_canonical(pr, status);
3052	}
3053
3054	float32 float32_scalbn(float32 a, int n, float_status *status)
3055	{
3056	FloatParts pa = float32_unpack_canonical(a, status);
3057	FloatParts pr = scalbn_decomposed(pa, n, status);
3058	return float32_round_pack_canonical(pr, status);
3059	}
3060
3061	float64 float64_scalbn(float64 a, int n, float_status *status)
3062	{
3063	FloatParts pa = float64_unpack_canonical(a, status);
3064	FloatParts pr = scalbn_decomposed(pa, n, status);
3065	return float64_round_pack_canonical(pr, status);
3066	}
3067
3068	/*
3069	* Square Root
3070	*
3071	* The old softfloat code did an approximation step before zeroing in
3072	* on the final result. However for simpleness we just compute the
3073	* square root by iterating down from the implicit bit to enough extra
3074	* bits to ensure we get a correctly rounded result.
3075	*
3076	* This does mean however the calculation is slower than before,
3077	* especially for 64 bit floats.
3078	*/
3079
3080	static FloatParts sqrt_float(FloatParts a, float_status s, const* FloatFmt *p)
3081	{
3082	uint64_t a_frac, r_frac, s_frac;
3083	int bit, last_bit;
3084
3085	if (is_nan(a.cls)) {
3086	return return_nan(a, s);
3087	}
3088	if (a.cls == float_class_zero) {
3089	return a; / sqrt(+-0) = +-0 /
3090	}
3091	if (a.sign) {
3092	s->float_exception_flags \|= float_flag_invalid;
3093	return parts_default_nan(s);
3094	}
3095	if (a.cls == float_class_inf) {
3096	return a; / sqrt(+inf) = +inf /
3097	}
3098
3099	assert(a.cls == float_class_normal);
3100
3101	/ We need two overflow bits at the top. Adding room for that is a*
3102	* right shift. If the exponent is odd, we can discard the low bit
3103	* by multiplying the fraction by 2; that's a left shift. Combine
3104	* those and we shift right if the exponent is even.
3105	*/
3106	a_frac = a.frac;
3107	if (!(a.exp & `1`)) {
3108	a_frac >>= `1`;
3109	}
3110	a.exp >>= `1`;
3111
3112	/ Bit-by-bit computation of sqrt. /
3113	r_frac = `0`;
3114	s_frac = `0`;
3115
3116	/ Iterate from implicit bit down to the 3 extra bits to compute a*
3117	* properly rounded result. Remember we've inserted one more bit
3118	* at the top, so these positions are one less.
3119	*/
3120	bit = DECOMPOSED_BINARY_POINT - `1`;
3121	last_bit = MAX(p->frac_shift - `4`, `0`);
3122	do {
3123	uint64_t q = `1ULL` << bit;
3124	uint64_t t_frac = s_frac + q;
3125	if (t_frac <= a_frac) {
3126	s_frac = t_frac + q;
3127	a_frac -= t_frac;
3128	r_frac += q;
3129	}
3130	a_frac <<= `1`;
3131	} while (--bit >= last_bit);
3132
3133	/ Undo the right shift done above. If there is any remaining*
3134	* fraction, the result is inexact. Set the sticky bit.
3135	*/
3136	a.frac = (r_frac << `1`) + (a_frac != `0`);
3137
3138	return a;
3139	}
3140
3141	float16 QEMU_FLATTEN float16_sqrt(float16 a, float_status *status)
3142	{
3143	FloatParts pa = float16_unpack_canonical(a, status);
3144	FloatParts pr = sqrt_float(pa, status, &float16_params);
3145	return float16_round_pack_canonical(pr, status);
3146	}
3147
3148	static float32 QEMU_SOFTFLOAT_ATTR
3149	soft_f32_sqrt(float32 a, float_status *status)
3150	{
3151	FloatParts pa = float32_unpack_canonical(a, status);
3152	FloatParts pr = sqrt_float(pa, status, &float32_params);
3153	return float32_round_pack_canonical(pr, status);
3154	}
3155
3156	static float64 QEMU_SOFTFLOAT_ATTR
3157	soft_f64_sqrt(float64 a, float_status *status)
3158	{
3159	FloatParts pa = float64_unpack_canonical(a, status);
3160	FloatParts pr = sqrt_float(pa, status, &float64_params);
3161	return float64_round_pack_canonical(pr, status);
3162	}
3163
3164	float32 QEMU_FLATTEN float32_sqrt(float32 xa, float_status *s)
3165	{
3166	union_float32 ua, ur;
3167
3168	ua.s = xa;
3169	if (unlikely(!can_use_fpu(s))) {
3170	goto soft;
3171	}
3172
3173	float32_input_flush1(&ua.s, s);
3174	if (QEMU_HARDFLOAT_1F32_USE_FP) {
3175	if (unlikely(!(fpclassify(ua.h) == FP_NORMAL \|\|
3176	fpclassify(ua.h) == FP_ZERO) \|\|
3177	signbit(ua.h))) {
3178	goto soft;
3179	}
3180	} else if (unlikely(!float32_is_zero_or_normal(ua.s) \|\|
3181	float32_is_neg(ua.s))) {
3182	goto soft;
3183	}
3184	ur.h = sqrtf(ua.h);
3185	return ur.s;
3186
3187	soft:
3188	return soft_f32_sqrt(ua.s, s);
3189	}
3190
3191	float64 QEMU_FLATTEN float64_sqrt(float64 xa, float_status *s)
3192	{
3193	union_float64 ua, ur;
3194
3195	ua.s = xa;
3196	if (unlikely(!can_use_fpu(s))) {
3197	goto soft;
3198	}
3199
3200	float64_input_flush1(&ua.s, s);
3201	if (QEMU_HARDFLOAT_1F64_USE_FP) {
3202	if (unlikely(!(fpclassify(ua.h) == FP_NORMAL \|\|
3203	fpclassify(ua.h) == FP_ZERO) \|\|
3204	signbit(ua.h))) {
3205	goto soft;
3206	}
3207	} else if (unlikely(!float64_is_zero_or_normal(ua.s) \|\|
3208	float64_is_neg(ua.s))) {
3209	goto soft;
3210	}
3211	ur.h = sqrt(ua.h);
3212	return ur.s;
3213
3214	soft:
3215	return soft_f64_sqrt(ua.s, s);
3216	}
3217
3218	/----------------------------------------------------------------------------*
3219	\| The pattern for a default generated NaN.
3220	----------------------------------------------------------------------------/
3221
3222	float16 float16_default_nan(float_status *status)
3223	{
3224	FloatParts p = parts_default_nan(status);
3225	p.frac >>= float16_params.frac_shift;
3226	return float16_pack_raw(p);
3227	}
3228
3229	float32 float32_default_nan(float_status *status)
3230	{
3231	FloatParts p = parts_default_nan(status);
3232	p.frac >>= float32_params.frac_shift;
3233	return float32_pack_raw(p);
3234	}
3235
3236	float64 float64_default_nan(float_status *status)
3237	{
3238	FloatParts p = parts_default_nan(status);
3239	p.frac >>= float64_params.frac_shift;
3240	return float64_pack_raw(p);
3241	}
3242
3243	float128 float128_default_nan(float_status *status)
3244	{
3245	FloatParts p = parts_default_nan(status);
3246	float128 r;
3247
3248	/ Extrapolate from the choices made by parts_default_nan to fill*
3249	* in the quad-floating format. If the low bit is set, assume we
3250	* want to set all non-snan bits.
3251	*/
3252	r.low = -(p.frac & `1`);
3253	r.high = p.frac >> (DECOMPOSED_BINARY_POINT - `48`);
3254	r.high \|= UINT64_C(`0x7FFF000000000000`);
3255	r.high \|= (uint64_t)p.sign << `63`;
3256
3257	return r;
3258	}
3259
3260	/----------------------------------------------------------------------------*
3261	\| Returns a quiet NaN from a signalling NaN for the floating point value `a'.
3262	----------------------------------------------------------------------------/
3263
3264	float16 float16_silence_nan(float16 a, float_status *status)
3265	{
3266	FloatParts p = float16_unpack_raw(a);
3267	p.frac <<= float16_params.frac_shift;
3268	p = parts_silence_nan(p, status);
3269	p.frac >>= float16_params.frac_shift;
3270	return float16_pack_raw(p);
3271	}
3272
3273	float32 float32_silence_nan(float32 a, float_status *status)
3274	{
3275	FloatParts p = float32_unpack_raw(a);
3276	p.frac <<= float32_params.frac_shift;
3277	p = parts_silence_nan(p, status);
3278	p.frac >>= float32_params.frac_shift;
3279	return float32_pack_raw(p);
3280	}
3281
3282	float64 float64_silence_nan(float64 a, float_status *status)
3283	{
3284	FloatParts p = float64_unpack_raw(a);
3285	p.frac <<= float64_params.frac_shift;
3286	p = parts_silence_nan(p, status);
3287	p.frac >>= float64_params.frac_shift;
3288	return float64_pack_raw(p);
3289	}
3290
3291
3292	/----------------------------------------------------------------------------*
3293	\| If `a' is denormal and we are in flush-to-zero mode then set the
3294	\| input-denormal exception and return zero. Otherwise just return the value.
3295	----------------------------------------------------------------------------/
3296
3297	static bool parts_squash_denormal(FloatParts p, float_status *status)
3298	{
3299	if (p.exp == `0` && p.frac != `0`) {
3300	float_raise(float_flag_input_denormal, status);
3301	return true;
3302	}
3303
3304	return false;
3305	}
3306
3307	float16 float16_squash_input_denormal(float16 a, float_status *status)
3308	{
3309	if (status->flush_inputs_to_zero) {
3310	FloatParts p = float16_unpack_raw(a);
3311	if (parts_squash_denormal(p, status)) {
3312	return float16_set_sign(float16_zero, p.sign);
3313	}
3314	}
3315	return a;
3316	}
3317
3318	float32 float32_squash_input_denormal(float32 a, float_status *status)
3319	{
3320	if (status->flush_inputs_to_zero) {
3321	FloatParts p = float32_unpack_raw(a);
3322	if (parts_squash_denormal(p, status)) {
3323	return float32_set_sign(float32_zero, p.sign);
3324	}
3325	}
3326	return a;
3327	}
3328
3329	float64 float64_squash_input_denormal(float64 a, float_status *status)
3330	{
3331	if (status->flush_inputs_to_zero) {
3332	FloatParts p = float64_unpack_raw(a);
3333	if (parts_squash_denormal(p, status)) {
3334	return float64_set_sign(float64_zero, p.sign);
3335	}
3336	}
3337	return a;
3338	}
3339
3340	/----------------------------------------------------------------------------*
3341	\| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
3342	\| and 7, and returns the properly rounded 32-bit integer corresponding to the
3343	\| input. If `zSign' is 1, the input is negated before being converted to an
3344	\| integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
3345	\| is simply rounded to an integer, with the inexact exception raised if the
3346	\| input cannot be represented exactly as an integer. However, if the fixed-
3347	\| point input is too large, the invalid exception is raised and the largest
3348	\| positive or negative integer is returned.
3349	----------------------------------------------------------------------------/
3350
3351	static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
3352	{
3353	int8_t roundingMode;
3354	flag roundNearestEven;
3355	int8_t roundIncrement, roundBits;
3356	int32_t z;
3357
3358	roundingMode = status->float_rounding_mode;
3359	roundNearestEven = ( roundingMode == float_round_nearest_even );
3360	switch (roundingMode) {
3361	case float_round_nearest_even:
3362	case float_round_ties_away:
3363	roundIncrement = `0x40`;
3364	break;
3365	case float_round_to_zero:
3366	roundIncrement = `0`;
3367	break;
3368	case float_round_up:
3369	roundIncrement = zSign ? `0` : `0x7f`;
3370	break;
3371	case float_round_down:
3372	roundIncrement = zSign ? `0x7f` : `0`;
3373	break;
3374	case float_round_to_odd:
3375	roundIncrement = absZ & `0x80` ? `0` : `0x7f`;
3376	break;
3377	default:
3378	abort();
3379	}
3380	roundBits = absZ & `0x7F`;
3381	absZ = ( absZ + roundIncrement )>>`7`;
3382	absZ &= ~ ( ( ( roundBits ^ `0x40` ) == `0` ) & roundNearestEven );
3383	z = absZ;
3384	if ( zSign ) z = - z;
3385	if ( ( absZ>>`32` ) \|\| ( z && ( ( z < `0` ) ^ zSign ) ) ) {
3386	float_raise(float_flag_invalid, status);
3387	return zSign ? INT32_MIN : INT32_MAX;
3388	}
3389	if (roundBits) {
3390	status->float_exception_flags \|= float_flag_inexact;
3391	}
3392	return z;
3393
3394	}
3395
3396	/----------------------------------------------------------------------------*
3397	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3398	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
3399	\| and returns the properly rounded 64-bit integer corresponding to the input.
3400	\| If `zSign' is 1, the input is negated before being converted to an integer.
3401	\| Ordinarily, the fixed-point input is simply rounded to an integer, with
3402	\| the inexact exception raised if the input cannot be represented exactly as
3403	\| an integer. However, if the fixed-point input is too large, the invalid
3404	\| exception is raised and the largest positive or negative integer is
3405	\| returned.
3406	----------------------------------------------------------------------------/
3407
3408	static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
3409	float_status *status)
3410	{
3411	int8_t roundingMode;
3412	flag roundNearestEven, increment;
3413	int64_t z;
3414
3415	roundingMode = status->float_rounding_mode;
3416	roundNearestEven = ( roundingMode == float_round_nearest_even );
3417	switch (roundingMode) {
3418	case float_round_nearest_even:
3419	case float_round_ties_away:
3420	increment = ((int64_t) absZ1 < `0`);
3421	break;
3422	case float_round_to_zero:
3423	increment = `0`;
3424	break;
3425	case float_round_up:
3426	increment = !zSign && absZ1;
3427	break;
3428	case float_round_down:
3429	increment = zSign && absZ1;
3430	break;
3431	case float_round_to_odd:
3432	increment = !(absZ0 & `1`) && absZ1;
3433	break;
3434	default:
3435	abort();
3436	}
3437	if ( increment ) {
3438	++absZ0;
3439	if ( absZ0 == `0` ) goto overflow;
3440	absZ0 &= ~ ( ( (uint64_t) ( absZ1<<`1` ) == `0` ) & roundNearestEven );
3441	}
3442	z = absZ0;
3443	if ( zSign ) z = - z;
3444	if ( z && ( ( z < `0` ) ^ zSign ) ) {
3445	overflow:
3446	float_raise(float_flag_invalid, status);
3447	return zSign ? INT64_MIN : INT64_MAX;
3448	}
3449	if (absZ1) {
3450	status->float_exception_flags \|= float_flag_inexact;
3451	}
3452	return z;
3453
3454	}
3455
3456	/----------------------------------------------------------------------------*
3457	\| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
3458	\| `absZ1', with binary point between bits 63 and 64 (between the input words),
3459	\| and returns the properly rounded 64-bit unsigned integer corresponding to the
3460	\| input. Ordinarily, the fixed-point input is simply rounded to an integer,
3461	\| with the inexact exception raised if the input cannot be represented exactly
3462	\| as an integer. However, if the fixed-point input is too large, the invalid
3463	\| exception is raised and the largest unsigned integer is returned.
3464	----------------------------------------------------------------------------/
3465
3466	static int64_t roundAndPackUint64(flag zSign, uint64_t absZ0,
3467	uint64_t absZ1, float_status *status)
3468	{
3469	int8_t roundingMode;
3470	flag roundNearestEven, increment;
3471
3472	roundingMode = status->float_rounding_mode;
3473	roundNearestEven = (roundingMode == float_round_nearest_even);
3474	switch (roundingMode) {
3475	case float_round_nearest_even:
3476	case float_round_ties_away:
3477	increment = ((int64_t)absZ1 < `0`);
3478	break;
3479	case float_round_to_zero:
3480	increment = `0`;
3481	break;
3482	case float_round_up:
3483	increment = !zSign && absZ1;
3484	break;
3485	case float_round_down:
3486	increment = zSign && absZ1;
3487	break;
3488	case float_round_to_odd:
3489	increment = !(absZ0 & `1`) && absZ1;
3490	break;
3491	default:
3492	abort();
3493	}
3494	if (increment) {
3495	++absZ0;
3496	if (absZ0 == `0`) {
3497	float_raise(float_flag_invalid, status);
3498	return UINT64_MAX;
3499	}
3500	absZ0 &= ~(((uint64_t)(absZ1<<`1`) == `0`) & roundNearestEven);
3501	}
3502
3503	if (zSign && absZ0) {
3504	float_raise(float_flag_invalid, status);
3505	return `0`;
3506	}
3507
3508	if (absZ1) {
3509	status->float_exception_flags \|= float_flag_inexact;
3510	}
3511	return absZ0;
3512	}
3513
3514	/----------------------------------------------------------------------------*
3515	\| Normalizes the subnormal single-precision floating-point value represented
3516	\| by the denormalized significand `aSig'. The normalized exponent and
3517	\| significand are stored at the locations pointed to by `zExpPtr' and
3518	\| `zSigPtr', respectively.
3519	----------------------------------------------------------------------------/
3520
3521	static void
3522	normalizeFloat32Subnormal(uint32_t aSig, int zExpPtr, uint32_t zSigPtr)
3523	{
3524	int8_t shiftCount;
3525
3526	shiftCount = clz32(aSig) - `8`;
3527	*zSigPtr = aSig<<shiftCount;
3528	*zExpPtr = `1` - shiftCount;
3529
3530	}
3531
3532	/----------------------------------------------------------------------------*
3533	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3534	\| and significand `zSig', and returns the proper single-precision floating-
3535	\| point value corresponding to the abstract input. Ordinarily, the abstract
3536	\| value is simply rounded and packed into the single-precision format, with
3537	\| the inexact exception raised if the abstract input cannot be represented
3538	\| exactly. However, if the abstract value is too large, the overflow and
3539	\| inexact exceptions are raised and an infinity or maximal finite value is
3540	\| returned. If the abstract value is too small, the input value is rounded to
3541	\| a subnormal number, and the underflow and inexact exceptions are raised if
3542	\| the abstract input cannot be represented exactly as a subnormal single-
3543	\| precision floating-point number.
3544	\| The input significand `zSig' has its binary point between bits 30
3545	\| and 29, which is 7 bits to the left of the usual location. This shifted
3546	\| significand must be normalized or smaller. If `zSig' is not normalized,
3547	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
3548	\| and it must not require rounding. In the usual case that `zSig' is
3549	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3550	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
3551	\| Binary Floating-Point Arithmetic.
3552	----------------------------------------------------------------------------/
3553
3554	static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
3555	float_status *status)
3556	{
3557	int8_t roundingMode;
3558	flag roundNearestEven;
3559	int8_t roundIncrement, roundBits;
3560	flag isTiny;
3561
3562	roundingMode = status->float_rounding_mode;
3563	roundNearestEven = ( roundingMode == float_round_nearest_even );
3564	switch (roundingMode) {
3565	case float_round_nearest_even:
3566	case float_round_ties_away:
3567	roundIncrement = `0x40`;
3568	break;
3569	case float_round_to_zero:
3570	roundIncrement = `0`;
3571	break;
3572	case float_round_up:
3573	roundIncrement = zSign ? `0` : `0x7f`;
3574	break;
3575	case float_round_down:
3576	roundIncrement = zSign ? `0x7f` : `0`;
3577	break;
3578	case float_round_to_odd:
3579	roundIncrement = zSig & `0x80` ? `0` : `0x7f`;
3580	break;
3581	default:
3582	abort();
3583	break;
3584	}
3585	roundBits = zSig & `0x7F`;
3586	if ( `0xFD` <= (uint16_t) zExp ) {
3587	if ( ( `0xFD` < zExp )
3588	\|\| ( ( zExp == `0xFD` )
3589	&& ( (int32_t) ( zSig + roundIncrement ) < `0` ) )
3590	) {
3591	bool overflow_to_inf = roundingMode != float_round_to_odd &&
3592	roundIncrement != `0`;
3593	float_raise(float_flag_overflow \| float_flag_inexact, status);
3594	return packFloat32(zSign, `0xFF`, -!overflow_to_inf);
3595	}
3596	if ( zExp < `0` ) {
3597	if (status->flush_to_zero) {
3598	float_raise(float_flag_output_denormal, status);
3599	return packFloat32(zSign, `0`, `0`);
3600	}
3601	isTiny =
3602	(status->float_detect_tininess
3603	== float_tininess_before_rounding)
3604	\|\| ( zExp < -`1` )
3605	\|\| ( zSig + roundIncrement < `0x80000000` );
3606	shift32RightJamming( zSig, - zExp, &zSig );
3607	zExp = `0`;
3608	roundBits = zSig & `0x7F`;
3609	if (isTiny && roundBits) {
3610	float_raise(float_flag_underflow, status);
3611	}
3612	if (roundingMode == float_round_to_odd) {
3613	/*
3614	* For round-to-odd case, the roundIncrement depends on
3615	* zSig which just changed.
3616	*/
3617	roundIncrement = zSig & `0x80` ? `0` : `0x7f`;
3618	}
3619	}
3620	}
3621	if (roundBits) {
3622	status->float_exception_flags \|= float_flag_inexact;
3623	}
3624	zSig = ( zSig + roundIncrement )>>`7`;
3625	zSig &= ~ ( ( ( roundBits ^ `0x40` ) == `0` ) & roundNearestEven );
3626	if ( zSig == `0` ) zExp = `0`;
3627	return packFloat32( zSign, zExp, zSig );
3628
3629	}
3630
3631	/----------------------------------------------------------------------------*
3632	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3633	\| and significand `zSig', and returns the proper single-precision floating-
3634	\| point value corresponding to the abstract input. This routine is just like
3635	\| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
3636	\| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3637	\| floating-point exponent.
3638	----------------------------------------------------------------------------/
3639
3640	static float32
3641	normalizeRoundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
3642	float_status *status)
3643	{
3644	int8_t shiftCount;
3645
3646	shiftCount = clz32(zSig) - `1`;
3647	return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount,
3648	status);
3649
3650	}
3651
3652	/----------------------------------------------------------------------------*
3653	\| Normalizes the subnormal double-precision floating-point value represented
3654	\| by the denormalized significand `aSig'. The normalized exponent and
3655	\| significand are stored at the locations pointed to by `zExpPtr' and
3656	\| `zSigPtr', respectively.
3657	----------------------------------------------------------------------------/
3658
3659	static void
3660	normalizeFloat64Subnormal(uint64_t aSig, int zExpPtr, uint64_t zSigPtr)
3661	{
3662	int8_t shiftCount;
3663
3664	shiftCount = clz64(aSig) - `11`;
3665	*zSigPtr = aSig<<shiftCount;
3666	*zExpPtr = `1` - shiftCount;
3667
3668	}
3669
3670	/----------------------------------------------------------------------------*
3671	\| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
3672	\| double-precision floating-point value, returning the result. After being
3673	\| shifted into the proper positions, the three fields are simply added
3674	\| together to form the result. This means that any integer portion of `zSig'
3675	\| will be added into the exponent. Since a properly normalized significand
3676	\| will have an integer portion equal to 1, the `zExp' input should be 1 less
3677	\| than the desired result exponent whenever `zSig' is a complete, normalized
3678	\| significand.
3679	----------------------------------------------------------------------------/
3680
3681	static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
3682	{
3683
3684	return make_float64(
3685	( ( (uint64_t) zSign )<<`63` ) + ( ( (uint64_t) zExp )<<`52` ) + zSig);
3686
3687	}
3688
3689	/----------------------------------------------------------------------------*
3690	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3691	\| and significand `zSig', and returns the proper double-precision floating-
3692	\| point value corresponding to the abstract input. Ordinarily, the abstract
3693	\| value is simply rounded and packed into the double-precision format, with
3694	\| the inexact exception raised if the abstract input cannot be represented
3695	\| exactly. However, if the abstract value is too large, the overflow and
3696	\| inexact exceptions are raised and an infinity or maximal finite value is
3697	\| returned. If the abstract value is too small, the input value is rounded to
3698	\| a subnormal number, and the underflow and inexact exceptions are raised if
3699	\| the abstract input cannot be represented exactly as a subnormal double-
3700	\| precision floating-point number.
3701	\| The input significand `zSig' has its binary point between bits 62
3702	\| and 61, which is 10 bits to the left of the usual location. This shifted
3703	\| significand must be normalized or smaller. If `zSig' is not normalized,
3704	\| `zExp' must be 0; in that case, the result returned is a subnormal number,
3705	\| and it must not require rounding. In the usual case that `zSig' is
3706	\| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
3707	\| The handling of underflow and overflow follows the IEC/IEEE Standard for
3708	\| Binary Floating-Point Arithmetic.
3709	----------------------------------------------------------------------------/
3710
3711	static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
3712	float_status *status)
3713	{
3714	int8_t roundingMode;
3715	flag roundNearestEven;
3716	int roundIncrement, roundBits;
3717	flag isTiny;
3718
3719	roundingMode = status->float_rounding_mode;
3720	roundNearestEven = ( roundingMode == float_round_nearest_even );
3721	switch (roundingMode) {
3722	case float_round_nearest_even:
3723	case float_round_ties_away:
3724	roundIncrement = `0x200`;
3725	break;
3726	case float_round_to_zero:
3727	roundIncrement = `0`;
3728	break;
3729	case float_round_up:
3730	roundIncrement = zSign ? `0` : `0x3ff`;
3731	break;
3732	case float_round_down:
3733	roundIncrement = zSign ? `0x3ff` : `0`;
3734	break;
3735	case float_round_to_odd:
3736	roundIncrement = (zSig & `0x400`) ? `0` : `0x3ff`;
3737	break;
3738	default:
3739	abort();
3740	}
3741	roundBits = zSig & `0x3FF`;
3742	if ( `0x7FD` <= (uint16_t) zExp ) {
3743	if ( ( `0x7FD` < zExp )
3744	\|\| ( ( zExp == `0x7FD` )
3745	&& ( (int64_t) ( zSig + roundIncrement ) < `0` ) )
3746	) {
3747	bool overflow_to_inf = roundingMode != float_round_to_odd &&
3748	roundIncrement != `0`;
3749	float_raise(float_flag_overflow \| float_flag_inexact, status);
3750	return packFloat64(zSign, `0x7FF`, -(!overflow_to_inf));
3751	}
3752	if ( zExp < `0` ) {
3753	if (status->flush_to_zero) {
3754	float_raise(float_flag_output_denormal, status);
3755	return packFloat64(zSign, `0`, `0`);
3756	}
3757	isTiny =
3758	(status->float_detect_tininess
3759	== float_tininess_before_rounding)
3760	\|\| ( zExp < -`1` )
3761	\|\| ( zSig + roundIncrement < UINT64_C(`0x8000000000000000`) );
3762	shift64RightJamming( zSig, - zExp, &zSig );
3763	zExp = `0`;
3764	roundBits = zSig & `0x3FF`;
3765	if (isTiny && roundBits) {
3766	float_raise(float_flag_underflow, status);
3767	}
3768	if (roundingMode == float_round_to_odd) {
3769	/*
3770	* For round-to-odd case, the roundIncrement depends on
3771	* zSig which just changed.
3772	*/
3773	roundIncrement = (zSig & `0x400`) ? `0` : `0x3ff`;
3774	}
3775	}
3776	}
3777	if (roundBits) {
3778	status->float_exception_flags \|= float_flag_inexact;
3779	}
3780	zSig = ( zSig + roundIncrement )>>`10`;
3781	zSig &= ~ ( ( ( roundBits ^ `0x200` ) == `0` ) & roundNearestEven );
3782	if ( zSig == `0` ) zExp = `0`;
3783	return packFloat64( zSign, zExp, zSig );
3784
3785	}
3786
3787	/----------------------------------------------------------------------------*
3788	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3789	\| and significand `zSig', and returns the proper double-precision floating-
3790	\| point value corresponding to the abstract input. This routine is just like
3791	\| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
3792	\| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
3793	\| floating-point exponent.
3794	----------------------------------------------------------------------------/
3795
3796	static float64
3797	normalizeRoundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
3798	float_status *status)
3799	{
3800	int8_t shiftCount;
3801
3802	shiftCount = clz64(zSig) - `1`;
3803	return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount,
3804	status);
3805
3806	}
3807
3808	/----------------------------------------------------------------------------*
3809	\| Normalizes the subnormal extended double-precision floating-point value
3810	\| represented by the denormalized significand `aSig'. The normalized exponent
3811	\| and significand are stored at the locations pointed to by `zExpPtr' and
3812	\| `zSigPtr', respectively.
3813	----------------------------------------------------------------------------/
3814
3815	void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr,
3816	uint64_t *zSigPtr)
3817	{
3818	int8_t shiftCount;
3819
3820	shiftCount = clz64(aSig);
3821	*zSigPtr = aSig<<shiftCount;
3822	*zExpPtr = `1` - shiftCount;
3823	}
3824
3825	/----------------------------------------------------------------------------*
3826	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
3827	\| and extended significand formed by the concatenation of `zSig0' and `zSig1',
3828	\| and returns the proper extended double-precision floating-point value
3829	\| corresponding to the abstract input. Ordinarily, the abstract value is
3830	\| rounded and packed into the extended double-precision format, with the
3831	\| inexact exception raised if the abstract input cannot be represented
3832	\| exactly. However, if the abstract value is too large, the overflow and
3833	\| inexact exceptions are raised and an infinity or maximal finite value is
3834	\| returned. If the abstract value is too small, the input value is rounded to
3835	\| a subnormal number, and the underflow and inexact exceptions are raised if
3836	\| the abstract input cannot be represented exactly as a subnormal extended
3837	\| double-precision floating-point number.
3838	\| If `roundingPrecision' is 32 or 64, the result is rounded to the same
3839	\| number of bits as single or double precision, respectively. Otherwise, the
3840	\| result is rounded to the full precision of the extended double-precision
3841	\| format.
3842	\| The input significand must be normalized or smaller. If the input
3843	\| significand is not normalized, `zExp' must be 0; in that case, the result
3844	\| returned is a subnormal number, and it must not require rounding. The
3845	\| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
3846	\| Floating-Point Arithmetic.
3847	----------------------------------------------------------------------------/
3848
3849	floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
3850	int32_t zExp, uint64_t zSig0, uint64_t zSig1,
3851	float_status *status)
3852	{
3853	int8_t roundingMode;
3854	flag roundNearestEven, increment, isTiny;
3855	int64_t roundIncrement, roundMask, roundBits;
3856
3857	roundingMode = status->float_rounding_mode;
3858	roundNearestEven = ( roundingMode == float_round_nearest_even );
3859	if ( roundingPrecision == `80` ) goto precision80;
3860	if ( roundingPrecision == `64` ) {
3861	roundIncrement = UINT64_C(`0x0000000000000400`);
3862	roundMask = UINT64_C(`0x00000000000007FF`);
3863	}
3864	else if ( roundingPrecision == `32` ) {
3865	roundIncrement = UINT64_C(`0x0000008000000000`);
3866	roundMask = UINT64_C(`0x000000FFFFFFFFFF`);
3867	}
3868	else {
3869	goto precision80;
3870	}
3871	zSig0 \|= ( zSig1 != `0` );
3872	switch (roundingMode) {
3873	case float_round_nearest_even:
3874	case float_round_ties_away:
3875	break;
3876	case float_round_to_zero:
3877	roundIncrement = `0`;
3878	break;
3879	case float_round_up:
3880	roundIncrement = zSign ? `0` : roundMask;
3881	break;
3882	case float_round_down:
3883	roundIncrement = zSign ? roundMask : `0`;
3884	break;
3885	default:
3886	abort();
3887	}
3888	roundBits = zSig0 & roundMask;
3889	if ( `0x7FFD` <= (uint32_t) ( zExp - `1` ) ) {
3890	if ( ( `0x7FFE` < zExp )
3891	\|\| ( ( zExp == `0x7FFE` ) && ( zSig0 + roundIncrement < zSig0 ) )
3892	) {
3893	goto overflow;
3894	}
3895	if ( zExp <= `0` ) {
3896	if (status->flush_to_zero) {
3897	float_raise(float_flag_output_denormal, status);
3898	return packFloatx80(zSign, `0`, `0`);
3899	}
3900	isTiny =
3901	(status->float_detect_tininess
3902	== float_tininess_before_rounding)
3903	\|\| ( zExp < `0` )
3904	\|\| ( zSig0 <= zSig0 + roundIncrement );
3905	shift64RightJamming( zSig0, `1` - zExp, &zSig0 );
3906	zExp = `0`;
3907	roundBits = zSig0 & roundMask;
3908	if (isTiny && roundBits) {
3909	float_raise(float_flag_underflow, status);
3910	}
3911	if (roundBits) {
3912	status->float_exception_flags \|= float_flag_inexact;
3913	}
3914	zSig0 += roundIncrement;
3915	if ( (int64_t) zSig0 < `0` ) zExp = `1`;
3916	roundIncrement = roundMask + `1`;
3917	if ( roundNearestEven && ( roundBits<<`1` == roundIncrement ) ) {
3918	roundMask \|= roundIncrement;
3919	}
3920	zSig0 &= ~ roundMask;
3921	return packFloatx80( zSign, zExp, zSig0 );
3922	}
3923	}
3924	if (roundBits) {
3925	status->float_exception_flags \|= float_flag_inexact;
3926	}
3927	zSig0 += roundIncrement;
3928	if ( zSig0 < roundIncrement ) {
3929	++zExp;
3930	zSig0 = UINT64_C(`0x8000000000000000`);
3931	}
3932	roundIncrement = roundMask + `1`;
3933	if ( roundNearestEven && ( roundBits<<`1` == roundIncrement ) ) {
3934	roundMask \|= roundIncrement;
3935	}
3936	zSig0 &= ~ roundMask;
3937	if ( zSig0 == `0` ) zExp = `0`;
3938	return packFloatx80( zSign, zExp, zSig0 );
3939	precision80:
3940	switch (roundingMode) {
3941	case float_round_nearest_even:
3942	case float_round_ties_away:
3943	increment = ((int64_t)zSig1 < `0`);
3944	break;
3945	case float_round_to_zero:
3946	increment = `0`;
3947	break;
3948	case float_round_up:
3949	increment = !zSign && zSig1;
3950	break;
3951	case float_round_down:
3952	increment = zSign && zSig1;
3953	break;
3954	default:
3955	abort();
3956	}
3957	if ( `0x7FFD` <= (uint32_t) ( zExp - `1` ) ) {
3958	if ( ( `0x7FFE` < zExp )
3959	\|\| ( ( zExp == `0x7FFE` )
3960	&& ( zSig0 == UINT64_C(`0xFFFFFFFFFFFFFFFF`) )
3961	&& increment
3962	)
3963	) {
3964	roundMask = `0`;
3965	overflow:
3966	float_raise(float_flag_overflow \| float_flag_inexact, status);
3967	if ( ( roundingMode == float_round_to_zero )
3968	\|\| ( zSign && ( roundingMode == float_round_up ) )
3969	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
3970	) {
3971	return packFloatx80( zSign, `0x7FFE`, ~ roundMask );
3972	}
3973	return packFloatx80(zSign,
3974	floatx80_infinity_high,
3975	floatx80_infinity_low);
3976	}
3977	if ( zExp <= `0` ) {
3978	isTiny =
3979	(status->float_detect_tininess
3980	== float_tininess_before_rounding)
3981	\|\| ( zExp < `0` )
3982	\|\| ! increment
3983	\|\| ( zSig0 < UINT64_C(`0xFFFFFFFFFFFFFFFF`) );
3984	shift64ExtraRightJamming( zSig0, zSig1, `1` - zExp, &zSig0, &zSig1 );
3985	zExp = `0`;
3986	if (isTiny && zSig1) {
3987	float_raise(float_flag_underflow, status);
3988	}
3989	if (zSig1) {
3990	status->float_exception_flags \|= float_flag_inexact;
3991	}
3992	switch (roundingMode) {
3993	case float_round_nearest_even:
3994	case float_round_ties_away:
3995	increment = ((int64_t)zSig1 < `0`);
3996	break;
3997	case float_round_to_zero:
3998	increment = `0`;
3999	break;
4000	case float_round_up:
4001	increment = !zSign && zSig1;
4002	break;
4003	case float_round_down:
4004	increment = zSign && zSig1;
4005	break;
4006	default:
4007	abort();
4008	}
4009	if ( increment ) {
4010	++zSig0;
4011	zSig0 &=
4012	~ ( ( (uint64_t) ( zSig1<<`1` ) == `0` ) & roundNearestEven );
4013	if ( (int64_t) zSig0 < `0` ) zExp = `1`;
4014	}
4015	return packFloatx80( zSign, zExp, zSig0 );
4016	}
4017	}
4018	if (zSig1) {
4019	status->float_exception_flags \|= float_flag_inexact;
4020	}
4021	if ( increment ) {
4022	++zSig0;
4023	if ( zSig0 == `0` ) {
4024	++zExp;
4025	zSig0 = UINT64_C(`0x8000000000000000`);
4026	}
4027	else {
4028	zSig0 &= ~ ( ( (uint64_t) ( zSig1<<`1` ) == `0` ) & roundNearestEven );
4029	}
4030	}
4031	else {
4032	if ( zSig0 == `0` ) zExp = `0`;
4033	}
4034	return packFloatx80( zSign, zExp, zSig0 );
4035
4036	}
4037
4038	/----------------------------------------------------------------------------*
4039	\| Takes an abstract floating-point value having sign `zSign', exponent
4040	\| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
4041	\| and returns the proper extended double-precision floating-point value
4042	\| corresponding to the abstract input. This routine is just like
4043	\| `roundAndPackFloatx80' except that the input significand does not have to be
4044	\| normalized.
4045	----------------------------------------------------------------------------/
4046
4047	floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
4048	flag zSign, int32_t zExp,
4049	uint64_t zSig0, uint64_t zSig1,
4050	float_status *status)
4051	{
4052	int8_t shiftCount;
4053
4054	if ( zSig0 == `0` ) {
4055	zSig0 = zSig1;
4056	zSig1 = `0`;
4057	zExp -= `64`;
4058	}
4059	shiftCount = clz64(zSig0);
4060	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4061	zExp -= shiftCount;
4062	return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
4063	zSig0, zSig1, status);
4064
4065	}
4066
4067	/----------------------------------------------------------------------------*
4068	\| Returns the least-significant 64 fraction bits of the quadruple-precision
4069	\| floating-point value `a'.
4070	----------------------------------------------------------------------------/
4071
4072	static inline uint64_t extractFloat128Frac1( float128 a )
4073	{
4074
4075	return a.low;
4076
4077	}
4078
4079	/----------------------------------------------------------------------------*
4080	\| Returns the most-significant 48 fraction bits of the quadruple-precision
4081	\| floating-point value `a'.
4082	----------------------------------------------------------------------------/
4083
4084	static inline uint64_t extractFloat128Frac0( float128 a )
4085	{
4086
4087	return a.high & UINT64_C(`0x0000FFFFFFFFFFFF`);
4088
4089	}
4090
4091	/----------------------------------------------------------------------------*
4092	\| Returns the exponent bits of the quadruple-precision floating-point value
4093	\| `a'.
4094	----------------------------------------------------------------------------/
4095
4096	static inline int32_t extractFloat128Exp( float128 a )
4097	{
4098
4099	return ( a.high>>`48` ) & `0x7FFF`;
4100
4101	}
4102
4103	/----------------------------------------------------------------------------*
4104	\| Returns the sign bit of the quadruple-precision floating-point value `a'.
4105	----------------------------------------------------------------------------/
4106
4107	static inline flag extractFloat128Sign( float128 a )
4108	{
4109
4110	return a.high>>`63`;
4111
4112	}
4113
4114	/----------------------------------------------------------------------------*
4115	\| Normalizes the subnormal quadruple-precision floating-point value
4116	\| represented by the denormalized significand formed by the concatenation of
4117	\| `aSig0' and `aSig1'. The normalized exponent is stored at the location
4118	\| pointed to by `zExpPtr'. The most significant 49 bits of the normalized
4119	\| significand are stored at the location pointed to by `zSig0Ptr', and the
4120	\| least significant 64 bits of the normalized significand are stored at the
4121	\| location pointed to by `zSig1Ptr'.
4122	----------------------------------------------------------------------------/
4123
4124	static void
4125	normalizeFloat128Subnormal(
4126	uint64_t aSig0,
4127	uint64_t aSig1,
4128	int32_t *zExpPtr,
4129	uint64_t *zSig0Ptr,
4130	uint64_t *zSig1Ptr
4131	)
4132	{
4133	int8_t shiftCount;
4134
4135	if ( aSig0 == `0` ) {
4136	shiftCount = clz64(aSig1) - `15`;
4137	if ( shiftCount < `0` ) {
4138	*zSig0Ptr = aSig1>>( - shiftCount );
4139	*zSig1Ptr = aSig1<<( shiftCount & `63` );
4140	}
4141	else {
4142	*zSig0Ptr = aSig1<<shiftCount;
4143	*zSig1Ptr = `0`;
4144	}
4145	*zExpPtr = - shiftCount - `63`;
4146	}
4147	else {
4148	shiftCount = clz64(aSig0) - `15`;
4149	shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
4150	*zExpPtr = `1` - shiftCount;
4151	}
4152
4153	}
4154
4155	/----------------------------------------------------------------------------*
4156	\| Packs the sign `zSign', the exponent `zExp', and the significand formed
4157	\| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
4158	\| floating-point value, returning the result. After being shifted into the
4159	\| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
4160	\| added together to form the most significant 32 bits of the result. This
4161	\| means that any integer portion of `zSig0' will be added into the exponent.
4162	\| Since a properly normalized significand will have an integer portion equal
4163	\| to 1, the `zExp' input should be 1 less than the desired result exponent
4164	\| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
4165	\| significand.
4166	----------------------------------------------------------------------------/
4167
4168	static inline float128
4169	packFloat128( flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1 )
4170	{
4171	float128 z;
4172
4173	z.low = zSig1;
4174	z.high = ( ( (uint64_t) zSign )<<`63` ) + ( ( (uint64_t) zExp )<<`48` ) + zSig0;
4175	return z;
4176
4177	}
4178
4179	/----------------------------------------------------------------------------*
4180	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4181	\| and extended significand formed by the concatenation of `zSig0', `zSig1',
4182	\| and `zSig2', and returns the proper quadruple-precision floating-point value
4183	\| corresponding to the abstract input. Ordinarily, the abstract value is
4184	\| simply rounded and packed into the quadruple-precision format, with the
4185	\| inexact exception raised if the abstract input cannot be represented
4186	\| exactly. However, if the abstract value is too large, the overflow and
4187	\| inexact exceptions are raised and an infinity or maximal finite value is
4188	\| returned. If the abstract value is too small, the input value is rounded to
4189	\| a subnormal number, and the underflow and inexact exceptions are raised if
4190	\| the abstract input cannot be represented exactly as a subnormal quadruple-
4191	\| precision floating-point number.
4192	\| The input significand must be normalized or smaller. If the input
4193	\| significand is not normalized, `zExp' must be 0; in that case, the result
4194	\| returned is a subnormal number, and it must not require rounding. In the
4195	\| usual case that the input significand is normalized, `zExp' must be 1 less
4196	\| than the ``true'' floating-point exponent. The handling of underflow and
4197	\| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4198	----------------------------------------------------------------------------/
4199
4200	static float128 roundAndPackFloat128(flag zSign, int32_t zExp,
4201	uint64_t zSig0, uint64_t zSig1,
4202	uint64_t zSig2, float_status *status)
4203	{
4204	int8_t roundingMode;
4205	flag roundNearestEven, increment, isTiny;
4206
4207	roundingMode = status->float_rounding_mode;
4208	roundNearestEven = ( roundingMode == float_round_nearest_even );
4209	switch (roundingMode) {
4210	case float_round_nearest_even:
4211	case float_round_ties_away:
4212	increment = ((int64_t)zSig2 < `0`);
4213	break;
4214	case float_round_to_zero:
4215	increment = `0`;
4216	break;
4217	case float_round_up:
4218	increment = !zSign && zSig2;
4219	break;
4220	case float_round_down:
4221	increment = zSign && zSig2;
4222	break;
4223	case float_round_to_odd:
4224	increment = !(zSig1 & `0x1`) && zSig2;
4225	break;
4226	default:
4227	abort();
4228	}
4229	if ( `0x7FFD` <= (uint32_t) zExp ) {
4230	if ( ( `0x7FFD` < zExp )
4231	\|\| ( ( zExp == `0x7FFD` )
4232	&& eq128(
4233	UINT64_C(`0x0001FFFFFFFFFFFF`),
4234	UINT64_C(`0xFFFFFFFFFFFFFFFF`),
4235	zSig0,
4236	zSig1
4237	)
4238	&& increment
4239	)
4240	) {
4241	float_raise(float_flag_overflow \| float_flag_inexact, status);
4242	if ( ( roundingMode == float_round_to_zero )
4243	\|\| ( zSign && ( roundingMode == float_round_up ) )
4244	\|\| ( ! zSign && ( roundingMode == float_round_down ) )
4245	\|\| (roundingMode == float_round_to_odd)
4246	) {
4247	return
4248	packFloat128(
4249	zSign,
4250	`0x7FFE`,
4251	UINT64_C(`0x0000FFFFFFFFFFFF`),
4252	UINT64_C(`0xFFFFFFFFFFFFFFFF`)
4253	);
4254	}
4255	return packFloat128( zSign, `0x7FFF`, `0`, `0` );
4256	}
4257	if ( zExp < `0` ) {
4258	if (status->flush_to_zero) {
4259	float_raise(float_flag_output_denormal, status);
4260	return packFloat128(zSign, `0`, `0`, `0`);
4261	}
4262	isTiny =
4263	(status->float_detect_tininess
4264	== float_tininess_before_rounding)
4265	\|\| ( zExp < -`1` )
4266	\|\| ! increment
4267	\|\| lt128(
4268	zSig0,
4269	zSig1,
4270	UINT64_C(`0x0001FFFFFFFFFFFF`),
4271	UINT64_C(`0xFFFFFFFFFFFFFFFF`)
4272	);
4273	shift128ExtraRightJamming(
4274	zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
4275	zExp = `0`;
4276	if (isTiny && zSig2) {
4277	float_raise(float_flag_underflow, status);
4278	}
4279	switch (roundingMode) {
4280	case float_round_nearest_even:
4281	case float_round_ties_away:
4282	increment = ((int64_t)zSig2 < `0`);
4283	break;
4284	case float_round_to_zero:
4285	increment = `0`;
4286	break;
4287	case float_round_up:
4288	increment = !zSign && zSig2;
4289	break;
4290	case float_round_down:
4291	increment = zSign && zSig2;
4292	break;
4293	case float_round_to_odd:
4294	increment = !(zSig1 & `0x1`) && zSig2;
4295	break;
4296	default:
4297	abort();
4298	}
4299	}
4300	}
4301	if (zSig2) {
4302	status->float_exception_flags \|= float_flag_inexact;
4303	}
4304	if ( increment ) {
4305	add128( zSig0, zSig1, `0`, `1`, &zSig0, &zSig1 );
4306	zSig1 &= ~ ( ( zSig2 + zSig2 == `0` ) & roundNearestEven );
4307	}
4308	else {
4309	if ( ( zSig0 \| zSig1 ) == `0` ) zExp = `0`;
4310	}
4311	return packFloat128( zSign, zExp, zSig0, zSig1 );
4312
4313	}
4314
4315	/----------------------------------------------------------------------------*
4316	\| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
4317	\| and significand formed by the concatenation of `zSig0' and `zSig1', and
4318	\| returns the proper quadruple-precision floating-point value corresponding
4319	\| to the abstract input. This routine is just like `roundAndPackFloat128'
4320	\| except that the input significand has fewer bits and does not have to be
4321	\| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
4322	\| point exponent.
4323	----------------------------------------------------------------------------/
4324
4325	static float128 normalizeRoundAndPackFloat128(flag zSign, int32_t zExp,
4326	uint64_t zSig0, uint64_t zSig1,
4327	float_status *status)
4328	{
4329	int8_t shiftCount;
4330	uint64_t zSig2;
4331
4332	if ( zSig0 == `0` ) {
4333	zSig0 = zSig1;
4334	zSig1 = `0`;
4335	zExp -= `64`;
4336	}
4337	shiftCount = clz64(zSig0) - `15`;
4338	if ( `0` <= shiftCount ) {
4339	zSig2 = `0`;
4340	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4341	}
4342	else {
4343	shift128ExtraRightJamming(
4344	zSig0, zSig1, `0`, - shiftCount, &zSig0, &zSig1, &zSig2 );
4345	}
4346	zExp -= shiftCount;
4347	return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
4348
4349	}
4350
4351
4352	/----------------------------------------------------------------------------*
4353	\| Returns the result of converting the 32-bit two's complement integer `a'
4354	\| to the extended double-precision floating-point format. The conversion
4355	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4356	\| Arithmetic.
4357	----------------------------------------------------------------------------/
4358
4359	floatx80 int32_to_floatx80(int32_t a, float_status *status)
4360	{
4361	flag zSign;
4362	uint32_t absA;
4363	int8_t shiftCount;
4364	uint64_t zSig;
4365
4366	if ( a == `0` ) return packFloatx80( `0`, `0`, `0` );
4367	zSign = ( a < `0` );
4368	absA = zSign ? - a : a;
4369	shiftCount = clz32(absA) + `32`;
4370	zSig = absA;
4371	return packFloatx80( zSign, `0x403E` - shiftCount, zSig<<shiftCount );
4372
4373	}
4374
4375	/----------------------------------------------------------------------------*
4376	\| Returns the result of converting the 32-bit two's complement integer `a' to
4377	\| the quadruple-precision floating-point format. The conversion is performed
4378	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4379	----------------------------------------------------------------------------/
4380
4381	float128 int32_to_float128(int32_t a, float_status *status)
4382	{
4383	flag zSign;
4384	uint32_t absA;
4385	int8_t shiftCount;
4386	uint64_t zSig0;
4387
4388	if ( a == `0` ) return packFloat128( `0`, `0`, `0`, `0` );
4389	zSign = ( a < `0` );
4390	absA = zSign ? - a : a;
4391	shiftCount = clz32(absA) + `17`;
4392	zSig0 = absA;
4393	return packFloat128( zSign, `0x402E` - shiftCount, zSig0<<shiftCount, `0` );
4394
4395	}
4396
4397	/----------------------------------------------------------------------------*
4398	\| Returns the result of converting the 64-bit two's complement integer `a'
4399	\| to the extended double-precision floating-point format. The conversion
4400	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4401	\| Arithmetic.
4402	----------------------------------------------------------------------------/
4403
4404	floatx80 int64_to_floatx80(int64_t a, float_status *status)
4405	{
4406	flag zSign;
4407	uint64_t absA;
4408	int8_t shiftCount;
4409
4410	if ( a == `0` ) return packFloatx80( `0`, `0`, `0` );
4411	zSign = ( a < `0` );
4412	absA = zSign ? - a : a;
4413	shiftCount = clz64(absA);
4414	return packFloatx80( zSign, `0x403E` - shiftCount, absA<<shiftCount );
4415
4416	}
4417
4418	/----------------------------------------------------------------------------*
4419	\| Returns the result of converting the 64-bit two's complement integer `a' to
4420	\| the quadruple-precision floating-point format. The conversion is performed
4421	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4422	----------------------------------------------------------------------------/
4423
4424	float128 int64_to_float128(int64_t a, float_status *status)
4425	{
4426	flag zSign;
4427	uint64_t absA;
4428	int8_t shiftCount;
4429	int32_t zExp;
4430	uint64_t zSig0, zSig1;
4431
4432	if ( a == `0` ) return packFloat128( `0`, `0`, `0`, `0` );
4433	zSign = ( a < `0` );
4434	absA = zSign ? - a : a;
4435	shiftCount = clz64(absA) + `49`;
4436	zExp = `0x406E` - shiftCount;
4437	if ( `64` <= shiftCount ) {
4438	zSig1 = `0`;
4439	zSig0 = absA;
4440	shiftCount -= `64`;
4441	}
4442	else {
4443	zSig1 = absA;
4444	zSig0 = `0`;
4445	}
4446	shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
4447	return packFloat128( zSign, zExp, zSig0, zSig1 );
4448
4449	}
4450
4451	/----------------------------------------------------------------------------*
4452	\| Returns the result of converting the 64-bit unsigned integer `a'
4453	\| to the quadruple-precision floating-point format. The conversion is performed
4454	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4455	----------------------------------------------------------------------------/
4456
4457	float128 uint64_to_float128(uint64_t a, float_status *status)
4458	{
4459	if (a == `0`) {
4460	return float128_zero;
4461	}
4462	return normalizeRoundAndPackFloat128(`0`, `0x406E`, `0`, a, status);
4463	}
4464
4465	/----------------------------------------------------------------------------*
4466	\| Returns the result of converting the single-precision floating-point value
4467	\| `a' to the extended double-precision floating-point format. The conversion
4468	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4469	\| Arithmetic.
4470	----------------------------------------------------------------------------/
4471
4472	floatx80 float32_to_floatx80(float32 a, float_status *status)
4473	{
4474	flag aSign;
4475	int aExp;
4476	uint32_t aSig;
4477
4478	a = float32_squash_input_denormal(a, status);
4479	aSig = extractFloat32Frac( a );
4480	aExp = extractFloat32Exp( a );
4481	aSign = extractFloat32Sign( a );
4482	if ( aExp == `0xFF` ) {
4483	if (aSig) {
4484	return commonNaNToFloatx80(float32ToCommonNaN(a, status), status);
4485	}
4486	return packFloatx80(aSign,
4487	floatx80_infinity_high,
4488	floatx80_infinity_low);
4489	}
4490	if ( aExp == `0` ) {
4491	if ( aSig == `0` ) return packFloatx80( aSign, `0`, `0` );
4492	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4493	}
4494	aSig \|= `0x00800000`;
4495	return packFloatx80( aSign, aExp + `0x3F80`, ( (uint64_t) aSig )<<`40` );
4496
4497	}
4498
4499	/----------------------------------------------------------------------------*
4500	\| Returns the result of converting the single-precision floating-point value
4501	\| `a' to the double-precision floating-point format. The conversion is
4502	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
4503	\| Arithmetic.
4504	----------------------------------------------------------------------------/
4505
4506	float128 float32_to_float128(float32 a, float_status *status)
4507	{
4508	flag aSign;
4509	int aExp;
4510	uint32_t aSig;
4511
4512	a = float32_squash_input_denormal(a, status);
4513	aSig = extractFloat32Frac( a );
4514	aExp = extractFloat32Exp( a );
4515	aSign = extractFloat32Sign( a );
4516	if ( aExp == `0xFF` ) {
4517	if (aSig) {
4518	return commonNaNToFloat128(float32ToCommonNaN(a, status), status);
4519	}
4520	return packFloat128( aSign, `0x7FFF`, `0`, `0` );
4521	}
4522	if ( aExp == `0` ) {
4523	if ( aSig == `0` ) return packFloat128( aSign, `0`, `0`, `0` );
4524	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4525	--aExp;
4526	}
4527	return packFloat128( aSign, aExp + `0x3F80`, ( (uint64_t) aSig )<<`25`, `0` );
4528
4529	}
4530
4531	/----------------------------------------------------------------------------*
4532	\| Returns the remainder of the single-precision floating-point value `a'
4533	\| with respect to the corresponding value `b'. The operation is performed
4534	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4535	----------------------------------------------------------------------------/
4536
4537	float32 float32_rem(float32 a, float32 b, float_status *status)
4538	{
4539	flag aSign, zSign;
4540	int aExp, bExp, expDiff;
4541	uint32_t aSig, bSig;
4542	uint32_t q;
4543	uint64_t aSig64, bSig64, q64;
4544	uint32_t alternateASig;
4545	int32_t sigMean;
4546	a = float32_squash_input_denormal(a, status);
4547	b = float32_squash_input_denormal(b, status);
4548
4549	aSig = extractFloat32Frac( a );
4550	aExp = extractFloat32Exp( a );
4551	aSign = extractFloat32Sign( a );
4552	bSig = extractFloat32Frac( b );
4553	bExp = extractFloat32Exp( b );
4554	if ( aExp == `0xFF` ) {
4555	if ( aSig \|\| ( ( bExp == `0xFF` ) && bSig ) ) {
4556	return propagateFloat32NaN(a, b, status);
4557	}
4558	float_raise(float_flag_invalid, status);
4559	return float32_default_nan(status);
4560	}
4561	if ( bExp == `0xFF` ) {
4562	if (bSig) {
4563	return propagateFloat32NaN(a, b, status);
4564	}
4565	return a;
4566	}
4567	if ( bExp == `0` ) {
4568	if ( bSig == `0` ) {
4569	float_raise(float_flag_invalid, status);
4570	return float32_default_nan(status);
4571	}
4572	normalizeFloat32Subnormal( bSig, &bExp, &bSig );
4573	}
4574	if ( aExp == `0` ) {
4575	if ( aSig == `0` ) return a;
4576	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4577	}
4578	expDiff = aExp - bExp;
4579	aSig \|= `0x00800000`;
4580	bSig \|= `0x00800000`;
4581	if ( expDiff < `32` ) {
4582	aSig <<= `8`;
4583	bSig <<= `8`;
4584	if ( expDiff < `0` ) {
4585	if ( expDiff < -`1` ) return a;
4586	aSig >>= `1`;
4587	}
4588	q = ( bSig <= aSig );
4589	if ( q ) aSig -= bSig;
4590	if ( `0` < expDiff ) {
4591	q = ( ( (uint64_t) aSig )<<`32` ) / bSig;
4592	q >>= `32` - expDiff;
4593	bSig >>= `2`;
4594	aSig = ( ( aSig>>`1` )<<( expDiff - `1` ) ) - bSig * q;
4595	}
4596	else {
4597	aSig >>= `2`;
4598	bSig >>= `2`;
4599	}
4600	}
4601	else {
4602	if ( bSig <= aSig ) aSig -= bSig;
4603	aSig64 = ( (uint64_t) aSig )<<`40`;
4604	bSig64 = ( (uint64_t) bSig )<<`40`;
4605	expDiff -= `64`;
4606	while ( `0` < expDiff ) {
4607	q64 = estimateDiv128To64( aSig64, `0`, bSig64 );
4608	q64 = ( `2` < q64 ) ? q64 - `2` : `0`;
4609	aSig64 = - ( ( bSig * q64 )<<`38` );
4610	expDiff -= `62`;
4611	}
4612	expDiff += `64`;
4613	q64 = estimateDiv128To64( aSig64, `0`, bSig64 );
4614	q64 = ( `2` < q64 ) ? q64 - `2` : `0`;
4615	q = q64>>( `64` - expDiff );
4616	bSig <<= `6`;
4617	aSig = ( ( aSig64>>`33` )<<( expDiff - `1` ) ) - bSig * q;
4618	}
4619	do {
4620	alternateASig = aSig;
4621	++q;
4622	aSig -= bSig;
4623	} while ( `0` <= (int32_t) aSig );
4624	sigMean = aSig + alternateASig;
4625	if ( ( sigMean < `0` ) \|\| ( ( sigMean == `0` ) && ( q & `1` ) ) ) {
4626	aSig = alternateASig;
4627	}
4628	zSign = ( (int32_t) aSig < `0` );
4629	if ( zSign ) aSig = - aSig;
4630	return normalizeRoundAndPackFloat32(aSign ^ zSign, bExp, aSig, status);
4631	}
4632
4633
4634
4635	/----------------------------------------------------------------------------*
4636	\| Returns the binary exponential of the single-precision floating-point value
4637	\| `a'. The operation is performed according to the IEC/IEEE Standard for
4638	\| Binary Floating-Point Arithmetic.
4639	\|
4640	\| Uses the following identities:
4641	\|
4642	\| 1. -------------------------------------------------------------------------
4643	\| x xln(2)*
4644	\| 2 = e
4645	\|
4646	\| 2. -------------------------------------------------------------------------
4647	\| 2 3 4 5 n
4648	\| x x x x x x x
4649	\| e = 1 + --- + --- + --- + --- + --- + ... + --- + ...
4650	\| 1! 2! 3! 4! 5! n!
4651	----------------------------------------------------------------------------/
4652
4653	static const float64 float32_exp2_coefficients[`15`] =
4654	{
4655	const_float64( `0x3ff0000000000000ll` ), / 1 /
4656	const_float64( `0x3fe0000000000000ll` ), / 2 /
4657	const_float64( `0x3fc5555555555555ll` ), / 3 /
4658	const_float64( `0x3fa5555555555555ll` ), / 4 /
4659	const_float64( `0x3f81111111111111ll` ), / 5 /
4660	const_float64( `0x3f56c16c16c16c17ll` ), / 6 /
4661	const_float64( `0x3f2a01a01a01a01all` ), / 7 /
4662	const_float64( `0x3efa01a01a01a01all` ), / 8 /
4663	const_float64( `0x3ec71de3a556c734ll` ), / 9 /
4664	const_float64( `0x3e927e4fb7789f5cll` ), / 10 /
4665	const_float64( `0x3e5ae64567f544e4ll` ), / 11 /
4666	const_float64( `0x3e21eed8eff8d898ll` ), / 12 /
4667	const_float64( `0x3de6124613a86d09ll` ), / 13 /
4668	const_float64( `0x3da93974a8c07c9dll` ), / 14 /
4669	const_float64( `0x3d6ae7f3e733b81fll` ), / 15 /
4670	};
4671
4672	float32 float32_exp2(float32 a, float_status *status)
4673	{
4674	flag aSign;
4675	int aExp;
4676	uint32_t aSig;
4677	float64 r, x, xn;
4678	int i;
4679	a = float32_squash_input_denormal(a, status);
4680
4681	aSig = extractFloat32Frac( a );
4682	aExp = extractFloat32Exp( a );
4683	aSign = extractFloat32Sign( a );
4684
4685	if ( aExp == `0xFF`) {
4686	if (aSig) {
4687	return propagateFloat32NaN(a, float32_zero, status);
4688	}
4689	return (aSign) ? float32_zero : a;
4690	}
4691	if (aExp == `0`) {
4692	if (aSig == `0`) return float32_one;
4693	}
4694
4695	float_raise(float_flag_inexact, status);
4696
4697	/ ******************************* /
4698	/ using float64 for approximation /
4699	/ ******************************* /
4700	x = float32_to_float64(a, status);
4701	x = float64_mul(x, float64_ln2, status);
4702
4703	xn = x;
4704	r = float64_one;
4705	for (i = `0` ; i < `15` ; i++) {
4706	float64 f;
4707
4708	f = float64_mul(xn, float32_exp2_coefficients[i], status);
4709	r = float64_add(r, f, status);
4710
4711	xn = float64_mul(xn, x, status);
4712	}
4713
4714	return float64_to_float32(r, status);
4715	}
4716
4717	/----------------------------------------------------------------------------*
4718	\| Returns the binary log of the single-precision floating-point value `a'.
4719	\| The operation is performed according to the IEC/IEEE Standard for Binary
4720	\| Floating-Point Arithmetic.
4721	----------------------------------------------------------------------------/
4722	float32 float32_log2(float32 a, float_status *status)
4723	{
4724	flag aSign, zSign;
4725	int aExp;
4726	uint32_t aSig, zSig, i;
4727
4728	a = float32_squash_input_denormal(a, status);
4729	aSig = extractFloat32Frac( a );
4730	aExp = extractFloat32Exp( a );
4731	aSign = extractFloat32Sign( a );
4732
4733	if ( aExp == `0` ) {
4734	if ( aSig == `0` ) return packFloat32( `1`, `0xFF`, `0` );
4735	normalizeFloat32Subnormal( aSig, &aExp, &aSig );
4736	}
4737	if ( aSign ) {
4738	float_raise(float_flag_invalid, status);
4739	return float32_default_nan(status);
4740	}
4741	if ( aExp == `0xFF` ) {
4742	if (aSig) {
4743	return propagateFloat32NaN(a, float32_zero, status);
4744	}
4745	return a;
4746	}
4747
4748	aExp -= `0x7F`;
4749	aSig \|= `0x00800000`;
4750	zSign = aExp < `0`;
4751	zSig = aExp << `23`;
4752
4753	for (i = `1` << `22`; i > `0`; i >>= `1`) {
4754	aSig = ( (uint64_t)aSig * aSig ) >> `23`;
4755	if ( aSig & `0x01000000` ) {
4756	aSig >>= `1`;
4757	zSig \|= i;
4758	}
4759	}
4760
4761	if ( zSign )
4762	zSig = -zSig;
4763
4764	return normalizeRoundAndPackFloat32(zSign, `0x85`, zSig, status);
4765	}
4766
4767	/----------------------------------------------------------------------------*
4768	\| Returns 1 if the single-precision floating-point value `a' is equal to
4769	\| the corresponding value `b', and 0 otherwise. The invalid exception is
4770	\| raised if either operand is a NaN. Otherwise, the comparison is performed
4771	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4772	----------------------------------------------------------------------------/
4773
4774	int float32_eq(float32 a, float32 b, float_status *status)
4775	{
4776	uint32_t av, bv;
4777	a = float32_squash_input_denormal(a, status);
4778	b = float32_squash_input_denormal(b, status);
4779
4780	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
4781	\|\| ( ( extractFloat32Exp( b ) == `0xFF` ) && extractFloat32Frac( b ) )
4782	) {
4783	float_raise(float_flag_invalid, status);
4784	return `0`;
4785	}
4786	av = float32_val(a);
4787	bv = float32_val(b);
4788	return ( av == bv ) \|\| ( (uint32_t) ( ( av \| bv )<<`1` ) == `0` );
4789	}
4790
4791	/----------------------------------------------------------------------------*
4792	\| Returns 1 if the single-precision floating-point value `a' is less than
4793	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
4794	\| exception is raised if either operand is a NaN. The comparison is performed
4795	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4796	----------------------------------------------------------------------------/
4797
4798	int float32_le(float32 a, float32 b, float_status *status)
4799	{
4800	flag aSign, bSign;
4801	uint32_t av, bv;
4802	a = float32_squash_input_denormal(a, status);
4803	b = float32_squash_input_denormal(b, status);
4804
4805	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
4806	\|\| ( ( extractFloat32Exp( b ) == `0xFF` ) && extractFloat32Frac( b ) )
4807	) {
4808	float_raise(float_flag_invalid, status);
4809	return `0`;
4810	}
4811	aSign = extractFloat32Sign( a );
4812	bSign = extractFloat32Sign( b );
4813	av = float32_val(a);
4814	bv = float32_val(b);
4815	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<`1` ) == `0` );
4816	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
4817
4818	}
4819
4820	/----------------------------------------------------------------------------*
4821	\| Returns 1 if the single-precision floating-point value `a' is less than
4822	\| the corresponding value `b', and 0 otherwise. The invalid exception is
4823	\| raised if either operand is a NaN. The comparison is performed according
4824	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4825	----------------------------------------------------------------------------/
4826
4827	int float32_lt(float32 a, float32 b, float_status *status)
4828	{
4829	flag aSign, bSign;
4830	uint32_t av, bv;
4831	a = float32_squash_input_denormal(a, status);
4832	b = float32_squash_input_denormal(b, status);
4833
4834	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
4835	\|\| ( ( extractFloat32Exp( b ) == `0xFF` ) && extractFloat32Frac( b ) )
4836	) {
4837	float_raise(float_flag_invalid, status);
4838	return `0`;
4839	}
4840	aSign = extractFloat32Sign( a );
4841	bSign = extractFloat32Sign( b );
4842	av = float32_val(a);
4843	bv = float32_val(b);
4844	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<`1` ) != `0` );
4845	return ( av != bv ) && ( aSign ^ ( av < bv ) );
4846
4847	}
4848
4849	/----------------------------------------------------------------------------*
4850	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4851	\| be compared, and 0 otherwise. The invalid exception is raised if either
4852	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
4853	\| Standard for Binary Floating-Point Arithmetic.
4854	----------------------------------------------------------------------------/
4855
4856	int float32_unordered(float32 a, float32 b, float_status *status)
4857	{
4858	a = float32_squash_input_denormal(a, status);
4859	b = float32_squash_input_denormal(b, status);
4860
4861	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
4862	\|\| ( ( extractFloat32Exp( b ) == `0xFF` ) && extractFloat32Frac( b ) )
4863	) {
4864	float_raise(float_flag_invalid, status);
4865	return `1`;
4866	}
4867	return `0`;
4868	}
4869
4870	/----------------------------------------------------------------------------*
4871	\| Returns 1 if the single-precision floating-point value `a' is equal to
4872	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4873	\| exception. The comparison is performed according to the IEC/IEEE Standard
4874	\| for Binary Floating-Point Arithmetic.
4875	----------------------------------------------------------------------------/
4876
4877	int float32_eq_quiet(float32 a, float32 b, float_status *status)
4878	{
4879	a = float32_squash_input_denormal(a, status);
4880	b = float32_squash_input_denormal(b, status);
4881
4882	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
4883	\|\| ( ( extractFloat32Exp( b ) == `0xFF` ) && extractFloat32Frac( b ) )
4884	) {
4885	if (float32_is_signaling_nan(a, status)
4886	\|\| float32_is_signaling_nan(b, status)) {
4887	float_raise(float_flag_invalid, status);
4888	}
4889	return `0`;
4890	}
4891	return ( float32_val(a) == float32_val(b) ) \|\|
4892	( (uint32_t) ( ( float32_val(a) \| float32_val(b) )<<`1` ) == `0` );
4893	}
4894
4895	/----------------------------------------------------------------------------*
4896	\| Returns 1 if the single-precision floating-point value `a' is less than or
4897	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
4898	\| cause an exception. Otherwise, the comparison is performed according to the
4899	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4900	----------------------------------------------------------------------------/
4901
4902	int float32_le_quiet(float32 a, float32 b, float_status *status)
4903	{
4904	flag aSign, bSign;
4905	uint32_t av, bv;
4906	a = float32_squash_input_denormal(a, status);
4907	b = float32_squash_input_denormal(b, status);
4908
4909	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
4910	\|\| ( ( extractFloat32Exp( b ) == `0xFF` ) && extractFloat32Frac( b ) )
4911	) {
4912	if (float32_is_signaling_nan(a, status)
4913	\|\| float32_is_signaling_nan(b, status)) {
4914	float_raise(float_flag_invalid, status);
4915	}
4916	return `0`;
4917	}
4918	aSign = extractFloat32Sign( a );
4919	bSign = extractFloat32Sign( b );
4920	av = float32_val(a);
4921	bv = float32_val(b);
4922	if ( aSign != bSign ) return aSign \|\| ( (uint32_t) ( ( av \| bv )<<`1` ) == `0` );
4923	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
4924
4925	}
4926
4927	/----------------------------------------------------------------------------*
4928	\| Returns 1 if the single-precision floating-point value `a' is less than
4929	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
4930	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
4931	\| Standard for Binary Floating-Point Arithmetic.
4932	----------------------------------------------------------------------------/
4933
4934	int float32_lt_quiet(float32 a, float32 b, float_status *status)
4935	{
4936	flag aSign, bSign;
4937	uint32_t av, bv;
4938	a = float32_squash_input_denormal(a, status);
4939	b = float32_squash_input_denormal(b, status);
4940
4941	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
4942	\|\| ( ( extractFloat32Exp( b ) == `0xFF` ) && extractFloat32Frac( b ) )
4943	) {
4944	if (float32_is_signaling_nan(a, status)
4945	\|\| float32_is_signaling_nan(b, status)) {
4946	float_raise(float_flag_invalid, status);
4947	}
4948	return `0`;
4949	}
4950	aSign = extractFloat32Sign( a );
4951	bSign = extractFloat32Sign( b );
4952	av = float32_val(a);
4953	bv = float32_val(b);
4954	if ( aSign != bSign ) return aSign && ( (uint32_t) ( ( av \| bv )<<`1` ) != `0` );
4955	return ( av != bv ) && ( aSign ^ ( av < bv ) );
4956
4957	}
4958
4959	/----------------------------------------------------------------------------*
4960	\| Returns 1 if the single-precision floating-point values `a' and `b' cannot
4961	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
4962	\| comparison is performed according to the IEC/IEEE Standard for Binary
4963	\| Floating-Point Arithmetic.
4964	----------------------------------------------------------------------------/
4965
4966	int float32_unordered_quiet(float32 a, float32 b, float_status *status)
4967	{
4968	a = float32_squash_input_denormal(a, status);
4969	b = float32_squash_input_denormal(b, status);
4970
4971	if ( ( ( extractFloat32Exp( a ) == `0xFF` ) && extractFloat32Frac( a ) )
4972	\|\| ( ( extractFloat32Exp( b ) == `0xFF` ) && extractFloat32Frac( b ) )
4973	) {
4974	if (float32_is_signaling_nan(a, status)
4975	\|\| float32_is_signaling_nan(b, status)) {
4976	float_raise(float_flag_invalid, status);
4977	}
4978	return `1`;
4979	}
4980	return `0`;
4981	}
4982
4983	/----------------------------------------------------------------------------*
4984	\| Returns the result of converting the double-precision floating-point value
4985	\| `a' to the extended double-precision floating-point format. The conversion
4986	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
4987	\| Arithmetic.
4988	----------------------------------------------------------------------------/
4989
4990	floatx80 float64_to_floatx80(float64 a, float_status *status)
4991	{
4992	flag aSign;
4993	int aExp;
4994	uint64_t aSig;
4995
4996	a = float64_squash_input_denormal(a, status);
4997	aSig = extractFloat64Frac( a );
4998	aExp = extractFloat64Exp( a );
4999	aSign = extractFloat64Sign( a );
5000	if ( aExp == `0x7FF` ) {
5001	if (aSig) {
5002	return commonNaNToFloatx80(float64ToCommonNaN(a, status), status);
5003	}
5004	return packFloatx80(aSign,
5005	floatx80_infinity_high,
5006	floatx80_infinity_low);
5007	}
5008	if ( aExp == `0` ) {
5009	if ( aSig == `0` ) return packFloatx80( aSign, `0`, `0` );
5010	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5011	}
5012	return
5013	packFloatx80(
5014	aSign, aExp + `0x3C00`, (aSig \| UINT64_C(`0x0010000000000000`)) << `11`);
5015
5016	}
5017
5018	/----------------------------------------------------------------------------*
5019	\| Returns the result of converting the double-precision floating-point value
5020	\| `a' to the quadruple-precision floating-point format. The conversion is
5021	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
5022	\| Arithmetic.
5023	----------------------------------------------------------------------------/
5024
5025	float128 float64_to_float128(float64 a, float_status *status)
5026	{
5027	flag aSign;
5028	int aExp;
5029	uint64_t aSig, zSig0, zSig1;
5030
5031	a = float64_squash_input_denormal(a, status);
5032	aSig = extractFloat64Frac( a );
5033	aExp = extractFloat64Exp( a );
5034	aSign = extractFloat64Sign( a );
5035	if ( aExp == `0x7FF` ) {
5036	if (aSig) {
5037	return commonNaNToFloat128(float64ToCommonNaN(a, status), status);
5038	}
5039	return packFloat128( aSign, `0x7FFF`, `0`, `0` );
5040	}
5041	if ( aExp == `0` ) {
5042	if ( aSig == `0` ) return packFloat128( aSign, `0`, `0`, `0` );
5043	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5044	--aExp;
5045	}
5046	shift128Right( aSig, `0`, `4`, &zSig0, &zSig1 );
5047	return packFloat128( aSign, aExp + `0x3C00`, zSig0, zSig1 );
5048
5049	}
5050
5051
5052	/----------------------------------------------------------------------------*
5053	\| Returns the remainder of the double-precision floating-point value `a'
5054	\| with respect to the corresponding value `b'. The operation is performed
5055	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5056	----------------------------------------------------------------------------/
5057
5058	float64 float64_rem(float64 a, float64 b, float_status *status)
5059	{
5060	flag aSign, zSign;
5061	int aExp, bExp, expDiff;
5062	uint64_t aSig, bSig;
5063	uint64_t q, alternateASig;
5064	int64_t sigMean;
5065
5066	a = float64_squash_input_denormal(a, status);
5067	b = float64_squash_input_denormal(b, status);
5068	aSig = extractFloat64Frac( a );
5069	aExp = extractFloat64Exp( a );
5070	aSign = extractFloat64Sign( a );
5071	bSig = extractFloat64Frac( b );
5072	bExp = extractFloat64Exp( b );
5073	if ( aExp == `0x7FF` ) {
5074	if ( aSig \|\| ( ( bExp == `0x7FF` ) && bSig ) ) {
5075	return propagateFloat64NaN(a, b, status);
5076	}
5077	float_raise(float_flag_invalid, status);
5078	return float64_default_nan(status);
5079	}
5080	if ( bExp == `0x7FF` ) {
5081	if (bSig) {
5082	return propagateFloat64NaN(a, b, status);
5083	}
5084	return a;
5085	}
5086	if ( bExp == `0` ) {
5087	if ( bSig == `0` ) {
5088	float_raise(float_flag_invalid, status);
5089	return float64_default_nan(status);
5090	}
5091	normalizeFloat64Subnormal( bSig, &bExp, &bSig );
5092	}
5093	if ( aExp == `0` ) {
5094	if ( aSig == `0` ) return a;
5095	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5096	}
5097	expDiff = aExp - bExp;
5098	aSig = (aSig \| UINT64_C(`0x0010000000000000`)) << `11`;
5099	bSig = (bSig \| UINT64_C(`0x0010000000000000`)) << `11`;
5100	if ( expDiff < `0` ) {
5101	if ( expDiff < -`1` ) return a;
5102	aSig >>= `1`;
5103	}
5104	q = ( bSig <= aSig );
5105	if ( q ) aSig -= bSig;
5106	expDiff -= `64`;
5107	while ( `0` < expDiff ) {
5108	q = estimateDiv128To64( aSig, `0`, bSig );
5109	q = ( `2` < q ) ? q - `2` : `0`;
5110	aSig = - ( ( bSig>>`2` ) * q );
5111	expDiff -= `62`;
5112	}
5113	expDiff += `64`;
5114	if ( `0` < expDiff ) {
5115	q = estimateDiv128To64( aSig, `0`, bSig );
5116	q = ( `2` < q ) ? q - `2` : `0`;
5117	q >>= `64` - expDiff;
5118	bSig >>= `2`;
5119	aSig = ( ( aSig>>`1` )<<( expDiff - `1` ) ) - bSig * q;
5120	}
5121	else {
5122	aSig >>= `2`;
5123	bSig >>= `2`;
5124	}
5125	do {
5126	alternateASig = aSig;
5127	++q;
5128	aSig -= bSig;
5129	} while ( `0` <= (int64_t) aSig );
5130	sigMean = aSig + alternateASig;
5131	if ( ( sigMean < `0` ) \|\| ( ( sigMean == `0` ) && ( q & `1` ) ) ) {
5132	aSig = alternateASig;
5133	}
5134	zSign = ( (int64_t) aSig < `0` );
5135	if ( zSign ) aSig = - aSig;
5136	return normalizeRoundAndPackFloat64(aSign ^ zSign, bExp, aSig, status);
5137
5138	}
5139
5140	/----------------------------------------------------------------------------*
5141	\| Returns the binary log of the double-precision floating-point value `a'.
5142	\| The operation is performed according to the IEC/IEEE Standard for Binary
5143	\| Floating-Point Arithmetic.
5144	----------------------------------------------------------------------------/
5145	float64 float64_log2(float64 a, float_status *status)
5146	{
5147	flag aSign, zSign;
5148	int aExp;
5149	uint64_t aSig, aSig0, aSig1, zSig, i;
5150	a = float64_squash_input_denormal(a, status);
5151
5152	aSig = extractFloat64Frac( a );
5153	aExp = extractFloat64Exp( a );
5154	aSign = extractFloat64Sign( a );
5155
5156	if ( aExp == `0` ) {
5157	if ( aSig == `0` ) return packFloat64( `1`, `0x7FF`, `0` );
5158	normalizeFloat64Subnormal( aSig, &aExp, &aSig );
5159	}
5160	if ( aSign ) {
5161	float_raise(float_flag_invalid, status);
5162	return float64_default_nan(status);
5163	}
5164	if ( aExp == `0x7FF` ) {
5165	if (aSig) {
5166	return propagateFloat64NaN(a, float64_zero, status);
5167	}
5168	return a;
5169	}
5170
5171	aExp -= `0x3FF`;
5172	aSig \|= UINT64_C(`0x0010000000000000`);
5173	zSign = aExp < `0`;
5174	zSig = (uint64_t)aExp << `52`;
5175	for (i = `1LL` << `51`; i > `0`; i >>= `1`) {
5176	mul64To128( aSig, aSig, &aSig0, &aSig1 );
5177	aSig = ( aSig0 << `12` ) \| ( aSig1 >> `52` );
5178	if ( aSig & UINT64_C(`0x0020000000000000`) ) {
5179	aSig >>= `1`;
5180	zSig \|= i;
5181	}
5182	}
5183
5184	if ( zSign )
5185	zSig = -zSig;
5186	return normalizeRoundAndPackFloat64(zSign, `0x408`, zSig, status);
5187	}
5188
5189	/----------------------------------------------------------------------------*
5190	\| Returns 1 if the double-precision floating-point value `a' is equal to the
5191	\| corresponding value `b', and 0 otherwise. The invalid exception is raised
5192	\| if either operand is a NaN. Otherwise, the comparison is performed
5193	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5194	----------------------------------------------------------------------------/
5195
5196	int float64_eq(float64 a, float64 b, float_status *status)
5197	{
5198	uint64_t av, bv;
5199	a = float64_squash_input_denormal(a, status);
5200	b = float64_squash_input_denormal(b, status);
5201
5202	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
5203	\|\| ( ( extractFloat64Exp( b ) == `0x7FF` ) && extractFloat64Frac( b ) )
5204	) {
5205	float_raise(float_flag_invalid, status);
5206	return `0`;
5207	}
5208	av = float64_val(a);
5209	bv = float64_val(b);
5210	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<`1` ) == `0` );
5211
5212	}
5213
5214	/----------------------------------------------------------------------------*
5215	\| Returns 1 if the double-precision floating-point value `a' is less than or
5216	\| equal to the corresponding value `b', and 0 otherwise. The invalid
5217	\| exception is raised if either operand is a NaN. The comparison is performed
5218	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5219	----------------------------------------------------------------------------/
5220
5221	int float64_le(float64 a, float64 b, float_status *status)
5222	{
5223	flag aSign, bSign;
5224	uint64_t av, bv;
5225	a = float64_squash_input_denormal(a, status);
5226	b = float64_squash_input_denormal(b, status);
5227
5228	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
5229	\|\| ( ( extractFloat64Exp( b ) == `0x7FF` ) && extractFloat64Frac( b ) )
5230	) {
5231	float_raise(float_flag_invalid, status);
5232	return `0`;
5233	}
5234	aSign = extractFloat64Sign( a );
5235	bSign = extractFloat64Sign( b );
5236	av = float64_val(a);
5237	bv = float64_val(b);
5238	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<`1` ) == `0` );
5239	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
5240
5241	}
5242
5243	/----------------------------------------------------------------------------*
5244	\| Returns 1 if the double-precision floating-point value `a' is less than
5245	\| the corresponding value `b', and 0 otherwise. The invalid exception is
5246	\| raised if either operand is a NaN. The comparison is performed according
5247	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5248	----------------------------------------------------------------------------/
5249
5250	int float64_lt(float64 a, float64 b, float_status *status)
5251	{
5252	flag aSign, bSign;
5253	uint64_t av, bv;
5254
5255	a = float64_squash_input_denormal(a, status);
5256	b = float64_squash_input_denormal(b, status);
5257	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
5258	\|\| ( ( extractFloat64Exp( b ) == `0x7FF` ) && extractFloat64Frac( b ) )
5259	) {
5260	float_raise(float_flag_invalid, status);
5261	return `0`;
5262	}
5263	aSign = extractFloat64Sign( a );
5264	bSign = extractFloat64Sign( b );
5265	av = float64_val(a);
5266	bv = float64_val(b);
5267	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<`1` ) != `0` );
5268	return ( av != bv ) && ( aSign ^ ( av < bv ) );
5269
5270	}
5271
5272	/----------------------------------------------------------------------------*
5273	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
5274	\| be compared, and 0 otherwise. The invalid exception is raised if either
5275	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
5276	\| Standard for Binary Floating-Point Arithmetic.
5277	----------------------------------------------------------------------------/
5278
5279	int float64_unordered(float64 a, float64 b, float_status *status)
5280	{
5281	a = float64_squash_input_denormal(a, status);
5282	b = float64_squash_input_denormal(b, status);
5283
5284	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
5285	\|\| ( ( extractFloat64Exp( b ) == `0x7FF` ) && extractFloat64Frac( b ) )
5286	) {
5287	float_raise(float_flag_invalid, status);
5288	return `1`;
5289	}
5290	return `0`;
5291	}
5292
5293	/----------------------------------------------------------------------------*
5294	\| Returns 1 if the double-precision floating-point value `a' is equal to the
5295	\| corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5296	\| exception.The comparison is performed according to the IEC/IEEE Standard
5297	\| for Binary Floating-Point Arithmetic.
5298	----------------------------------------------------------------------------/
5299
5300	int float64_eq_quiet(float64 a, float64 b, float_status *status)
5301	{
5302	uint64_t av, bv;
5303	a = float64_squash_input_denormal(a, status);
5304	b = float64_squash_input_denormal(b, status);
5305
5306	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
5307	\|\| ( ( extractFloat64Exp( b ) == `0x7FF` ) && extractFloat64Frac( b ) )
5308	) {
5309	if (float64_is_signaling_nan(a, status)
5310	\|\| float64_is_signaling_nan(b, status)) {
5311	float_raise(float_flag_invalid, status);
5312	}
5313	return `0`;
5314	}
5315	av = float64_val(a);
5316	bv = float64_val(b);
5317	return ( av == bv ) \|\| ( (uint64_t) ( ( av \| bv )<<`1` ) == `0` );
5318
5319	}
5320
5321	/----------------------------------------------------------------------------*
5322	\| Returns 1 if the double-precision floating-point value `a' is less than or
5323	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
5324	\| cause an exception. Otherwise, the comparison is performed according to the
5325	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5326	----------------------------------------------------------------------------/
5327
5328	int float64_le_quiet(float64 a, float64 b, float_status *status)
5329	{
5330	flag aSign, bSign;
5331	uint64_t av, bv;
5332	a = float64_squash_input_denormal(a, status);
5333	b = float64_squash_input_denormal(b, status);
5334
5335	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
5336	\|\| ( ( extractFloat64Exp( b ) == `0x7FF` ) && extractFloat64Frac( b ) )
5337	) {
5338	if (float64_is_signaling_nan(a, status)
5339	\|\| float64_is_signaling_nan(b, status)) {
5340	float_raise(float_flag_invalid, status);
5341	}
5342	return `0`;
5343	}
5344	aSign = extractFloat64Sign( a );
5345	bSign = extractFloat64Sign( b );
5346	av = float64_val(a);
5347	bv = float64_val(b);
5348	if ( aSign != bSign ) return aSign \|\| ( (uint64_t) ( ( av \| bv )<<`1` ) == `0` );
5349	return ( av == bv ) \|\| ( aSign ^ ( av < bv ) );
5350
5351	}
5352
5353	/----------------------------------------------------------------------------*
5354	\| Returns 1 if the double-precision floating-point value `a' is less than
5355	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
5356	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
5357	\| Standard for Binary Floating-Point Arithmetic.
5358	----------------------------------------------------------------------------/
5359
5360	int float64_lt_quiet(float64 a, float64 b, float_status *status)
5361	{
5362	flag aSign, bSign;
5363	uint64_t av, bv;
5364	a = float64_squash_input_denormal(a, status);
5365	b = float64_squash_input_denormal(b, status);
5366
5367	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
5368	\|\| ( ( extractFloat64Exp( b ) == `0x7FF` ) && extractFloat64Frac( b ) )
5369	) {
5370	if (float64_is_signaling_nan(a, status)
5371	\|\| float64_is_signaling_nan(b, status)) {
5372	float_raise(float_flag_invalid, status);
5373	}
5374	return `0`;
5375	}
5376	aSign = extractFloat64Sign( a );
5377	bSign = extractFloat64Sign( b );
5378	av = float64_val(a);
5379	bv = float64_val(b);
5380	if ( aSign != bSign ) return aSign && ( (uint64_t) ( ( av \| bv )<<`1` ) != `0` );
5381	return ( av != bv ) && ( aSign ^ ( av < bv ) );
5382
5383	}
5384
5385	/----------------------------------------------------------------------------*
5386	\| Returns 1 if the double-precision floating-point values `a' and `b' cannot
5387	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
5388	\| comparison is performed according to the IEC/IEEE Standard for Binary
5389	\| Floating-Point Arithmetic.
5390	----------------------------------------------------------------------------/
5391
5392	int float64_unordered_quiet(float64 a, float64 b, float_status *status)
5393	{
5394	a = float64_squash_input_denormal(a, status);
5395	b = float64_squash_input_denormal(b, status);
5396
5397	if ( ( ( extractFloat64Exp( a ) == `0x7FF` ) && extractFloat64Frac( a ) )
5398	\|\| ( ( extractFloat64Exp( b ) == `0x7FF` ) && extractFloat64Frac( b ) )
5399	) {
5400	if (float64_is_signaling_nan(a, status)
5401	\|\| float64_is_signaling_nan(b, status)) {
5402	float_raise(float_flag_invalid, status);
5403	}
5404	return `1`;
5405	}
5406	return `0`;
5407	}
5408
5409	/----------------------------------------------------------------------------*
5410	\| Returns the result of converting the extended double-precision floating-
5411	\| point value `a' to the 32-bit two's complement integer format. The
5412	\| conversion is performed according to the IEC/IEEE Standard for Binary
5413	\| Floating-Point Arithmetic---which means in particular that the conversion
5414	\| is rounded according to the current rounding mode. If `a' is a NaN, the
5415	\| largest positive integer is returned. Otherwise, if the conversion
5416	\| overflows, the largest integer with the same sign as `a' is returned.
5417	----------------------------------------------------------------------------/
5418
5419	int32_t floatx80_to_int32(floatx80 a, float_status *status)
5420	{
5421	flag aSign;
5422	int32_t aExp, shiftCount;
5423	uint64_t aSig;
5424
5425	if (floatx80_invalid_encoding(a)) {
5426	float_raise(float_flag_invalid, status);
5427	return `1` << `31`;
5428	}
5429	aSig = extractFloatx80Frac( a );
5430	aExp = extractFloatx80Exp( a );
5431	aSign = extractFloatx80Sign( a );
5432	if ( ( aExp == `0x7FFF` ) && (uint64_t) ( aSig<<`1` ) ) aSign = `0`;
5433	shiftCount = `0x4037` - aExp;
5434	if ( shiftCount <= `0` ) shiftCount = `1`;
5435	shift64RightJamming( aSig, shiftCount, &aSig );
5436	return roundAndPackInt32(aSign, aSig, status);
5437
5438	}
5439
5440	/----------------------------------------------------------------------------*
5441	\| Returns the result of converting the extended double-precision floating-
5442	\| point value `a' to the 32-bit two's complement integer format. The
5443	\| conversion is performed according to the IEC/IEEE Standard for Binary
5444	\| Floating-Point Arithmetic, except that the conversion is always rounded
5445	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
5446	\| Otherwise, if the conversion overflows, the largest integer with the same
5447	\| sign as `a' is returned.
5448	----------------------------------------------------------------------------/
5449
5450	int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
5451	{
5452	flag aSign;
5453	int32_t aExp, shiftCount;
5454	uint64_t aSig, savedASig;
5455	int32_t z;
5456
5457	if (floatx80_invalid_encoding(a)) {
5458	float_raise(float_flag_invalid, status);
5459	return `1` << `31`;
5460	}
5461	aSig = extractFloatx80Frac( a );
5462	aExp = extractFloatx80Exp( a );
5463	aSign = extractFloatx80Sign( a );
5464	if ( `0x401E` < aExp ) {
5465	if ( ( aExp == `0x7FFF` ) && (uint64_t) ( aSig<<`1` ) ) aSign = `0`;
5466	goto invalid;
5467	}
5468	else if ( aExp < `0x3FFF` ) {
5469	if (aExp \|\| aSig) {
5470	status->float_exception_flags \|= float_flag_inexact;
5471	}
5472	return `0`;
5473	}
5474	shiftCount = `0x403E` - aExp;
5475	savedASig = aSig;
5476	aSig >>= shiftCount;
5477	z = aSig;
5478	if ( aSign ) z = - z;
5479	if ( ( z < `0` ) ^ aSign ) {
5480	invalid:
5481	float_raise(float_flag_invalid, status);
5482	return aSign ? (int32_t) `0x80000000` : `0x7FFFFFFF`;
5483	}
5484	if ( ( aSig<<shiftCount ) != savedASig ) {
5485	status->float_exception_flags \|= float_flag_inexact;
5486	}
5487	return z;
5488
5489	}
5490
5491	/----------------------------------------------------------------------------*
5492	\| Returns the result of converting the extended double-precision floating-
5493	\| point value `a' to the 64-bit two's complement integer format. The
5494	\| conversion is performed according to the IEC/IEEE Standard for Binary
5495	\| Floating-Point Arithmetic---which means in particular that the conversion
5496	\| is rounded according to the current rounding mode. If `a' is a NaN,
5497	\| the largest positive integer is returned. Otherwise, if the conversion
5498	\| overflows, the largest integer with the same sign as `a' is returned.
5499	----------------------------------------------------------------------------/
5500
5501	int64_t floatx80_to_int64(floatx80 a, float_status *status)
5502	{
5503	flag aSign;
5504	int32_t aExp, shiftCount;
5505	uint64_t aSig, aSigExtra;
5506
5507	if (floatx80_invalid_encoding(a)) {
5508	float_raise(float_flag_invalid, status);
5509	return `1ULL` << `63`;
5510	}
5511	aSig = extractFloatx80Frac( a );
5512	aExp = extractFloatx80Exp( a );
5513	aSign = extractFloatx80Sign( a );
5514	shiftCount = `0x403E` - aExp;
5515	if ( shiftCount <= `0` ) {
5516	if ( shiftCount ) {
5517	float_raise(float_flag_invalid, status);
5518	if (!aSign \|\| floatx80_is_any_nan(a)) {
5519	return INT64_MAX;
5520	}
5521	return INT64_MIN;
5522	}
5523	aSigExtra = `0`;
5524	}
5525	else {
5526	shift64ExtraRightJamming( aSig, `0`, shiftCount, &aSig, &aSigExtra );
5527	}
5528	return roundAndPackInt64(aSign, aSig, aSigExtra, status);
5529
5530	}
5531
5532	/----------------------------------------------------------------------------*
5533	\| Returns the result of converting the extended double-precision floating-
5534	\| point value `a' to the 64-bit two's complement integer format. The
5535	\| conversion is performed according to the IEC/IEEE Standard for Binary
5536	\| Floating-Point Arithmetic, except that the conversion is always rounded
5537	\| toward zero. If `a' is a NaN, the largest positive integer is returned.
5538	\| Otherwise, if the conversion overflows, the largest integer with the same
5539	\| sign as `a' is returned.
5540	----------------------------------------------------------------------------/
5541
5542	int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status *status)
5543	{
5544	flag aSign;
5545	int32_t aExp, shiftCount;
5546	uint64_t aSig;
5547	int64_t z;
5548
5549	if (floatx80_invalid_encoding(a)) {
5550	float_raise(float_flag_invalid, status);
5551	return `1ULL` << `63`;
5552	}
5553	aSig = extractFloatx80Frac( a );
5554	aExp = extractFloatx80Exp( a );
5555	aSign = extractFloatx80Sign( a );
5556	shiftCount = aExp - `0x403E`;
5557	if ( `0` <= shiftCount ) {
5558	aSig &= UINT64_C(`0x7FFFFFFFFFFFFFFF`);
5559	if ( ( a.high != `0xC03E` ) \|\| aSig ) {
5560	float_raise(float_flag_invalid, status);
5561	if ( ! aSign \|\| ( ( aExp == `0x7FFF` ) && aSig ) ) {
5562	return INT64_MAX;
5563	}
5564	}
5565	return INT64_MIN;
5566	}
5567	else if ( aExp < `0x3FFF` ) {
5568	if (aExp \| aSig) {
5569	status->float_exception_flags \|= float_flag_inexact;
5570	}
5571	return `0`;
5572	}
5573	z = aSig>>( - shiftCount );
5574	if ( (uint64_t) ( aSig<<( shiftCount & `63` ) ) ) {
5575	status->float_exception_flags \|= float_flag_inexact;
5576	}
5577	if ( aSign ) z = - z;
5578	return z;
5579
5580	}
5581
5582	/----------------------------------------------------------------------------*
5583	\| Returns the result of converting the extended double-precision floating-
5584	\| point value `a' to the single-precision floating-point format. The
5585	\| conversion is performed according to the IEC/IEEE Standard for Binary
5586	\| Floating-Point Arithmetic.
5587	----------------------------------------------------------------------------/
5588
5589	float32 floatx80_to_float32(floatx80 a, float_status *status)
5590	{
5591	flag aSign;
5592	int32_t aExp;
5593	uint64_t aSig;
5594
5595	if (floatx80_invalid_encoding(a)) {
5596	float_raise(float_flag_invalid, status);
5597	return float32_default_nan(status);
5598	}
5599	aSig = extractFloatx80Frac( a );
5600	aExp = extractFloatx80Exp( a );
5601	aSign = extractFloatx80Sign( a );
5602	if ( aExp == `0x7FFF` ) {
5603	if ( (uint64_t) ( aSig<<`1` ) ) {
5604	return commonNaNToFloat32(floatx80ToCommonNaN(a, status), status);
5605	}
5606	return packFloat32( aSign, `0xFF`, `0` );
5607	}
5608	shift64RightJamming( aSig, `33`, &aSig );
5609	if ( aExp \|\| aSig ) aExp -= `0x3F81`;
5610	return roundAndPackFloat32(aSign, aExp, aSig, status);
5611
5612	}
5613
5614	/----------------------------------------------------------------------------*
5615	\| Returns the result of converting the extended double-precision floating-
5616	\| point value `a' to the double-precision floating-point format. The
5617	\| conversion is performed according to the IEC/IEEE Standard for Binary
5618	\| Floating-Point Arithmetic.
5619	----------------------------------------------------------------------------/
5620
5621	float64 floatx80_to_float64(floatx80 a, float_status *status)
5622	{
5623	flag aSign;
5624	int32_t aExp;
5625	uint64_t aSig, zSig;
5626
5627	if (floatx80_invalid_encoding(a)) {
5628	float_raise(float_flag_invalid, status);
5629	return float64_default_nan(status);
5630	}
5631	aSig = extractFloatx80Frac( a );
5632	aExp = extractFloatx80Exp( a );
5633	aSign = extractFloatx80Sign( a );
5634	if ( aExp == `0x7FFF` ) {
5635	if ( (uint64_t) ( aSig<<`1` ) ) {
5636	return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
5637	}
5638	return packFloat64( aSign, `0x7FF`, `0` );
5639	}
5640	shift64RightJamming( aSig, `1`, &zSig );
5641	if ( aExp \|\| aSig ) aExp -= `0x3C01`;
5642	return roundAndPackFloat64(aSign, aExp, zSig, status);
5643
5644	}
5645
5646	/----------------------------------------------------------------------------*
5647	\| Returns the result of converting the extended double-precision floating-
5648	\| point value `a' to the quadruple-precision floating-point format. The
5649	\| conversion is performed according to the IEC/IEEE Standard for Binary
5650	\| Floating-Point Arithmetic.
5651	----------------------------------------------------------------------------/
5652
5653	float128 floatx80_to_float128(floatx80 a, float_status *status)
5654	{
5655	flag aSign;
5656	int aExp;
5657	uint64_t aSig, zSig0, zSig1;
5658
5659	if (floatx80_invalid_encoding(a)) {
5660	float_raise(float_flag_invalid, status);
5661	return float128_default_nan(status);
5662	}
5663	aSig = extractFloatx80Frac( a );
5664	aExp = extractFloatx80Exp( a );
5665	aSign = extractFloatx80Sign( a );
5666	if ( ( aExp == `0x7FFF` ) && (uint64_t) ( aSig<<`1` ) ) {
5667	return commonNaNToFloat128(floatx80ToCommonNaN(a, status), status);
5668	}
5669	shift128Right( aSig<<`1`, `0`, `16`, &zSig0, &zSig1 );
5670	return packFloat128( aSign, aExp, zSig0, zSig1 );
5671
5672	}
5673
5674	/----------------------------------------------------------------------------*
5675	\| Rounds the extended double-precision floating-point value `a'
5676	\| to the precision provided by floatx80_rounding_precision and returns the
5677	\| result as an extended double-precision floating-point value.
5678	\| The operation is performed according to the IEC/IEEE Standard for Binary
5679	\| Floating-Point Arithmetic.
5680	----------------------------------------------------------------------------/
5681
5682	floatx80 floatx80_round(floatx80 a, float_status *status)
5683	{
5684	return roundAndPackFloatx80(status->floatx80_rounding_precision,
5685	extractFloatx80Sign(a),
5686	extractFloatx80Exp(a),
5687	extractFloatx80Frac(a), `0`, status);
5688	}
5689
5690	/----------------------------------------------------------------------------*
5691	\| Rounds the extended double-precision floating-point value `a' to an integer,
5692	\| and returns the result as an extended quadruple-precision floating-point
5693	\| value. The operation is performed according to the IEC/IEEE Standard for
5694	\| Binary Floating-Point Arithmetic.
5695	----------------------------------------------------------------------------/
5696
5697	floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
5698	{
5699	flag aSign;
5700	int32_t aExp;
5701	uint64_t lastBitMask, roundBitsMask;
5702	floatx80 z;
5703
5704	if (floatx80_invalid_encoding(a)) {
5705	float_raise(float_flag_invalid, status);
5706	return floatx80_default_nan(status);
5707	}
5708	aExp = extractFloatx80Exp( a );
5709	if ( `0x403E` <= aExp ) {
5710	if ( ( aExp == `0x7FFF` ) && (uint64_t) ( extractFloatx80Frac( a )<<`1` ) ) {
5711	return propagateFloatx80NaN(a, a, status);
5712	}
5713	return a;
5714	}
5715	if ( aExp < `0x3FFF` ) {
5716	if ( ( aExp == `0` )
5717	&& ( (uint64_t) ( extractFloatx80Frac( a )<<`1` ) == `0` ) ) {
5718	return a;
5719	}
5720	status->float_exception_flags \|= float_flag_inexact;
5721	aSign = extractFloatx80Sign( a );
5722	switch (status->float_rounding_mode) {
5723	case float_round_nearest_even:
5724	if ( ( aExp == `0x3FFE` ) && (uint64_t) ( extractFloatx80Frac( a )<<`1` )
5725	) {
5726	return
5727	packFloatx80( aSign, `0x3FFF`, UINT64_C(`0x8000000000000000`));
5728	}
5729	break;
5730	case float_round_ties_away:
5731	if (aExp == `0x3FFE`) {
5732	return packFloatx80(aSign, `0x3FFF`, UINT64_C(`0x8000000000000000`));
5733	}
5734	break;
5735	case float_round_down:
5736	return
5737	aSign ?
5738	packFloatx80( `1`, `0x3FFF`, UINT64_C(`0x8000000000000000`))
5739	: packFloatx80( `0`, `0`, `0` );
5740	case float_round_up:
5741	return
5742	aSign ? packFloatx80( `1`, `0`, `0` )
5743	: packFloatx80( `0`, `0x3FFF`, UINT64_C(`0x8000000000000000`));
5744	}
5745	return packFloatx80( aSign, `0`, `0` );
5746	}
5747	lastBitMask = `1`;
5748	lastBitMask <<= `0x403E` - aExp;
5749	roundBitsMask = lastBitMask - `1`;
5750	z = a;
5751	switch (status->float_rounding_mode) {
5752	case float_round_nearest_even:
5753	z.low += lastBitMask>>`1`;
5754	if ((z.low & roundBitsMask) == `0`) {
5755	z.low &= ~lastBitMask;
5756	}
5757	break;
5758	case float_round_ties_away:
5759	z.low += lastBitMask >> `1`;
5760	break;
5761	case float_round_to_zero:
5762	break;
5763	case float_round_up:
5764	if (!extractFloatx80Sign(z)) {
5765	z.low += roundBitsMask;
5766	}
5767	break;
5768	case float_round_down:
5769	if (extractFloatx80Sign(z)) {
5770	z.low += roundBitsMask;
5771	}
5772	break;
5773	default:
5774	abort();
5775	}
5776	z.low &= ~ roundBitsMask;
5777	if ( z.low == `0` ) {
5778	++z.high;
5779	z.low = UINT64_C(`0x8000000000000000`);
5780	}
5781	if (z.low != a.low) {
5782	status->float_exception_flags \|= float_flag_inexact;
5783	}
5784	return z;
5785
5786	}
5787
5788	/----------------------------------------------------------------------------*
5789	\| Returns the result of adding the absolute values of the extended double-
5790	\| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
5791	\| negated before being returned. `zSign' is ignored if the result is a NaN.
5792	\| The addition is performed according to the IEC/IEEE Standard for Binary
5793	\| Floating-Point Arithmetic.
5794	----------------------------------------------------------------------------/
5795
5796	static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5797	float_status *status)
5798	{
5799	int32_t aExp, bExp, zExp;
5800	uint64_t aSig, bSig, zSig0, zSig1;
5801	int32_t expDiff;
5802
5803	aSig = extractFloatx80Frac( a );
5804	aExp = extractFloatx80Exp( a );
5805	bSig = extractFloatx80Frac( b );
5806	bExp = extractFloatx80Exp( b );
5807	expDiff = aExp - bExp;
5808	if ( `0` < expDiff ) {
5809	if ( aExp == `0x7FFF` ) {
5810	if ((uint64_t)(aSig << `1`)) {
5811	return propagateFloatx80NaN(a, b, status);
5812	}
5813	return a;
5814	}
5815	if ( bExp == `0` ) --expDiff;
5816	shift64ExtraRightJamming( bSig, `0`, expDiff, &bSig, &zSig1 );
5817	zExp = aExp;
5818	}
5819	else if ( expDiff < `0` ) {
5820	if ( bExp == `0x7FFF` ) {
5821	if ((uint64_t)(bSig << `1`)) {
5822	return propagateFloatx80NaN(a, b, status);
5823	}
5824	return packFloatx80(zSign,
5825	floatx80_infinity_high,
5826	floatx80_infinity_low);
5827	}
5828	if ( aExp == `0` ) ++expDiff;
5829	shift64ExtraRightJamming( aSig, `0`, - expDiff, &aSig, &zSig1 );
5830	zExp = bExp;
5831	}
5832	else {
5833	if ( aExp == `0x7FFF` ) {
5834	if ( (uint64_t) ( ( aSig \| bSig )<<`1` ) ) {
5835	return propagateFloatx80NaN(a, b, status);
5836	}
5837	return a;
5838	}
5839	zSig1 = `0`;
5840	zSig0 = aSig + bSig;
5841	if ( aExp == `0` ) {
5842	normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
5843	goto roundAndPack;
5844	}
5845	zExp = aExp;
5846	goto shiftRight1;
5847	}
5848	zSig0 = aSig + bSig;
5849	if ( (int64_t) zSig0 < `0` ) goto roundAndPack;
5850	shiftRight1:
5851	shift64ExtraRightJamming( zSig0, zSig1, `1`, &zSig0, &zSig1 );
5852	zSig0 \|= UINT64_C(`0x8000000000000000`);
5853	++zExp;
5854	roundAndPack:
5855	return roundAndPackFloatx80(status->floatx80_rounding_precision,
5856	zSign, zExp, zSig0, zSig1, status);
5857	}
5858
5859	/----------------------------------------------------------------------------*
5860	\| Returns the result of subtracting the absolute values of the extended
5861	\| double-precision floating-point values `a' and `b'. If `zSign' is 1, the
5862	\| difference is negated before being returned. `zSign' is ignored if the
5863	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
5864	\| Standard for Binary Floating-Point Arithmetic.
5865	----------------------------------------------------------------------------/
5866
5867	static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
5868	float_status *status)
5869	{
5870	int32_t aExp, bExp, zExp;
5871	uint64_t aSig, bSig, zSig0, zSig1;
5872	int32_t expDiff;
5873
5874	aSig = extractFloatx80Frac( a );
5875	aExp = extractFloatx80Exp( a );
5876	bSig = extractFloatx80Frac( b );
5877	bExp = extractFloatx80Exp( b );
5878	expDiff = aExp - bExp;
5879	if ( `0` < expDiff ) goto aExpBigger;
5880	if ( expDiff < `0` ) goto bExpBigger;
5881	if ( aExp == `0x7FFF` ) {
5882	if ( (uint64_t) ( ( aSig \| bSig )<<`1` ) ) {
5883	return propagateFloatx80NaN(a, b, status);
5884	}
5885	float_raise(float_flag_invalid, status);
5886	return floatx80_default_nan(status);
5887	}
5888	if ( aExp == `0` ) {
5889	aExp = `1`;
5890	bExp = `1`;
5891	}
5892	zSig1 = `0`;
5893	if ( bSig < aSig ) goto aBigger;
5894	if ( aSig < bSig ) goto bBigger;
5895	return packFloatx80(status->float_rounding_mode == float_round_down, `0`, `0`);
5896	bExpBigger:
5897	if ( bExp == `0x7FFF` ) {
5898	if ((uint64_t)(bSig << `1`)) {
5899	return propagateFloatx80NaN(a, b, status);
5900	}
5901	return packFloatx80(zSign ^ `1`, floatx80_infinity_high,
5902	floatx80_infinity_low);
5903	}
5904	if ( aExp == `0` ) ++expDiff;
5905	shift128RightJamming( aSig, `0`, - expDiff, &aSig, &zSig1 );
5906	bBigger:
5907	sub128( bSig, `0`, aSig, zSig1, &zSig0, &zSig1 );
5908	zExp = bExp;
5909	zSign ^= `1`;
5910	goto normalizeRoundAndPack;
5911	aExpBigger:
5912	if ( aExp == `0x7FFF` ) {
5913	if ((uint64_t)(aSig << `1`)) {
5914	return propagateFloatx80NaN(a, b, status);
5915	}
5916	return a;
5917	}
5918	if ( bExp == `0` ) --expDiff;
5919	shift128RightJamming( bSig, `0`, expDiff, &bSig, &zSig1 );
5920	aBigger:
5921	sub128( aSig, `0`, bSig, zSig1, &zSig0, &zSig1 );
5922	zExp = aExp;
5923	normalizeRoundAndPack:
5924	return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
5925	zSign, zExp, zSig0, zSig1, status);
5926	}
5927
5928	/----------------------------------------------------------------------------*
5929	\| Returns the result of adding the extended double-precision floating-point
5930	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
5931	\| Standard for Binary Floating-Point Arithmetic.
5932	----------------------------------------------------------------------------/
5933
5934	floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
5935	{
5936	flag aSign, bSign;
5937
5938	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
5939	float_raise(float_flag_invalid, status);
5940	return floatx80_default_nan(status);
5941	}
5942	aSign = extractFloatx80Sign( a );
5943	bSign = extractFloatx80Sign( b );
5944	if ( aSign == bSign ) {
5945	return addFloatx80Sigs(a, b, aSign, status);
5946	}
5947	else {
5948	return subFloatx80Sigs(a, b, aSign, status);
5949	}
5950
5951	}
5952
5953	/----------------------------------------------------------------------------*
5954	\| Returns the result of subtracting the extended double-precision floating-
5955	\| point values `a' and `b'. The operation is performed according to the
5956	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5957	----------------------------------------------------------------------------/
5958
5959	floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
5960	{
5961	flag aSign, bSign;
5962
5963	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
5964	float_raise(float_flag_invalid, status);
5965	return floatx80_default_nan(status);
5966	}
5967	aSign = extractFloatx80Sign( a );
5968	bSign = extractFloatx80Sign( b );
5969	if ( aSign == bSign ) {
5970	return subFloatx80Sigs(a, b, aSign, status);
5971	}
5972	else {
5973	return addFloatx80Sigs(a, b, aSign, status);
5974	}
5975
5976	}
5977
5978	/----------------------------------------------------------------------------*
5979	\| Returns the result of multiplying the extended double-precision floating-
5980	\| point values `a' and `b'. The operation is performed according to the
5981	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
5982	----------------------------------------------------------------------------/
5983
5984	floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
5985	{
5986	flag aSign, bSign, zSign;
5987	int32_t aExp, bExp, zExp;
5988	uint64_t aSig, bSig, zSig0, zSig1;
5989
5990	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
5991	float_raise(float_flag_invalid, status);
5992	return floatx80_default_nan(status);
5993	}
5994	aSig = extractFloatx80Frac( a );
5995	aExp = extractFloatx80Exp( a );
5996	aSign = extractFloatx80Sign( a );
5997	bSig = extractFloatx80Frac( b );
5998	bExp = extractFloatx80Exp( b );
5999	bSign = extractFloatx80Sign( b );
6000	zSign = aSign ^ bSign;
6001	if ( aExp == `0x7FFF` ) {
6002	if ( (uint64_t) ( aSig<<`1` )
6003	\|\| ( ( bExp == `0x7FFF` ) && (uint64_t) ( bSig<<`1` ) ) ) {
6004	return propagateFloatx80NaN(a, b, status);
6005	}
6006	if ( ( bExp \| bSig ) == `0` ) goto invalid;
6007	return packFloatx80(zSign, floatx80_infinity_high,
6008	floatx80_infinity_low);
6009	}
6010	if ( bExp == `0x7FFF` ) {
6011	if ((uint64_t)(bSig << `1`)) {
6012	return propagateFloatx80NaN(a, b, status);
6013	}
6014	if ( ( aExp \| aSig ) == `0` ) {
6015	invalid:
6016	float_raise(float_flag_invalid, status);
6017	return floatx80_default_nan(status);
6018	}
6019	return packFloatx80(zSign, floatx80_infinity_high,
6020	floatx80_infinity_low);
6021	}
6022	if ( aExp == `0` ) {
6023	if ( aSig == `0` ) return packFloatx80( zSign, `0`, `0` );
6024	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6025	}
6026	if ( bExp == `0` ) {
6027	if ( bSig == `0` ) return packFloatx80( zSign, `0`, `0` );
6028	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6029	}
6030	zExp = aExp + bExp - `0x3FFE`;
6031	mul64To128( aSig, bSig, &zSig0, &zSig1 );
6032	if ( `0` < (int64_t) zSig0 ) {
6033	shortShift128Left( zSig0, zSig1, `1`, &zSig0, &zSig1 );
6034	--zExp;
6035	}
6036	return roundAndPackFloatx80(status->floatx80_rounding_precision,
6037	zSign, zExp, zSig0, zSig1, status);
6038	}
6039
6040	/----------------------------------------------------------------------------*
6041	\| Returns the result of dividing the extended double-precision floating-point
6042	\| value `a' by the corresponding value `b'. The operation is performed
6043	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6044	----------------------------------------------------------------------------/
6045
6046	floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
6047	{
6048	flag aSign, bSign, zSign;
6049	int32_t aExp, bExp, zExp;
6050	uint64_t aSig, bSig, zSig0, zSig1;
6051	uint64_t rem0, rem1, rem2, term0, term1, term2;
6052
6053	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
6054	float_raise(float_flag_invalid, status);
6055	return floatx80_default_nan(status);
6056	}
6057	aSig = extractFloatx80Frac( a );
6058	aExp = extractFloatx80Exp( a );
6059	aSign = extractFloatx80Sign( a );
6060	bSig = extractFloatx80Frac( b );
6061	bExp = extractFloatx80Exp( b );
6062	bSign = extractFloatx80Sign( b );
6063	zSign = aSign ^ bSign;
6064	if ( aExp == `0x7FFF` ) {
6065	if ((uint64_t)(aSig << `1`)) {
6066	return propagateFloatx80NaN(a, b, status);
6067	}
6068	if ( bExp == `0x7FFF` ) {
6069	if ((uint64_t)(bSig << `1`)) {
6070	return propagateFloatx80NaN(a, b, status);
6071	}
6072	goto invalid;
6073	}
6074	return packFloatx80(zSign, floatx80_infinity_high,
6075	floatx80_infinity_low);
6076	}
6077	if ( bExp == `0x7FFF` ) {
6078	if ((uint64_t)(bSig << `1`)) {
6079	return propagateFloatx80NaN(a, b, status);
6080	}
6081	return packFloatx80( zSign, `0`, `0` );
6082	}
6083	if ( bExp == `0` ) {
6084	if ( bSig == `0` ) {
6085	if ( ( aExp \| aSig ) == `0` ) {
6086	invalid:
6087	float_raise(float_flag_invalid, status);
6088	return floatx80_default_nan(status);
6089	}
6090	float_raise(float_flag_divbyzero, status);
6091	return packFloatx80(zSign, floatx80_infinity_high,
6092	floatx80_infinity_low);
6093	}
6094	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6095	}
6096	if ( aExp == `0` ) {
6097	if ( aSig == `0` ) return packFloatx80( zSign, `0`, `0` );
6098	normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
6099	}
6100	zExp = aExp - bExp + `0x3FFE`;
6101	rem1 = `0`;
6102	if ( bSig <= aSig ) {
6103	shift128Right( aSig, `0`, `1`, &aSig, &rem1 );
6104	++zExp;
6105	}
6106	zSig0 = estimateDiv128To64( aSig, rem1, bSig );
6107	mul64To128( bSig, zSig0, &term0, &term1 );
6108	sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
6109	while ( (int64_t) rem0 < `0` ) {
6110	--zSig0;
6111	add128( rem0, rem1, `0`, bSig, &rem0, &rem1 );
6112	}
6113	zSig1 = estimateDiv128To64( rem1, `0`, bSig );
6114	if ( (uint64_t) ( zSig1<<`1` ) <= `8` ) {
6115	mul64To128( bSig, zSig1, &term1, &term2 );
6116	sub128( rem1, `0`, term1, term2, &rem1, &rem2 );
6117	while ( (int64_t) rem1 < `0` ) {
6118	--zSig1;
6119	add128( rem1, rem2, `0`, bSig, &rem1, &rem2 );
6120	}
6121	zSig1 \|= ( ( rem1 \| rem2 ) != `0` );
6122	}
6123	return roundAndPackFloatx80(status->floatx80_rounding_precision,
6124	zSign, zExp, zSig0, zSig1, status);
6125	}
6126
6127	/----------------------------------------------------------------------------*
6128	\| Returns the remainder of the extended double-precision floating-point value
6129	\| `a' with respect to the corresponding value `b'. The operation is performed
6130	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6131	----------------------------------------------------------------------------/
6132
6133	floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
6134	{
6135	flag aSign, zSign;
6136	int32_t aExp, bExp, expDiff;
6137	uint64_t aSig0, aSig1, bSig;
6138	uint64_t q, term0, term1, alternateASig0, alternateASig1;
6139
6140	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
6141	float_raise(float_flag_invalid, status);
6142	return floatx80_default_nan(status);
6143	}
6144	aSig0 = extractFloatx80Frac( a );
6145	aExp = extractFloatx80Exp( a );
6146	aSign = extractFloatx80Sign( a );
6147	bSig = extractFloatx80Frac( b );
6148	bExp = extractFloatx80Exp( b );
6149	if ( aExp == `0x7FFF` ) {
6150	if ( (uint64_t) ( aSig0<<`1` )
6151	\|\| ( ( bExp == `0x7FFF` ) && (uint64_t) ( bSig<<`1` ) ) ) {
6152	return propagateFloatx80NaN(a, b, status);
6153	}
6154	goto invalid;
6155	}
6156	if ( bExp == `0x7FFF` ) {
6157	if ((uint64_t)(bSig << `1`)) {
6158	return propagateFloatx80NaN(a, b, status);
6159	}
6160	return a;
6161	}
6162	if ( bExp == `0` ) {
6163	if ( bSig == `0` ) {
6164	invalid:
6165	float_raise(float_flag_invalid, status);
6166	return floatx80_default_nan(status);
6167	}
6168	normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
6169	}
6170	if ( aExp == `0` ) {
6171	if ( (uint64_t) ( aSig0<<`1` ) == `0` ) return a;
6172	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6173	}
6174	bSig \|= UINT64_C(`0x8000000000000000`);
6175	zSign = aSign;
6176	expDiff = aExp - bExp;
6177	aSig1 = `0`;
6178	if ( expDiff < `0` ) {
6179	if ( expDiff < -`1` ) return a;
6180	shift128Right( aSig0, `0`, `1`, &aSig0, &aSig1 );
6181	expDiff = `0`;
6182	}
6183	q = ( bSig <= aSig0 );
6184	if ( q ) aSig0 -= bSig;
6185	expDiff -= `64`;
6186	while ( `0` < expDiff ) {
6187	q = estimateDiv128To64( aSig0, aSig1, bSig );
6188	q = ( `2` < q ) ? q - `2` : `0`;
6189	mul64To128( bSig, q, &term0, &term1 );
6190	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6191	shortShift128Left( aSig0, aSig1, `62`, &aSig0, &aSig1 );
6192	expDiff -= `62`;
6193	}
6194	expDiff += `64`;
6195	if ( `0` < expDiff ) {
6196	q = estimateDiv128To64( aSig0, aSig1, bSig );
6197	q = ( `2` < q ) ? q - `2` : `0`;
6198	q >>= `64` - expDiff;
6199	mul64To128( bSig, q<<( `64` - expDiff ), &term0, &term1 );
6200	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6201	shortShift128Left( `0`, bSig, `64` - expDiff, &term0, &term1 );
6202	while ( le128( term0, term1, aSig0, aSig1 ) ) {
6203	++q;
6204	sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
6205	}
6206	}
6207	else {
6208	term1 = `0`;
6209	term0 = bSig;
6210	}
6211	sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
6212	if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
6213	\|\| ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
6214	&& ( q & `1` ) )
6215	) {
6216	aSig0 = alternateASig0;
6217	aSig1 = alternateASig1;
6218	zSign = ! zSign;
6219	}
6220	return
6221	normalizeRoundAndPackFloatx80(
6222	`80`, zSign, bExp + expDiff, aSig0, aSig1, status);
6223
6224	}
6225
6226	/----------------------------------------------------------------------------*
6227	\| Returns the square root of the extended double-precision floating-point
6228	\| value `a'. The operation is performed according to the IEC/IEEE Standard
6229	\| for Binary Floating-Point Arithmetic.
6230	----------------------------------------------------------------------------/
6231
6232	floatx80 floatx80_sqrt(floatx80 a, float_status *status)
6233	{
6234	flag aSign;
6235	int32_t aExp, zExp;
6236	uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
6237	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
6238
6239	if (floatx80_invalid_encoding(a)) {
6240	float_raise(float_flag_invalid, status);
6241	return floatx80_default_nan(status);
6242	}
6243	aSig0 = extractFloatx80Frac( a );
6244	aExp = extractFloatx80Exp( a );
6245	aSign = extractFloatx80Sign( a );
6246	if ( aExp == `0x7FFF` ) {
6247	if ((uint64_t)(aSig0 << `1`)) {
6248	return propagateFloatx80NaN(a, a, status);
6249	}
6250	if ( ! aSign ) return a;
6251	goto invalid;
6252	}
6253	if ( aSign ) {
6254	if ( ( aExp \| aSig0 ) == `0` ) return a;
6255	invalid:
6256	float_raise(float_flag_invalid, status);
6257	return floatx80_default_nan(status);
6258	}
6259	if ( aExp == `0` ) {
6260	if ( aSig0 == `0` ) return packFloatx80( `0`, `0`, `0` );
6261	normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
6262	}
6263	zExp = ( ( aExp - `0x3FFF` )>>`1` ) + `0x3FFF`;
6264	zSig0 = estimateSqrt32( aExp, aSig0>>`32` );
6265	shift128Right( aSig0, `0`, `2` + ( aExp & `1` ), &aSig0, &aSig1 );
6266	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<`32` ) + ( zSig0<<`30` );
6267	doubleZSig0 = zSig0<<`1`;
6268	mul64To128( zSig0, zSig0, &term0, &term1 );
6269	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
6270	while ( (int64_t) rem0 < `0` ) {
6271	--zSig0;
6272	doubleZSig0 -= `2`;
6273	add128( rem0, rem1, zSig0>>`63`, doubleZSig0 \| `1`, &rem0, &rem1 );
6274	}
6275	zSig1 = estimateDiv128To64( rem1, `0`, doubleZSig0 );
6276	if ( ( zSig1 & UINT64_C(`0x3FFFFFFFFFFFFFFF`) ) <= `5` ) {
6277	if ( zSig1 == `0` ) zSig1 = `1`;
6278	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
6279	sub128( rem1, `0`, term1, term2, &rem1, &rem2 );
6280	mul64To128( zSig1, zSig1, &term2, &term3 );
6281	sub192( rem1, rem2, `0`, `0`, term2, term3, &rem1, &rem2, &rem3 );
6282	while ( (int64_t) rem1 < `0` ) {
6283	--zSig1;
6284	shortShift128Left( `0`, zSig1, `1`, &term2, &term3 );
6285	term3 \|= `1`;
6286	term2 \|= doubleZSig0;
6287	add192( rem1, rem2, rem3, `0`, term2, term3, &rem1, &rem2, &rem3 );
6288	}
6289	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != `0` );
6290	}
6291	shortShift128Left( `0`, zSig1, `1`, &zSig0, &zSig1 );
6292	zSig0 \|= doubleZSig0;
6293	return roundAndPackFloatx80(status->floatx80_rounding_precision,
6294	`0`, zExp, zSig0, zSig1, status);
6295	}
6296
6297	/----------------------------------------------------------------------------*
6298	\| Returns 1 if the extended double-precision floating-point value `a' is equal
6299	\| to the corresponding value `b', and 0 otherwise. The invalid exception is
6300	\| raised if either operand is a NaN. Otherwise, the comparison is performed
6301	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6302	----------------------------------------------------------------------------/
6303
6304	int floatx80_eq(floatx80 a, floatx80 b, float_status *status)
6305	{
6306
6307	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)
6308	\|\| (extractFloatx80Exp(a) == `0x7FFF`
6309	&& (uint64_t) (extractFloatx80Frac(a) << `1`))
6310	\|\| (extractFloatx80Exp(b) == `0x7FFF`
6311	&& (uint64_t) (extractFloatx80Frac(b) << `1`))
6312	) {
6313	float_raise(float_flag_invalid, status);
6314	return `0`;
6315	}
6316	return
6317	( a.low == b.low )
6318	&& ( ( a.high == b.high )
6319	\|\| ( ( a.low == `0` )
6320	&& ( (uint16_t) ( ( a.high \| b.high )<<`1` ) == `0` ) )
6321	);
6322
6323	}
6324
6325	/----------------------------------------------------------------------------*
6326	\| Returns 1 if the extended double-precision floating-point value `a' is
6327	\| less than or equal to the corresponding value `b', and 0 otherwise. The
6328	\| invalid exception is raised if either operand is a NaN. The comparison is
6329	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
6330	\| Arithmetic.
6331	----------------------------------------------------------------------------/
6332
6333	int floatx80_le(floatx80 a, floatx80 b, float_status *status)
6334	{
6335	flag aSign, bSign;
6336
6337	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)
6338	\|\| (extractFloatx80Exp(a) == `0x7FFF`
6339	&& (uint64_t) (extractFloatx80Frac(a) << `1`))
6340	\|\| (extractFloatx80Exp(b) == `0x7FFF`
6341	&& (uint64_t) (extractFloatx80Frac(b) << `1`))
6342	) {
6343	float_raise(float_flag_invalid, status);
6344	return `0`;
6345	}
6346	aSign = extractFloatx80Sign( a );
6347	bSign = extractFloatx80Sign( b );
6348	if ( aSign != bSign ) {
6349	return
6350	aSign
6351	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
6352	== `0` );
6353	}
6354	return
6355	aSign ? le128( b.high, b.low, a.high, a.low )
6356	: le128( a.high, a.low, b.high, b.low );
6357
6358	}
6359
6360	/----------------------------------------------------------------------------*
6361	\| Returns 1 if the extended double-precision floating-point value `a' is
6362	\| less than the corresponding value `b', and 0 otherwise. The invalid
6363	\| exception is raised if either operand is a NaN. The comparison is performed
6364	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6365	----------------------------------------------------------------------------/
6366
6367	int floatx80_lt(floatx80 a, floatx80 b, float_status *status)
6368	{
6369	flag aSign, bSign;
6370
6371	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)
6372	\|\| (extractFloatx80Exp(a) == `0x7FFF`
6373	&& (uint64_t) (extractFloatx80Frac(a) << `1`))
6374	\|\| (extractFloatx80Exp(b) == `0x7FFF`
6375	&& (uint64_t) (extractFloatx80Frac(b) << `1`))
6376	) {
6377	float_raise(float_flag_invalid, status);
6378	return `0`;
6379	}
6380	aSign = extractFloatx80Sign( a );
6381	bSign = extractFloatx80Sign( b );
6382	if ( aSign != bSign ) {
6383	return
6384	aSign
6385	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
6386	!= `0` );
6387	}
6388	return
6389	aSign ? lt128( b.high, b.low, a.high, a.low )
6390	: lt128( a.high, a.low, b.high, b.low );
6391
6392	}
6393
6394	/----------------------------------------------------------------------------*
6395	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
6396	\| cannot be compared, and 0 otherwise. The invalid exception is raised if
6397	\| either operand is a NaN. The comparison is performed according to the
6398	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6399	----------------------------------------------------------------------------/
6400	int floatx80_unordered(floatx80 a, floatx80 b, float_status *status)
6401	{
6402	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)
6403	\|\| (extractFloatx80Exp(a) == `0x7FFF`
6404	&& (uint64_t) (extractFloatx80Frac(a) << `1`))
6405	\|\| (extractFloatx80Exp(b) == `0x7FFF`
6406	&& (uint64_t) (extractFloatx80Frac(b) << `1`))
6407	) {
6408	float_raise(float_flag_invalid, status);
6409	return `1`;
6410	}
6411	return `0`;
6412	}
6413
6414	/----------------------------------------------------------------------------*
6415	\| Returns 1 if the extended double-precision floating-point value `a' is
6416	\| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
6417	\| cause an exception. The comparison is performed according to the IEC/IEEE
6418	\| Standard for Binary Floating-Point Arithmetic.
6419	----------------------------------------------------------------------------/
6420
6421	int floatx80_eq_quiet(floatx80 a, floatx80 b, float_status *status)
6422	{
6423
6424	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
6425	float_raise(float_flag_invalid, status);
6426	return `0`;
6427	}
6428	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
6429	&& (uint64_t) ( extractFloatx80Frac( a )<<`1` ) )
6430	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
6431	&& (uint64_t) ( extractFloatx80Frac( b )<<`1` ) )
6432	) {
6433	if (floatx80_is_signaling_nan(a, status)
6434	\|\| floatx80_is_signaling_nan(b, status)) {
6435	float_raise(float_flag_invalid, status);
6436	}
6437	return `0`;
6438	}
6439	return
6440	( a.low == b.low )
6441	&& ( ( a.high == b.high )
6442	\|\| ( ( a.low == `0` )
6443	&& ( (uint16_t) ( ( a.high \| b.high )<<`1` ) == `0` ) )
6444	);
6445
6446	}
6447
6448	/----------------------------------------------------------------------------*
6449	\| Returns 1 if the extended double-precision floating-point value `a' is less
6450	\| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
6451	\| do not cause an exception. Otherwise, the comparison is performed according
6452	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6453	----------------------------------------------------------------------------/
6454
6455	int floatx80_le_quiet(floatx80 a, floatx80 b, float_status *status)
6456	{
6457	flag aSign, bSign;
6458
6459	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
6460	float_raise(float_flag_invalid, status);
6461	return `0`;
6462	}
6463	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
6464	&& (uint64_t) ( extractFloatx80Frac( a )<<`1` ) )
6465	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
6466	&& (uint64_t) ( extractFloatx80Frac( b )<<`1` ) )
6467	) {
6468	if (floatx80_is_signaling_nan(a, status)
6469	\|\| floatx80_is_signaling_nan(b, status)) {
6470	float_raise(float_flag_invalid, status);
6471	}
6472	return `0`;
6473	}
6474	aSign = extractFloatx80Sign( a );
6475	bSign = extractFloatx80Sign( b );
6476	if ( aSign != bSign ) {
6477	return
6478	aSign
6479	\|\| ( ( ( (uint16_t) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
6480	== `0` );
6481	}
6482	return
6483	aSign ? le128( b.high, b.low, a.high, a.low )
6484	: le128( a.high, a.low, b.high, b.low );
6485
6486	}
6487
6488	/----------------------------------------------------------------------------*
6489	\| Returns 1 if the extended double-precision floating-point value `a' is less
6490	\| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
6491	\| an exception. Otherwise, the comparison is performed according to the
6492	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
6493	----------------------------------------------------------------------------/
6494
6495	int floatx80_lt_quiet(floatx80 a, floatx80 b, float_status *status)
6496	{
6497	flag aSign, bSign;
6498
6499	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
6500	float_raise(float_flag_invalid, status);
6501	return `0`;
6502	}
6503	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
6504	&& (uint64_t) ( extractFloatx80Frac( a )<<`1` ) )
6505	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
6506	&& (uint64_t) ( extractFloatx80Frac( b )<<`1` ) )
6507	) {
6508	if (floatx80_is_signaling_nan(a, status)
6509	\|\| floatx80_is_signaling_nan(b, status)) {
6510	float_raise(float_flag_invalid, status);
6511	}
6512	return `0`;
6513	}
6514	aSign = extractFloatx80Sign( a );
6515	bSign = extractFloatx80Sign( b );
6516	if ( aSign != bSign ) {
6517	return
6518	aSign
6519	&& ( ( ( (uint16_t) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
6520	!= `0` );
6521	}
6522	return
6523	aSign ? lt128( b.high, b.low, a.high, a.low )
6524	: lt128( a.high, a.low, b.high, b.low );
6525
6526	}
6527
6528	/----------------------------------------------------------------------------*
6529	\| Returns 1 if the extended double-precision floating-point values `a' and `b'
6530	\| cannot be compared, and 0 otherwise. Quiet NaNs do not cause an exception.
6531	\| The comparison is performed according to the IEC/IEEE Standard for Binary
6532	\| Floating-Point Arithmetic.
6533	----------------------------------------------------------------------------/
6534	int floatx80_unordered_quiet(floatx80 a, floatx80 b, float_status *status)
6535	{
6536	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
6537	float_raise(float_flag_invalid, status);
6538	return `1`;
6539	}
6540	if ( ( ( extractFloatx80Exp( a ) == `0x7FFF` )
6541	&& (uint64_t) ( extractFloatx80Frac( a )<<`1` ) )
6542	\|\| ( ( extractFloatx80Exp( b ) == `0x7FFF` )
6543	&& (uint64_t) ( extractFloatx80Frac( b )<<`1` ) )
6544	) {
6545	if (floatx80_is_signaling_nan(a, status)
6546	\|\| floatx80_is_signaling_nan(b, status)) {
6547	float_raise(float_flag_invalid, status);
6548	}
6549	return `1`;
6550	}
6551	return `0`;
6552	}
6553
6554	/----------------------------------------------------------------------------*
6555	\| Returns the result of converting the quadruple-precision floating-point
6556	\| value `a' to the 32-bit two's complement integer format. The conversion
6557	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6558	\| Arithmetic---which means in particular that the conversion is rounded
6559	\| according to the current rounding mode. If `a' is a NaN, the largest
6560	\| positive integer is returned. Otherwise, if the conversion overflows, the
6561	\| largest integer with the same sign as `a' is returned.
6562	----------------------------------------------------------------------------/
6563
6564	int32_t float128_to_int32(float128 a, float_status *status)
6565	{
6566	flag aSign;
6567	int32_t aExp, shiftCount;
6568	uint64_t aSig0, aSig1;
6569
6570	aSig1 = extractFloat128Frac1( a );
6571	aSig0 = extractFloat128Frac0( a );
6572	aExp = extractFloat128Exp( a );
6573	aSign = extractFloat128Sign( a );
6574	if ( ( aExp == `0x7FFF` ) && ( aSig0 \| aSig1 ) ) aSign = `0`;
6575	if ( aExp ) aSig0 \|= UINT64_C(`0x0001000000000000`);
6576	aSig0 \|= ( aSig1 != `0` );
6577	shiftCount = `0x4028` - aExp;
6578	if ( `0` < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
6579	return roundAndPackInt32(aSign, aSig0, status);
6580
6581	}
6582
6583	/----------------------------------------------------------------------------*
6584	\| Returns the result of converting the quadruple-precision floating-point
6585	\| value `a' to the 32-bit two's complement integer format. The conversion
6586	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6587	\| Arithmetic, except that the conversion is always rounded toward zero. If
6588	\| `a' is a NaN, the largest positive integer is returned. Otherwise, if the
6589	\| conversion overflows, the largest integer with the same sign as `a' is
6590	\| returned.
6591	----------------------------------------------------------------------------/
6592
6593	int32_t float128_to_int32_round_to_zero(float128 a, float_status *status)
6594	{
6595	flag aSign;
6596	int32_t aExp, shiftCount;
6597	uint64_t aSig0, aSig1, savedASig;
6598	int32_t z;
6599
6600	aSig1 = extractFloat128Frac1( a );
6601	aSig0 = extractFloat128Frac0( a );
6602	aExp = extractFloat128Exp( a );
6603	aSign = extractFloat128Sign( a );
6604	aSig0 \|= ( aSig1 != `0` );
6605	if ( `0x401E` < aExp ) {
6606	if ( ( aExp == `0x7FFF` ) && aSig0 ) aSign = `0`;
6607	goto invalid;
6608	}
6609	else if ( aExp < `0x3FFF` ) {
6610	if (aExp \|\| aSig0) {
6611	status->float_exception_flags \|= float_flag_inexact;
6612	}
6613	return `0`;
6614	}
6615	aSig0 \|= UINT64_C(`0x0001000000000000`);
6616	shiftCount = `0x402F` - aExp;
6617	savedASig = aSig0;
6618	aSig0 >>= shiftCount;
6619	z = aSig0;
6620	if ( aSign ) z = - z;
6621	if ( ( z < `0` ) ^ aSign ) {
6622	invalid:
6623	float_raise(float_flag_invalid, status);
6624	return aSign ? INT32_MIN : INT32_MAX;
6625	}
6626	if ( ( aSig0<<shiftCount ) != savedASig ) {
6627	status->float_exception_flags \|= float_flag_inexact;
6628	}
6629	return z;
6630
6631	}
6632
6633	/----------------------------------------------------------------------------*
6634	\| Returns the result of converting the quadruple-precision floating-point
6635	\| value `a' to the 64-bit two's complement integer format. The conversion
6636	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6637	\| Arithmetic---which means in particular that the conversion is rounded
6638	\| according to the current rounding mode. If `a' is a NaN, the largest
6639	\| positive integer is returned. Otherwise, if the conversion overflows, the
6640	\| largest integer with the same sign as `a' is returned.
6641	----------------------------------------------------------------------------/
6642
6643	int64_t float128_to_int64(float128 a, float_status *status)
6644	{
6645	flag aSign;
6646	int32_t aExp, shiftCount;
6647	uint64_t aSig0, aSig1;
6648
6649	aSig1 = extractFloat128Frac1( a );
6650	aSig0 = extractFloat128Frac0( a );
6651	aExp = extractFloat128Exp( a );
6652	aSign = extractFloat128Sign( a );
6653	if ( aExp ) aSig0 \|= UINT64_C(`0x0001000000000000`);
6654	shiftCount = `0x402F` - aExp;
6655	if ( shiftCount <= `0` ) {
6656	if ( `0x403E` < aExp ) {
6657	float_raise(float_flag_invalid, status);
6658	if ( ! aSign
6659	\|\| ( ( aExp == `0x7FFF` )
6660	&& ( aSig1 \|\| ( aSig0 != UINT64_C(`0x0001000000000000`) ) )
6661	)
6662	) {
6663	return INT64_MAX;
6664	}
6665	return INT64_MIN;
6666	}
6667	shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
6668	}
6669	else {
6670	shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
6671	}
6672	return roundAndPackInt64(aSign, aSig0, aSig1, status);
6673
6674	}
6675
6676	/----------------------------------------------------------------------------*
6677	\| Returns the result of converting the quadruple-precision floating-point
6678	\| value `a' to the 64-bit two's complement integer format. The conversion
6679	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6680	\| Arithmetic, except that the conversion is always rounded toward zero.
6681	\| If `a' is a NaN, the largest positive integer is returned. Otherwise, if
6682	\| the conversion overflows, the largest integer with the same sign as `a' is
6683	\| returned.
6684	----------------------------------------------------------------------------/
6685
6686	int64_t float128_to_int64_round_to_zero(float128 a, float_status *status)
6687	{
6688	flag aSign;
6689	int32_t aExp, shiftCount;
6690	uint64_t aSig0, aSig1;
6691	int64_t z;
6692
6693	aSig1 = extractFloat128Frac1( a );
6694	aSig0 = extractFloat128Frac0( a );
6695	aExp = extractFloat128Exp( a );
6696	aSign = extractFloat128Sign( a );
6697	if ( aExp ) aSig0 \|= UINT64_C(`0x0001000000000000`);
6698	shiftCount = aExp - `0x402F`;
6699	if ( `0` < shiftCount ) {
6700	if ( `0x403E` <= aExp ) {
6701	aSig0 &= UINT64_C(`0x0000FFFFFFFFFFFF`);
6702	if ( ( a.high == UINT64_C(`0xC03E000000000000`) )
6703	&& ( aSig1 < UINT64_C(`0x0002000000000000`) ) ) {
6704	if (aSig1) {
6705	status->float_exception_flags \|= float_flag_inexact;
6706	}
6707	}
6708	else {
6709	float_raise(float_flag_invalid, status);
6710	if ( ! aSign \|\| ( ( aExp == `0x7FFF` ) && ( aSig0 \| aSig1 ) ) ) {
6711	return INT64_MAX;
6712	}
6713	}
6714	return INT64_MIN;
6715	}
6716	z = ( aSig0<<shiftCount ) \| ( aSig1>>( ( - shiftCount ) & `63` ) );
6717	if ( (uint64_t) ( aSig1<<shiftCount ) ) {
6718	status->float_exception_flags \|= float_flag_inexact;
6719	}
6720	}
6721	else {
6722	if ( aExp < `0x3FFF` ) {
6723	if ( aExp \| aSig0 \| aSig1 ) {
6724	status->float_exception_flags \|= float_flag_inexact;
6725	}
6726	return `0`;
6727	}
6728	z = aSig0>>( - shiftCount );
6729	if ( aSig1
6730	\|\| ( shiftCount && (uint64_t) ( aSig0<<( shiftCount & `63` ) ) ) ) {
6731	status->float_exception_flags \|= float_flag_inexact;
6732	}
6733	}
6734	if ( aSign ) z = - z;
6735	return z;
6736
6737	}
6738
6739	/----------------------------------------------------------------------------*
6740	\| Returns the result of converting the quadruple-precision floating-point value
6741	\| `a' to the 64-bit unsigned integer format. The conversion is
6742	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
6743	\| Arithmetic---which means in particular that the conversion is rounded
6744	\| according to the current rounding mode. If `a' is a NaN, the largest
6745	\| positive integer is returned. If the conversion overflows, the
6746	\| largest unsigned integer is returned. If 'a' is negative, the value is
6747	\| rounded and zero is returned; negative values that do not round to zero
6748	\| will raise the inexact exception.
6749	----------------------------------------------------------------------------/
6750
6751	uint64_t float128_to_uint64(float128 a, float_status *status)
6752	{
6753	flag aSign;
6754	int aExp;
6755	int shiftCount;
6756	uint64_t aSig0, aSig1;
6757
6758	aSig0 = extractFloat128Frac0(a);
6759	aSig1 = extractFloat128Frac1(a);
6760	aExp = extractFloat128Exp(a);
6761	aSign = extractFloat128Sign(a);
6762	if (aSign && (aExp > `0x3FFE`)) {
6763	float_raise(float_flag_invalid, status);
6764	if (float128_is_any_nan(a)) {
6765	return UINT64_MAX;
6766	} else {
6767	return `0`;
6768	}
6769	}
6770	if (aExp) {
6771	aSig0 \|= UINT64_C(`0x0001000000000000`);
6772	}
6773	shiftCount = `0x402F` - aExp;
6774	if (shiftCount <= `0`) {
6775	if (`0x403E` < aExp) {
6776	float_raise(float_flag_invalid, status);
6777	return UINT64_MAX;
6778	}
6779	shortShift128Left(aSig0, aSig1, -shiftCount, &aSig0, &aSig1);
6780	} else {
6781	shift64ExtraRightJamming(aSig0, aSig1, shiftCount, &aSig0, &aSig1);
6782	}
6783	return roundAndPackUint64(aSign, aSig0, aSig1, status);
6784	}
6785
6786	uint64_t float128_to_uint64_round_to_zero(float128 a, float_status *status)
6787	{
6788	uint64_t v;
6789	signed char current_rounding_mode = status->float_rounding_mode;
6790
6791	set_float_rounding_mode(float_round_to_zero, status);
6792	v = float128_to_uint64(a, status);
6793	set_float_rounding_mode(current_rounding_mode, status);
6794
6795	return v;
6796	}
6797
6798	/----------------------------------------------------------------------------*
6799	\| Returns the result of converting the quadruple-precision floating-point
6800	\| value `a' to the 32-bit unsigned integer format. The conversion
6801	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6802	\| Arithmetic except that the conversion is always rounded toward zero.
6803	\| If `a' is a NaN, the largest positive integer is returned. Otherwise,
6804	\| if the conversion overflows, the largest unsigned integer is returned.
6805	\| If 'a' is negative, the value is rounded and zero is returned; negative
6806	\| values that do not round to zero will raise the inexact exception.
6807	----------------------------------------------------------------------------/
6808
6809	uint32_t float128_to_uint32_round_to_zero(float128 a, float_status *status)
6810	{
6811	uint64_t v;
6812	uint32_t res;
6813	int old_exc_flags = get_float_exception_flags(status);
6814
6815	v = float128_to_uint64_round_to_zero(a, status);
6816	if (v > `0xffffffff`) {
6817	res = `0xffffffff`;
6818	} else {
6819	return v;
6820	}
6821	set_float_exception_flags(old_exc_flags, status);
6822	float_raise(float_flag_invalid, status);
6823	return res;
6824	}
6825
6826	/----------------------------------------------------------------------------*
6827	\| Returns the result of converting the quadruple-precision floating-point value
6828	\| `a' to the 32-bit unsigned integer format. The conversion is
6829	\| performed according to the IEC/IEEE Standard for Binary Floating-Point
6830	\| Arithmetic---which means in particular that the conversion is rounded
6831	\| according to the current rounding mode. If `a' is a NaN, the largest
6832	\| positive integer is returned. If the conversion overflows, the
6833	\| largest unsigned integer is returned. If 'a' is negative, the value is
6834	\| rounded and zero is returned; negative values that do not round to zero
6835	\| will raise the inexact exception.
6836	----------------------------------------------------------------------------/
6837
6838	uint32_t float128_to_uint32(float128 a, float_status *status)
6839	{
6840	uint64_t v;
6841	uint32_t res;
6842	int old_exc_flags = get_float_exception_flags(status);
6843
6844	v = float128_to_uint64(a, status);
6845	if (v > `0xffffffff`) {
6846	res = `0xffffffff`;
6847	} else {
6848	return v;
6849	}
6850	set_float_exception_flags(old_exc_flags, status);
6851	float_raise(float_flag_invalid, status);
6852	return res;
6853	}
6854
6855	/----------------------------------------------------------------------------*
6856	\| Returns the result of converting the quadruple-precision floating-point
6857	\| value `a' to the single-precision floating-point format. The conversion
6858	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6859	\| Arithmetic.
6860	----------------------------------------------------------------------------/
6861
6862	float32 float128_to_float32(float128 a, float_status *status)
6863	{
6864	flag aSign;
6865	int32_t aExp;
6866	uint64_t aSig0, aSig1;
6867	uint32_t zSig;
6868
6869	aSig1 = extractFloat128Frac1( a );
6870	aSig0 = extractFloat128Frac0( a );
6871	aExp = extractFloat128Exp( a );
6872	aSign = extractFloat128Sign( a );
6873	if ( aExp == `0x7FFF` ) {
6874	if ( aSig0 \| aSig1 ) {
6875	return commonNaNToFloat32(float128ToCommonNaN(a, status), status);
6876	}
6877	return packFloat32( aSign, `0xFF`, `0` );
6878	}
6879	aSig0 \|= ( aSig1 != `0` );
6880	shift64RightJamming( aSig0, `18`, &aSig0 );
6881	zSig = aSig0;
6882	if ( aExp \|\| zSig ) {
6883	zSig \|= `0x40000000`;
6884	aExp -= `0x3F81`;
6885	}
6886	return roundAndPackFloat32(aSign, aExp, zSig, status);
6887
6888	}
6889
6890	/----------------------------------------------------------------------------*
6891	\| Returns the result of converting the quadruple-precision floating-point
6892	\| value `a' to the double-precision floating-point format. The conversion
6893	\| is performed according to the IEC/IEEE Standard for Binary Floating-Point
6894	\| Arithmetic.
6895	----------------------------------------------------------------------------/
6896
6897	float64 float128_to_float64(float128 a, float_status *status)
6898	{
6899	flag aSign;
6900	int32_t aExp;
6901	uint64_t aSig0, aSig1;
6902
6903	aSig1 = extractFloat128Frac1( a );
6904	aSig0 = extractFloat128Frac0( a );
6905	aExp = extractFloat128Exp( a );
6906	aSign = extractFloat128Sign( a );
6907	if ( aExp == `0x7FFF` ) {
6908	if ( aSig0 \| aSig1 ) {
6909	return commonNaNToFloat64(float128ToCommonNaN(a, status), status);
6910	}
6911	return packFloat64( aSign, `0x7FF`, `0` );
6912	}
6913	shortShift128Left( aSig0, aSig1, `14`, &aSig0, &aSig1 );
6914	aSig0 \|= ( aSig1 != `0` );
6915	if ( aExp \|\| aSig0 ) {
6916	aSig0 \|= UINT64_C(`0x4000000000000000`);
6917	aExp -= `0x3C01`;
6918	}
6919	return roundAndPackFloat64(aSign, aExp, aSig0, status);
6920
6921	}
6922
6923	/----------------------------------------------------------------------------*
6924	\| Returns the result of converting the quadruple-precision floating-point
6925	\| value `a' to the extended double-precision floating-point format. The
6926	\| conversion is performed according to the IEC/IEEE Standard for Binary
6927	\| Floating-Point Arithmetic.
6928	----------------------------------------------------------------------------/
6929
6930	floatx80 float128_to_floatx80(float128 a, float_status *status)
6931	{
6932	flag aSign;
6933	int32_t aExp;
6934	uint64_t aSig0, aSig1;
6935
6936	aSig1 = extractFloat128Frac1( a );
6937	aSig0 = extractFloat128Frac0( a );
6938	aExp = extractFloat128Exp( a );
6939	aSign = extractFloat128Sign( a );
6940	if ( aExp == `0x7FFF` ) {
6941	if ( aSig0 \| aSig1 ) {
6942	return commonNaNToFloatx80(float128ToCommonNaN(a, status), status);
6943	}
6944	return packFloatx80(aSign, floatx80_infinity_high,
6945	floatx80_infinity_low);
6946	}
6947	if ( aExp == `0` ) {
6948	if ( ( aSig0 \| aSig1 ) == `0` ) return packFloatx80( aSign, `0`, `0` );
6949	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
6950	}
6951	else {
6952	aSig0 \|= UINT64_C(`0x0001000000000000`);
6953	}
6954	shortShift128Left( aSig0, aSig1, `15`, &aSig0, &aSig1 );
6955	return roundAndPackFloatx80(`80`, aSign, aExp, aSig0, aSig1, status);
6956
6957	}
6958
6959	/----------------------------------------------------------------------------*
6960	\| Rounds the quadruple-precision floating-point value `a' to an integer, and
6961	\| returns the result as a quadruple-precision floating-point value. The
6962	\| operation is performed according to the IEC/IEEE Standard for Binary
6963	\| Floating-Point Arithmetic.
6964	----------------------------------------------------------------------------/
6965
6966	float128 float128_round_to_int(float128 a, float_status *status)
6967	{
6968	flag aSign;
6969	int32_t aExp;
6970	uint64_t lastBitMask, roundBitsMask;
6971	float128 z;
6972
6973	aExp = extractFloat128Exp( a );
6974	if ( `0x402F` <= aExp ) {
6975	if ( `0x406F` <= aExp ) {
6976	if ( ( aExp == `0x7FFF` )
6977	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) )
6978	) {
6979	return propagateFloat128NaN(a, a, status);
6980	}
6981	return a;
6982	}
6983	lastBitMask = `1`;
6984	lastBitMask = ( lastBitMask<<( `0x406E` - aExp ) )<<`1`;
6985	roundBitsMask = lastBitMask - `1`;
6986	z = a;
6987	switch (status->float_rounding_mode) {
6988	case float_round_nearest_even:
6989	if ( lastBitMask ) {
6990	add128( z.high, z.low, `0`, lastBitMask>>`1`, &z.high, &z.low );
6991	if ( ( z.low & roundBitsMask ) == `0` ) z.low &= ~ lastBitMask;
6992	}
6993	else {
6994	if ( (int64_t) z.low < `0` ) {
6995	++z.high;
6996	if ( (uint64_t) ( z.low<<`1` ) == `0` ) z.high &= ~`1`;
6997	}
6998	}
6999	break;
7000	case float_round_ties_away:
7001	if (lastBitMask) {
7002	add128(z.high, z.low, `0`, lastBitMask >> `1`, &z.high, &z.low);
7003	} else {
7004	if ((int64_t) z.low < `0`) {
7005	++z.high;
7006	}
7007	}
7008	break;
7009	case float_round_to_zero:
7010	break;
7011	case float_round_up:
7012	if (!extractFloat128Sign(z)) {
7013	add128(z.high, z.low, `0`, roundBitsMask, &z.high, &z.low);
7014	}
7015	break;
7016	case float_round_down:
7017	if (extractFloat128Sign(z)) {
7018	add128(z.high, z.low, `0`, roundBitsMask, &z.high, &z.low);
7019	}
7020	break;
7021	case float_round_to_odd:
7022	/*
7023	* Note that if lastBitMask == 0, the last bit is the lsb
7024	* of high, and roundBitsMask == -1.
7025	*/
7026	if ((lastBitMask ? z.low & lastBitMask : z.high & `1`) == `0`) {
7027	add128(z.high, z.low, `0`, roundBitsMask, &z.high, &z.low);
7028	}
7029	break;
7030	default:
7031	abort();
7032	}
7033	z.low &= ~ roundBitsMask;
7034	}
7035	else {
7036	if ( aExp < `0x3FFF` ) {
7037	if ( ( ( (uint64_t) ( a.high<<`1` ) ) \| a.low ) == `0` ) return a;
7038	status->float_exception_flags \|= float_flag_inexact;
7039	aSign = extractFloat128Sign( a );
7040	switch (status->float_rounding_mode) {
7041	case float_round_nearest_even:
7042	if ( ( aExp == `0x3FFE` )
7043	&& ( extractFloat128Frac0( a )
7044	\| extractFloat128Frac1( a ) )
7045	) {
7046	return packFloat128( aSign, `0x3FFF`, `0`, `0` );
7047	}
7048	break;
7049	case float_round_ties_away:
7050	if (aExp == `0x3FFE`) {
7051	return packFloat128(aSign, `0x3FFF`, `0`, `0`);
7052	}
7053	break;
7054	case float_round_down:
7055	return
7056	aSign ? packFloat128( `1`, `0x3FFF`, `0`, `0` )
7057	: packFloat128( `0`, `0`, `0`, `0` );
7058	case float_round_up:
7059	return
7060	aSign ? packFloat128( `1`, `0`, `0`, `0` )
7061	: packFloat128( `0`, `0x3FFF`, `0`, `0` );
7062
7063	case float_round_to_odd:
7064	return packFloat128(aSign, `0x3FFF`, `0`, `0`);
7065	}
7066	return packFloat128( aSign, `0`, `0`, `0` );
7067	}
7068	lastBitMask = `1`;
7069	lastBitMask <<= `0x402F` - aExp;
7070	roundBitsMask = lastBitMask - `1`;
7071	z.low = `0`;
7072	z.high = a.high;
7073	switch (status->float_rounding_mode) {
7074	case float_round_nearest_even:
7075	z.high += lastBitMask>>`1`;
7076	if ( ( ( z.high & roundBitsMask ) \| a.low ) == `0` ) {
7077	z.high &= ~ lastBitMask;
7078	}
7079	break;
7080	case float_round_ties_away:
7081	z.high += lastBitMask>>`1`;
7082	break;
7083	case float_round_to_zero:
7084	break;
7085	case float_round_up:
7086	if (!extractFloat128Sign(z)) {
7087	z.high \|= ( a.low != `0` );
7088	z.high += roundBitsMask;
7089	}
7090	break;
7091	case float_round_down:
7092	if (extractFloat128Sign(z)) {
7093	z.high \|= (a.low != `0`);
7094	z.high += roundBitsMask;
7095	}
7096	break;
7097	case float_round_to_odd:
7098	if ((z.high & lastBitMask) == `0`) {
7099	z.high \|= (a.low != `0`);
7100	z.high += roundBitsMask;
7101	}
7102	break;
7103	default:
7104	abort();
7105	}
7106	z.high &= ~ roundBitsMask;
7107	}
7108	if ( ( z.low != a.low ) \|\| ( z.high != a.high ) ) {
7109	status->float_exception_flags \|= float_flag_inexact;
7110	}
7111	return z;
7112
7113	}
7114
7115	/----------------------------------------------------------------------------*
7116	\| Returns the result of adding the absolute values of the quadruple-precision
7117	\| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
7118	\| before being returned. `zSign' is ignored if the result is a NaN.
7119	\| The addition is performed according to the IEC/IEEE Standard for Binary
7120	\| Floating-Point Arithmetic.
7121	----------------------------------------------------------------------------/
7122
7123	static float128 addFloat128Sigs(float128 a, float128 b, flag zSign,
7124	float_status *status)
7125	{
7126	int32_t aExp, bExp, zExp;
7127	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7128	int32_t expDiff;
7129
7130	aSig1 = extractFloat128Frac1( a );
7131	aSig0 = extractFloat128Frac0( a );
7132	aExp = extractFloat128Exp( a );
7133	bSig1 = extractFloat128Frac1( b );
7134	bSig0 = extractFloat128Frac0( b );
7135	bExp = extractFloat128Exp( b );
7136	expDiff = aExp - bExp;
7137	if ( `0` < expDiff ) {
7138	if ( aExp == `0x7FFF` ) {
7139	if (aSig0 \| aSig1) {
7140	return propagateFloat128NaN(a, b, status);
7141	}
7142	return a;
7143	}
7144	if ( bExp == `0` ) {
7145	--expDiff;
7146	}
7147	else {
7148	bSig0 \|= UINT64_C(`0x0001000000000000`);
7149	}
7150	shift128ExtraRightJamming(
7151	bSig0, bSig1, `0`, expDiff, &bSig0, &bSig1, &zSig2 );
7152	zExp = aExp;
7153	}
7154	else if ( expDiff < `0` ) {
7155	if ( bExp == `0x7FFF` ) {
7156	if (bSig0 \| bSig1) {
7157	return propagateFloat128NaN(a, b, status);
7158	}
7159	return packFloat128( zSign, `0x7FFF`, `0`, `0` );
7160	}
7161	if ( aExp == `0` ) {
7162	++expDiff;
7163	}
7164	else {
7165	aSig0 \|= UINT64_C(`0x0001000000000000`);
7166	}
7167	shift128ExtraRightJamming(
7168	aSig0, aSig1, `0`, - expDiff, &aSig0, &aSig1, &zSig2 );
7169	zExp = bExp;
7170	}
7171	else {
7172	if ( aExp == `0x7FFF` ) {
7173	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
7174	return propagateFloat128NaN(a, b, status);
7175	}
7176	return a;
7177	}
7178	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7179	if ( aExp == `0` ) {
7180	if (status->flush_to_zero) {
7181	if (zSig0 \| zSig1) {
7182	float_raise(float_flag_output_denormal, status);
7183	}
7184	return packFloat128(zSign, `0`, `0`, `0`);
7185	}
7186	return packFloat128( zSign, `0`, zSig0, zSig1 );
7187	}
7188	zSig2 = `0`;
7189	zSig0 \|= UINT64_C(`0x0002000000000000`);
7190	zExp = aExp;
7191	goto shiftRight1;
7192	}
7193	aSig0 \|= UINT64_C(`0x0001000000000000`);
7194	add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7195	--zExp;
7196	if ( zSig0 < UINT64_C(`0x0002000000000000`) ) goto roundAndPack;
7197	++zExp;
7198	shiftRight1:
7199	shift128ExtraRightJamming(
7200	zSig0, zSig1, zSig2, `1`, &zSig0, &zSig1, &zSig2 );
7201	roundAndPack:
7202	return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7203
7204	}
7205
7206	/----------------------------------------------------------------------------*
7207	\| Returns the result of subtracting the absolute values of the quadruple-
7208	\| precision floating-point values `a' and `b'. If `zSign' is 1, the
7209	\| difference is negated before being returned. `zSign' is ignored if the
7210	\| result is a NaN. The subtraction is performed according to the IEC/IEEE
7211	\| Standard for Binary Floating-Point Arithmetic.
7212	----------------------------------------------------------------------------/
7213
7214	static float128 subFloat128Sigs(float128 a, float128 b, flag zSign,
7215	float_status *status)
7216	{
7217	int32_t aExp, bExp, zExp;
7218	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
7219	int32_t expDiff;
7220
7221	aSig1 = extractFloat128Frac1( a );
7222	aSig0 = extractFloat128Frac0( a );
7223	aExp = extractFloat128Exp( a );
7224	bSig1 = extractFloat128Frac1( b );
7225	bSig0 = extractFloat128Frac0( b );
7226	bExp = extractFloat128Exp( b );
7227	expDiff = aExp - bExp;
7228	shortShift128Left( aSig0, aSig1, `14`, &aSig0, &aSig1 );
7229	shortShift128Left( bSig0, bSig1, `14`, &bSig0, &bSig1 );
7230	if ( `0` < expDiff ) goto aExpBigger;
7231	if ( expDiff < `0` ) goto bExpBigger;
7232	if ( aExp == `0x7FFF` ) {
7233	if ( aSig0 \| aSig1 \| bSig0 \| bSig1 ) {
7234	return propagateFloat128NaN(a, b, status);
7235	}
7236	float_raise(float_flag_invalid, status);
7237	return float128_default_nan(status);
7238	}
7239	if ( aExp == `0` ) {
7240	aExp = `1`;
7241	bExp = `1`;
7242	}
7243	if ( bSig0 < aSig0 ) goto aBigger;
7244	if ( aSig0 < bSig0 ) goto bBigger;
7245	if ( bSig1 < aSig1 ) goto aBigger;
7246	if ( aSig1 < bSig1 ) goto bBigger;
7247	return packFloat128(status->float_rounding_mode == float_round_down,
7248	`0`, `0`, `0`);
7249	bExpBigger:
7250	if ( bExp == `0x7FFF` ) {
7251	if (bSig0 \| bSig1) {
7252	return propagateFloat128NaN(a, b, status);
7253	}
7254	return packFloat128( zSign ^ `1`, `0x7FFF`, `0`, `0` );
7255	}
7256	if ( aExp == `0` ) {
7257	++expDiff;
7258	}
7259	else {
7260	aSig0 \|= UINT64_C(`0x4000000000000000`);
7261	}
7262	shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7263	bSig0 \|= UINT64_C(`0x4000000000000000`);
7264	bBigger:
7265	sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
7266	zExp = bExp;
7267	zSign ^= `1`;
7268	goto normalizeRoundAndPack;
7269	aExpBigger:
7270	if ( aExp == `0x7FFF` ) {
7271	if (aSig0 \| aSig1) {
7272	return propagateFloat128NaN(a, b, status);
7273	}
7274	return a;
7275	}
7276	if ( bExp == `0` ) {
7277	--expDiff;
7278	}
7279	else {
7280	bSig0 \|= UINT64_C(`0x4000000000000000`);
7281	}
7282	shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
7283	aSig0 \|= UINT64_C(`0x4000000000000000`);
7284	aBigger:
7285	sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
7286	zExp = aExp;
7287	normalizeRoundAndPack:
7288	--zExp;
7289	return normalizeRoundAndPackFloat128(zSign, zExp - `14`, zSig0, zSig1,
7290	status);
7291
7292	}
7293
7294	/----------------------------------------------------------------------------*
7295	\| Returns the result of adding the quadruple-precision floating-point values
7296	\| `a' and `b'. The operation is performed according to the IEC/IEEE Standard
7297	\| for Binary Floating-Point Arithmetic.
7298	----------------------------------------------------------------------------/
7299
7300	float128 float128_add(float128 a, float128 b, float_status *status)
7301	{
7302	flag aSign, bSign;
7303
7304	aSign = extractFloat128Sign( a );
7305	bSign = extractFloat128Sign( b );
7306	if ( aSign == bSign ) {
7307	return addFloat128Sigs(a, b, aSign, status);
7308	}
7309	else {
7310	return subFloat128Sigs(a, b, aSign, status);
7311	}
7312
7313	}
7314
7315	/----------------------------------------------------------------------------*
7316	\| Returns the result of subtracting the quadruple-precision floating-point
7317	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
7318	\| Standard for Binary Floating-Point Arithmetic.
7319	----------------------------------------------------------------------------/
7320
7321	float128 float128_sub(float128 a, float128 b, float_status *status)
7322	{
7323	flag aSign, bSign;
7324
7325	aSign = extractFloat128Sign( a );
7326	bSign = extractFloat128Sign( b );
7327	if ( aSign == bSign ) {
7328	return subFloat128Sigs(a, b, aSign, status);
7329	}
7330	else {
7331	return addFloat128Sigs(a, b, aSign, status);
7332	}
7333
7334	}
7335
7336	/----------------------------------------------------------------------------*
7337	\| Returns the result of multiplying the quadruple-precision floating-point
7338	\| values `a' and `b'. The operation is performed according to the IEC/IEEE
7339	\| Standard for Binary Floating-Point Arithmetic.
7340	----------------------------------------------------------------------------/
7341
7342	float128 float128_mul(float128 a, float128 b, float_status *status)
7343	{
7344	flag aSign, bSign, zSign;
7345	int32_t aExp, bExp, zExp;
7346	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
7347
7348	aSig1 = extractFloat128Frac1( a );
7349	aSig0 = extractFloat128Frac0( a );
7350	aExp = extractFloat128Exp( a );
7351	aSign = extractFloat128Sign( a );
7352	bSig1 = extractFloat128Frac1( b );
7353	bSig0 = extractFloat128Frac0( b );
7354	bExp = extractFloat128Exp( b );
7355	bSign = extractFloat128Sign( b );
7356	zSign = aSign ^ bSign;
7357	if ( aExp == `0x7FFF` ) {
7358	if ( ( aSig0 \| aSig1 )
7359	\|\| ( ( bExp == `0x7FFF` ) && ( bSig0 \| bSig1 ) ) ) {
7360	return propagateFloat128NaN(a, b, status);
7361	}
7362	if ( ( bExp \| bSig0 \| bSig1 ) == `0` ) goto invalid;
7363	return packFloat128( zSign, `0x7FFF`, `0`, `0` );
7364	}
7365	if ( bExp == `0x7FFF` ) {
7366	if (bSig0 \| bSig1) {
7367	return propagateFloat128NaN(a, b, status);
7368	}
7369	if ( ( aExp \| aSig0 \| aSig1 ) == `0` ) {
7370	invalid:
7371	float_raise(float_flag_invalid, status);
7372	return float128_default_nan(status);
7373	}
7374	return packFloat128( zSign, `0x7FFF`, `0`, `0` );
7375	}
7376	if ( aExp == `0` ) {
7377	if ( ( aSig0 \| aSig1 ) == `0` ) return packFloat128( zSign, `0`, `0`, `0` );
7378	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7379	}
7380	if ( bExp == `0` ) {
7381	if ( ( bSig0 \| bSig1 ) == `0` ) return packFloat128( zSign, `0`, `0`, `0` );
7382	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7383	}
7384	zExp = aExp + bExp - `0x4000`;
7385	aSig0 \|= UINT64_C(`0x0001000000000000`);
7386	shortShift128Left( bSig0, bSig1, `16`, &bSig0, &bSig1 );
7387	mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 );
7388	add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 );
7389	zSig2 \|= ( zSig3 != `0` );
7390	if (UINT64_C( `0x0002000000000000`) <= zSig0 ) {
7391	shift128ExtraRightJamming(
7392	zSig0, zSig1, zSig2, `1`, &zSig0, &zSig1, &zSig2 );
7393	++zExp;
7394	}
7395	return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7396
7397	}
7398
7399	/----------------------------------------------------------------------------*
7400	\| Returns the result of dividing the quadruple-precision floating-point value
7401	\| `a' by the corresponding value `b'. The operation is performed according to
7402	\| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7403	----------------------------------------------------------------------------/
7404
7405	float128 float128_div(float128 a, float128 b, float_status *status)
7406	{
7407	flag aSign, bSign, zSign;
7408	int32_t aExp, bExp, zExp;
7409	uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
7410	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7411
7412	aSig1 = extractFloat128Frac1( a );
7413	aSig0 = extractFloat128Frac0( a );
7414	aExp = extractFloat128Exp( a );
7415	aSign = extractFloat128Sign( a );
7416	bSig1 = extractFloat128Frac1( b );
7417	bSig0 = extractFloat128Frac0( b );
7418	bExp = extractFloat128Exp( b );
7419	bSign = extractFloat128Sign( b );
7420	zSign = aSign ^ bSign;
7421	if ( aExp == `0x7FFF` ) {
7422	if (aSig0 \| aSig1) {
7423	return propagateFloat128NaN(a, b, status);
7424	}
7425	if ( bExp == `0x7FFF` ) {
7426	if (bSig0 \| bSig1) {
7427	return propagateFloat128NaN(a, b, status);
7428	}
7429	goto invalid;
7430	}
7431	return packFloat128( zSign, `0x7FFF`, `0`, `0` );
7432	}
7433	if ( bExp == `0x7FFF` ) {
7434	if (bSig0 \| bSig1) {
7435	return propagateFloat128NaN(a, b, status);
7436	}
7437	return packFloat128( zSign, `0`, `0`, `0` );
7438	}
7439	if ( bExp == `0` ) {
7440	if ( ( bSig0 \| bSig1 ) == `0` ) {
7441	if ( ( aExp \| aSig0 \| aSig1 ) == `0` ) {
7442	invalid:
7443	float_raise(float_flag_invalid, status);
7444	return float128_default_nan(status);
7445	}
7446	float_raise(float_flag_divbyzero, status);
7447	return packFloat128( zSign, `0x7FFF`, `0`, `0` );
7448	}
7449	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7450	}
7451	if ( aExp == `0` ) {
7452	if ( ( aSig0 \| aSig1 ) == `0` ) return packFloat128( zSign, `0`, `0`, `0` );
7453	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7454	}
7455	zExp = aExp - bExp + `0x3FFD`;
7456	shortShift128Left(
7457	aSig0 \| UINT64_C(`0x0001000000000000`), aSig1, `15`, &aSig0, &aSig1 );
7458	shortShift128Left(
7459	bSig0 \| UINT64_C(`0x0001000000000000`), bSig1, `15`, &bSig0, &bSig1 );
7460	if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) {
7461	shift128Right( aSig0, aSig1, `1`, &aSig0, &aSig1 );
7462	++zExp;
7463	}
7464	zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 );
7465	mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 );
7466	sub192( aSig0, aSig1, `0`, term0, term1, term2, &rem0, &rem1, &rem2 );
7467	while ( (int64_t) rem0 < `0` ) {
7468	--zSig0;
7469	add192( rem0, rem1, rem2, `0`, bSig0, bSig1, &rem0, &rem1, &rem2 );
7470	}
7471	zSig1 = estimateDiv128To64( rem1, rem2, bSig0 );
7472	if ( ( zSig1 & `0x3FFF` ) <= `4` ) {
7473	mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 );
7474	sub192( rem1, rem2, `0`, term1, term2, term3, &rem1, &rem2, &rem3 );
7475	while ( (int64_t) rem1 < `0` ) {
7476	--zSig1;
7477	add192( rem1, rem2, rem3, `0`, bSig0, bSig1, &rem1, &rem2, &rem3 );
7478	}
7479	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != `0` );
7480	}
7481	shift128ExtraRightJamming( zSig0, zSig1, `0`, `15`, &zSig0, &zSig1, &zSig2 );
7482	return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
7483
7484	}
7485
7486	/----------------------------------------------------------------------------*
7487	\| Returns the remainder of the quadruple-precision floating-point value `a'
7488	\| with respect to the corresponding value `b'. The operation is performed
7489	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7490	----------------------------------------------------------------------------/
7491
7492	float128 float128_rem(float128 a, float128 b, float_status *status)
7493	{
7494	flag aSign, zSign;
7495	int32_t aExp, bExp, expDiff;
7496	uint64_t aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2;
7497	uint64_t allZero, alternateASig0, alternateASig1, sigMean1;
7498	int64_t sigMean0;
7499
7500	aSig1 = extractFloat128Frac1( a );
7501	aSig0 = extractFloat128Frac0( a );
7502	aExp = extractFloat128Exp( a );
7503	aSign = extractFloat128Sign( a );
7504	bSig1 = extractFloat128Frac1( b );
7505	bSig0 = extractFloat128Frac0( b );
7506	bExp = extractFloat128Exp( b );
7507	if ( aExp == `0x7FFF` ) {
7508	if ( ( aSig0 \| aSig1 )
7509	\|\| ( ( bExp == `0x7FFF` ) && ( bSig0 \| bSig1 ) ) ) {
7510	return propagateFloat128NaN(a, b, status);
7511	}
7512	goto invalid;
7513	}
7514	if ( bExp == `0x7FFF` ) {
7515	if (bSig0 \| bSig1) {
7516	return propagateFloat128NaN(a, b, status);
7517	}
7518	return a;
7519	}
7520	if ( bExp == `0` ) {
7521	if ( ( bSig0 \| bSig1 ) == `0` ) {
7522	invalid:
7523	float_raise(float_flag_invalid, status);
7524	return float128_default_nan(status);
7525	}
7526	normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 );
7527	}
7528	if ( aExp == `0` ) {
7529	if ( ( aSig0 \| aSig1 ) == `0` ) return a;
7530	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7531	}
7532	expDiff = aExp - bExp;
7533	if ( expDiff < -`1` ) return a;
7534	shortShift128Left(
7535	aSig0 \| UINT64_C(`0x0001000000000000`),
7536	aSig1,
7537	`15` - ( expDiff < `0` ),
7538	&aSig0,
7539	&aSig1
7540	);
7541	shortShift128Left(
7542	bSig0 \| UINT64_C(`0x0001000000000000`), bSig1, `15`, &bSig0, &bSig1 );
7543	q = le128( bSig0, bSig1, aSig0, aSig1 );
7544	if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7545	expDiff -= `64`;
7546	while ( `0` < expDiff ) {
7547	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7548	q = ( `4` < q ) ? q - `4` : `0`;
7549	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7550	shortShift192Left( term0, term1, term2, `61`, &term1, &term2, &allZero );
7551	shortShift128Left( aSig0, aSig1, `61`, &aSig0, &allZero );
7552	sub128( aSig0, `0`, term1, term2, &aSig0, &aSig1 );
7553	expDiff -= `61`;
7554	}
7555	if ( -`64` < expDiff ) {
7556	q = estimateDiv128To64( aSig0, aSig1, bSig0 );
7557	q = ( `4` < q ) ? q - `4` : `0`;
7558	q >>= - expDiff;
7559	shift128Right( bSig0, bSig1, `12`, &bSig0, &bSig1 );
7560	expDiff += `52`;
7561	if ( expDiff < `0` ) {
7562	shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
7563	}
7564	else {
7565	shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 );
7566	}
7567	mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 );
7568	sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 );
7569	}
7570	else {
7571	shift128Right( aSig0, aSig1, `12`, &aSig0, &aSig1 );
7572	shift128Right( bSig0, bSig1, `12`, &bSig0, &bSig1 );
7573	}
7574	do {
7575	alternateASig0 = aSig0;
7576	alternateASig1 = aSig1;
7577	++q;
7578	sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 );
7579	} while ( `0` <= (int64_t) aSig0 );
7580	add128(
7581	aSig0, aSig1, alternateASig0, alternateASig1, (uint64_t *)&sigMean0, &sigMean1 );
7582	if ( ( sigMean0 < `0` )
7583	\|\| ( ( ( sigMean0 \| sigMean1 ) == `0` ) && ( q & `1` ) ) ) {
7584	aSig0 = alternateASig0;
7585	aSig1 = alternateASig1;
7586	}
7587	zSign = ( (int64_t) aSig0 < `0` );
7588	if ( zSign ) sub128( `0`, `0`, aSig0, aSig1, &aSig0, &aSig1 );
7589	return normalizeRoundAndPackFloat128(aSign ^ zSign, bExp - `4`, aSig0, aSig1,
7590	status);
7591	}
7592
7593	/----------------------------------------------------------------------------*
7594	\| Returns the square root of the quadruple-precision floating-point value `a'.
7595	\| The operation is performed according to the IEC/IEEE Standard for Binary
7596	\| Floating-Point Arithmetic.
7597	----------------------------------------------------------------------------/
7598
7599	float128 float128_sqrt(float128 a, float_status *status)
7600	{
7601	flag aSign;
7602	int32_t aExp, zExp;
7603	uint64_t aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0;
7604	uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
7605
7606	aSig1 = extractFloat128Frac1( a );
7607	aSig0 = extractFloat128Frac0( a );
7608	aExp = extractFloat128Exp( a );
7609	aSign = extractFloat128Sign( a );
7610	if ( aExp == `0x7FFF` ) {
7611	if (aSig0 \| aSig1) {
7612	return propagateFloat128NaN(a, a, status);
7613	}
7614	if ( ! aSign ) return a;
7615	goto invalid;
7616	}
7617	if ( aSign ) {
7618	if ( ( aExp \| aSig0 \| aSig1 ) == `0` ) return a;
7619	invalid:
7620	float_raise(float_flag_invalid, status);
7621	return float128_default_nan(status);
7622	}
7623	if ( aExp == `0` ) {
7624	if ( ( aSig0 \| aSig1 ) == `0` ) return packFloat128( `0`, `0`, `0`, `0` );
7625	normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
7626	}
7627	zExp = ( ( aExp - `0x3FFF` )>>`1` ) + `0x3FFE`;
7628	aSig0 \|= UINT64_C(`0x0001000000000000`);
7629	zSig0 = estimateSqrt32( aExp, aSig0>>`17` );
7630	shortShift128Left( aSig0, aSig1, `13` - ( aExp & `1` ), &aSig0, &aSig1 );
7631	zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<`32` ) + ( zSig0<<`30` );
7632	doubleZSig0 = zSig0<<`1`;
7633	mul64To128( zSig0, zSig0, &term0, &term1 );
7634	sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
7635	while ( (int64_t) rem0 < `0` ) {
7636	--zSig0;
7637	doubleZSig0 -= `2`;
7638	add128( rem0, rem1, zSig0>>`63`, doubleZSig0 \| `1`, &rem0, &rem1 );
7639	}
7640	zSig1 = estimateDiv128To64( rem1, `0`, doubleZSig0 );
7641	if ( ( zSig1 & `0x1FFF` ) <= `5` ) {
7642	if ( zSig1 == `0` ) zSig1 = `1`;
7643	mul64To128( doubleZSig0, zSig1, &term1, &term2 );
7644	sub128( rem1, `0`, term1, term2, &rem1, &rem2 );
7645	mul64To128( zSig1, zSig1, &term2, &term3 );
7646	sub192( rem1, rem2, `0`, `0`, term2, term3, &rem1, &rem2, &rem3 );
7647	while ( (int64_t) rem1 < `0` ) {
7648	--zSig1;
7649	shortShift128Left( `0`, zSig1, `1`, &term2, &term3 );
7650	term3 \|= `1`;
7651	term2 \|= doubleZSig0;
7652	add192( rem1, rem2, rem3, `0`, term2, term3, &rem1, &rem2, &rem3 );
7653	}
7654	zSig1 \|= ( ( rem1 \| rem2 \| rem3 ) != `0` );
7655	}
7656	shift128ExtraRightJamming( zSig0, zSig1, `0`, `14`, &zSig0, &zSig1, &zSig2 );
7657	return roundAndPackFloat128(`0`, zExp, zSig0, zSig1, zSig2, status);
7658
7659	}
7660
7661	/----------------------------------------------------------------------------*
7662	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
7663	\| the corresponding value `b', and 0 otherwise. The invalid exception is
7664	\| raised if either operand is a NaN. Otherwise, the comparison is performed
7665	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7666	----------------------------------------------------------------------------/
7667
7668	int float128_eq(float128 a, float128 b, float_status *status)
7669	{
7670
7671	if ( ( ( extractFloat128Exp( a ) == `0x7FFF` )
7672	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
7673	\|\| ( ( extractFloat128Exp( b ) == `0x7FFF` )
7674	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
7675	) {
7676	float_raise(float_flag_invalid, status);
7677	return `0`;
7678	}
7679	return
7680	( a.low == b.low )
7681	&& ( ( a.high == b.high )
7682	\|\| ( ( a.low == `0` )
7683	&& ( (uint64_t) ( ( a.high \| b.high )<<`1` ) == `0` ) )
7684	);
7685
7686	}
7687
7688	/----------------------------------------------------------------------------*
7689	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
7690	\| or equal to the corresponding value `b', and 0 otherwise. The invalid
7691	\| exception is raised if either operand is a NaN. The comparison is performed
7692	\| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7693	----------------------------------------------------------------------------/
7694
7695	int float128_le(float128 a, float128 b, float_status *status)
7696	{
7697	flag aSign, bSign;
7698
7699	if ( ( ( extractFloat128Exp( a ) == `0x7FFF` )
7700	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
7701	\|\| ( ( extractFloat128Exp( b ) == `0x7FFF` )
7702	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
7703	) {
7704	float_raise(float_flag_invalid, status);
7705	return `0`;
7706	}
7707	aSign = extractFloat128Sign( a );
7708	bSign = extractFloat128Sign( b );
7709	if ( aSign != bSign ) {
7710	return
7711	aSign
7712	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
7713	== `0` );
7714	}
7715	return
7716	aSign ? le128( b.high, b.low, a.high, a.low )
7717	: le128( a.high, a.low, b.high, b.low );
7718
7719	}
7720
7721	/----------------------------------------------------------------------------*
7722	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
7723	\| the corresponding value `b', and 0 otherwise. The invalid exception is
7724	\| raised if either operand is a NaN. The comparison is performed according
7725	\| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7726	----------------------------------------------------------------------------/
7727
7728	int float128_lt(float128 a, float128 b, float_status *status)
7729	{
7730	flag aSign, bSign;
7731
7732	if ( ( ( extractFloat128Exp( a ) == `0x7FFF` )
7733	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
7734	\|\| ( ( extractFloat128Exp( b ) == `0x7FFF` )
7735	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
7736	) {
7737	float_raise(float_flag_invalid, status);
7738	return `0`;
7739	}
7740	aSign = extractFloat128Sign( a );
7741	bSign = extractFloat128Sign( b );
7742	if ( aSign != bSign ) {
7743	return
7744	aSign
7745	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
7746	!= `0` );
7747	}
7748	return
7749	aSign ? lt128( b.high, b.low, a.high, a.low )
7750	: lt128( a.high, a.low, b.high, b.low );
7751
7752	}
7753
7754	/----------------------------------------------------------------------------*
7755	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7756	\| be compared, and 0 otherwise. The invalid exception is raised if either
7757	\| operand is a NaN. The comparison is performed according to the IEC/IEEE
7758	\| Standard for Binary Floating-Point Arithmetic.
7759	----------------------------------------------------------------------------/
7760
7761	int float128_unordered(float128 a, float128 b, float_status *status)
7762	{
7763	if ( ( ( extractFloat128Exp( a ) == `0x7FFF` )
7764	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
7765	\|\| ( ( extractFloat128Exp( b ) == `0x7FFF` )
7766	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
7767	) {
7768	float_raise(float_flag_invalid, status);
7769	return `1`;
7770	}
7771	return `0`;
7772	}
7773
7774	/----------------------------------------------------------------------------*
7775	\| Returns 1 if the quadruple-precision floating-point value `a' is equal to
7776	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7777	\| exception. The comparison is performed according to the IEC/IEEE Standard
7778	\| for Binary Floating-Point Arithmetic.
7779	----------------------------------------------------------------------------/
7780
7781	int float128_eq_quiet(float128 a, float128 b, float_status *status)
7782	{
7783
7784	if ( ( ( extractFloat128Exp( a ) == `0x7FFF` )
7785	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
7786	\|\| ( ( extractFloat128Exp( b ) == `0x7FFF` )
7787	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
7788	) {
7789	if (float128_is_signaling_nan(a, status)
7790	\|\| float128_is_signaling_nan(b, status)) {
7791	float_raise(float_flag_invalid, status);
7792	}
7793	return `0`;
7794	}
7795	return
7796	( a.low == b.low )
7797	&& ( ( a.high == b.high )
7798	\|\| ( ( a.low == `0` )
7799	&& ( (uint64_t) ( ( a.high \| b.high )<<`1` ) == `0` ) )
7800	);
7801
7802	}
7803
7804	/----------------------------------------------------------------------------*
7805	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
7806	\| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
7807	\| cause an exception. Otherwise, the comparison is performed according to the
7808	\| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
7809	----------------------------------------------------------------------------/
7810
7811	int float128_le_quiet(float128 a, float128 b, float_status *status)
7812	{
7813	flag aSign, bSign;
7814
7815	if ( ( ( extractFloat128Exp( a ) == `0x7FFF` )
7816	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
7817	\|\| ( ( extractFloat128Exp( b ) == `0x7FFF` )
7818	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
7819	) {
7820	if (float128_is_signaling_nan(a, status)
7821	\|\| float128_is_signaling_nan(b, status)) {
7822	float_raise(float_flag_invalid, status);
7823	}
7824	return `0`;
7825	}
7826	aSign = extractFloat128Sign( a );
7827	bSign = extractFloat128Sign( b );
7828	if ( aSign != bSign ) {
7829	return
7830	aSign
7831	\|\| ( ( ( (uint64_t) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
7832	== `0` );
7833	}
7834	return
7835	aSign ? le128( b.high, b.low, a.high, a.low )
7836	: le128( a.high, a.low, b.high, b.low );
7837
7838	}
7839
7840	/----------------------------------------------------------------------------*
7841	\| Returns 1 if the quadruple-precision floating-point value `a' is less than
7842	\| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
7843	\| exception. Otherwise, the comparison is performed according to the IEC/IEEE
7844	\| Standard for Binary Floating-Point Arithmetic.
7845	----------------------------------------------------------------------------/
7846
7847	int float128_lt_quiet(float128 a, float128 b, float_status *status)
7848	{
7849	flag aSign, bSign;
7850
7851	if ( ( ( extractFloat128Exp( a ) == `0x7FFF` )
7852	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
7853	\|\| ( ( extractFloat128Exp( b ) == `0x7FFF` )
7854	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
7855	) {
7856	if (float128_is_signaling_nan(a, status)
7857	\|\| float128_is_signaling_nan(b, status)) {
7858	float_raise(float_flag_invalid, status);
7859	}
7860	return `0`;
7861	}
7862	aSign = extractFloat128Sign( a );
7863	bSign = extractFloat128Sign( b );
7864	if ( aSign != bSign ) {
7865	return
7866	aSign
7867	&& ( ( ( (uint64_t) ( ( a.high \| b.high )<<`1` ) ) \| a.low \| b.low )
7868	!= `0` );
7869	}
7870	return
7871	aSign ? lt128( b.high, b.low, a.high, a.low )
7872	: lt128( a.high, a.low, b.high, b.low );
7873
7874	}
7875
7876	/----------------------------------------------------------------------------*
7877	\| Returns 1 if the quadruple-precision floating-point values `a' and `b' cannot
7878	\| be compared, and 0 otherwise. Quiet NaNs do not cause an exception. The
7879	\| comparison is performed according to the IEC/IEEE Standard for Binary
7880	\| Floating-Point Arithmetic.
7881	----------------------------------------------------------------------------/
7882
7883	int float128_unordered_quiet(float128 a, float128 b, float_status *status)
7884	{
7885	if ( ( ( extractFloat128Exp( a ) == `0x7FFF` )
7886	&& ( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) )
7887	\|\| ( ( extractFloat128Exp( b ) == `0x7FFF` )
7888	&& ( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )
7889	) {
7890	if (float128_is_signaling_nan(a, status)
7891	\|\| float128_is_signaling_nan(b, status)) {
7892	float_raise(float_flag_invalid, status);
7893	}
7894	return `1`;
7895	}
7896	return `0`;
7897	}
7898
7899	static inline int floatx80_compare_internal(floatx80 a, floatx80 b,
7900	int is_quiet, float_status *status)
7901	{
7902	flag aSign, bSign;
7903
7904	if (floatx80_invalid_encoding(a) \|\| floatx80_invalid_encoding(b)) {
7905	float_raise(float_flag_invalid, status);
7906	return float_relation_unordered;
7907	}
7908	if (( ( extractFloatx80Exp( a ) == `0x7fff` ) &&
7909	( extractFloatx80Frac( a )<<`1` ) ) \|\|
7910	( ( extractFloatx80Exp( b ) == `0x7fff` ) &&
7911	( extractFloatx80Frac( b )<<`1` ) )) {
7912	if (!is_quiet \|\|
7913	floatx80_is_signaling_nan(a, status) \|\|
7914	floatx80_is_signaling_nan(b, status)) {
7915	float_raise(float_flag_invalid, status);
7916	}
7917	return float_relation_unordered;
7918	}
7919	aSign = extractFloatx80Sign( a );
7920	bSign = extractFloatx80Sign( b );
7921	if ( aSign != bSign ) {
7922
7923	if ( ( ( (uint16_t) ( ( a.high \| b.high ) << `1` ) ) == `0`) &&
7924	( ( a.low \| b.low ) == `0` ) ) {
7925	/ zero case /
7926	return float_relation_equal;
7927	} else {
7928	return `1` - (`2` * aSign);
7929	}
7930	} else {
7931	if (a.low == b.low && a.high == b.high) {
7932	return float_relation_equal;
7933	} else {
7934	return `1` - `2` * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7935	}
7936	}
7937	}
7938
7939	int floatx80_compare(floatx80 a, floatx80 b, float_status *status)
7940	{
7941	return floatx80_compare_internal(a, b, `0`, status);
7942	}
7943
7944	int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status *status)
7945	{
7946	return floatx80_compare_internal(a, b, `1`, status);
7947	}
7948
7949	static inline int float128_compare_internal(float128 a, float128 b,
7950	int is_quiet, float_status *status)
7951	{
7952	flag aSign, bSign;
7953
7954	if (( ( extractFloat128Exp( a ) == `0x7fff` ) &&
7955	( extractFloat128Frac0( a ) \| extractFloat128Frac1( a ) ) ) \|\|
7956	( ( extractFloat128Exp( b ) == `0x7fff` ) &&
7957	( extractFloat128Frac0( b ) \| extractFloat128Frac1( b ) ) )) {
7958	if (!is_quiet \|\|
7959	float128_is_signaling_nan(a, status) \|\|
7960	float128_is_signaling_nan(b, status)) {
7961	float_raise(float_flag_invalid, status);
7962	}
7963	return float_relation_unordered;
7964	}
7965	aSign = extractFloat128Sign( a );
7966	bSign = extractFloat128Sign( b );
7967	if ( aSign != bSign ) {
7968	if ( ( ( ( a.high \| b.high )<<`1` ) \| a.low \| b.low ) == `0` ) {
7969	/ zero case /
7970	return float_relation_equal;
7971	} else {
7972	return `1` - (`2` * aSign);
7973	}
7974	} else {
7975	if (a.low == b.low && a.high == b.high) {
7976	return float_relation_equal;
7977	} else {
7978	return `1` - `2` * (aSign ^ ( lt128( a.high, a.low, b.high, b.low ) ));
7979	}
7980	}
7981	}
7982
7983	int float128_compare(float128 a, float128 b, float_status *status)
7984	{
7985	return float128_compare_internal(a, b, `0`, status);
7986	}
7987
7988	int float128_compare_quiet(float128 a, float128 b, float_status *status)
7989	{
7990	return float128_compare_internal(a, b, `1`, status);
7991	}
7992
7993	floatx80 floatx80_scalbn(floatx80 a, int n, float_status *status)
7994	{
7995	flag aSign;
7996	int32_t aExp;
7997	uint64_t aSig;
7998
7999	if (floatx80_invalid_encoding(a)) {
8000	float_raise(float_flag_invalid, status);
8001	return floatx80_default_nan(status);
8002	}
8003	aSig = extractFloatx80Frac( a );
8004	aExp = extractFloatx80Exp( a );
8005	aSign = extractFloatx80Sign( a );
8006
8007	if ( aExp == `0x7FFF` ) {
8008	if ( aSig<<`1` ) {
8009	return propagateFloatx80NaN(a, a, status);
8010	}
8011	return a;
8012	}
8013
8014	if (aExp == `0`) {
8015	if (aSig == `0`) {
8016	return a;
8017	}
8018	aExp++;
8019	}
8020
8021	if (n > `0x10000`) {
8022	n = `0x10000`;
8023	} else if (n < -`0x10000`) {
8024	n = -`0x10000`;
8025	}
8026
8027	aExp += n;
8028	return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
8029	aSign, aExp, aSig, `0`, status);
8030	}
8031
8032	float128 float128_scalbn(float128 a, int n, float_status *status)
8033	{
8034	flag aSign;
8035	int32_t aExp;
8036	uint64_t aSig0, aSig1;
8037
8038	aSig1 = extractFloat128Frac1( a );
8039	aSig0 = extractFloat128Frac0( a );
8040	aExp = extractFloat128Exp( a );
8041	aSign = extractFloat128Sign( a );
8042	if ( aExp == `0x7FFF` ) {
8043	if ( aSig0 \| aSig1 ) {
8044	return propagateFloat128NaN(a, a, status);
8045	}
8046	return a;
8047	}
8048	if (aExp != `0`) {
8049	aSig0 \|= UINT64_C(`0x0001000000000000`);
8050	} else if (aSig0 == `0` && aSig1 == `0`) {
8051	return a;
8052	} else {
8053	aExp++;
8054	}
8055
8056	if (n > `0x10000`) {
8057	n = `0x10000`;
8058	} else if (n < -`0x10000`) {
8059	n = -`0x10000`;
8060	}
8061
8062	aExp += n - `1`;
8063	return normalizeRoundAndPackFloat128( aSign, aExp, aSig0, aSig1
8064	, status);
8065
8066	}
8067
8068	static void __attribute__((constructor)) softfloat_init(void)
8069	{
8070	union_float64 ua, ub, uc, ur;
8071
8072	if (QEMU_NO_HARDFLOAT) {
8073	return;
8074	}
8075	/*
8076	* Test that the host's FMA is not obviously broken. For example,
8077	* glibc < 2.23 can perform an incorrect FMA on certain hosts; see
8078	* https://sourceware.org/bugzilla/show_bug.cgi?id=13304
8079	*/
8080	ua.s = `0x0020000000000001ULL`;
8081	ub.s = `0x3ca0000000000000ULL`;
8082	uc.s = `0x0020000000000000ULL`;
8083	ur.h = fma(ua.h, ub.h, uc.h);
8084	if (ur.s != `0x0020000000000001ULL`) {
8085	force_soft_fma = true;
8086	}
8087	}
8088

Browse the source code of qemu/fpu/softfloat.c