softfloat-macros.h source code [qemu/include/fpu/softfloat-macros.h]

1	/*
2	* QEMU float support macros
3	*
4	* The code in this source file is derived from release 2a of the SoftFloat
5	* IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
6	* some later contributions) are provided under that license, as detailed below.
7	* It has subsequently been modified by contributors to the QEMU Project,
8	* so some portions are provided under:
9	* the SoftFloat-2a license
10	* the BSD license
11	* GPL-v2-or-later
12	*
13	* Any future contributions to this file after December 1st 2014 will be
14	* taken to be licensed under the Softfloat-2a license unless specifically
15	* indicated otherwise.
16	*/
17
18	/*
19	===============================================================================
20	This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
21	Arithmetic Package, Release 2a.
22
23	Written by John R. Hauser. This work was made possible in part by the
24	International Computer Science Institute, located at Suite 600, 1947 Center
25	Street, Berkeley, California 94704. Funding was partially provided by the
26	National Science Foundation under grant MIP-9311980. The original version
27	of this code was written as part of a project to build a fixed-point vector
28	processor in collaboration with the University of California at Berkeley,
29	overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30	is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31	arithmetic/SoftFloat.html'.
32
33	THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34	has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35	TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36	PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37	AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38
39	Derivative works are acceptable, even for commercial purposes, so long as
40	(1) they include prominent notice that the work is derivative, and (2) they
41	include prominent notice akin to these four paragraphs for those parts of
42	this code that are retained.
43
44	===============================================================================
45	*/
46
47	/ BSD licensing:*
48	* Copyright (c) 2006, Fabrice Bellard
49	* All rights reserved.
50	*
51	* Redistribution and use in source and binary forms, with or without
52	* modification, are permitted provided that the following conditions are met:
53	*
54	* 1. Redistributions of source code must retain the above copyright notice,
55	* this list of conditions and the following disclaimer.
56	*
57	* 2. Redistributions in binary form must reproduce the above copyright notice,
58	* this list of conditions and the following disclaimer in the documentation
59	* and/or other materials provided with the distribution.
60	*
61	* 3. Neither the name of the copyright holder nor the names of its contributors
62	* may be used to endorse or promote products derived from this software without
63	* specific prior written permission.
64	*
65	* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
66	* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
67	* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
68	* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
69	* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
70	* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
71	* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
72	* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
73	* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
74	* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
75	* THE POSSIBILITY OF SUCH DAMAGE.
76	*/
77
78	/ Portions of this work are licensed under the terms of the GNU GPL,*
79	* version 2 or later. See the COPYING file in the top-level directory.
80	*/
81
82	#ifndef FPU_SOFTFLOAT_MACROS_H
83	#define FPU_SOFTFLOAT_MACROS_H
84
85	#include "fpu/softfloat-types.h"
86
87	/----------------------------------------------------------------------------*
88	\| Shifts `a' right by the number of bits given in `count'. If any nonzero
89	\| bits are shifted off, they are ``jammed'' into the least significant bit of
90	\| the result by setting the least significant bit to 1. The value of `count'
91	\| can be arbitrarily large; in particular, if `count' is greater than 32, the
92	\| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
93	\| The result is stored in the location pointed to by `zPtr'.
94	----------------------------------------------------------------------------/
95
96	static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr)
97	{
98	uint32_t z;
99
100	if ( count == `0` ) {
101	z = a;
102	}
103	else if ( count < `32` ) {
104	z = ( a>>count ) \| ( ( a<<( ( - count ) & `31` ) ) != `0` );
105	}
106	else {
107	z = ( a != `0` );
108	}
109	*zPtr = z;
110
111	}
112
113	/----------------------------------------------------------------------------*
114	\| Shifts `a' right by the number of bits given in `count'. If any nonzero
115	\| bits are shifted off, they are ``jammed'' into the least significant bit of
116	\| the result by setting the least significant bit to 1. The value of `count'
117	\| can be arbitrarily large; in particular, if `count' is greater than 64, the
118	\| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
119	\| The result is stored in the location pointed to by `zPtr'.
120	----------------------------------------------------------------------------/
121
122	static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr)
123	{
124	uint64_t z;
125
126	if ( count == `0` ) {
127	z = a;
128	}
129	else if ( count < `64` ) {
130	z = ( a>>count ) \| ( ( a<<( ( - count ) & `63` ) ) != `0` );
131	}
132	else {
133	z = ( a != `0` );
134	}
135	*zPtr = z;
136
137	}
138
139	/----------------------------------------------------------------------------*
140	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
141	\| _plus_ the number of bits given in `count'. The shifted result is at most
142	\| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The
143	\| bits shifted off form a second 64-bit result as follows: The _last_ bit
144	\| shifted off is the most-significant bit of the extra result, and the other
145	\| 63 bits of the extra result are all zero if and only if _all_but_the_last_
146	\| bits shifted off were all zero. This extra result is stored in the location
147	\| pointed to by `z1Ptr'. The value of `count' can be arbitrarily large.
148	\| (This routine makes more sense if `a0' and `a1' are considered to form a
149	\| fixed-point value with binary point between `a0' and `a1'. This fixed-point
150	\| value is shifted right by the number of bits given in `count', and the
151	\| integer part of the result is returned at the location pointed to by
152	\| `z0Ptr'. The fractional part of the result may be slightly corrupted as
153	\| described above, and is returned at the location pointed to by `z1Ptr'.)
154	----------------------------------------------------------------------------/
155
156	static inline void
157	shift64ExtraRightJamming(
158	uint64_t a0, uint64_t a1, int count, uint64_t z0Ptr, uint64_t z1Ptr)
159	{
160	uint64_t z0, z1;
161	int8_t negCount = ( - count ) & `63`;
162
163	if ( count == `0` ) {
164	z1 = a1;
165	z0 = a0;
166	}
167	else if ( count < `64` ) {
168	z1 = ( a0<<negCount ) \| ( a1 != `0` );
169	z0 = a0>>count;
170	}
171	else {
172	if ( count == `64` ) {
173	z1 = a0 \| ( a1 != `0` );
174	}
175	else {
176	z1 = ( ( a0 \| a1 ) != `0` );
177	}
178	z0 = `0`;
179	}
180	*z1Ptr = z1;
181	*z0Ptr = z0;
182
183	}
184
185	/----------------------------------------------------------------------------*
186	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
187	\| number of bits given in `count'. Any bits shifted off are lost. The value
188	\| of `count' can be arbitrarily large; in particular, if `count' is greater
189	\| than 128, the result will be 0. The result is broken into two 64-bit pieces
190	\| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
191	----------------------------------------------------------------------------/
192
193	static inline void
194	shift128Right(
195	uint64_t a0, uint64_t a1, int count, uint64_t z0Ptr, uint64_t z1Ptr)
196	{
197	uint64_t z0, z1;
198	int8_t negCount = ( - count ) & `63`;
199
200	if ( count == `0` ) {
201	z1 = a1;
202	z0 = a0;
203	}
204	else if ( count < `64` ) {
205	z1 = ( a0<<negCount ) \| ( a1>>count );
206	z0 = a0>>count;
207	}
208	else {
209	z1 = (count < `128`) ? (a0 >> (count & `63`)) : `0`;
210	z0 = `0`;
211	}
212	*z1Ptr = z1;
213	*z0Ptr = z0;
214
215	}
216
217	/----------------------------------------------------------------------------*
218	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
219	\| number of bits given in `count'. If any nonzero bits are shifted off, they
220	\| are ``jammed'' into the least significant bit of the result by setting the
221	\| least significant bit to 1. The value of `count' can be arbitrarily large;
222	\| in particular, if `count' is greater than 128, the result will be either
223	\| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
224	\| nonzero. The result is broken into two 64-bit pieces which are stored at
225	\| the locations pointed to by `z0Ptr' and `z1Ptr'.
226	----------------------------------------------------------------------------/
227
228	static inline void
229	shift128RightJamming(
230	uint64_t a0, uint64_t a1, int count, uint64_t z0Ptr, uint64_t z1Ptr)
231	{
232	uint64_t z0, z1;
233	int8_t negCount = ( - count ) & `63`;
234
235	if ( count == `0` ) {
236	z1 = a1;
237	z0 = a0;
238	}
239	else if ( count < `64` ) {
240	z1 = ( a0<<negCount ) \| ( a1>>count ) \| ( ( a1<<negCount ) != `0` );
241	z0 = a0>>count;
242	}
243	else {
244	if ( count == `64` ) {
245	z1 = a0 \| ( a1 != `0` );
246	}
247	else if ( count < `128` ) {
248	z1 = ( a0>>( count & `63` ) ) \| ( ( ( a0<<negCount ) \| a1 ) != `0` );
249	}
250	else {
251	z1 = ( ( a0 \| a1 ) != `0` );
252	}
253	z0 = `0`;
254	}
255	*z1Ptr = z1;
256	*z0Ptr = z0;
257
258	}
259
260	/----------------------------------------------------------------------------*
261	\| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
262	\| by 64 _plus_ the number of bits given in `count'. The shifted result is
263	\| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
264	\| stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted
265	\| off form a third 64-bit result as follows: The _last_ bit shifted off is
266	\| the most-significant bit of the extra result, and the other 63 bits of the
267	\| extra result are all zero if and only if _all_but_the_last_ bits shifted off
268	\| were all zero. This extra result is stored in the location pointed to by
269	\| `z2Ptr'. The value of `count' can be arbitrarily large.
270	\| (This routine makes more sense if `a0', `a1', and `a2' are considered
271	\| to form a fixed-point value with binary point between `a1' and `a2'. This
272	\| fixed-point value is shifted right by the number of bits given in `count',
273	\| and the integer part of the result is returned at the locations pointed to
274	\| by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly
275	\| corrupted as described above, and is returned at the location pointed to by
276	\| `z2Ptr'.)
277	----------------------------------------------------------------------------/
278
279	static inline void
280	shift128ExtraRightJamming(
281	uint64_t a0,
282	uint64_t a1,
283	uint64_t a2,
284	int count,
285	uint64_t *z0Ptr,
286	uint64_t *z1Ptr,
287	uint64_t *z2Ptr
288	)
289	{
290	uint64_t z0, z1, z2;
291	int8_t negCount = ( - count ) & `63`;
292
293	if ( count == `0` ) {
294	z2 = a2;
295	z1 = a1;
296	z0 = a0;
297	}
298	else {
299	if ( count < `64` ) {
300	z2 = a1<<negCount;
301	z1 = ( a0<<negCount ) \| ( a1>>count );
302	z0 = a0>>count;
303	}
304	else {
305	if ( count == `64` ) {
306	z2 = a1;
307	z1 = a0;
308	}
309	else {
310	a2 \|= a1;
311	if ( count < `128` ) {
312	z2 = a0<<negCount;
313	z1 = a0>>( count & `63` );
314	}
315	else {
316	z2 = ( count == `128` ) ? a0 : ( a0 != `0` );
317	z1 = `0`;
318	}
319	}
320	z0 = `0`;
321	}
322	z2 \|= ( a2 != `0` );
323	}
324	*z2Ptr = z2;
325	*z1Ptr = z1;
326	*z0Ptr = z0;
327
328	}
329
330	/----------------------------------------------------------------------------*
331	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
332	\| number of bits given in `count'. Any bits shifted off are lost. The value
333	\| of `count' must be less than 64. The result is broken into two 64-bit
334	\| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
335	----------------------------------------------------------------------------/
336
337	static inline void shortShift128Left(uint64_t a0, uint64_t a1, int count,
338	uint64_t z0Ptr, uint64_t z1Ptr)
339	{
340	*z1Ptr = a1 << count;
341	*z0Ptr = count == `0` ? a0 : (a0 << count) \| (a1 >> (-count & `63`));
342	}
343
344	/----------------------------------------------------------------------------*
345	\| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
346	\| number of bits given in `count'. Any bits shifted off are lost. The value
347	\| of `count' may be greater than 64. The result is broken into two 64-bit
348	\| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
349	----------------------------------------------------------------------------/
350
351	static inline void shift128Left(uint64_t a0, uint64_t a1, int count,
352	uint64_t z0Ptr, uint64_t z1Ptr)
353	{
354	if (count < `64`) {
355	*z1Ptr = a1 << count;
356	*z0Ptr = count == `0` ? a0 : (a0 << count) \| (a1 >> (-count & `63`));
357	} else {
358	*z1Ptr = `0`;
359	*z0Ptr = a1 << (count - `64`);
360	}
361	}
362
363	/----------------------------------------------------------------------------*
364	\| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left
365	\| by the number of bits given in `count'. Any bits shifted off are lost.
366	\| The value of `count' must be less than 64. The result is broken into three
367	\| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
368	\| `z1Ptr', and `z2Ptr'.
369	----------------------------------------------------------------------------/
370
371	static inline void
372	shortShift192Left(
373	uint64_t a0,
374	uint64_t a1,
375	uint64_t a2,
376	int count,
377	uint64_t *z0Ptr,
378	uint64_t *z1Ptr,
379	uint64_t *z2Ptr
380	)
381	{
382	uint64_t z0, z1, z2;
383	int8_t negCount;
384
385	z2 = a2<<count;
386	z1 = a1<<count;
387	z0 = a0<<count;
388	if ( `0` < count ) {
389	negCount = ( ( - count ) & `63` );
390	z1 \|= a2>>negCount;
391	z0 \|= a1>>negCount;
392	}
393	*z2Ptr = z2;
394	*z1Ptr = z1;
395	*z0Ptr = z0;
396
397	}
398
399	/----------------------------------------------------------------------------*
400	\| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
401	\| value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so
402	\| any carry out is lost. The result is broken into two 64-bit pieces which
403	\| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
404	----------------------------------------------------------------------------/
405
406	static inline void
407	add128(
408	uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t z0Ptr, uint64_t z1Ptr )
409	{
410	uint64_t z1;
411
412	z1 = a1 + b1;
413	*z1Ptr = z1;
414	*z0Ptr = a0 + b0 + ( z1 < a1 );
415
416	}
417
418	/----------------------------------------------------------------------------*
419	\| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
420	\| 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is
421	\| modulo 2^192, so any carry out is lost. The result is broken into three
422	\| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
423	\| `z1Ptr', and `z2Ptr'.
424	----------------------------------------------------------------------------/
425
426	static inline void
427	add192(
428	uint64_t a0,
429	uint64_t a1,
430	uint64_t a2,
431	uint64_t b0,
432	uint64_t b1,
433	uint64_t b2,
434	uint64_t *z0Ptr,
435	uint64_t *z1Ptr,
436	uint64_t *z2Ptr
437	)
438	{
439	uint64_t z0, z1, z2;
440	int8_t carry0, carry1;
441
442	z2 = a2 + b2;
443	carry1 = ( z2 < a2 );
444	z1 = a1 + b1;
445	carry0 = ( z1 < a1 );
446	z0 = a0 + b0;
447	z1 += carry1;
448	z0 += ( z1 < carry1 );
449	z0 += carry0;
450	*z2Ptr = z2;
451	*z1Ptr = z1;
452	*z0Ptr = z0;
453
454	}
455
456	/----------------------------------------------------------------------------*
457	\| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
458	\| 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo
459	\| 2^128, so any borrow out (carry out) is lost. The result is broken into two
460	\| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
461	\| `z1Ptr'.
462	----------------------------------------------------------------------------/
463
464	static inline void
465	sub128(
466	uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t z0Ptr, uint64_t z1Ptr )
467	{
468
469	*z1Ptr = a1 - b1;
470	*z0Ptr = a0 - b0 - ( a1 < b1 );
471
472	}
473
474	/----------------------------------------------------------------------------*
475	\| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
476	\| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
477	\| Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The
478	\| result is broken into three 64-bit pieces which are stored at the locations
479	\| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
480	----------------------------------------------------------------------------/
481
482	static inline void
483	sub192(
484	uint64_t a0,
485	uint64_t a1,
486	uint64_t a2,
487	uint64_t b0,
488	uint64_t b1,
489	uint64_t b2,
490	uint64_t *z0Ptr,
491	uint64_t *z1Ptr,
492	uint64_t *z2Ptr
493	)
494	{
495	uint64_t z0, z1, z2;
496	int8_t borrow0, borrow1;
497
498	z2 = a2 - b2;
499	borrow1 = ( a2 < b2 );
500	z1 = a1 - b1;
501	borrow0 = ( a1 < b1 );
502	z0 = a0 - b0;
503	z0 -= ( z1 < borrow1 );
504	z1 -= borrow1;
505	z0 -= borrow0;
506	*z2Ptr = z2;
507	*z1Ptr = z1;
508	*z0Ptr = z0;
509
510	}
511
512	/----------------------------------------------------------------------------*
513	\| Multiplies `a' by `b' to obtain a 128-bit product. The product is broken
514	\| into two 64-bit pieces which are stored at the locations pointed to by
515	\| `z0Ptr' and `z1Ptr'.
516	----------------------------------------------------------------------------/
517
518	static inline void mul64To128( uint64_t a, uint64_t b, uint64_t z0Ptr, uint64_t z1Ptr )
519	{
520	uint32_t aHigh, aLow, bHigh, bLow;
521	uint64_t z0, zMiddleA, zMiddleB, z1;
522
523	aLow = a;
524	aHigh = a>>`32`;
525	bLow = b;
526	bHigh = b>>`32`;
527	z1 = ( (uint64_t) aLow ) * bLow;
528	zMiddleA = ( (uint64_t) aLow ) * bHigh;
529	zMiddleB = ( (uint64_t) aHigh ) * bLow;
530	z0 = ( (uint64_t) aHigh ) * bHigh;
531	zMiddleA += zMiddleB;
532	z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<`32` ) + ( zMiddleA>>`32` );
533	zMiddleA <<= `32`;
534	z1 += zMiddleA;
535	z0 += ( z1 < zMiddleA );
536	*z1Ptr = z1;
537	*z0Ptr = z0;
538
539	}
540
541	/----------------------------------------------------------------------------*
542	\| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
543	\| `b' to obtain a 192-bit product. The product is broken into three 64-bit
544	\| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
545	\| `z2Ptr'.
546	----------------------------------------------------------------------------/
547
548	static inline void
549	mul128By64To192(
550	uint64_t a0,
551	uint64_t a1,
552	uint64_t b,
553	uint64_t *z0Ptr,
554	uint64_t *z1Ptr,
555	uint64_t *z2Ptr
556	)
557	{
558	uint64_t z0, z1, z2, more1;
559
560	mul64To128( a1, b, &z1, &z2 );
561	mul64To128( a0, b, &z0, &more1 );
562	add128( z0, more1, `0`, z1, &z0, &z1 );
563	*z2Ptr = z2;
564	*z1Ptr = z1;
565	*z0Ptr = z0;
566
567	}
568
569	/----------------------------------------------------------------------------*
570	\| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
571	\| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
572	\| product. The product is broken into four 64-bit pieces which are stored at
573	\| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
574	----------------------------------------------------------------------------/
575
576	static inline void
577	mul128To256(
578	uint64_t a0,
579	uint64_t a1,
580	uint64_t b0,
581	uint64_t b1,
582	uint64_t *z0Ptr,
583	uint64_t *z1Ptr,
584	uint64_t *z2Ptr,
585	uint64_t *z3Ptr
586	)
587	{
588	uint64_t z0, z1, z2, z3;
589	uint64_t more1, more2;
590
591	mul64To128( a1, b1, &z2, &z3 );
592	mul64To128( a1, b0, &z1, &more2 );
593	add128( z1, more2, `0`, z2, &z1, &z2 );
594	mul64To128( a0, b0, &z0, &more1 );
595	add128( z0, more1, `0`, z1, &z0, &z1 );
596	mul64To128( a0, b1, &more1, &more2 );
597	add128( more1, more2, `0`, z2, &more1, &z2 );
598	add128( z0, z1, `0`, more1, &z0, &z1 );
599	*z3Ptr = z3;
600	*z2Ptr = z2;
601	*z1Ptr = z1;
602	*z0Ptr = z0;
603
604	}
605
606	/----------------------------------------------------------------------------*
607	\| Returns an approximation to the 64-bit integer quotient obtained by dividing
608	\| `b' into the 128-bit value formed by concatenating `a0' and `a1'. The
609	\| divisor `b' must be at least 2^63. If q is the exact quotient truncated
610	\| toward zero, the approximation returned lies between q and q + 2 inclusive.
611	\| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
612	\| unsigned integer is returned.
613	----------------------------------------------------------------------------/
614
615	static inline uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
616	{
617	uint64_t b0, b1;
618	uint64_t rem0, rem1, term0, term1;
619	uint64_t z;
620
621	if ( b <= a0 ) return UINT64_C(`0xFFFFFFFFFFFFFFFF`);
622	b0 = b>>`32`;
623	z = ( b0<<`32` <= a0 ) ? UINT64_C(`0xFFFFFFFF00000000`) : ( a0 / b0 )<<`32`;
624	mul64To128( b, z, &term0, &term1 );
625	sub128( a0, a1, term0, term1, &rem0, &rem1 );
626	while ( ( (int64_t) rem0 ) < `0` ) {
627	z -= UINT64_C(`0x100000000`);
628	b1 = b<<`32`;
629	add128( rem0, rem1, b0, b1, &rem0, &rem1 );
630	}
631	rem0 = ( rem0<<`32` ) \| ( rem1>>`32` );
632	z \|= ( b0<<`32` <= rem0 ) ? `0xFFFFFFFF` : rem0 / b0;
633	return z;
634
635	}
636
637	/ From the GNU Multi Precision Library - longlong.h __udiv_qrnnd*
638	* (https://gmplib.org/repo/gmp/file/tip/longlong.h)
639	*
640	* Licensed under the GPLv2/LGPLv3
641	*/
642	static inline uint64_t udiv_qrnnd(uint64_t *r, uint64_t n1,
643	uint64_t n0, uint64_t d)
644	{
645	#if defined(__x86_64__)
646	uint64_t q;
647	asm("divq %4" : "=a"(q), "=d"(*r) : "0"(n0), "1"(n1), "rm"(d));
648	return q;
649	#elif defined(__s390x__) && !defined(__clang__)
650	/ Need to use a TImode type to get an even register pair for DLGR. /
651	unsigned __int128 n = (unsigned __int128)n1 << `64` \| n0;
652	asm("dlgr %0, %1" : "+r"(n) : "r"(d));
653	*r = n >> `64`;
654	return n;
655	#elif defined(_ARCH_PPC64) && defined(_ARCH_PWR7)
656	/ From Power ISA 2.06, programming note for divdeu. /
657	uint64_t q1, q2, Q, r1, r2, R;
658	asm("divdeu %0,%2,%4; divdu %1,%3,%4"
659	: "=&r"(q1), "=r"(q2)
660	: "r"(n1), "r"(n0), "r"(d));
661	r1 = -(q1 * d); / low part of (n1<<64) - (q1 * d) /
662	r2 = n0 - (q2 * d);
663	Q = q1 + q2;
664	R = r1 + r2;
665	if (R >= d \|\| R < r2) { / overflow implies R > d /
666	Q += `1`;
667	R -= d;
668	}
669	*r = R;
670	return Q;
671	#else
672	uint64_t d0, d1, q0, q1, r1, r0, m;
673
674	d0 = (uint32_t)d;
675	d1 = d >> `32`;
676
677	r1 = n1 % d1;
678	q1 = n1 / d1;
679	m = q1 * d0;
680	r1 = (r1 << `32`) \| (n0 >> `32`);
681	if (r1 < m) {
682	q1 -= `1`;
683	r1 += d;
684	if (r1 >= d) {
685	if (r1 < m) {
686	q1 -= `1`;
687	r1 += d;
688	}
689	}
690	}
691	r1 -= m;
692
693	r0 = r1 % d1;
694	q0 = r1 / d1;
695	m = q0 * d0;
696	r0 = (r0 << `32`) \| (uint32_t)n0;
697	if (r0 < m) {
698	q0 -= `1`;
699	r0 += d;
700	if (r0 >= d) {
701	if (r0 < m) {
702	q0 -= `1`;
703	r0 += d;
704	}
705	}
706	}
707	r0 -= m;
708
709	*r = r0;
710	return (q1 << `32`) \| q0;
711	#endif
712	}
713
714	/----------------------------------------------------------------------------*
715	\| Returns an approximation to the square root of the 32-bit significand given
716	\| by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of
717	\| `aExp' (the least significant bit) is 1, the integer returned approximates
718	\| 2^31sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp'*
719	\| is 0, the integer returned approximates 2^31sqrt(`a'/2^30). In either*
720	\| case, the approximation returned lies strictly within +/-2 of the exact
721	\| value.
722	----------------------------------------------------------------------------/
723
724	static inline uint32_t estimateSqrt32(int aExp, uint32_t a)
725	{
726	static const uint16_t sqrtOddAdjustments[] = {
727	`0x0004`, `0x0022`, `0x005D`, `0x00B1`, `0x011D`, `0x019F`, `0x0236`, `0x02E0`,
728	`0x039C`, `0x0468`, `0x0545`, `0x0631`, `0x072B`, `0x0832`, `0x0946`, `0x0A67`
729	};
730	static const uint16_t sqrtEvenAdjustments[] = {
731	`0x0A2D`, `0x08AF`, `0x075A`, `0x0629`, `0x051A`, `0x0429`, `0x0356`, `0x029E`,
732	`0x0200`, `0x0179`, `0x0109`, `0x00AF`, `0x0068`, `0x0034`, `0x0012`, `0x0002`
733	};
734	int8_t index;
735	uint32_t z;
736
737	index = ( a>>`27` ) & `15`;
738	if ( aExp & `1` ) {
739	z = `0x4000` + ( a>>`17` ) - sqrtOddAdjustments[ (int)index ];
740	z = ( ( a / z )<<`14` ) + ( z<<`15` );
741	a >>= `1`;
742	}
743	else {
744	z = `0x8000` + ( a>>`17` ) - sqrtEvenAdjustments[ (int)index ];
745	z = a / z + z;
746	z = ( `0x20000` <= z ) ? `0xFFFF8000` : ( z<<`15` );
747	if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>`1` );
748	}
749	return ( (uint32_t) ( ( ( (uint64_t) a )<<`31` ) / z ) ) + ( z>>`1` );
750
751	}
752
753	/----------------------------------------------------------------------------*
754	\| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
755	\| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
756	\| Otherwise, returns 0.
757	----------------------------------------------------------------------------/
758
759	static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
760	{
761
762	return ( a0 == b0 ) && ( a1 == b1 );
763
764	}
765
766	/----------------------------------------------------------------------------*
767	\| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
768	\| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
769	\| Otherwise, returns 0.
770	----------------------------------------------------------------------------/
771
772	static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
773	{
774
775	return ( a0 < b0 ) \|\| ( ( a0 == b0 ) && ( a1 <= b1 ) );
776
777	}
778
779	/----------------------------------------------------------------------------*
780	\| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
781	\| than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise,
782	\| returns 0.
783	----------------------------------------------------------------------------/
784
785	static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
786	{
787
788	return ( a0 < b0 ) \|\| ( ( a0 == b0 ) && ( a1 < b1 ) );
789
790	}
791
792	/----------------------------------------------------------------------------*
793	\| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is
794	\| not equal to the 128-bit value formed by concatenating `b0' and `b1'.
795	\| Otherwise, returns 0.
796	----------------------------------------------------------------------------/
797
798	static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 )
799	{
800
801	return ( a0 != b0 ) \|\| ( a1 != b1 );
802
803	}
804
805	#endif
806

Browse the source code of qemu/include/fpu/softfloat-macros.h