| 1 | #include "SDL_internal.h" | 
|---|
| 2 | /* | 
|---|
| 3 | * ==================================================== | 
|---|
| 4 | * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. | 
|---|
| 5 | * | 
|---|
| 6 | * Developed at SunPro, a Sun Microsystems, Inc. business. | 
|---|
| 7 | * Permission to use, copy, modify, and distribute this | 
|---|
| 8 | * software is freely granted, provided that this notice | 
|---|
| 9 | * is preserved. | 
|---|
| 10 | * ==================================================== | 
|---|
| 11 | */ | 
|---|
| 12 |  | 
|---|
| 13 | /* __ieee754_exp(x) | 
|---|
| 14 | * Returns the exponential of x. | 
|---|
| 15 | * | 
|---|
| 16 | * Method | 
|---|
| 17 | *   1. Argument reduction: | 
|---|
| 18 | *      Reduce x to an r so that |r| <= 0.5*ln2 ~ 0.34658. | 
|---|
| 19 | *	Given x, find r and integer k such that | 
|---|
| 20 | * | 
|---|
| 21 | *               x = k*ln2 + r,  |r| <= 0.5*ln2. | 
|---|
| 22 | * | 
|---|
| 23 | *      Here r will be represented as r = hi-lo for better | 
|---|
| 24 | *	accuracy. | 
|---|
| 25 | * | 
|---|
| 26 | *   2. Approximation of exp(r) by a special rational function on | 
|---|
| 27 | *	the interval [0,0.34658]: | 
|---|
| 28 | *	Write | 
|---|
| 29 | *	    R(r**2) = r*(exp(r)+1)/(exp(r)-1) = 2 + r*r/6 - r**4/360 + ... | 
|---|
| 30 | *      We use a special Reme algorithm on [0,0.34658] to generate | 
|---|
| 31 | * 	a polynomial of degree 5 to approximate R. The maximum error | 
|---|
| 32 | *	of this polynomial approximation is bounded by 2**-59. In | 
|---|
| 33 | *	other words, | 
|---|
| 34 | *	    R(z) ~ 2.0 + P1*z + P2*z**2 + P3*z**3 + P4*z**4 + P5*z**5 | 
|---|
| 35 | *  	(where z=r*r, and the values of P1 to P5 are listed below) | 
|---|
| 36 | *	and | 
|---|
| 37 | *	    |                  5          |     -59 | 
|---|
| 38 | *	    | 2.0+P1*z+...+P5*z   -  R(z) | <= 2 | 
|---|
| 39 | *	    |                             | | 
|---|
| 40 | *	The computation of exp(r) thus becomes | 
|---|
| 41 | *                             2*r | 
|---|
| 42 | *		exp(r) = 1 + ------- | 
|---|
| 43 | *		              R - r | 
|---|
| 44 | *                                 r*R1(r) | 
|---|
| 45 | *		       = 1 + r + ----------- (for better accuracy) | 
|---|
| 46 | *		                  2 - R1(r) | 
|---|
| 47 | *	where | 
|---|
| 48 | *			         2       4             10 | 
|---|
| 49 | *		R1(r) = r - (P1*r  + P2*r  + ... + P5*r   ). | 
|---|
| 50 | * | 
|---|
| 51 | *   3. Scale back to obtain exp(x): | 
|---|
| 52 | *	From step 1, we have | 
|---|
| 53 | *	   exp(x) = 2^k * exp(r) | 
|---|
| 54 | * | 
|---|
| 55 | * Special cases: | 
|---|
| 56 | *	exp(INF) is INF, exp(NaN) is NaN; | 
|---|
| 57 | *	exp(-INF) is 0, and | 
|---|
| 58 | *	for finite argument, only exp(0)=1 is exact. | 
|---|
| 59 | * | 
|---|
| 60 | * Accuracy: | 
|---|
| 61 | *	according to an error analysis, the error is always less than | 
|---|
| 62 | *	1 ulp (unit in the last place). | 
|---|
| 63 | * | 
|---|
| 64 | * Misc. info. | 
|---|
| 65 | *	For IEEE double | 
|---|
| 66 | *	    if x >  7.09782712893383973096e+02 then exp(x) overflow | 
|---|
| 67 | *	    if x < -7.45133219101941108420e+02 then exp(x) underflow | 
|---|
| 68 | * | 
|---|
| 69 | * Constants: | 
|---|
| 70 | * The hexadecimal values are the intended ones for the following | 
|---|
| 71 | * constants. The decimal values may be used, provided that the | 
|---|
| 72 | * compiler will convert from decimal to binary accurately enough | 
|---|
| 73 | * to produce the hexadecimal values shown. | 
|---|
| 74 | */ | 
|---|
| 75 |  | 
|---|
| 76 | #include "math_libm.h" | 
|---|
| 77 | #include "math_private.h" | 
|---|
| 78 |  | 
|---|
| 79 | #ifdef __WATCOMC__ /* Watcom defines huge=__huge */ | 
|---|
| 80 | #undef huge | 
|---|
| 81 | #endif | 
|---|
| 82 |  | 
|---|
| 83 | static const double | 
|---|
| 84 | one	= 1.0, | 
|---|
| 85 | halF[2]	= {0.5,-0.5,}, | 
|---|
| 86 | huge	= 1.0e+300, | 
|---|
| 87 | twom1000= 9.33263618503218878990e-302,     /* 2**-1000=0x01700000,0*/ | 
|---|
| 88 | o_threshold=  7.09782712893383973096e+02,  /* 0x40862E42, 0xFEFA39EF */ | 
|---|
| 89 | u_threshold= -7.45133219101941108420e+02,  /* 0xc0874910, 0xD52D3051 */ | 
|---|
| 90 | ln2HI[2]   ={ 6.93147180369123816490e-01,  /* 0x3fe62e42, 0xfee00000 */ | 
|---|
| 91 | -6.93147180369123816490e-01,},/* 0xbfe62e42, 0xfee00000 */ | 
|---|
| 92 | ln2LO[2]   ={ 1.90821492927058770002e-10,  /* 0x3dea39ef, 0x35793c76 */ | 
|---|
| 93 | -1.90821492927058770002e-10,},/* 0xbdea39ef, 0x35793c76 */ | 
|---|
| 94 | invln2 =  1.44269504088896338700e+00, /* 0x3ff71547, 0x652b82fe */ | 
|---|
| 95 | P1   =  1.66666666666666019037e-01, /* 0x3FC55555, 0x5555553E */ | 
|---|
| 96 | P2   = -2.77777777770155933842e-03, /* 0xBF66C16C, 0x16BEBD93 */ | 
|---|
| 97 | P3   =  6.61375632143793436117e-05, /* 0x3F11566A, 0xAF25DE2C */ | 
|---|
| 98 | P4   = -1.65339022054652515390e-06, /* 0xBEBBBD41, 0xC5D26BF1 */ | 
|---|
| 99 | P5   =  4.13813679705723846039e-08; /* 0x3E663769, 0x72BEA4D0 */ | 
|---|
| 100 |  | 
|---|
| 101 | union { | 
|---|
| 102 | Uint64 u64; | 
|---|
| 103 | double d; | 
|---|
| 104 | } inf_union = { | 
|---|
| 105 | SDL_UINT64_C(0x7ff0000000000000)  /* Binary representation of a 64-bit infinite double (sign=0, exponent=2047, mantissa=0) */ | 
|---|
| 106 | }; | 
|---|
| 107 |  | 
|---|
| 108 | double __ieee754_exp(double x)	/* default IEEE double exp */ | 
|---|
| 109 | { | 
|---|
| 110 | double y; | 
|---|
| 111 | double hi = 0.0; | 
|---|
| 112 | double lo = 0.0; | 
|---|
| 113 | double c; | 
|---|
| 114 | double t; | 
|---|
| 115 | int32_t k=0; | 
|---|
| 116 | int32_t xsb; | 
|---|
| 117 | u_int32_t hx; | 
|---|
| 118 |  | 
|---|
| 119 | GET_HIGH_WORD(hx,x); | 
|---|
| 120 | xsb = (hx>>31)&1;		/* sign bit of x */ | 
|---|
| 121 | hx &= 0x7fffffff;		/* high word of |x| */ | 
|---|
| 122 |  | 
|---|
| 123 | /* filter out non-finite argument */ | 
|---|
| 124 | if(hx >= 0x40862E42) {			/* if |x|>=709.78... */ | 
|---|
| 125 | if(hx>=0x7ff00000) { | 
|---|
| 126 | u_int32_t lx; | 
|---|
| 127 | GET_LOW_WORD(lx,x); | 
|---|
| 128 | if(((hx&0xfffff)|lx)!=0) | 
|---|
| 129 | return x+x; 		/* NaN */ | 
|---|
| 130 | else return (xsb==0)? x:0.0;	/* exp(+-inf)={inf,0} */ | 
|---|
| 131 | } | 
|---|
| 132 | #if 1 | 
|---|
| 133 | if(x > o_threshold) return inf_union.d; /* overflow */ | 
|---|
| 134 | #elif 1 | 
|---|
| 135 | if(x > o_threshold) return huge*huge; /* overflow */ | 
|---|
| 136 | #else  /* !!! FIXME: check this: "huge * huge" is a compiler warning, maybe they wanted +Inf? */ | 
|---|
| 137 | if(x > o_threshold) return INFINITY; /* overflow */ | 
|---|
| 138 | #endif | 
|---|
| 139 |  | 
|---|
| 140 | if(x < u_threshold) return twom1000*twom1000; /* underflow */ | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | /* argument reduction */ | 
|---|
| 144 | if(hx > 0x3fd62e42) {		/* if  |x| > 0.5 ln2 */ | 
|---|
| 145 | if(hx < 0x3FF0A2B2) {	/* and |x| < 1.5 ln2 */ | 
|---|
| 146 | hi = x-ln2HI[xsb]; lo=ln2LO[xsb]; k = 1-xsb-xsb; | 
|---|
| 147 | } else { | 
|---|
| 148 | k  = (int32_t) (invln2*x+halF[xsb]); | 
|---|
| 149 | t  = k; | 
|---|
| 150 | hi = x - t*ln2HI[0];	/* t*ln2HI is exact here */ | 
|---|
| 151 | lo = t*ln2LO[0]; | 
|---|
| 152 | } | 
|---|
| 153 | x  = hi - lo; | 
|---|
| 154 | } | 
|---|
| 155 | else if(hx < 0x3e300000)  {	/* when |x|<2**-28 */ | 
|---|
| 156 | if(huge+x>one) return one+x;/* trigger inexact */ | 
|---|
| 157 | } | 
|---|
| 158 | else k = 0; | 
|---|
| 159 |  | 
|---|
| 160 | /* x is now in primary range */ | 
|---|
| 161 | t  = x*x; | 
|---|
| 162 | c  = x - t*(P1+t*(P2+t*(P3+t*(P4+t*P5)))); | 
|---|
| 163 | if(k==0) 	return one-((x*c)/(c-2.0)-x); | 
|---|
| 164 | else 		y = one-((lo-(x*c)/(2.0-c))-hi); | 
|---|
| 165 | if(k >= -1021) { | 
|---|
| 166 | u_int32_t hy; | 
|---|
| 167 | GET_HIGH_WORD(hy,y); | 
|---|
| 168 | SET_HIGH_WORD(y,hy+(k<<20));	/* add k to y's exponent */ | 
|---|
| 169 | return y; | 
|---|
| 170 | } else { | 
|---|
| 171 | u_int32_t hy; | 
|---|
| 172 | GET_HIGH_WORD(hy,y); | 
|---|
| 173 | SET_HIGH_WORD(y,hy+((k+1000)<<20));	/* add k to y's exponent */ | 
|---|
| 174 | return y*twom1000; | 
|---|
| 175 | } | 
|---|
| 176 | } | 
|---|
| 177 |  | 
|---|
| 178 | /* | 
|---|
| 179 | * wrapper exp(x) | 
|---|
| 180 | */ | 
|---|
| 181 | #ifndef _IEEE_LIBM | 
|---|
| 182 | double exp(double x) | 
|---|
| 183 | { | 
|---|
| 184 | static const double o_threshold =  7.09782712893383973096e+02; /* 0x40862E42, 0xFEFA39EF */ | 
|---|
| 185 | static const double u_threshold = -7.45133219101941108420e+02; /* 0xc0874910, 0xD52D3051 */ | 
|---|
| 186 |  | 
|---|
| 187 | double z = __ieee754_exp(x); | 
|---|
| 188 | if (_LIB_VERSION == _IEEE_) | 
|---|
| 189 | return z; | 
|---|
| 190 | if (isfinite(x)) { | 
|---|
| 191 | if (x > o_threshold) | 
|---|
| 192 | return __kernel_standard(x, x, 6); /* exp overflow */ | 
|---|
| 193 | if (x < u_threshold) | 
|---|
| 194 | return __kernel_standard(x, x, 7); /* exp underflow */ | 
|---|
| 195 | } | 
|---|
| 196 | return z; | 
|---|
| 197 | } | 
|---|
| 198 | #else | 
|---|
| 199 | strong_alias(__ieee754_exp, exp) | 
|---|
| 200 | #endif | 
|---|
| 201 | libm_hidden_def(exp) | 
|---|
| 202 |  | 
|---|