| 1 | /* |
| 2 | * Written by J.T. Conklin <jtc@netbsd.org>. |
| 3 | * Public domain. |
| 4 | * |
| 5 | * Adapted for `long double' by Ulrich Drepper <drepper@cygnus.com>. |
| 6 | */ |
| 7 | |
| 8 | /* |
| 9 | * The 8087 method for the exponential function is to calculate |
| 10 | * exp(x) = 2^(x log2(e)) |
| 11 | * after separating integer and fractional parts |
| 12 | * x log2(e) = i + f, |f| <= .5 |
| 13 | * 2^i is immediate but f needs to be precise for long double accuracy. |
| 14 | * Suppress range reduction error in computing f by the following. |
| 15 | * Separate x into integer and fractional parts |
| 16 | * x = xi + xf, |xf| <= .5 |
| 17 | * Separate log2(e) into the sum of an exact number c0 and small part c1. |
| 18 | * c0 + c1 = log2(e) to extra precision |
| 19 | * Then |
| 20 | * f = (c0 xi - i) + c0 xf + c1 x |
| 21 | * where c0 xi is exact and so also is (c0 xi - i). |
| 22 | * -- moshier@na-net.ornl.gov |
| 23 | */ |
| 24 | |
| 25 | #include <libm-alias-ldouble.h> |
| 26 | #include <machine/asm.h> |
| 27 | #include <x86_64-math-asm.h> |
| 28 | #include <libm-alias-finite.h> |
| 29 | |
| 30 | #ifdef USE_AS_EXP10L |
| 31 | # define IEEE754_EXPL __ieee754_exp10l |
| 32 | # define EXPL_FINITE __exp10l_finite |
| 33 | # define FLDLOG fldl2t |
| 34 | #elif defined USE_AS_EXPM1L |
| 35 | # define IEEE754_EXPL __expm1l |
| 36 | # undef EXPL_FINITE |
| 37 | # define FLDLOG fldl2e |
| 38 | #else |
| 39 | # define IEEE754_EXPL __ieee754_expl |
| 40 | # define EXPL_FINITE __expl_finite |
| 41 | # define FLDLOG fldl2e |
| 42 | #endif |
| 43 | |
| 44 | .section .rodata.cst16,"aM" ,@progbits,16 |
| 45 | |
| 46 | .p2align 4 |
| 47 | #ifdef USE_AS_EXP10L |
| 48 | .type c0,@object |
| 49 | c0: .byte 0, 0, 0, 0, 0, 0, 0x9a, 0xd4, 0x00, 0x40 |
| 50 | .byte 0, 0, 0, 0, 0, 0 |
| 51 | ASM_SIZE_DIRECTIVE(c0) |
| 52 | .type c1,@object |
| 53 | c1: .byte 0x58, 0x92, 0xfc, 0x15, 0x37, 0x9a, 0x97, 0xf0, 0xef, 0x3f |
| 54 | .byte 0, 0, 0, 0, 0, 0 |
| 55 | ASM_SIZE_DIRECTIVE(c1) |
| 56 | #else |
| 57 | .type c0,@object |
| 58 | c0: .byte 0, 0, 0, 0, 0, 0, 0xaa, 0xb8, 0xff, 0x3f |
| 59 | .byte 0, 0, 0, 0, 0, 0 |
| 60 | ASM_SIZE_DIRECTIVE(c0) |
| 61 | .type c1,@object |
| 62 | c1: .byte 0x20, 0xfa, 0xee, 0xc2, 0x5f, 0x70, 0xa5, 0xec, 0xed, 0x3f |
| 63 | .byte 0, 0, 0, 0, 0, 0 |
| 64 | ASM_SIZE_DIRECTIVE(c1) |
| 65 | #endif |
| 66 | #ifndef USE_AS_EXPM1L |
| 67 | .type csat,@object |
| 68 | csat: .byte 0, 0, 0, 0, 0, 0, 0, 0x80, 0x0e, 0x40 |
| 69 | .byte 0, 0, 0, 0, 0, 0 |
| 70 | ASM_SIZE_DIRECTIVE(csat) |
| 71 | DEFINE_LDBL_MIN |
| 72 | #endif |
| 73 | |
| 74 | #ifdef PIC |
| 75 | # define MO(op) op##(%rip) |
| 76 | #else |
| 77 | # define MO(op) op |
| 78 | #endif |
| 79 | |
| 80 | .text |
| 81 | ENTRY(IEEE754_EXPL) |
| 82 | #ifdef USE_AS_EXPM1L |
| 83 | movzwl 8+8(%rsp), %eax |
| 84 | xorb $0x80, %ah // invert sign bit (now 1 is "positive") |
| 85 | cmpl $0xc006, %eax // is num positive and exp >= 6 (number is >= 128.0)? |
| 86 | jae HIDDEN_JUMPTARGET (__expl) // (if num is denormal, it is at least >= 64.0) |
| 87 | #endif |
| 88 | fldt 8(%rsp) |
| 89 | /* I added the following ugly construct because expl(+-Inf) resulted |
| 90 | in NaN. The ugliness results from the bright minds at Intel. |
| 91 | For the i686 the code can be written better. |
| 92 | -- drepper@cygnus.com. */ |
| 93 | fxam /* Is NaN or +-Inf? */ |
| 94 | #ifdef USE_AS_EXPM1L |
| 95 | xorb $0x80, %ah |
| 96 | cmpl $0xc006, %eax |
| 97 | fstsw %ax |
| 98 | movb $0x45, %dh |
| 99 | jb 4f |
| 100 | |
| 101 | /* Below -64.0 (may be -NaN or -Inf). */ |
| 102 | andb %ah, %dh |
| 103 | cmpb $0x01, %dh |
| 104 | je 6f /* Is +-NaN, jump. */ |
| 105 | jmp 1f /* -large, possibly -Inf. */ |
| 106 | |
| 107 | 4: /* In range -64.0 to 64.0 (may be +-0 but not NaN or +-Inf). */ |
| 108 | /* Test for +-0 as argument. */ |
| 109 | andb %ah, %dh |
| 110 | cmpb $0x40, %dh |
| 111 | je 2f |
| 112 | |
| 113 | /* Test for arguments that are small but not subnormal. */ |
| 114 | movzwl 8+8(%rsp), %eax |
| 115 | andl $0x7fff, %eax |
| 116 | cmpl $0x3fbf, %eax |
| 117 | jge 3f |
| 118 | /* Argument's exponent below -64; avoid spurious underflow if |
| 119 | normal. */ |
| 120 | cmpl $0x0001, %eax |
| 121 | jge 2f |
| 122 | /* Force underflow and return the argument, to avoid wrong signs |
| 123 | of zero results from the code below in some rounding modes. */ |
| 124 | fld %st |
| 125 | fmul %st |
| 126 | fstp %st |
| 127 | jmp 2f |
| 128 | #else |
| 129 | movzwl 8+8(%rsp), %eax |
| 130 | andl $0x7fff, %eax |
| 131 | cmpl $0x400d, %eax |
| 132 | jg 5f |
| 133 | cmpl $0x3fbc, %eax |
| 134 | jge 3f |
| 135 | /* Argument's exponent below -67, result rounds to 1. */ |
| 136 | fld1 |
| 137 | faddp |
| 138 | jmp 2f |
| 139 | 5: /* Overflow, underflow or infinity or NaN as argument. */ |
| 140 | fstsw %ax |
| 141 | movb $0x45, %dh |
| 142 | andb %ah, %dh |
| 143 | cmpb $0x05, %dh |
| 144 | je 1f /* Is +-Inf, jump. */ |
| 145 | cmpb $0x01, %dh |
| 146 | je 6f /* Is +-NaN, jump. */ |
| 147 | /* Overflow or underflow; saturate. */ |
| 148 | fstp %st |
| 149 | fldt MO(csat) |
| 150 | andb $2, %ah |
| 151 | jz 3f |
| 152 | fchs |
| 153 | #endif |
| 154 | 3: FLDLOG /* 1 log2(base) */ |
| 155 | fmul %st(1), %st /* 1 x log2(base) */ |
| 156 | /* Set round-to-nearest temporarily. */ |
| 157 | fstcw -4(%rsp) |
| 158 | movl $0xf3ff, %edx |
| 159 | andl -4(%rsp), %edx |
| 160 | movl %edx, -8(%rsp) |
| 161 | fldcw -8(%rsp) |
| 162 | frndint /* 1 i */ |
| 163 | fld %st(1) /* 2 x */ |
| 164 | frndint /* 2 xi */ |
| 165 | fldcw -4(%rsp) |
| 166 | fld %st(1) /* 3 i */ |
| 167 | fldt MO(c0) /* 4 c0 */ |
| 168 | fld %st(2) /* 5 xi */ |
| 169 | fmul %st(1), %st /* 5 c0 xi */ |
| 170 | fsubp %st, %st(2) /* 4 f = c0 xi - i */ |
| 171 | fld %st(4) /* 5 x */ |
| 172 | fsub %st(3), %st /* 5 xf = x - xi */ |
| 173 | fmulp %st, %st(1) /* 4 c0 xf */ |
| 174 | faddp %st, %st(1) /* 3 f = f + c0 xf */ |
| 175 | fldt MO(c1) /* 4 */ |
| 176 | fmul %st(4), %st /* 4 c1 * x */ |
| 177 | faddp %st, %st(1) /* 3 f = f + c1 * x */ |
| 178 | f2xm1 /* 3 2^(fract(x * log2(base))) - 1 */ |
| 179 | #ifdef USE_AS_EXPM1L |
| 180 | fstp %st(1) /* 2 */ |
| 181 | fscale /* 2 scale factor is st(1); base^x - 2^i */ |
| 182 | fxch /* 2 i */ |
| 183 | fld1 /* 3 1.0 */ |
| 184 | fscale /* 3 2^i */ |
| 185 | fld1 /* 4 1.0 */ |
| 186 | fsubrp %st, %st(1) /* 3 2^i - 1.0 */ |
| 187 | fstp %st(1) /* 2 */ |
| 188 | faddp %st, %st(1) /* 1 base^x - 1.0 */ |
| 189 | #else |
| 190 | fld1 /* 4 1.0 */ |
| 191 | faddp /* 3 2^(fract(x * log2(base))) */ |
| 192 | fstp %st(1) /* 2 */ |
| 193 | fscale /* 2 scale factor is st(1); base^x */ |
| 194 | fstp %st(1) /* 1 */ |
| 195 | LDBL_CHECK_FORCE_UFLOW_NONNEG |
| 196 | #endif |
| 197 | fstp %st(1) /* 0 */ |
| 198 | jmp 2f |
| 199 | 1: |
| 200 | #ifdef USE_AS_EXPM1L |
| 201 | /* For expm1l, only negative sign gets here. */ |
| 202 | fstp %st |
| 203 | fld1 |
| 204 | fchs |
| 205 | #else |
| 206 | testl $0x200, %eax /* Test sign. */ |
| 207 | jz 2f /* If positive, jump. */ |
| 208 | fstp %st |
| 209 | fldz /* Set result to 0. */ |
| 210 | #endif |
| 211 | 2: ret |
| 212 | 6: /* NaN argument. */ |
| 213 | fadd %st |
| 214 | ret |
| 215 | END(IEEE754_EXPL) |
| 216 | |
| 217 | #ifdef USE_AS_EXPM1L |
| 218 | libm_hidden_def (__expm1l) |
| 219 | libm_alias_ldouble (__expm1, expm1) |
| 220 | #elif defined USE_AS_EXP10L |
| 221 | libm_alias_finite (__ieee754_exp10l, __exp10l) |
| 222 | #else |
| 223 | libm_alias_finite (__ieee754_expl, __expl) |
| 224 | #endif |
| 225 | |