| 1 | // This file is part of Eigen, a lightweight C++ template library |
| 2 | // for linear algebra. |
| 3 | // |
| 4 | // Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) |
| 5 | // Copyright (C) 2016 Gael Guennebaud <gael.guennebaud@inria.fr> |
| 6 | // |
| 7 | // This Source Code Form is subject to the terms of the Mozilla |
| 8 | // Public License v. 2.0. If a copy of the MPL was not distributed |
| 9 | // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. |
| 10 | |
| 11 | #ifndef EIGEN_MATHFUNCTIONSIMPL_H |
| 12 | #define EIGEN_MATHFUNCTIONSIMPL_H |
| 13 | |
| 14 | namespace Eigen { |
| 15 | |
| 16 | namespace internal { |
| 17 | |
| 18 | /** \internal \returns the hyperbolic tan of \a a (coeff-wise) |
| 19 | Doesn't do anything fancy, just a 13/6-degree rational interpolant which |
| 20 | is accurate up to a couple of ulp in the range [-9, 9], outside of which |
| 21 | the tanh(x) = +/-1. |
| 22 | |
| 23 | This implementation works on both scalars and packets. |
| 24 | */ |
| 25 | template<typename T> |
| 26 | T generic_fast_tanh_float(const T& a_x) |
| 27 | { |
| 28 | // Clamp the inputs to the range [-9, 9] since anything outside |
| 29 | // this range is +/-1.0f in single-precision. |
| 30 | const T plus_9 = pset1<T>(9.f); |
| 31 | const T minus_9 = pset1<T>(-9.f); |
| 32 | // NOTE GCC prior to 6.3 might improperly optimize this max/min |
| 33 | // step such that if a_x is nan, x will be either 9 or -9, |
| 34 | // and tanh will return 1 or -1 instead of nan. |
| 35 | // This is supposed to be fixed in gcc6.3, |
| 36 | // see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=72867 |
| 37 | const T x = pmax(minus_9,pmin(plus_9,a_x)); |
| 38 | // The monomial coefficients of the numerator polynomial (odd). |
| 39 | const T alpha_1 = pset1<T>(4.89352455891786e-03f); |
| 40 | const T alpha_3 = pset1<T>(6.37261928875436e-04f); |
| 41 | const T alpha_5 = pset1<T>(1.48572235717979e-05f); |
| 42 | const T alpha_7 = pset1<T>(5.12229709037114e-08f); |
| 43 | const T alpha_9 = pset1<T>(-8.60467152213735e-11f); |
| 44 | const T alpha_11 = pset1<T>(2.00018790482477e-13f); |
| 45 | const T alpha_13 = pset1<T>(-2.76076847742355e-16f); |
| 46 | |
| 47 | // The monomial coefficients of the denominator polynomial (even). |
| 48 | const T beta_0 = pset1<T>(4.89352518554385e-03f); |
| 49 | const T beta_2 = pset1<T>(2.26843463243900e-03f); |
| 50 | const T beta_4 = pset1<T>(1.18534705686654e-04f); |
| 51 | const T beta_6 = pset1<T>(1.19825839466702e-06f); |
| 52 | |
| 53 | // Since the polynomials are odd/even, we need x^2. |
| 54 | const T x2 = pmul(x, x); |
| 55 | |
| 56 | // Evaluate the numerator polynomial p. |
| 57 | T p = pmadd(x2, alpha_13, alpha_11); |
| 58 | p = pmadd(x2, p, alpha_9); |
| 59 | p = pmadd(x2, p, alpha_7); |
| 60 | p = pmadd(x2, p, alpha_5); |
| 61 | p = pmadd(x2, p, alpha_3); |
| 62 | p = pmadd(x2, p, alpha_1); |
| 63 | p = pmul(x, p); |
| 64 | |
| 65 | // Evaluate the denominator polynomial p. |
| 66 | T q = pmadd(x2, beta_6, beta_4); |
| 67 | q = pmadd(x2, q, beta_2); |
| 68 | q = pmadd(x2, q, beta_0); |
| 69 | |
| 70 | // Divide the numerator by the denominator. |
| 71 | return pdiv(p, q); |
| 72 | } |
| 73 | |
| 74 | template<typename RealScalar> |
| 75 | EIGEN_STRONG_INLINE |
| 76 | RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) |
| 77 | { |
| 78 | EIGEN_USING_STD_MATH(sqrt); |
| 79 | RealScalar p, qp; |
| 80 | p = numext::maxi(x,y); |
| 81 | if(p==RealScalar(0)) return RealScalar(0); |
| 82 | qp = numext::mini(y,x) / p; |
| 83 | return p * sqrt(RealScalar(1) + qp*qp); |
| 84 | } |
| 85 | |
| 86 | template<typename Scalar> |
| 87 | struct hypot_impl |
| 88 | { |
| 89 | typedef typename NumTraits<Scalar>::Real RealScalar; |
| 90 | static inline RealScalar run(const Scalar& x, const Scalar& y) |
| 91 | { |
| 92 | EIGEN_USING_STD_MATH(abs); |
| 93 | return positive_real_hypot<RealScalar>(abs(x), abs(y)); |
| 94 | } |
| 95 | }; |
| 96 | |
| 97 | } // end namespace internal |
| 98 | |
| 99 | } // end namespace Eigen |
| 100 | |
| 101 | #endif // EIGEN_MATHFUNCTIONSIMPL_H |
| 102 | |