| 1 | // Copyright 2009 Google Inc. All Rights Reserved. |
| 2 | |
| 3 | #include "util/math/exactfloat/exactfloat.h" |
| 4 | |
| 5 | #include <cstdarg> |
| 6 | #include <cstddef> |
| 7 | #include <cstdlib> |
| 8 | #include <cstring> |
| 9 | #include <cstdio> |
| 10 | |
| 11 | #include <math.h> |
| 12 | #include <algorithm> |
| 13 | using std::min; |
| 14 | using std::max; |
| 15 | using std::swap; |
| 16 | using std::reverse; |
| 17 | |
| 18 | #include <limits> |
| 19 | using std::numeric_limits; |
| 20 | |
| 21 | #include "base/integral_types.h" |
| 22 | #include "base/logging.h" |
| 23 | |
| 24 | namespace bn { |
| 25 | #include "bn/crypto.h" |
| 26 | #include "bn/bn.c" |
| 27 | #include "bn/bn_asm.c" |
| 28 | #include "bn/bn_ctx.c" |
| 29 | #include "bn/bn_mul.c" |
| 30 | #include "bn/bn_sqr.c" |
| 31 | } |
| 32 | |
| 33 | using namespace bn; |
| 34 | |
| 35 | // Define storage for constants. |
| 36 | const int ExactFloat::kMinExp; |
| 37 | const int ExactFloat::kMaxExp; |
| 38 | const int ExactFloat::kMaxPrec; |
| 39 | const int32 ExactFloat::kExpNaN; |
| 40 | const int32 ExactFloat::kExpInfinity; |
| 41 | const int32 ExactFloat::kExpZero; |
| 42 | const int ExactFloat::kDoubleMantissaBits; |
| 43 | |
| 44 | // To simplify the overflow/underflow logic, we limit the exponent and |
| 45 | // precision range so that (2 * bn_exp_) does not overflow an "int". We take |
| 46 | // advantage of this, for example, by only checking for overflow/underflow |
| 47 | // *after* multiplying two numbers. |
| 48 | COMPILE_ASSERT( |
| 49 | ExactFloat::kMaxExp <= INT_MAX / 2 && |
| 50 | ExactFloat::kMinExp - ExactFloat::kMaxPrec >= INT_MIN / 2, |
| 51 | exactfloat_exponent_might_overflow); |
| 52 | |
| 53 | // We define a few simple extensions to the BIGNUM interface. In some cases |
| 54 | // these depend on BIGNUM internal fields, so they might require tweaking if |
| 55 | // the BIGNUM implementation changes significantly. |
| 56 | |
| 57 | // Set a BIGNUM to the given unsigned 64-bit value. |
| 58 | inline static void BN_ext_set_uint64(BIGNUM* bn, uint64 v) { |
| 59 | #if BN_BITS2 == 64 |
| 60 | CHECK(BN_set_word(bn, v)); |
| 61 | #else |
| 62 | COMPILE_ASSERT(BN_BITS2 == 32, at_least_32_bit_openssl_build_needed); |
| 63 | CHECK(BN_set_word(bn, static_cast<uint32>(v >> 32))); |
| 64 | CHECK(BN_lshift(bn, bn, 32)); |
| 65 | CHECK(BN_add_word(bn, static_cast<uint32>(v))); |
| 66 | #endif |
| 67 | } |
| 68 | |
| 69 | // Return the absolute value of a BIGNUM as a 64-bit unsigned integer. |
| 70 | // Requires that BIGNUM fits into 64 bits. |
| 71 | inline static uint64 BN_ext_get_uint64(const BIGNUM* bn) { |
| 72 | DCHECK_LE(BN_num_bytes(bn), sizeof(uint64)); |
| 73 | #if BN_BITS2 == 64 |
| 74 | return BN_get_word(bn); |
| 75 | #else |
| 76 | COMPILE_ASSERT(BN_BITS2 == 32, at_least_32_bit_openssl_build_needed); |
| 77 | if (bn->top == 0) return 0; |
| 78 | if (bn->top == 1) return BN_get_word(bn); |
| 79 | DCHECK_EQ(bn->top, 2); |
| 80 | return (static_cast<uint64>(bn->d[1]) << 32) + bn->d[0]; |
| 81 | #endif |
| 82 | } |
| 83 | |
| 84 | // Count the number of low-order zero bits in the given BIGNUM (ignoring its |
| 85 | // sign). Returns 0 if the argument is zero. |
| 86 | static int BN_ext_count_low_zero_bits(const BIGNUM* bn) { |
| 87 | int count = 0; |
| 88 | for (int i = 0; i < bn->top; ++i) { |
| 89 | BN_ULONG w = bn->d[i]; |
| 90 | if (w == 0) { |
| 91 | count += 8 * sizeof(BN_ULONG); |
| 92 | } else { |
| 93 | for (; (w & 1) == 0; w >>= 1) { |
| 94 | ++count; |
| 95 | } |
| 96 | break; |
| 97 | } |
| 98 | } |
| 99 | return count; |
| 100 | } |
| 101 | |
| 102 | ExactFloat::ExactFloat(double v) { |
| 103 | BN_init(&bn_); |
| 104 | sign_ = signbit(v) ? -1 : 1; |
| 105 | if (isnan(v)) { |
| 106 | set_nan(); |
| 107 | } else if (isinf(v)) { |
| 108 | set_inf(sign_); |
| 109 | } else { |
| 110 | // The following code is much simpler than messing about with bit masks, |
| 111 | // has the advantage of handling denormalized numbers and zero correctly, |
| 112 | // and is actually quite efficient (at least compared to the rest of this |
| 113 | // code). "f" is a fraction in the range [0.5, 1), so if we shift it left |
| 114 | // by the number of mantissa bits in a double (53, including the leading |
| 115 | // "1") then the result is always an integer. |
| 116 | int exp; |
| 117 | double f = frexp(fabs(v), &exp); |
| 118 | uint64 m = static_cast<uint64>(ldexp(f, kDoubleMantissaBits)); |
| 119 | BN_ext_set_uint64(&bn_, m); |
| 120 | bn_exp_ = exp - kDoubleMantissaBits; |
| 121 | Canonicalize(); |
| 122 | } |
| 123 | } |
| 124 | |
| 125 | ExactFloat::ExactFloat(int v) { |
| 126 | BN_init(&bn_); |
| 127 | sign_ = (v >= 0) ? 1 : -1; |
| 128 | // Note that this works even for INT_MIN because the parameter type for |
| 129 | // BN_set_word() is unsigned. |
| 130 | CHECK(BN_set_word(&bn_, abs(v))); |
| 131 | bn_exp_ = 0; |
| 132 | Canonicalize(); |
| 133 | } |
| 134 | |
| 135 | ExactFloat::ExactFloat(const ExactFloat& b) |
| 136 | : sign_(b.sign_), |
| 137 | bn_exp_(b.bn_exp_) { |
| 138 | BN_init(&bn_); |
| 139 | BN_copy(&bn_, &b.bn_); |
| 140 | } |
| 141 | |
| 142 | ExactFloat ExactFloat::SignedZero(int sign) { |
| 143 | ExactFloat r; |
| 144 | r.set_zero(sign); |
| 145 | return r; |
| 146 | } |
| 147 | |
| 148 | ExactFloat ExactFloat::Infinity(int sign) { |
| 149 | ExactFloat r; |
| 150 | r.set_inf(sign); |
| 151 | return r; |
| 152 | } |
| 153 | |
| 154 | ExactFloat ExactFloat::NaN() { |
| 155 | ExactFloat r; |
| 156 | r.set_nan(); |
| 157 | return r; |
| 158 | } |
| 159 | |
| 160 | int ExactFloat::prec() const { |
| 161 | return BN_num_bits(&bn_); |
| 162 | } |
| 163 | |
| 164 | int ExactFloat::exp() const { |
| 165 | DCHECK(is_normal()); |
| 166 | return bn_exp_ + BN_num_bits(&bn_); |
| 167 | } |
| 168 | |
| 169 | void ExactFloat::set_zero(int sign) { |
| 170 | sign_ = sign; |
| 171 | bn_exp_ = kExpZero; |
| 172 | if (!BN_is_zero(&bn_)) BN_zero(&bn_); |
| 173 | } |
| 174 | |
| 175 | void ExactFloat::set_inf(int sign) { |
| 176 | sign_ = sign; |
| 177 | bn_exp_ = kExpInfinity; |
| 178 | if (!BN_is_zero(&bn_)) BN_zero(&bn_); |
| 179 | } |
| 180 | |
| 181 | void ExactFloat::set_nan() { |
| 182 | sign_ = 1; |
| 183 | bn_exp_ = kExpNaN; |
| 184 | if (!BN_is_zero(&bn_)) BN_zero(&bn_); |
| 185 | } |
| 186 | |
| 187 | double ExactFloat::ToDouble() const { |
| 188 | // If the mantissa has too many bits, we need to round it. |
| 189 | if (prec() <= kDoubleMantissaBits) { |
| 190 | return ToDoubleHelper(); |
| 191 | } else { |
| 192 | ExactFloat r = RoundToMaxPrec(kDoubleMantissaBits, kRoundTiesToEven); |
| 193 | return r.ToDoubleHelper(); |
| 194 | } |
| 195 | } |
| 196 | |
| 197 | double ExactFloat::ToDoubleHelper() const { |
| 198 | DCHECK_LE(BN_num_bits(&bn_), kDoubleMantissaBits); |
| 199 | if (!is_normal()) { |
| 200 | if (is_zero()) return copysign(0, sign_); |
| 201 | if (is_inf()) return copysign(INFINITY, sign_); |
| 202 | return copysign(NAN, sign_); |
| 203 | } |
| 204 | uint64 d_mantissa = BN_ext_get_uint64(&bn_); |
| 205 | // We rely on ldexp() to handle overflow and underflow. (It will return a |
| 206 | // signed zero or infinity if the result is too small or too large.) |
| 207 | return sign_ * ldexp(static_cast<double>(d_mantissa), bn_exp_); |
| 208 | } |
| 209 | |
| 210 | ExactFloat ExactFloat::RoundToMaxPrec(int max_prec, RoundingMode mode) const { |
| 211 | // The "kRoundTiesToEven" mode requires at least 2 bits of precision |
| 212 | // (otherwise both adjacent representable values may be odd). |
| 213 | DCHECK_GE(max_prec, 2); |
| 214 | DCHECK_LE(max_prec, kMaxPrec); |
| 215 | |
| 216 | // The following test also catches zero, infinity, and NaN. |
| 217 | int shift = prec() - max_prec; |
| 218 | if (shift <= 0) return *this; |
| 219 | |
| 220 | // Round by removing the appropriate number of bits from the mantissa. Note |
| 221 | // that if the value is rounded up to a power of 2, the high-order bit |
| 222 | // position may increase, but in that case Canonicalize() will remove at |
| 223 | // least one zero bit and so the output will still have prec() <= max_prec. |
| 224 | return RoundToPowerOf2(bn_exp_ + shift, mode); |
| 225 | } |
| 226 | |
| 227 | ExactFloat ExactFloat::RoundToPowerOf2(int bit_exp, RoundingMode mode) const { |
| 228 | DCHECK_GE(bit_exp, kMinExp - kMaxPrec); |
| 229 | DCHECK_LE(bit_exp, kMaxExp); |
| 230 | |
| 231 | // If the exponent is already large enough, or the value is zero, infinity, |
| 232 | // or NaN, then there is nothing to do. |
| 233 | int shift = bit_exp - bn_exp_; |
| 234 | if (shift <= 0) return *this; |
| 235 | DCHECK(is_normal()); |
| 236 | |
| 237 | // Convert rounding up/down to toward/away from zero, so that we don't need |
| 238 | // to consider the sign of the number from this point onward. |
| 239 | if (mode == kRoundTowardPositive) { |
| 240 | mode = (sign_ > 0) ? kRoundAwayFromZero : kRoundTowardZero; |
| 241 | } else if (mode == kRoundTowardNegative) { |
| 242 | mode = (sign_ > 0) ? kRoundTowardZero : kRoundAwayFromZero; |
| 243 | } |
| 244 | |
| 245 | // Rounding consists of right-shifting the mantissa by "shift", and then |
| 246 | // possibly incrementing the result (depending on the rounding mode, the |
| 247 | // bits that were discarded, and sometimes the lowest kept bit). The |
| 248 | // following code figures out whether we need to increment. |
| 249 | ExactFloat r; |
| 250 | bool increment = false; |
| 251 | if (mode == kRoundTowardZero) { |
| 252 | // Never increment. |
| 253 | } else if (mode == kRoundTiesAwayFromZero) { |
| 254 | // Increment if the highest discarded bit is 1. |
| 255 | if (BN_is_bit_set(&bn_, shift - 1)) |
| 256 | increment = true; |
| 257 | } else if (mode == kRoundAwayFromZero) { |
| 258 | // Increment unless all discarded bits are zero. |
| 259 | if (BN_ext_count_low_zero_bits(&bn_) < shift) |
| 260 | increment = true; |
| 261 | } else { |
| 262 | DCHECK_EQ(mode, kRoundTiesToEven); |
| 263 | // Let "w/xyz" denote a mantissa where "w" is the lowest kept bit and |
| 264 | // "xyz" are the discarded bits. Then using regexp notation: |
| 265 | // ./0.* -> Don't increment (fraction < 1/2) |
| 266 | // 0/10* -> Don't increment (fraction = 1/2, kept part even) |
| 267 | // 1/10* -> Increment (fraction = 1/2, kept part odd) |
| 268 | // ./1.*1.* -> Increment (fraction > 1/2) |
| 269 | if (BN_is_bit_set(&bn_, shift - 1) && |
| 270 | ((BN_is_bit_set(&bn_, shift) || |
| 271 | BN_ext_count_low_zero_bits(&bn_) < shift - 1))) { |
| 272 | increment = true; |
| 273 | } |
| 274 | } |
| 275 | r.bn_exp_ = bn_exp_ + shift; |
| 276 | CHECK(BN_rshift(&r.bn_, &bn_, shift)); |
| 277 | if (increment) { |
| 278 | CHECK(BN_add_word(&r.bn_, 1)); |
| 279 | } |
| 280 | r.sign_ = sign_; |
| 281 | r.Canonicalize(); |
| 282 | return r; |
| 283 | } |
| 284 | |
| 285 | int ExactFloat::NumSignificantDigitsForPrec(int prec) { |
| 286 | // The simplest bound is |
| 287 | // |
| 288 | // d <= 1 + ceil(prec * log10(2)) |
| 289 | // |
| 290 | // The following bound is tighter by 0.5 digits on average, but requires |
| 291 | // the exponent to be known as well: |
| 292 | // |
| 293 | // d <= ceil(exp * log10(2)) - floor((exp - prec) * log10(2)) |
| 294 | // |
| 295 | // Since either of these bounds can be too large by 0, 1, or 2 digits, we |
| 296 | // stick with the simpler first bound. |
| 297 | return static_cast<int>(1 + ceil(prec * (M_LN2 / M_LN10))); |
| 298 | } |
| 299 | |
| 300 | // Numbers are always formatted with at least this many significant digits. |
| 301 | // This prevents small integers from being formatted in exponential notation |
| 302 | // (e.g. 1024 formatted as 1e+03), and also avoids the confusion of having |
| 303 | // supposedly "high precision" numbers formatted with just 1 or 2 digits |
| 304 | // (e.g. 1/512 == 0.001953125 formatted as 0.002). |
| 305 | static const int kMinSignificantDigits = 10; |
| 306 | |
| 307 | string ExactFloat::ToString() const { |
| 308 | int max_digits = max(kMinSignificantDigits, |
| 309 | NumSignificantDigitsForPrec(prec())); |
| 310 | return ToStringWithMaxDigits(max_digits); |
| 311 | } |
| 312 | |
| 313 | string ExactFloat::ToStringWithMaxDigits(int max_digits) const { |
| 314 | DCHECK_GT(max_digits, 0); |
| 315 | if (!is_normal()) { |
| 316 | if (is_nan()) return "nan" ; |
| 317 | if (is_zero()) return (sign_ < 0) ? "-0" : "0" ; |
| 318 | return (sign_ < 0) ? "-inf" : "inf" ; |
| 319 | } |
| 320 | string digits; |
| 321 | int exp10 = GetDecimalDigits(max_digits, &digits); |
| 322 | string str; |
| 323 | if (sign_ < 0) str.push_back('-'); |
| 324 | |
| 325 | // We use the standard '%g' formatting rules. If the exponent is less than |
| 326 | // -4 or greater than or equal to the requested precision (i.e., max_digits) |
| 327 | // then we use exponential notation. |
| 328 | // |
| 329 | // But since "exp10" is the base-10 exponent corresponding to a mantissa in |
| 330 | // the range [0.1, 1), whereas the '%g' rules assume a mantissa in the range |
| 331 | // [1.0, 10), we need to adjust these parameters by 1. |
| 332 | if (exp10 <= -4 || exp10 > max_digits) { |
| 333 | // Use exponential format. |
| 334 | str.push_back(digits[0]); |
| 335 | if (digits.size() > 1) { |
| 336 | str.push_back('.'); |
| 337 | str.append(digits.begin() + 1, digits.end()); |
| 338 | } |
| 339 | char exp_buf[20]; |
| 340 | sprintf(exp_buf, "e%+02d" , exp10 - 1); |
| 341 | str += exp_buf; |
| 342 | } else { |
| 343 | // Use fixed format. We split this into two cases depending on whether |
| 344 | // the integer portion is non-zero or not. |
| 345 | if (exp10 > 0) { |
| 346 | if ((size_t)exp10 >= digits.size()) { |
| 347 | str += digits; |
| 348 | for (int i = exp10 - digits.size(); i > 0; --i) { |
| 349 | str.push_back('0'); |
| 350 | } |
| 351 | } else { |
| 352 | str.append(digits.begin(), digits.begin() + exp10); |
| 353 | str.push_back('.'); |
| 354 | str.append(digits.begin() + exp10, digits.end()); |
| 355 | } |
| 356 | } else { |
| 357 | str += "0." ; |
| 358 | for (int i = exp10; i < 0; ++i) { |
| 359 | str.push_back('0'); |
| 360 | } |
| 361 | str += digits; |
| 362 | } |
| 363 | } |
| 364 | return str; |
| 365 | } |
| 366 | |
| 367 | // Increment an unsigned integer represented as a string of ASCII digits. |
| 368 | static void IncrementDecimalDigits(string* digits) { |
| 369 | string::iterator pos = digits->end(); |
| 370 | while (--pos >= digits->begin()) { |
| 371 | if (*pos < '9') { ++*pos; return; } |
| 372 | *pos = '0'; |
| 373 | } |
| 374 | digits->insert(0, "1" ); |
| 375 | } |
| 376 | |
| 377 | int ExactFloat::GetDecimalDigits(int max_digits, string* digits) const { |
| 378 | DCHECK(is_normal()); |
| 379 | // Convert the value to the form (bn * (10 ** bn_exp10)) where "bn" is a |
| 380 | // positive integer (BIGNUM). |
| 381 | BIGNUM* bn = BN_new(); |
| 382 | int bn_exp10; |
| 383 | if (bn_exp_ >= 0) { |
| 384 | // The easy case: bn = bn_ * (2 ** bn_exp_)), bn_exp10 = 0. |
| 385 | CHECK(BN_lshift(bn, &bn_, bn_exp_)); |
| 386 | bn_exp10 = 0; |
| 387 | } else { |
| 388 | // Set bn = bn_ * (5 ** -bn_exp_) and bn_exp10 = bn_exp_. This is |
| 389 | // equivalent to the original value of (bn_ * (2 ** bn_exp_)). |
| 390 | BIGNUM* power = BN_new(); |
| 391 | CHECK(BN_set_word(power, -bn_exp_)); |
| 392 | CHECK(BN_set_word(bn, 5)); |
| 393 | BN_CTX* ctx = BN_CTX_new(); |
| 394 | CHECK(BN_exp(bn, bn, power, ctx)); |
| 395 | CHECK(BN_mul(bn, bn, &bn_, ctx)); |
| 396 | BN_CTX_free(ctx); |
| 397 | BN_free(power); |
| 398 | bn_exp10 = bn_exp_; |
| 399 | } |
| 400 | // Now convert "bn" to a decimal string. |
| 401 | char* all_digits = BN_bn2dec(bn); |
| 402 | DCHECK(all_digits != NULL); |
| 403 | BN_free(bn); |
| 404 | // Check whether we have too many digits and round if necessary. |
| 405 | int num_digits = strlen(all_digits); |
| 406 | if (num_digits <= max_digits) { |
| 407 | *digits = all_digits; |
| 408 | } else { |
| 409 | digits->assign(all_digits, max_digits); |
| 410 | // Standard "printf" formatting rounds ties to an even number. This means |
| 411 | // that we round up (away from zero) if highest discarded digit is '5' or |
| 412 | // more, unless all other discarded digits are zero in which case we round |
| 413 | // up only if the lowest kept digit is odd. |
| 414 | if (all_digits[max_digits] >= '5' && |
| 415 | ((all_digits[max_digits-1] & 1) == 1 || |
| 416 | strpbrk(all_digits + max_digits + 1, "123456789" ) != NULL)) { |
| 417 | // This can increase the number of digits by 1, but in that case at |
| 418 | // least one trailing zero will be stripped off below. |
| 419 | IncrementDecimalDigits(digits); |
| 420 | } |
| 421 | // Adjust the base-10 exponent to reflect the digits we have removed. |
| 422 | bn_exp10 += num_digits - max_digits; |
| 423 | } |
| 424 | OPENSSL_free(all_digits); |
| 425 | |
| 426 | // Now strip any trailing zeros. |
| 427 | DCHECK_NE((*digits)[0], '0'); |
| 428 | string::iterator pos = digits->end(); |
| 429 | while (pos[-1] == '0') --pos; |
| 430 | if (pos < digits->end()) { |
| 431 | bn_exp10 += digits->end() - pos; |
| 432 | digits->erase(pos, digits->end()); |
| 433 | } |
| 434 | DCHECK_LE(digits->size(), max_digits); |
| 435 | |
| 436 | // Finally, we adjust the base-10 exponent so that the mantissa is a |
| 437 | // fraction in the range [0.1, 1) rather than an integer. |
| 438 | return bn_exp10 + digits->size(); |
| 439 | } |
| 440 | |
| 441 | string ExactFloat::ToUniqueString() const { |
| 442 | char prec_buf[20]; |
| 443 | sprintf(prec_buf, "<%d>" , prec()); |
| 444 | return ToString() + prec_buf; |
| 445 | } |
| 446 | |
| 447 | ExactFloat& ExactFloat::operator=(const ExactFloat& b) { |
| 448 | if (this != &b) { |
| 449 | sign_ = b.sign_; |
| 450 | bn_exp_ = b.bn_exp_; |
| 451 | BN_copy(&bn_, &b.bn_); |
| 452 | } |
| 453 | return *this; |
| 454 | } |
| 455 | |
| 456 | ExactFloat ExactFloat::operator-() const { |
| 457 | return CopyWithSign(-sign_); |
| 458 | } |
| 459 | |
| 460 | ExactFloat operator+(const ExactFloat& a, const ExactFloat& b) { |
| 461 | return ExactFloat::SignedSum(a.sign_, &a, b.sign_, &b); |
| 462 | } |
| 463 | |
| 464 | ExactFloat operator-(const ExactFloat& a, const ExactFloat& b) { |
| 465 | return ExactFloat::SignedSum(a.sign_, &a, -b.sign_, &b); |
| 466 | } |
| 467 | |
| 468 | ExactFloat ExactFloat::SignedSum(int a_sign, const ExactFloat* a, |
| 469 | int b_sign, const ExactFloat* b) { |
| 470 | if (!a->is_normal() || !b->is_normal()) { |
| 471 | // Handle zero, infinity, and NaN according to IEEE 754-2008. |
| 472 | if (a->is_nan()) return *a; |
| 473 | if (b->is_nan()) return *b; |
| 474 | if (a->is_inf()) { |
| 475 | // Adding two infinities with opposite sign yields NaN. |
| 476 | if (b->is_inf() && a_sign != b_sign) return NaN(); |
| 477 | return Infinity(a_sign); |
| 478 | } |
| 479 | if (b->is_inf()) return Infinity(b_sign); |
| 480 | if (a->is_zero()) { |
| 481 | if (!b->is_zero()) return b->CopyWithSign(b_sign); |
| 482 | // Adding two zeros with the same sign preserves the sign. |
| 483 | if (a_sign == b_sign) return SignedZero(a_sign); |
| 484 | // Adding two zeros of opposite sign produces +0. |
| 485 | return SignedZero(+1); |
| 486 | } |
| 487 | DCHECK(b->is_zero()); |
| 488 | return a->CopyWithSign(a_sign); |
| 489 | } |
| 490 | // Swap the numbers if necessary so that "a" has the larger bn_exp_. |
| 491 | if (a->bn_exp_ < b->bn_exp_) { |
| 492 | swap(a_sign, b_sign); |
| 493 | swap(a, b); |
| 494 | } |
| 495 | // Shift "a" if necessary so that both values have the same bn_exp_. |
| 496 | ExactFloat r; |
| 497 | if (a->bn_exp_ > b->bn_exp_) { |
| 498 | CHECK(BN_lshift(&r.bn_, &a->bn_, a->bn_exp_ - b->bn_exp_)); |
| 499 | a = &r; // The only field of "a" used below is bn_. |
| 500 | } |
| 501 | r.bn_exp_ = b->bn_exp_; |
| 502 | if (a_sign == b_sign) { |
| 503 | CHECK(BN_add(&r.bn_, &a->bn_, &b->bn_)); |
| 504 | r.sign_ = a_sign; |
| 505 | } else { |
| 506 | // Note that the BIGNUM documentation is out of date -- all methods now |
| 507 | // allow the result to be the same as any input argument, so it is okay if |
| 508 | // (a == &r) due to the shift above. |
| 509 | CHECK(BN_sub(&r.bn_, &a->bn_, &b->bn_)); |
| 510 | if (BN_is_zero(&r.bn_)) { |
| 511 | r.sign_ = +1; |
| 512 | } else if (BN_is_negative(&r.bn_)) { |
| 513 | // The magnitude of "b" was larger. |
| 514 | r.sign_ = b_sign; |
| 515 | BN_set_negative(&r.bn_, false); |
| 516 | } else { |
| 517 | // They were equal, or the magnitude of "a" was larger. |
| 518 | r.sign_ = a_sign; |
| 519 | } |
| 520 | } |
| 521 | r.Canonicalize(); |
| 522 | return r; |
| 523 | } |
| 524 | |
| 525 | void ExactFloat::Canonicalize() { |
| 526 | if (!is_normal()) return; |
| 527 | |
| 528 | // Underflow/overflow occurs if exp() is not in [kMinExp, kMaxExp]. |
| 529 | // We also convert a zero mantissa to signed zero. |
| 530 | int my_exp = exp(); |
| 531 | if (my_exp < kMinExp || BN_is_zero(&bn_)) { |
| 532 | set_zero(sign_); |
| 533 | } else if (my_exp > kMaxExp) { |
| 534 | set_inf(sign_); |
| 535 | } else if (!BN_is_odd(&bn_)) { |
| 536 | // Remove any low-order zero bits from the mantissa. |
| 537 | DCHECK(!BN_is_zero(&bn_)); |
| 538 | int shift = BN_ext_count_low_zero_bits(&bn_); |
| 539 | if (shift > 0) { |
| 540 | CHECK(BN_rshift(&bn_, &bn_, shift)); |
| 541 | bn_exp_ += shift; |
| 542 | } |
| 543 | } |
| 544 | // If the mantissa has too many bits, we replace it by NaN to indicate |
| 545 | // that an inexact calculation has occurred. |
| 546 | if (prec() > kMaxPrec) { |
| 547 | set_nan(); |
| 548 | } |
| 549 | } |
| 550 | |
| 551 | ExactFloat operator*(const ExactFloat& a, const ExactFloat& b) { |
| 552 | int result_sign = a.sign_ * b.sign_; |
| 553 | if (!a.is_normal() || !b.is_normal()) { |
| 554 | // Handle zero, infinity, and NaN according to IEEE 754-2008. |
| 555 | if (a.is_nan()) return a; |
| 556 | if (b.is_nan()) return b; |
| 557 | if (a.is_inf()) { |
| 558 | // Infinity times zero yields NaN. |
| 559 | if (b.is_zero()) return ExactFloat::NaN(); |
| 560 | return ExactFloat::Infinity(result_sign); |
| 561 | } |
| 562 | if (b.is_inf()) { |
| 563 | if (a.is_zero()) return ExactFloat::NaN(); |
| 564 | return ExactFloat::Infinity(result_sign); |
| 565 | } |
| 566 | DCHECK(a.is_zero() || b.is_zero()); |
| 567 | return ExactFloat::SignedZero(result_sign); |
| 568 | } |
| 569 | ExactFloat r; |
| 570 | r.sign_ = result_sign; |
| 571 | r.bn_exp_ = a.bn_exp_ + b.bn_exp_; |
| 572 | BN_CTX* ctx = BN_CTX_new(); |
| 573 | CHECK(BN_mul(&r.bn_, &a.bn_, &b.bn_, ctx)); |
| 574 | BN_CTX_free(ctx); |
| 575 | r.Canonicalize(); |
| 576 | return r; |
| 577 | } |
| 578 | |
| 579 | bool operator==(const ExactFloat& a, const ExactFloat& b) { |
| 580 | // NaN is not equal to anything, not even itself. |
| 581 | if (a.is_nan() || b.is_nan()) return false; |
| 582 | |
| 583 | // Since Canonicalize() strips low-order zero bits, all other cases |
| 584 | // (including non-normal values) require bn_exp_ to be equal. |
| 585 | if (a.bn_exp_ != b.bn_exp_) return false; |
| 586 | |
| 587 | // Positive and negative zero are equal. |
| 588 | if (a.is_zero() && b.is_zero()) return true; |
| 589 | |
| 590 | // Otherwise, the signs and mantissas must match. Note that non-normal |
| 591 | // values such as infinity have a mantissa of zero. |
| 592 | return a.sign_ == b.sign_ && BN_ucmp(&a.bn_, &b.bn_) == 0; |
| 593 | } |
| 594 | |
| 595 | int ExactFloat::ScaleAndCompare(const ExactFloat& b) const { |
| 596 | DCHECK(is_normal() && b.is_normal() && bn_exp_ >= b.bn_exp_); |
| 597 | ExactFloat tmp = *this; |
| 598 | CHECK(BN_lshift(&tmp.bn_, &tmp.bn_, bn_exp_ - b.bn_exp_)); |
| 599 | return BN_ucmp(&tmp.bn_, &b.bn_); |
| 600 | } |
| 601 | |
| 602 | bool ExactFloat::UnsignedLess(const ExactFloat& b) const { |
| 603 | // Handle the zero/infinity cases (NaN has already been done). |
| 604 | if (is_inf() || b.is_zero()) return false; |
| 605 | if (is_zero() || b.is_inf()) return true; |
| 606 | // If the high-order bit positions differ, we are done. |
| 607 | int cmp = exp() - b.exp(); |
| 608 | if (cmp != 0) return cmp < 0; |
| 609 | // Otherwise shift one of the two values so that they both have the same |
| 610 | // bn_exp_ and then compare the mantissas. |
| 611 | return (bn_exp_ >= b.bn_exp_ ? |
| 612 | ScaleAndCompare(b) < 0 : b.ScaleAndCompare(*this) > 0); |
| 613 | } |
| 614 | |
| 615 | bool operator<(const ExactFloat& a, const ExactFloat& b) { |
| 616 | // NaN is unordered compared to everything, including itself. |
| 617 | if (a.is_nan() || b.is_nan()) return false; |
| 618 | // Positive and negative zero are equal. |
| 619 | if (a.is_zero() && b.is_zero()) return false; |
| 620 | // Otherwise, anything negative is less than anything positive. |
| 621 | if (a.sign_ != b.sign_) return a.sign_ < b.sign_; |
| 622 | // Now we just compare absolute values. |
| 623 | return (a.sign_ > 0) ? a.UnsignedLess(b) : b.UnsignedLess(a); |
| 624 | } |
| 625 | |
| 626 | ExactFloat fabs(const ExactFloat& a) { |
| 627 | return a.CopyWithSign(+1); |
| 628 | } |
| 629 | |
| 630 | ExactFloat fmax(const ExactFloat& a, const ExactFloat& b) { |
| 631 | // If one argument is NaN, return the other argument. |
| 632 | if (a.is_nan()) return b; |
| 633 | if (b.is_nan()) return a; |
| 634 | // Not required by IEEE 754, but we prefer +0 over -0. |
| 635 | if (a.sign_ != b.sign_) { |
| 636 | return (a.sign_ < b.sign_) ? b : a; |
| 637 | } |
| 638 | return (a < b) ? b : a; |
| 639 | } |
| 640 | |
| 641 | ExactFloat fmin(const ExactFloat& a, const ExactFloat& b) { |
| 642 | // If one argument is NaN, return the other argument. |
| 643 | if (a.is_nan()) return b; |
| 644 | if (b.is_nan()) return a; |
| 645 | // Not required by IEEE 754, but we prefer -0 over +0. |
| 646 | if (a.sign_ != b.sign_) { |
| 647 | return (a.sign_ < b.sign_) ? a : b; |
| 648 | } |
| 649 | return (a < b) ? a : b; |
| 650 | } |
| 651 | |
| 652 | ExactFloat fdim(const ExactFloat& a, const ExactFloat& b) { |
| 653 | // This formulation has the correct behavior for NaNs. |
| 654 | return (a <= b) ? 0 : (a - b); |
| 655 | } |
| 656 | |
| 657 | ExactFloat ceil(const ExactFloat& a) { |
| 658 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardPositive); |
| 659 | } |
| 660 | |
| 661 | ExactFloat floor(const ExactFloat& a) { |
| 662 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardNegative); |
| 663 | } |
| 664 | |
| 665 | ExactFloat trunc(const ExactFloat& a) { |
| 666 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardZero); |
| 667 | } |
| 668 | |
| 669 | ExactFloat round(const ExactFloat& a) { |
| 670 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesAwayFromZero); |
| 671 | } |
| 672 | |
| 673 | ExactFloat rint(const ExactFloat& a) { |
| 674 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesToEven); |
| 675 | } |
| 676 | |
| 677 | template <class T> |
| 678 | T ExactFloat::ToInteger(RoundingMode mode) const { |
| 679 | COMPILE_ASSERT(sizeof(T) <= sizeof(uint64), max_64_bits_supported); |
| 680 | COMPILE_ASSERT(numeric_limits<T>::is_signed, only_signed_types_supported); |
| 681 | const int64 kMinValue = numeric_limits<T>::min(); |
| 682 | const int64 kMaxValue = numeric_limits<T>::max(); |
| 683 | |
| 684 | ExactFloat r = RoundToPowerOf2(0, mode); |
| 685 | if (r.is_nan()) return kMaxValue; |
| 686 | if (r.is_zero()) return 0; |
| 687 | if (!r.is_inf()) { |
| 688 | // If the unsigned value has more than 63 bits it is always clamped. |
| 689 | if (r.exp() < 64) { |
| 690 | int64 value = BN_ext_get_uint64(&r.bn_) << r.bn_exp_; |
| 691 | if (r.sign_ < 0) value = -value; |
| 692 | return max(kMinValue, min(kMaxValue, value)); |
| 693 | } |
| 694 | } |
| 695 | return (r.sign_ < 0) ? kMinValue : kMaxValue; |
| 696 | } |
| 697 | |
| 698 | long lrint(const ExactFloat& a) { |
| 699 | return a.ToInteger<long>(ExactFloat::kRoundTiesToEven); |
| 700 | } |
| 701 | |
| 702 | long long llrint(const ExactFloat& a) { |
| 703 | return a.ToInteger<long long>(ExactFloat::kRoundTiesToEven); |
| 704 | } |
| 705 | |
| 706 | long lround(const ExactFloat& a) { |
| 707 | return a.ToInteger<long>(ExactFloat::kRoundTiesAwayFromZero); |
| 708 | } |
| 709 | |
| 710 | long long llround(const ExactFloat& a) { |
| 711 | return a.ToInteger<long long>(ExactFloat::kRoundTiesAwayFromZero); |
| 712 | } |
| 713 | |
| 714 | ExactFloat copysign(const ExactFloat& a, const ExactFloat& b) { |
| 715 | return a.CopyWithSign(b.sign_); |
| 716 | } |
| 717 | |
| 718 | ExactFloat frexp(const ExactFloat& a, int* exp) { |
| 719 | if (!a.is_normal()) { |
| 720 | // If a == 0, exp should be zero. If a.is_inf() or a.is_nan(), exp is not |
| 721 | // defined but the glibc implementation returns zero. |
| 722 | *exp = 0; |
| 723 | return a; |
| 724 | } |
| 725 | *exp = a.exp(); |
| 726 | return ldexp(a, -a.exp()); |
| 727 | } |
| 728 | |
| 729 | ExactFloat ldexp(const ExactFloat& a, int exp) { |
| 730 | if (!a.is_normal()) return a; |
| 731 | |
| 732 | // To prevent integer overflow, we first clamp "exp" so that |
| 733 | // (kMinExp - 1) <= (a_exp + exp) <= (kMaxExp + 1). |
| 734 | int a_exp = a.exp(); |
| 735 | exp = min(ExactFloat::kMaxExp + 1 - a_exp, |
| 736 | max(ExactFloat::kMinExp - 1 + a_exp, exp)); |
| 737 | |
| 738 | // Now modify the exponent and check for overflow/underflow. |
| 739 | ExactFloat r = a; |
| 740 | r.bn_exp_ += exp; |
| 741 | r.Canonicalize(); |
| 742 | return r; |
| 743 | } |
| 744 | |
| 745 | int ilogb(const ExactFloat& a) { |
| 746 | if (a.is_zero()) return FP_ILOGB0; |
| 747 | if (a.is_inf()) return INT_MAX; |
| 748 | if (a.is_nan()) return FP_ILOGBNAN; |
| 749 | // a.exp() assumes the significand is in the range [0.5, 1). |
| 750 | return a.exp() - 1; |
| 751 | } |
| 752 | |
| 753 | ExactFloat logb(const ExactFloat& a) { |
| 754 | if (a.is_zero()) return ExactFloat::Infinity(-1); |
| 755 | if (a.is_inf()) return ExactFloat::Infinity(+1); // Even if a < 0. |
| 756 | if (a.is_nan()) return a; |
| 757 | // exp() assumes the significand is in the range [0.5,1). |
| 758 | return ExactFloat(a.exp() - 1); |
| 759 | } |
| 760 | |
| 761 | ExactFloat ExactFloat::Unimplemented() { |
| 762 | LOG(FATAL) << "Unimplemented ExactFloat method called" ; |
| 763 | return NaN(); |
| 764 | } |
| 765 | |