1 | // Copyright 2009 Google Inc. All Rights Reserved. |
2 | |
3 | #include "util/math/exactfloat/exactfloat.h" |
4 | |
5 | #include <cstdarg> |
6 | #include <cstddef> |
7 | #include <cstdlib> |
8 | #include <cstring> |
9 | #include <cstdio> |
10 | |
11 | #include <math.h> |
12 | #include <algorithm> |
13 | using std::min; |
14 | using std::max; |
15 | using std::swap; |
16 | using std::reverse; |
17 | |
18 | #include <limits> |
19 | using std::numeric_limits; |
20 | |
21 | #include "base/integral_types.h" |
22 | #include "base/logging.h" |
23 | |
24 | namespace bn { |
25 | #include "bn/crypto.h" |
26 | #include "bn/bn.c" |
27 | #include "bn/bn_asm.c" |
28 | #include "bn/bn_ctx.c" |
29 | #include "bn/bn_mul.c" |
30 | #include "bn/bn_sqr.c" |
31 | } |
32 | |
33 | using namespace bn; |
34 | |
35 | // Define storage for constants. |
36 | const int ExactFloat::kMinExp; |
37 | const int ExactFloat::kMaxExp; |
38 | const int ExactFloat::kMaxPrec; |
39 | const int32 ExactFloat::kExpNaN; |
40 | const int32 ExactFloat::kExpInfinity; |
41 | const int32 ExactFloat::kExpZero; |
42 | const int ExactFloat::kDoubleMantissaBits; |
43 | |
44 | // To simplify the overflow/underflow logic, we limit the exponent and |
45 | // precision range so that (2 * bn_exp_) does not overflow an "int". We take |
46 | // advantage of this, for example, by only checking for overflow/underflow |
47 | // *after* multiplying two numbers. |
48 | COMPILE_ASSERT( |
49 | ExactFloat::kMaxExp <= INT_MAX / 2 && |
50 | ExactFloat::kMinExp - ExactFloat::kMaxPrec >= INT_MIN / 2, |
51 | exactfloat_exponent_might_overflow); |
52 | |
53 | // We define a few simple extensions to the BIGNUM interface. In some cases |
54 | // these depend on BIGNUM internal fields, so they might require tweaking if |
55 | // the BIGNUM implementation changes significantly. |
56 | |
57 | // Set a BIGNUM to the given unsigned 64-bit value. |
58 | inline static void BN_ext_set_uint64(BIGNUM* bn, uint64 v) { |
59 | #if BN_BITS2 == 64 |
60 | CHECK(BN_set_word(bn, v)); |
61 | #else |
62 | COMPILE_ASSERT(BN_BITS2 == 32, at_least_32_bit_openssl_build_needed); |
63 | CHECK(BN_set_word(bn, static_cast<uint32>(v >> 32))); |
64 | CHECK(BN_lshift(bn, bn, 32)); |
65 | CHECK(BN_add_word(bn, static_cast<uint32>(v))); |
66 | #endif |
67 | } |
68 | |
69 | // Return the absolute value of a BIGNUM as a 64-bit unsigned integer. |
70 | // Requires that BIGNUM fits into 64 bits. |
71 | inline static uint64 BN_ext_get_uint64(const BIGNUM* bn) { |
72 | DCHECK_LE(BN_num_bytes(bn), sizeof(uint64)); |
73 | #if BN_BITS2 == 64 |
74 | return BN_get_word(bn); |
75 | #else |
76 | COMPILE_ASSERT(BN_BITS2 == 32, at_least_32_bit_openssl_build_needed); |
77 | if (bn->top == 0) return 0; |
78 | if (bn->top == 1) return BN_get_word(bn); |
79 | DCHECK_EQ(bn->top, 2); |
80 | return (static_cast<uint64>(bn->d[1]) << 32) + bn->d[0]; |
81 | #endif |
82 | } |
83 | |
84 | // Count the number of low-order zero bits in the given BIGNUM (ignoring its |
85 | // sign). Returns 0 if the argument is zero. |
86 | static int BN_ext_count_low_zero_bits(const BIGNUM* bn) { |
87 | int count = 0; |
88 | for (int i = 0; i < bn->top; ++i) { |
89 | BN_ULONG w = bn->d[i]; |
90 | if (w == 0) { |
91 | count += 8 * sizeof(BN_ULONG); |
92 | } else { |
93 | for (; (w & 1) == 0; w >>= 1) { |
94 | ++count; |
95 | } |
96 | break; |
97 | } |
98 | } |
99 | return count; |
100 | } |
101 | |
102 | ExactFloat::ExactFloat(double v) { |
103 | BN_init(&bn_); |
104 | sign_ = signbit(v) ? -1 : 1; |
105 | if (isnan(v)) { |
106 | set_nan(); |
107 | } else if (isinf(v)) { |
108 | set_inf(sign_); |
109 | } else { |
110 | // The following code is much simpler than messing about with bit masks, |
111 | // has the advantage of handling denormalized numbers and zero correctly, |
112 | // and is actually quite efficient (at least compared to the rest of this |
113 | // code). "f" is a fraction in the range [0.5, 1), so if we shift it left |
114 | // by the number of mantissa bits in a double (53, including the leading |
115 | // "1") then the result is always an integer. |
116 | int exp; |
117 | double f = frexp(fabs(v), &exp); |
118 | uint64 m = static_cast<uint64>(ldexp(f, kDoubleMantissaBits)); |
119 | BN_ext_set_uint64(&bn_, m); |
120 | bn_exp_ = exp - kDoubleMantissaBits; |
121 | Canonicalize(); |
122 | } |
123 | } |
124 | |
125 | ExactFloat::ExactFloat(int v) { |
126 | BN_init(&bn_); |
127 | sign_ = (v >= 0) ? 1 : -1; |
128 | // Note that this works even for INT_MIN because the parameter type for |
129 | // BN_set_word() is unsigned. |
130 | CHECK(BN_set_word(&bn_, abs(v))); |
131 | bn_exp_ = 0; |
132 | Canonicalize(); |
133 | } |
134 | |
135 | ExactFloat::ExactFloat(const ExactFloat& b) |
136 | : sign_(b.sign_), |
137 | bn_exp_(b.bn_exp_) { |
138 | BN_init(&bn_); |
139 | BN_copy(&bn_, &b.bn_); |
140 | } |
141 | |
142 | ExactFloat ExactFloat::SignedZero(int sign) { |
143 | ExactFloat r; |
144 | r.set_zero(sign); |
145 | return r; |
146 | } |
147 | |
148 | ExactFloat ExactFloat::Infinity(int sign) { |
149 | ExactFloat r; |
150 | r.set_inf(sign); |
151 | return r; |
152 | } |
153 | |
154 | ExactFloat ExactFloat::NaN() { |
155 | ExactFloat r; |
156 | r.set_nan(); |
157 | return r; |
158 | } |
159 | |
160 | int ExactFloat::prec() const { |
161 | return BN_num_bits(&bn_); |
162 | } |
163 | |
164 | int ExactFloat::exp() const { |
165 | DCHECK(is_normal()); |
166 | return bn_exp_ + BN_num_bits(&bn_); |
167 | } |
168 | |
169 | void ExactFloat::set_zero(int sign) { |
170 | sign_ = sign; |
171 | bn_exp_ = kExpZero; |
172 | if (!BN_is_zero(&bn_)) BN_zero(&bn_); |
173 | } |
174 | |
175 | void ExactFloat::set_inf(int sign) { |
176 | sign_ = sign; |
177 | bn_exp_ = kExpInfinity; |
178 | if (!BN_is_zero(&bn_)) BN_zero(&bn_); |
179 | } |
180 | |
181 | void ExactFloat::set_nan() { |
182 | sign_ = 1; |
183 | bn_exp_ = kExpNaN; |
184 | if (!BN_is_zero(&bn_)) BN_zero(&bn_); |
185 | } |
186 | |
187 | double ExactFloat::ToDouble() const { |
188 | // If the mantissa has too many bits, we need to round it. |
189 | if (prec() <= kDoubleMantissaBits) { |
190 | return ToDoubleHelper(); |
191 | } else { |
192 | ExactFloat r = RoundToMaxPrec(kDoubleMantissaBits, kRoundTiesToEven); |
193 | return r.ToDoubleHelper(); |
194 | } |
195 | } |
196 | |
197 | double ExactFloat::ToDoubleHelper() const { |
198 | DCHECK_LE(BN_num_bits(&bn_), kDoubleMantissaBits); |
199 | if (!is_normal()) { |
200 | if (is_zero()) return copysign(0, sign_); |
201 | if (is_inf()) return copysign(INFINITY, sign_); |
202 | return copysign(NAN, sign_); |
203 | } |
204 | uint64 d_mantissa = BN_ext_get_uint64(&bn_); |
205 | // We rely on ldexp() to handle overflow and underflow. (It will return a |
206 | // signed zero or infinity if the result is too small or too large.) |
207 | return sign_ * ldexp(static_cast<double>(d_mantissa), bn_exp_); |
208 | } |
209 | |
210 | ExactFloat ExactFloat::RoundToMaxPrec(int max_prec, RoundingMode mode) const { |
211 | // The "kRoundTiesToEven" mode requires at least 2 bits of precision |
212 | // (otherwise both adjacent representable values may be odd). |
213 | DCHECK_GE(max_prec, 2); |
214 | DCHECK_LE(max_prec, kMaxPrec); |
215 | |
216 | // The following test also catches zero, infinity, and NaN. |
217 | int shift = prec() - max_prec; |
218 | if (shift <= 0) return *this; |
219 | |
220 | // Round by removing the appropriate number of bits from the mantissa. Note |
221 | // that if the value is rounded up to a power of 2, the high-order bit |
222 | // position may increase, but in that case Canonicalize() will remove at |
223 | // least one zero bit and so the output will still have prec() <= max_prec. |
224 | return RoundToPowerOf2(bn_exp_ + shift, mode); |
225 | } |
226 | |
227 | ExactFloat ExactFloat::RoundToPowerOf2(int bit_exp, RoundingMode mode) const { |
228 | DCHECK_GE(bit_exp, kMinExp - kMaxPrec); |
229 | DCHECK_LE(bit_exp, kMaxExp); |
230 | |
231 | // If the exponent is already large enough, or the value is zero, infinity, |
232 | // or NaN, then there is nothing to do. |
233 | int shift = bit_exp - bn_exp_; |
234 | if (shift <= 0) return *this; |
235 | DCHECK(is_normal()); |
236 | |
237 | // Convert rounding up/down to toward/away from zero, so that we don't need |
238 | // to consider the sign of the number from this point onward. |
239 | if (mode == kRoundTowardPositive) { |
240 | mode = (sign_ > 0) ? kRoundAwayFromZero : kRoundTowardZero; |
241 | } else if (mode == kRoundTowardNegative) { |
242 | mode = (sign_ > 0) ? kRoundTowardZero : kRoundAwayFromZero; |
243 | } |
244 | |
245 | // Rounding consists of right-shifting the mantissa by "shift", and then |
246 | // possibly incrementing the result (depending on the rounding mode, the |
247 | // bits that were discarded, and sometimes the lowest kept bit). The |
248 | // following code figures out whether we need to increment. |
249 | ExactFloat r; |
250 | bool increment = false; |
251 | if (mode == kRoundTowardZero) { |
252 | // Never increment. |
253 | } else if (mode == kRoundTiesAwayFromZero) { |
254 | // Increment if the highest discarded bit is 1. |
255 | if (BN_is_bit_set(&bn_, shift - 1)) |
256 | increment = true; |
257 | } else if (mode == kRoundAwayFromZero) { |
258 | // Increment unless all discarded bits are zero. |
259 | if (BN_ext_count_low_zero_bits(&bn_) < shift) |
260 | increment = true; |
261 | } else { |
262 | DCHECK_EQ(mode, kRoundTiesToEven); |
263 | // Let "w/xyz" denote a mantissa where "w" is the lowest kept bit and |
264 | // "xyz" are the discarded bits. Then using regexp notation: |
265 | // ./0.* -> Don't increment (fraction < 1/2) |
266 | // 0/10* -> Don't increment (fraction = 1/2, kept part even) |
267 | // 1/10* -> Increment (fraction = 1/2, kept part odd) |
268 | // ./1.*1.* -> Increment (fraction > 1/2) |
269 | if (BN_is_bit_set(&bn_, shift - 1) && |
270 | ((BN_is_bit_set(&bn_, shift) || |
271 | BN_ext_count_low_zero_bits(&bn_) < shift - 1))) { |
272 | increment = true; |
273 | } |
274 | } |
275 | r.bn_exp_ = bn_exp_ + shift; |
276 | CHECK(BN_rshift(&r.bn_, &bn_, shift)); |
277 | if (increment) { |
278 | CHECK(BN_add_word(&r.bn_, 1)); |
279 | } |
280 | r.sign_ = sign_; |
281 | r.Canonicalize(); |
282 | return r; |
283 | } |
284 | |
285 | int ExactFloat::NumSignificantDigitsForPrec(int prec) { |
286 | // The simplest bound is |
287 | // |
288 | // d <= 1 + ceil(prec * log10(2)) |
289 | // |
290 | // The following bound is tighter by 0.5 digits on average, but requires |
291 | // the exponent to be known as well: |
292 | // |
293 | // d <= ceil(exp * log10(2)) - floor((exp - prec) * log10(2)) |
294 | // |
295 | // Since either of these bounds can be too large by 0, 1, or 2 digits, we |
296 | // stick with the simpler first bound. |
297 | return static_cast<int>(1 + ceil(prec * (M_LN2 / M_LN10))); |
298 | } |
299 | |
300 | // Numbers are always formatted with at least this many significant digits. |
301 | // This prevents small integers from being formatted in exponential notation |
302 | // (e.g. 1024 formatted as 1e+03), and also avoids the confusion of having |
303 | // supposedly "high precision" numbers formatted with just 1 or 2 digits |
304 | // (e.g. 1/512 == 0.001953125 formatted as 0.002). |
305 | static const int kMinSignificantDigits = 10; |
306 | |
307 | string ExactFloat::ToString() const { |
308 | int max_digits = max(kMinSignificantDigits, |
309 | NumSignificantDigitsForPrec(prec())); |
310 | return ToStringWithMaxDigits(max_digits); |
311 | } |
312 | |
313 | string ExactFloat::ToStringWithMaxDigits(int max_digits) const { |
314 | DCHECK_GT(max_digits, 0); |
315 | if (!is_normal()) { |
316 | if (is_nan()) return "nan" ; |
317 | if (is_zero()) return (sign_ < 0) ? "-0" : "0" ; |
318 | return (sign_ < 0) ? "-inf" : "inf" ; |
319 | } |
320 | string digits; |
321 | int exp10 = GetDecimalDigits(max_digits, &digits); |
322 | string str; |
323 | if (sign_ < 0) str.push_back('-'); |
324 | |
325 | // We use the standard '%g' formatting rules. If the exponent is less than |
326 | // -4 or greater than or equal to the requested precision (i.e., max_digits) |
327 | // then we use exponential notation. |
328 | // |
329 | // But since "exp10" is the base-10 exponent corresponding to a mantissa in |
330 | // the range [0.1, 1), whereas the '%g' rules assume a mantissa in the range |
331 | // [1.0, 10), we need to adjust these parameters by 1. |
332 | if (exp10 <= -4 || exp10 > max_digits) { |
333 | // Use exponential format. |
334 | str.push_back(digits[0]); |
335 | if (digits.size() > 1) { |
336 | str.push_back('.'); |
337 | str.append(digits.begin() + 1, digits.end()); |
338 | } |
339 | char exp_buf[20]; |
340 | sprintf(exp_buf, "e%+02d" , exp10 - 1); |
341 | str += exp_buf; |
342 | } else { |
343 | // Use fixed format. We split this into two cases depending on whether |
344 | // the integer portion is non-zero or not. |
345 | if (exp10 > 0) { |
346 | if ((size_t)exp10 >= digits.size()) { |
347 | str += digits; |
348 | for (int i = exp10 - digits.size(); i > 0; --i) { |
349 | str.push_back('0'); |
350 | } |
351 | } else { |
352 | str.append(digits.begin(), digits.begin() + exp10); |
353 | str.push_back('.'); |
354 | str.append(digits.begin() + exp10, digits.end()); |
355 | } |
356 | } else { |
357 | str += "0." ; |
358 | for (int i = exp10; i < 0; ++i) { |
359 | str.push_back('0'); |
360 | } |
361 | str += digits; |
362 | } |
363 | } |
364 | return str; |
365 | } |
366 | |
367 | // Increment an unsigned integer represented as a string of ASCII digits. |
368 | static void IncrementDecimalDigits(string* digits) { |
369 | string::iterator pos = digits->end(); |
370 | while (--pos >= digits->begin()) { |
371 | if (*pos < '9') { ++*pos; return; } |
372 | *pos = '0'; |
373 | } |
374 | digits->insert(0, "1" ); |
375 | } |
376 | |
377 | int ExactFloat::GetDecimalDigits(int max_digits, string* digits) const { |
378 | DCHECK(is_normal()); |
379 | // Convert the value to the form (bn * (10 ** bn_exp10)) where "bn" is a |
380 | // positive integer (BIGNUM). |
381 | BIGNUM* bn = BN_new(); |
382 | int bn_exp10; |
383 | if (bn_exp_ >= 0) { |
384 | // The easy case: bn = bn_ * (2 ** bn_exp_)), bn_exp10 = 0. |
385 | CHECK(BN_lshift(bn, &bn_, bn_exp_)); |
386 | bn_exp10 = 0; |
387 | } else { |
388 | // Set bn = bn_ * (5 ** -bn_exp_) and bn_exp10 = bn_exp_. This is |
389 | // equivalent to the original value of (bn_ * (2 ** bn_exp_)). |
390 | BIGNUM* power = BN_new(); |
391 | CHECK(BN_set_word(power, -bn_exp_)); |
392 | CHECK(BN_set_word(bn, 5)); |
393 | BN_CTX* ctx = BN_CTX_new(); |
394 | CHECK(BN_exp(bn, bn, power, ctx)); |
395 | CHECK(BN_mul(bn, bn, &bn_, ctx)); |
396 | BN_CTX_free(ctx); |
397 | BN_free(power); |
398 | bn_exp10 = bn_exp_; |
399 | } |
400 | // Now convert "bn" to a decimal string. |
401 | char* all_digits = BN_bn2dec(bn); |
402 | DCHECK(all_digits != NULL); |
403 | BN_free(bn); |
404 | // Check whether we have too many digits and round if necessary. |
405 | int num_digits = strlen(all_digits); |
406 | if (num_digits <= max_digits) { |
407 | *digits = all_digits; |
408 | } else { |
409 | digits->assign(all_digits, max_digits); |
410 | // Standard "printf" formatting rounds ties to an even number. This means |
411 | // that we round up (away from zero) if highest discarded digit is '5' or |
412 | // more, unless all other discarded digits are zero in which case we round |
413 | // up only if the lowest kept digit is odd. |
414 | if (all_digits[max_digits] >= '5' && |
415 | ((all_digits[max_digits-1] & 1) == 1 || |
416 | strpbrk(all_digits + max_digits + 1, "123456789" ) != NULL)) { |
417 | // This can increase the number of digits by 1, but in that case at |
418 | // least one trailing zero will be stripped off below. |
419 | IncrementDecimalDigits(digits); |
420 | } |
421 | // Adjust the base-10 exponent to reflect the digits we have removed. |
422 | bn_exp10 += num_digits - max_digits; |
423 | } |
424 | OPENSSL_free(all_digits); |
425 | |
426 | // Now strip any trailing zeros. |
427 | DCHECK_NE((*digits)[0], '0'); |
428 | string::iterator pos = digits->end(); |
429 | while (pos[-1] == '0') --pos; |
430 | if (pos < digits->end()) { |
431 | bn_exp10 += digits->end() - pos; |
432 | digits->erase(pos, digits->end()); |
433 | } |
434 | DCHECK_LE(digits->size(), max_digits); |
435 | |
436 | // Finally, we adjust the base-10 exponent so that the mantissa is a |
437 | // fraction in the range [0.1, 1) rather than an integer. |
438 | return bn_exp10 + digits->size(); |
439 | } |
440 | |
441 | string ExactFloat::ToUniqueString() const { |
442 | char prec_buf[20]; |
443 | sprintf(prec_buf, "<%d>" , prec()); |
444 | return ToString() + prec_buf; |
445 | } |
446 | |
447 | ExactFloat& ExactFloat::operator=(const ExactFloat& b) { |
448 | if (this != &b) { |
449 | sign_ = b.sign_; |
450 | bn_exp_ = b.bn_exp_; |
451 | BN_copy(&bn_, &b.bn_); |
452 | } |
453 | return *this; |
454 | } |
455 | |
456 | ExactFloat ExactFloat::operator-() const { |
457 | return CopyWithSign(-sign_); |
458 | } |
459 | |
460 | ExactFloat operator+(const ExactFloat& a, const ExactFloat& b) { |
461 | return ExactFloat::SignedSum(a.sign_, &a, b.sign_, &b); |
462 | } |
463 | |
464 | ExactFloat operator-(const ExactFloat& a, const ExactFloat& b) { |
465 | return ExactFloat::SignedSum(a.sign_, &a, -b.sign_, &b); |
466 | } |
467 | |
468 | ExactFloat ExactFloat::SignedSum(int a_sign, const ExactFloat* a, |
469 | int b_sign, const ExactFloat* b) { |
470 | if (!a->is_normal() || !b->is_normal()) { |
471 | // Handle zero, infinity, and NaN according to IEEE 754-2008. |
472 | if (a->is_nan()) return *a; |
473 | if (b->is_nan()) return *b; |
474 | if (a->is_inf()) { |
475 | // Adding two infinities with opposite sign yields NaN. |
476 | if (b->is_inf() && a_sign != b_sign) return NaN(); |
477 | return Infinity(a_sign); |
478 | } |
479 | if (b->is_inf()) return Infinity(b_sign); |
480 | if (a->is_zero()) { |
481 | if (!b->is_zero()) return b->CopyWithSign(b_sign); |
482 | // Adding two zeros with the same sign preserves the sign. |
483 | if (a_sign == b_sign) return SignedZero(a_sign); |
484 | // Adding two zeros of opposite sign produces +0. |
485 | return SignedZero(+1); |
486 | } |
487 | DCHECK(b->is_zero()); |
488 | return a->CopyWithSign(a_sign); |
489 | } |
490 | // Swap the numbers if necessary so that "a" has the larger bn_exp_. |
491 | if (a->bn_exp_ < b->bn_exp_) { |
492 | swap(a_sign, b_sign); |
493 | swap(a, b); |
494 | } |
495 | // Shift "a" if necessary so that both values have the same bn_exp_. |
496 | ExactFloat r; |
497 | if (a->bn_exp_ > b->bn_exp_) { |
498 | CHECK(BN_lshift(&r.bn_, &a->bn_, a->bn_exp_ - b->bn_exp_)); |
499 | a = &r; // The only field of "a" used below is bn_. |
500 | } |
501 | r.bn_exp_ = b->bn_exp_; |
502 | if (a_sign == b_sign) { |
503 | CHECK(BN_add(&r.bn_, &a->bn_, &b->bn_)); |
504 | r.sign_ = a_sign; |
505 | } else { |
506 | // Note that the BIGNUM documentation is out of date -- all methods now |
507 | // allow the result to be the same as any input argument, so it is okay if |
508 | // (a == &r) due to the shift above. |
509 | CHECK(BN_sub(&r.bn_, &a->bn_, &b->bn_)); |
510 | if (BN_is_zero(&r.bn_)) { |
511 | r.sign_ = +1; |
512 | } else if (BN_is_negative(&r.bn_)) { |
513 | // The magnitude of "b" was larger. |
514 | r.sign_ = b_sign; |
515 | BN_set_negative(&r.bn_, false); |
516 | } else { |
517 | // They were equal, or the magnitude of "a" was larger. |
518 | r.sign_ = a_sign; |
519 | } |
520 | } |
521 | r.Canonicalize(); |
522 | return r; |
523 | } |
524 | |
525 | void ExactFloat::Canonicalize() { |
526 | if (!is_normal()) return; |
527 | |
528 | // Underflow/overflow occurs if exp() is not in [kMinExp, kMaxExp]. |
529 | // We also convert a zero mantissa to signed zero. |
530 | int my_exp = exp(); |
531 | if (my_exp < kMinExp || BN_is_zero(&bn_)) { |
532 | set_zero(sign_); |
533 | } else if (my_exp > kMaxExp) { |
534 | set_inf(sign_); |
535 | } else if (!BN_is_odd(&bn_)) { |
536 | // Remove any low-order zero bits from the mantissa. |
537 | DCHECK(!BN_is_zero(&bn_)); |
538 | int shift = BN_ext_count_low_zero_bits(&bn_); |
539 | if (shift > 0) { |
540 | CHECK(BN_rshift(&bn_, &bn_, shift)); |
541 | bn_exp_ += shift; |
542 | } |
543 | } |
544 | // If the mantissa has too many bits, we replace it by NaN to indicate |
545 | // that an inexact calculation has occurred. |
546 | if (prec() > kMaxPrec) { |
547 | set_nan(); |
548 | } |
549 | } |
550 | |
551 | ExactFloat operator*(const ExactFloat& a, const ExactFloat& b) { |
552 | int result_sign = a.sign_ * b.sign_; |
553 | if (!a.is_normal() || !b.is_normal()) { |
554 | // Handle zero, infinity, and NaN according to IEEE 754-2008. |
555 | if (a.is_nan()) return a; |
556 | if (b.is_nan()) return b; |
557 | if (a.is_inf()) { |
558 | // Infinity times zero yields NaN. |
559 | if (b.is_zero()) return ExactFloat::NaN(); |
560 | return ExactFloat::Infinity(result_sign); |
561 | } |
562 | if (b.is_inf()) { |
563 | if (a.is_zero()) return ExactFloat::NaN(); |
564 | return ExactFloat::Infinity(result_sign); |
565 | } |
566 | DCHECK(a.is_zero() || b.is_zero()); |
567 | return ExactFloat::SignedZero(result_sign); |
568 | } |
569 | ExactFloat r; |
570 | r.sign_ = result_sign; |
571 | r.bn_exp_ = a.bn_exp_ + b.bn_exp_; |
572 | BN_CTX* ctx = BN_CTX_new(); |
573 | CHECK(BN_mul(&r.bn_, &a.bn_, &b.bn_, ctx)); |
574 | BN_CTX_free(ctx); |
575 | r.Canonicalize(); |
576 | return r; |
577 | } |
578 | |
579 | bool operator==(const ExactFloat& a, const ExactFloat& b) { |
580 | // NaN is not equal to anything, not even itself. |
581 | if (a.is_nan() || b.is_nan()) return false; |
582 | |
583 | // Since Canonicalize() strips low-order zero bits, all other cases |
584 | // (including non-normal values) require bn_exp_ to be equal. |
585 | if (a.bn_exp_ != b.bn_exp_) return false; |
586 | |
587 | // Positive and negative zero are equal. |
588 | if (a.is_zero() && b.is_zero()) return true; |
589 | |
590 | // Otherwise, the signs and mantissas must match. Note that non-normal |
591 | // values such as infinity have a mantissa of zero. |
592 | return a.sign_ == b.sign_ && BN_ucmp(&a.bn_, &b.bn_) == 0; |
593 | } |
594 | |
595 | int ExactFloat::ScaleAndCompare(const ExactFloat& b) const { |
596 | DCHECK(is_normal() && b.is_normal() && bn_exp_ >= b.bn_exp_); |
597 | ExactFloat tmp = *this; |
598 | CHECK(BN_lshift(&tmp.bn_, &tmp.bn_, bn_exp_ - b.bn_exp_)); |
599 | return BN_ucmp(&tmp.bn_, &b.bn_); |
600 | } |
601 | |
602 | bool ExactFloat::UnsignedLess(const ExactFloat& b) const { |
603 | // Handle the zero/infinity cases (NaN has already been done). |
604 | if (is_inf() || b.is_zero()) return false; |
605 | if (is_zero() || b.is_inf()) return true; |
606 | // If the high-order bit positions differ, we are done. |
607 | int cmp = exp() - b.exp(); |
608 | if (cmp != 0) return cmp < 0; |
609 | // Otherwise shift one of the two values so that they both have the same |
610 | // bn_exp_ and then compare the mantissas. |
611 | return (bn_exp_ >= b.bn_exp_ ? |
612 | ScaleAndCompare(b) < 0 : b.ScaleAndCompare(*this) > 0); |
613 | } |
614 | |
615 | bool operator<(const ExactFloat& a, const ExactFloat& b) { |
616 | // NaN is unordered compared to everything, including itself. |
617 | if (a.is_nan() || b.is_nan()) return false; |
618 | // Positive and negative zero are equal. |
619 | if (a.is_zero() && b.is_zero()) return false; |
620 | // Otherwise, anything negative is less than anything positive. |
621 | if (a.sign_ != b.sign_) return a.sign_ < b.sign_; |
622 | // Now we just compare absolute values. |
623 | return (a.sign_ > 0) ? a.UnsignedLess(b) : b.UnsignedLess(a); |
624 | } |
625 | |
626 | ExactFloat fabs(const ExactFloat& a) { |
627 | return a.CopyWithSign(+1); |
628 | } |
629 | |
630 | ExactFloat fmax(const ExactFloat& a, const ExactFloat& b) { |
631 | // If one argument is NaN, return the other argument. |
632 | if (a.is_nan()) return b; |
633 | if (b.is_nan()) return a; |
634 | // Not required by IEEE 754, but we prefer +0 over -0. |
635 | if (a.sign_ != b.sign_) { |
636 | return (a.sign_ < b.sign_) ? b : a; |
637 | } |
638 | return (a < b) ? b : a; |
639 | } |
640 | |
641 | ExactFloat fmin(const ExactFloat& a, const ExactFloat& b) { |
642 | // If one argument is NaN, return the other argument. |
643 | if (a.is_nan()) return b; |
644 | if (b.is_nan()) return a; |
645 | // Not required by IEEE 754, but we prefer -0 over +0. |
646 | if (a.sign_ != b.sign_) { |
647 | return (a.sign_ < b.sign_) ? a : b; |
648 | } |
649 | return (a < b) ? a : b; |
650 | } |
651 | |
652 | ExactFloat fdim(const ExactFloat& a, const ExactFloat& b) { |
653 | // This formulation has the correct behavior for NaNs. |
654 | return (a <= b) ? 0 : (a - b); |
655 | } |
656 | |
657 | ExactFloat ceil(const ExactFloat& a) { |
658 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardPositive); |
659 | } |
660 | |
661 | ExactFloat floor(const ExactFloat& a) { |
662 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardNegative); |
663 | } |
664 | |
665 | ExactFloat trunc(const ExactFloat& a) { |
666 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTowardZero); |
667 | } |
668 | |
669 | ExactFloat round(const ExactFloat& a) { |
670 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesAwayFromZero); |
671 | } |
672 | |
673 | ExactFloat rint(const ExactFloat& a) { |
674 | return a.RoundToPowerOf2(0, ExactFloat::kRoundTiesToEven); |
675 | } |
676 | |
677 | template <class T> |
678 | T ExactFloat::ToInteger(RoundingMode mode) const { |
679 | COMPILE_ASSERT(sizeof(T) <= sizeof(uint64), max_64_bits_supported); |
680 | COMPILE_ASSERT(numeric_limits<T>::is_signed, only_signed_types_supported); |
681 | const int64 kMinValue = numeric_limits<T>::min(); |
682 | const int64 kMaxValue = numeric_limits<T>::max(); |
683 | |
684 | ExactFloat r = RoundToPowerOf2(0, mode); |
685 | if (r.is_nan()) return kMaxValue; |
686 | if (r.is_zero()) return 0; |
687 | if (!r.is_inf()) { |
688 | // If the unsigned value has more than 63 bits it is always clamped. |
689 | if (r.exp() < 64) { |
690 | int64 value = BN_ext_get_uint64(&r.bn_) << r.bn_exp_; |
691 | if (r.sign_ < 0) value = -value; |
692 | return max(kMinValue, min(kMaxValue, value)); |
693 | } |
694 | } |
695 | return (r.sign_ < 0) ? kMinValue : kMaxValue; |
696 | } |
697 | |
698 | long lrint(const ExactFloat& a) { |
699 | return a.ToInteger<long>(ExactFloat::kRoundTiesToEven); |
700 | } |
701 | |
702 | long long llrint(const ExactFloat& a) { |
703 | return a.ToInteger<long long>(ExactFloat::kRoundTiesToEven); |
704 | } |
705 | |
706 | long lround(const ExactFloat& a) { |
707 | return a.ToInteger<long>(ExactFloat::kRoundTiesAwayFromZero); |
708 | } |
709 | |
710 | long long llround(const ExactFloat& a) { |
711 | return a.ToInteger<long long>(ExactFloat::kRoundTiesAwayFromZero); |
712 | } |
713 | |
714 | ExactFloat copysign(const ExactFloat& a, const ExactFloat& b) { |
715 | return a.CopyWithSign(b.sign_); |
716 | } |
717 | |
718 | ExactFloat frexp(const ExactFloat& a, int* exp) { |
719 | if (!a.is_normal()) { |
720 | // If a == 0, exp should be zero. If a.is_inf() or a.is_nan(), exp is not |
721 | // defined but the glibc implementation returns zero. |
722 | *exp = 0; |
723 | return a; |
724 | } |
725 | *exp = a.exp(); |
726 | return ldexp(a, -a.exp()); |
727 | } |
728 | |
729 | ExactFloat ldexp(const ExactFloat& a, int exp) { |
730 | if (!a.is_normal()) return a; |
731 | |
732 | // To prevent integer overflow, we first clamp "exp" so that |
733 | // (kMinExp - 1) <= (a_exp + exp) <= (kMaxExp + 1). |
734 | int a_exp = a.exp(); |
735 | exp = min(ExactFloat::kMaxExp + 1 - a_exp, |
736 | max(ExactFloat::kMinExp - 1 + a_exp, exp)); |
737 | |
738 | // Now modify the exponent and check for overflow/underflow. |
739 | ExactFloat r = a; |
740 | r.bn_exp_ += exp; |
741 | r.Canonicalize(); |
742 | return r; |
743 | } |
744 | |
745 | int ilogb(const ExactFloat& a) { |
746 | if (a.is_zero()) return FP_ILOGB0; |
747 | if (a.is_inf()) return INT_MAX; |
748 | if (a.is_nan()) return FP_ILOGBNAN; |
749 | // a.exp() assumes the significand is in the range [0.5, 1). |
750 | return a.exp() - 1; |
751 | } |
752 | |
753 | ExactFloat logb(const ExactFloat& a) { |
754 | if (a.is_zero()) return ExactFloat::Infinity(-1); |
755 | if (a.is_inf()) return ExactFloat::Infinity(+1); // Even if a < 0. |
756 | if (a.is_nan()) return a; |
757 | // exp() assumes the significand is in the range [0.5,1). |
758 | return ExactFloat(a.exp() - 1); |
759 | } |
760 | |
761 | ExactFloat ExactFloat::Unimplemented() { |
762 | LOG(FATAL) << "Unimplemented ExactFloat method called" ; |
763 | return NaN(); |
764 | } |
765 | |