1// Copyright 2018 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
16#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
17
18#include <algorithm>
19#include <cstdint>
20#include <iostream>
21#include <string>
22
23#include "absl/strings/ascii.h"
24#include "absl/strings/internal/charconv_parse.h"
25#include "absl/strings/string_view.h"
26
27namespace absl {
28namespace strings_internal {
29
30// The largest power that 5 that can be raised to, and still fit in a uint32_t.
31constexpr int kMaxSmallPowerOfFive = 13;
32// The largest power that 10 that can be raised to, and still fit in a uint32_t.
33constexpr int kMaxSmallPowerOfTen = 9;
34
35extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1];
36extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1];
37
38// Large, fixed-width unsigned integer.
39//
40// Exact rounding for decimal-to-binary floating point conversion requires very
41// large integer math, but a design goal of absl::from_chars is to avoid
42// allocating memory. The integer precision needed for decimal-to-binary
43// conversions is large but bounded, so a huge fixed-width integer class
44// suffices.
45//
46// This is an intentionally limited big integer class. Only needed operations
47// are implemented. All storage lives in an array data member, and all
48// arithmetic is done in-place, to avoid requiring separate storage for operand
49// and result.
50//
51// This is an internal class. Some methods live in the .cc file, and are
52// instantiated only for the values of max_words we need.
53template <int max_words>
54class BigUnsigned {
55 public:
56 static_assert(max_words == 4 || max_words == 84,
57 "unsupported max_words value");
58
59 BigUnsigned() : size_(0), words_{} {}
60 explicit constexpr BigUnsigned(uint64_t v)
61 : size_((v >> 32) ? 2 : v ? 1 : 0),
62 words_{static_cast<uint32_t>(v & 0xffffffffu),
63 static_cast<uint32_t>(v >> 32)} {}
64
65 // Constructs a BigUnsigned from the given string_view containing a decimal
66 // value. If the input std::string is not a decimal integer, constructs a 0
67 // instead.
68 explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} {
69 // Check for valid input, returning a 0 otherwise. This is reasonable
70 // behavior only because this constructor is for unit tests.
71 if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() ||
72 sv.empty()) {
73 return;
74 }
75 int exponent_adjust =
76 ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1);
77 if (exponent_adjust > 0) {
78 MultiplyByTenToTheNth(exponent_adjust);
79 }
80 }
81
82 // Loads the mantissa value of a previously-parsed float.
83 //
84 // Returns the associated decimal exponent. The value of the parsed float is
85 // exactly *this * 10**exponent.
86 int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
87
88 // Returns the number of decimal digits of precision this type provides. All
89 // numbers with this many decimal digits or fewer are representable by this
90 // type.
91 //
92 // Analagous to std::numeric_limits<BigUnsigned>::digits10.
93 static constexpr int Digits10() {
94 // 9975007/1035508 is very slightly less than log10(2**32).
95 return static_cast<uint64_t>(max_words) * 9975007 / 1035508;
96 }
97
98 // Shifts left by the given number of bits.
99 void ShiftLeft(int count) {
100 if (count > 0) {
101 const int word_shift = count / 32;
102 if (word_shift >= max_words) {
103 SetToZero();
104 return;
105 }
106 size_ = (std::min)(size_ + word_shift, max_words);
107 count %= 32;
108 if (count == 0) {
109 std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
110 } else {
111 for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) {
112 words_[i] = (words_[i - word_shift] << count) |
113 (words_[i - word_shift - 1] >> (32 - count));
114 }
115 words_[word_shift] = words_[0] << count;
116 // Grow size_ if necessary.
117 if (size_ < max_words && words_[size_]) {
118 ++size_;
119 }
120 }
121 std::fill(words_, words_ + word_shift, 0u);
122 }
123 }
124
125
126 // Multiplies by v in-place.
127 void MultiplyBy(uint32_t v) {
128 if (size_ == 0 || v == 1) {
129 return;
130 }
131 if (v == 0) {
132 SetToZero();
133 return;
134 }
135 const uint64_t factor = v;
136 uint64_t window = 0;
137 for (int i = 0; i < size_; ++i) {
138 window += factor * words_[i];
139 words_[i] = window & 0xffffffff;
140 window >>= 32;
141 }
142 // If carry bits remain and there's space for them, grow size_.
143 if (window && size_ < max_words) {
144 words_[size_] = window & 0xffffffff;
145 ++size_;
146 }
147 }
148
149 void MultiplyBy(uint64_t v) {
150 uint32_t words[2];
151 words[0] = static_cast<uint32_t>(v);
152 words[1] = static_cast<uint32_t>(v >> 32);
153 if (words[1] == 0) {
154 MultiplyBy(words[0]);
155 } else {
156 MultiplyBy(2, words);
157 }
158 }
159
160 // Multiplies in place by 5 to the power of n. n must be non-negative.
161 void MultiplyByFiveToTheNth(int n) {
162 while (n >= kMaxSmallPowerOfFive) {
163 MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
164 n -= kMaxSmallPowerOfFive;
165 }
166 if (n > 0) {
167 MultiplyBy(kFiveToNth[n]);
168 }
169 }
170
171 // Multiplies in place by 10 to the power of n. n must be non-negative.
172 void MultiplyByTenToTheNth(int n) {
173 if (n > kMaxSmallPowerOfTen) {
174 // For large n, raise to a power of 5, then shift left by the same amount.
175 // (10**n == 5**n * 2**n.) This requires fewer multiplications overall.
176 MultiplyByFiveToTheNth(n);
177 ShiftLeft(n);
178 } else if (n > 0) {
179 // We can do this more quickly for very small N by using a single
180 // multiplication.
181 MultiplyBy(kTenToNth[n]);
182 }
183 }
184
185 // Returns the value of 5**n, for non-negative n. This implementation uses
186 // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
187 // MultiplyByFiveToTheNth().
188 static BigUnsigned FiveToTheNth(int n);
189
190 // Multiplies by another BigUnsigned, in-place.
191 template <int M>
192 void MultiplyBy(const BigUnsigned<M>& other) {
193 MultiplyBy(other.size(), other.words());
194 }
195
196 void SetToZero() {
197 std::fill(words_, words_ + size_, 0u);
198 size_ = 0;
199 }
200
201 // Returns the value of the nth word of this BigUnsigned. This is
202 // range-checked, and returns 0 on out-of-bounds accesses.
203 uint32_t GetWord(int index) const {
204 if (index < 0 || index >= size_) {
205 return 0;
206 }
207 return words_[index];
208 }
209
210 // Returns this integer as a decimal std::string. This is not used in the decimal-
211 // to-binary conversion; it is intended to aid in testing.
212 std::string ToString() const;
213
214 int size() const { return size_; }
215 const uint32_t* words() const { return words_; }
216
217 private:
218 // Reads the number between [begin, end), possibly containing a decimal point,
219 // into this BigUnsigned.
220 //
221 // Callers are required to ensure [begin, end) contains a valid number, with
222 // one or more decimal digits and at most one decimal point. This routine
223 // will behave unpredictably if these preconditions are not met.
224 //
225 // Only the first `significant_digits` digits are read. Digits beyond this
226 // limit are "sticky": If the final significant digit is 0 or 5, and if any
227 // dropped digit is nonzero, then that final significant digit is adjusted up
228 // to 1 or 6. This adjustment allows for precise rounding.
229 //
230 // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
231 // account for the decimal point and for dropped significant digits. After
232 // this function returns,
233 // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment.
234 int ReadDigits(const char* begin, const char* end, int significant_digits);
235
236 // Performs a step of big integer multiplication. This computes the full
237 // (64-bit-wide) values that should be added at the given index (step), and
238 // adds to that location in-place.
239 //
240 // Because our math all occurs in place, we must multiply starting from the
241 // highest word working downward. (This is a bit more expensive due to the
242 // extra carries involved.)
243 //
244 // This must be called in steps, for each word to be calculated, starting from
245 // the high end and working down to 0. The first value of `step` should be
246 // `std::min(original_size + other.size_ - 2, max_words - 1)`.
247 // The reason for this expression is that multiplying the i'th word from one
248 // multiplicand and the j'th word of another multiplicand creates a
249 // two-word-wide value to be stored at the (i+j)'th element. The highest
250 // word indices we will access are `original_size - 1` from this object, and
251 // `other.size_ - 1` from our operand. Therefore,
252 // `original_size + other.size_ - 2` is the first step we should calculate,
253 // but limited on an upper bound by max_words.
254
255 // Working from high-to-low ensures that we do not overwrite the portions of
256 // the initial value of *this which are still needed for later steps.
257 //
258 // Once called with step == 0, *this contains the result of the
259 // multiplication.
260 //
261 // `original_size` is the size_ of *this before the first call to
262 // MultiplyStep(). `other_words` and `other_size` are the contents of our
263 // operand. `step` is the step to perform, as described above.
264 void MultiplyStep(int original_size, const uint32_t* other_words,
265 int other_size, int step);
266
267 void MultiplyBy(int other_size, const uint32_t* other_words) {
268 const int original_size = size_;
269 const int first_step =
270 (std::min)(original_size + other_size - 2, max_words - 1);
271 for (int step = first_step; step >= 0; --step) {
272 MultiplyStep(original_size, other_words, other_size, step);
273 }
274 }
275
276 // Adds a 32-bit value to the index'th word, with carry.
277 void AddWithCarry(int index, uint32_t value) {
278 if (value) {
279 while (index < max_words && value > 0) {
280 words_[index] += value;
281 // carry if we overflowed in this word:
282 if (value > words_[index]) {
283 value = 1;
284 ++index;
285 } else {
286 value = 0;
287 }
288 }
289 size_ = (std::min)(max_words, (std::max)(index + 1, size_));
290 }
291 }
292
293 void AddWithCarry(int index, uint64_t value) {
294 if (value && index < max_words) {
295 uint32_t high = value >> 32;
296 uint32_t low = value & 0xffffffff;
297 words_[index] += low;
298 if (words_[index] < low) {
299 ++high;
300 if (high == 0) {
301 // Carry from the low word caused our high word to overflow.
302 // Short circuit here to do the right thing.
303 AddWithCarry(index + 2, static_cast<uint32_t>(1));
304 return;
305 }
306 }
307 if (high > 0) {
308 AddWithCarry(index + 1, high);
309 } else {
310 // Normally 32-bit AddWithCarry() sets size_, but since we don't call
311 // it when `high` is 0, do it ourselves here.
312 size_ = (std::min)(max_words, (std::max)(index + 1, size_));
313 }
314 }
315 }
316
317 // Divide this in place by a constant divisor. Returns the remainder of the
318 // division.
319 template <uint32_t divisor>
320 uint32_t DivMod() {
321 uint64_t accumulator = 0;
322 for (int i = size_ - 1; i >= 0; --i) {
323 accumulator <<= 32;
324 accumulator += words_[i];
325 // accumulator / divisor will never overflow an int32_t in this loop
326 words_[i] = static_cast<uint32_t>(accumulator / divisor);
327 accumulator = accumulator % divisor;
328 }
329 while (size_ > 0 && words_[size_ - 1] == 0) {
330 --size_;
331 }
332 return static_cast<uint32_t>(accumulator);
333 }
334
335 // The number of elements in words_ that may carry significant values.
336 // All elements beyond this point are 0.
337 //
338 // When size_ is 0, this BigUnsigned stores the value 0.
339 // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is
340 // nonzero. This can occur due to overflow truncation.
341 // In particular, x.size_ != y.size_ does *not* imply x != y.
342 int size_;
343 uint32_t words_[max_words];
344};
345
346// Compares two big integer instances.
347//
348// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
349template <int N, int M>
350int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
351 int limit = (std::max)(lhs.size(), rhs.size());
352 for (int i = limit - 1; i >= 0; --i) {
353 const uint32_t lhs_word = lhs.GetWord(i);
354 const uint32_t rhs_word = rhs.GetWord(i);
355 if (lhs_word < rhs_word) {
356 return -1;
357 } else if (lhs_word > rhs_word) {
358 return 1;
359 }
360 }
361 return 0;
362}
363
364template <int N, int M>
365bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
366 int limit = (std::max)(lhs.size(), rhs.size());
367 for (int i = 0; i < limit; ++i) {
368 if (lhs.GetWord(i) != rhs.GetWord(i)) {
369 return false;
370 }
371 }
372 return true;
373}
374
375template <int N, int M>
376bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
377 return !(lhs == rhs);
378}
379
380template <int N, int M>
381bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
382 return Compare(lhs, rhs) == -1;
383}
384
385template <int N, int M>
386bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
387 return rhs < lhs;
388}
389template <int N, int M>
390bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
391 return !(rhs < lhs);
392}
393template <int N, int M>
394bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
395 return !(lhs < rhs);
396}
397
398// Output operator for BigUnsigned, for testing purposes only.
399template <int N>
400std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
401 return os << num.ToString();
402}
403
404// Explicit instantiation declarations for the sizes of BigUnsigned that we
405// are using.
406//
407// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
408// still bigger than an int128, and 84 is a large value we will want to use
409// in the from_chars implementation.
410//
411// Comments justifying the use of 84 belong in the from_chars implementation,
412// and will be added in a follow-up CL.
413extern template class BigUnsigned<4>;
414extern template class BigUnsigned<84>;
415
416} // namespace strings_internal
417} // namespace absl
418
419#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
420