1 | // Copyright 2018 The Abseil Authors. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ |
16 | #define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ |
17 | |
18 | #include <algorithm> |
19 | #include <cstdint> |
20 | #include <iostream> |
21 | #include <string> |
22 | |
23 | #include "absl/strings/ascii.h" |
24 | #include "absl/strings/internal/charconv_parse.h" |
25 | #include "absl/strings/string_view.h" |
26 | |
27 | namespace absl { |
28 | namespace strings_internal { |
29 | |
30 | // The largest power that 5 that can be raised to, and still fit in a uint32_t. |
31 | constexpr int kMaxSmallPowerOfFive = 13; |
32 | // The largest power that 10 that can be raised to, and still fit in a uint32_t. |
33 | constexpr int kMaxSmallPowerOfTen = 9; |
34 | |
35 | extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + 1]; |
36 | extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + 1]; |
37 | |
38 | // Large, fixed-width unsigned integer. |
39 | // |
40 | // Exact rounding for decimal-to-binary floating point conversion requires very |
41 | // large integer math, but a design goal of absl::from_chars is to avoid |
42 | // allocating memory. The integer precision needed for decimal-to-binary |
43 | // conversions is large but bounded, so a huge fixed-width integer class |
44 | // suffices. |
45 | // |
46 | // This is an intentionally limited big integer class. Only needed operations |
47 | // are implemented. All storage lives in an array data member, and all |
48 | // arithmetic is done in-place, to avoid requiring separate storage for operand |
49 | // and result. |
50 | // |
51 | // This is an internal class. Some methods live in the .cc file, and are |
52 | // instantiated only for the values of max_words we need. |
53 | template <int max_words> |
54 | class BigUnsigned { |
55 | public: |
56 | static_assert(max_words == 4 || max_words == 84, |
57 | "unsupported max_words value" ); |
58 | |
59 | BigUnsigned() : size_(0), words_{} {} |
60 | explicit constexpr BigUnsigned(uint64_t v) |
61 | : size_((v >> 32) ? 2 : v ? 1 : 0), |
62 | words_{static_cast<uint32_t>(v & 0xffffffffu), |
63 | static_cast<uint32_t>(v >> 32)} {} |
64 | |
65 | // Constructs a BigUnsigned from the given string_view containing a decimal |
66 | // value. If the input std::string is not a decimal integer, constructs a 0 |
67 | // instead. |
68 | explicit BigUnsigned(absl::string_view sv) : size_(0), words_{} { |
69 | // Check for valid input, returning a 0 otherwise. This is reasonable |
70 | // behavior only because this constructor is for unit tests. |
71 | if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() || |
72 | sv.empty()) { |
73 | return; |
74 | } |
75 | int exponent_adjust = |
76 | ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + 1); |
77 | if (exponent_adjust > 0) { |
78 | MultiplyByTenToTheNth(exponent_adjust); |
79 | } |
80 | } |
81 | |
82 | // Loads the mantissa value of a previously-parsed float. |
83 | // |
84 | // Returns the associated decimal exponent. The value of the parsed float is |
85 | // exactly *this * 10**exponent. |
86 | int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits); |
87 | |
88 | // Returns the number of decimal digits of precision this type provides. All |
89 | // numbers with this many decimal digits or fewer are representable by this |
90 | // type. |
91 | // |
92 | // Analagous to std::numeric_limits<BigUnsigned>::digits10. |
93 | static constexpr int Digits10() { |
94 | // 9975007/1035508 is very slightly less than log10(2**32). |
95 | return static_cast<uint64_t>(max_words) * 9975007 / 1035508; |
96 | } |
97 | |
98 | // Shifts left by the given number of bits. |
99 | void ShiftLeft(int count) { |
100 | if (count > 0) { |
101 | const int word_shift = count / 32; |
102 | if (word_shift >= max_words) { |
103 | SetToZero(); |
104 | return; |
105 | } |
106 | size_ = (std::min)(size_ + word_shift, max_words); |
107 | count %= 32; |
108 | if (count == 0) { |
109 | std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_); |
110 | } else { |
111 | for (int i = (std::min)(size_, max_words - 1); i > word_shift; --i) { |
112 | words_[i] = (words_[i - word_shift] << count) | |
113 | (words_[i - word_shift - 1] >> (32 - count)); |
114 | } |
115 | words_[word_shift] = words_[0] << count; |
116 | // Grow size_ if necessary. |
117 | if (size_ < max_words && words_[size_]) { |
118 | ++size_; |
119 | } |
120 | } |
121 | std::fill(words_, words_ + word_shift, 0u); |
122 | } |
123 | } |
124 | |
125 | |
126 | // Multiplies by v in-place. |
127 | void MultiplyBy(uint32_t v) { |
128 | if (size_ == 0 || v == 1) { |
129 | return; |
130 | } |
131 | if (v == 0) { |
132 | SetToZero(); |
133 | return; |
134 | } |
135 | const uint64_t factor = v; |
136 | uint64_t window = 0; |
137 | for (int i = 0; i < size_; ++i) { |
138 | window += factor * words_[i]; |
139 | words_[i] = window & 0xffffffff; |
140 | window >>= 32; |
141 | } |
142 | // If carry bits remain and there's space for them, grow size_. |
143 | if (window && size_ < max_words) { |
144 | words_[size_] = window & 0xffffffff; |
145 | ++size_; |
146 | } |
147 | } |
148 | |
149 | void MultiplyBy(uint64_t v) { |
150 | uint32_t words[2]; |
151 | words[0] = static_cast<uint32_t>(v); |
152 | words[1] = static_cast<uint32_t>(v >> 32); |
153 | if (words[1] == 0) { |
154 | MultiplyBy(words[0]); |
155 | } else { |
156 | MultiplyBy(2, words); |
157 | } |
158 | } |
159 | |
160 | // Multiplies in place by 5 to the power of n. n must be non-negative. |
161 | void MultiplyByFiveToTheNth(int n) { |
162 | while (n >= kMaxSmallPowerOfFive) { |
163 | MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]); |
164 | n -= kMaxSmallPowerOfFive; |
165 | } |
166 | if (n > 0) { |
167 | MultiplyBy(kFiveToNth[n]); |
168 | } |
169 | } |
170 | |
171 | // Multiplies in place by 10 to the power of n. n must be non-negative. |
172 | void MultiplyByTenToTheNth(int n) { |
173 | if (n > kMaxSmallPowerOfTen) { |
174 | // For large n, raise to a power of 5, then shift left by the same amount. |
175 | // (10**n == 5**n * 2**n.) This requires fewer multiplications overall. |
176 | MultiplyByFiveToTheNth(n); |
177 | ShiftLeft(n); |
178 | } else if (n > 0) { |
179 | // We can do this more quickly for very small N by using a single |
180 | // multiplication. |
181 | MultiplyBy(kTenToNth[n]); |
182 | } |
183 | } |
184 | |
185 | // Returns the value of 5**n, for non-negative n. This implementation uses |
186 | // a lookup table, and is faster then seeding a BigUnsigned with 1 and calling |
187 | // MultiplyByFiveToTheNth(). |
188 | static BigUnsigned FiveToTheNth(int n); |
189 | |
190 | // Multiplies by another BigUnsigned, in-place. |
191 | template <int M> |
192 | void MultiplyBy(const BigUnsigned<M>& other) { |
193 | MultiplyBy(other.size(), other.words()); |
194 | } |
195 | |
196 | void SetToZero() { |
197 | std::fill(words_, words_ + size_, 0u); |
198 | size_ = 0; |
199 | } |
200 | |
201 | // Returns the value of the nth word of this BigUnsigned. This is |
202 | // range-checked, and returns 0 on out-of-bounds accesses. |
203 | uint32_t GetWord(int index) const { |
204 | if (index < 0 || index >= size_) { |
205 | return 0; |
206 | } |
207 | return words_[index]; |
208 | } |
209 | |
210 | // Returns this integer as a decimal std::string. This is not used in the decimal- |
211 | // to-binary conversion; it is intended to aid in testing. |
212 | std::string ToString() const; |
213 | |
214 | int size() const { return size_; } |
215 | const uint32_t* words() const { return words_; } |
216 | |
217 | private: |
218 | // Reads the number between [begin, end), possibly containing a decimal point, |
219 | // into this BigUnsigned. |
220 | // |
221 | // Callers are required to ensure [begin, end) contains a valid number, with |
222 | // one or more decimal digits and at most one decimal point. This routine |
223 | // will behave unpredictably if these preconditions are not met. |
224 | // |
225 | // Only the first `significant_digits` digits are read. Digits beyond this |
226 | // limit are "sticky": If the final significant digit is 0 or 5, and if any |
227 | // dropped digit is nonzero, then that final significant digit is adjusted up |
228 | // to 1 or 6. This adjustment allows for precise rounding. |
229 | // |
230 | // Returns `exponent_adjustment`, a power-of-ten exponent adjustment to |
231 | // account for the decimal point and for dropped significant digits. After |
232 | // this function returns, |
233 | // actual_value_of_parsed_string ~= *this * 10**exponent_adjustment. |
234 | int ReadDigits(const char* begin, const char* end, int significant_digits); |
235 | |
236 | // Performs a step of big integer multiplication. This computes the full |
237 | // (64-bit-wide) values that should be added at the given index (step), and |
238 | // adds to that location in-place. |
239 | // |
240 | // Because our math all occurs in place, we must multiply starting from the |
241 | // highest word working downward. (This is a bit more expensive due to the |
242 | // extra carries involved.) |
243 | // |
244 | // This must be called in steps, for each word to be calculated, starting from |
245 | // the high end and working down to 0. The first value of `step` should be |
246 | // `std::min(original_size + other.size_ - 2, max_words - 1)`. |
247 | // The reason for this expression is that multiplying the i'th word from one |
248 | // multiplicand and the j'th word of another multiplicand creates a |
249 | // two-word-wide value to be stored at the (i+j)'th element. The highest |
250 | // word indices we will access are `original_size - 1` from this object, and |
251 | // `other.size_ - 1` from our operand. Therefore, |
252 | // `original_size + other.size_ - 2` is the first step we should calculate, |
253 | // but limited on an upper bound by max_words. |
254 | |
255 | // Working from high-to-low ensures that we do not overwrite the portions of |
256 | // the initial value of *this which are still needed for later steps. |
257 | // |
258 | // Once called with step == 0, *this contains the result of the |
259 | // multiplication. |
260 | // |
261 | // `original_size` is the size_ of *this before the first call to |
262 | // MultiplyStep(). `other_words` and `other_size` are the contents of our |
263 | // operand. `step` is the step to perform, as described above. |
264 | void MultiplyStep(int original_size, const uint32_t* other_words, |
265 | int other_size, int step); |
266 | |
267 | void MultiplyBy(int other_size, const uint32_t* other_words) { |
268 | const int original_size = size_; |
269 | const int first_step = |
270 | (std::min)(original_size + other_size - 2, max_words - 1); |
271 | for (int step = first_step; step >= 0; --step) { |
272 | MultiplyStep(original_size, other_words, other_size, step); |
273 | } |
274 | } |
275 | |
276 | // Adds a 32-bit value to the index'th word, with carry. |
277 | void AddWithCarry(int index, uint32_t value) { |
278 | if (value) { |
279 | while (index < max_words && value > 0) { |
280 | words_[index] += value; |
281 | // carry if we overflowed in this word: |
282 | if (value > words_[index]) { |
283 | value = 1; |
284 | ++index; |
285 | } else { |
286 | value = 0; |
287 | } |
288 | } |
289 | size_ = (std::min)(max_words, (std::max)(index + 1, size_)); |
290 | } |
291 | } |
292 | |
293 | void AddWithCarry(int index, uint64_t value) { |
294 | if (value && index < max_words) { |
295 | uint32_t high = value >> 32; |
296 | uint32_t low = value & 0xffffffff; |
297 | words_[index] += low; |
298 | if (words_[index] < low) { |
299 | ++high; |
300 | if (high == 0) { |
301 | // Carry from the low word caused our high word to overflow. |
302 | // Short circuit here to do the right thing. |
303 | AddWithCarry(index + 2, static_cast<uint32_t>(1)); |
304 | return; |
305 | } |
306 | } |
307 | if (high > 0) { |
308 | AddWithCarry(index + 1, high); |
309 | } else { |
310 | // Normally 32-bit AddWithCarry() sets size_, but since we don't call |
311 | // it when `high` is 0, do it ourselves here. |
312 | size_ = (std::min)(max_words, (std::max)(index + 1, size_)); |
313 | } |
314 | } |
315 | } |
316 | |
317 | // Divide this in place by a constant divisor. Returns the remainder of the |
318 | // division. |
319 | template <uint32_t divisor> |
320 | uint32_t DivMod() { |
321 | uint64_t accumulator = 0; |
322 | for (int i = size_ - 1; i >= 0; --i) { |
323 | accumulator <<= 32; |
324 | accumulator += words_[i]; |
325 | // accumulator / divisor will never overflow an int32_t in this loop |
326 | words_[i] = static_cast<uint32_t>(accumulator / divisor); |
327 | accumulator = accumulator % divisor; |
328 | } |
329 | while (size_ > 0 && words_[size_ - 1] == 0) { |
330 | --size_; |
331 | } |
332 | return static_cast<uint32_t>(accumulator); |
333 | } |
334 | |
335 | // The number of elements in words_ that may carry significant values. |
336 | // All elements beyond this point are 0. |
337 | // |
338 | // When size_ is 0, this BigUnsigned stores the value 0. |
339 | // When size_ is nonzero, is *not* guaranteed that words_[size_ - 1] is |
340 | // nonzero. This can occur due to overflow truncation. |
341 | // In particular, x.size_ != y.size_ does *not* imply x != y. |
342 | int size_; |
343 | uint32_t words_[max_words]; |
344 | }; |
345 | |
346 | // Compares two big integer instances. |
347 | // |
348 | // Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs. |
349 | template <int N, int M> |
350 | int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
351 | int limit = (std::max)(lhs.size(), rhs.size()); |
352 | for (int i = limit - 1; i >= 0; --i) { |
353 | const uint32_t lhs_word = lhs.GetWord(i); |
354 | const uint32_t rhs_word = rhs.GetWord(i); |
355 | if (lhs_word < rhs_word) { |
356 | return -1; |
357 | } else if (lhs_word > rhs_word) { |
358 | return 1; |
359 | } |
360 | } |
361 | return 0; |
362 | } |
363 | |
364 | template <int N, int M> |
365 | bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
366 | int limit = (std::max)(lhs.size(), rhs.size()); |
367 | for (int i = 0; i < limit; ++i) { |
368 | if (lhs.GetWord(i) != rhs.GetWord(i)) { |
369 | return false; |
370 | } |
371 | } |
372 | return true; |
373 | } |
374 | |
375 | template <int N, int M> |
376 | bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
377 | return !(lhs == rhs); |
378 | } |
379 | |
380 | template <int N, int M> |
381 | bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
382 | return Compare(lhs, rhs) == -1; |
383 | } |
384 | |
385 | template <int N, int M> |
386 | bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
387 | return rhs < lhs; |
388 | } |
389 | template <int N, int M> |
390 | bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
391 | return !(rhs < lhs); |
392 | } |
393 | template <int N, int M> |
394 | bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) { |
395 | return !(lhs < rhs); |
396 | } |
397 | |
398 | // Output operator for BigUnsigned, for testing purposes only. |
399 | template <int N> |
400 | std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) { |
401 | return os << num.ToString(); |
402 | } |
403 | |
404 | // Explicit instantiation declarations for the sizes of BigUnsigned that we |
405 | // are using. |
406 | // |
407 | // For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is |
408 | // still bigger than an int128, and 84 is a large value we will want to use |
409 | // in the from_chars implementation. |
410 | // |
411 | // Comments justifying the use of 84 belong in the from_chars implementation, |
412 | // and will be added in a follow-up CL. |
413 | extern template class BigUnsigned<4>; |
414 | extern template class BigUnsigned<84>; |
415 | |
416 | } // namespace strings_internal |
417 | } // namespace absl |
418 | |
419 | #endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_ |
420 | |