charconv_bigint.h source code [Abseil/strings/internal/charconv_bigint.h]

1	// Copyright 2018 The Abseil Authors.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// https://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#ifndef ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
16	#define ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
17
18	#include <algorithm>
19	#include <cstdint>
20	#include <iostream>
21	#include <string>
22
23	#include "absl/strings/ascii.h"
24	#include "absl/strings/internal/charconv_parse.h"
25	#include "absl/strings/string_view.h"
26
27	namespace absl {
28	namespace strings_internal {
29
30	// The largest power that 5 that can be raised to, and still fit in a uint32_t.
31	constexpr int kMaxSmallPowerOfFive = `13`;
32	// The largest power that 10 that can be raised to, and still fit in a uint32_t.
33	constexpr int kMaxSmallPowerOfTen = `9`;
34
35	extern const uint32_t kFiveToNth[kMaxSmallPowerOfFive + `1`];
36	extern const uint32_t kTenToNth[kMaxSmallPowerOfTen + `1`];
37
38	// Large, fixed-width unsigned integer.
39	//
40	// Exact rounding for decimal-to-binary floating point conversion requires very
41	// large integer math, but a design goal of absl::from_chars is to avoid
42	// allocating memory. The integer precision needed for decimal-to-binary
43	// conversions is large but bounded, so a huge fixed-width integer class
44	// suffices.
45	//
46	// This is an intentionally limited big integer class. Only needed operations
47	// are implemented. All storage lives in an array data member, and all
48	// arithmetic is done in-place, to avoid requiring separate storage for operand
49	// and result.
50	//
51	// This is an internal class. Some methods live in the .cc file, and are
52	// instantiated only for the values of max_words we need.
53	template <int max_words>
54	class BigUnsigned {
55	public:
56	static_assert(max_words == `4` \|\| max_words == `84`,
57	"unsupported max_words value");
58
59	BigUnsigned() : size_(`0`), words_{} {}
60	explicit constexpr BigUnsigned(uint64_t v)
61	: size_((v >> `32`) ? `2` : v ? `1` : `0`),
62	words_{static_cast<uint32_t>(v & `0xffffffffu`),
63	static_cast<uint32_t>(v >> `32`)} {}
64
65	// Constructs a BigUnsigned from the given string_view containing a decimal
66	// value. If the input std::string is not a decimal integer, constructs a 0
67	// instead.
68	explicit BigUnsigned(absl::string_view sv) : size_(`0`), words_{} {
69	// Check for valid input, returning a 0 otherwise. This is reasonable
70	// behavior only because this constructor is for unit tests.
71	if (std::find_if_not(sv.begin(), sv.end(), ascii_isdigit) != sv.end() \|\|
72	sv.empty()) {
73	return;
74	}
75	int exponent_adjust =
76	ReadDigits(sv.data(), sv.data() + sv.size(), Digits10() + `1`);
77	if (exponent_adjust > `0`) {
78	MultiplyByTenToTheNth(exponent_adjust);
79	}
80	}
81
82	// Loads the mantissa value of a previously-parsed float.
83	//
84	// Returns the associated decimal exponent. The value of the parsed float is
85	// exactly this * 10*exponent.
86	int ReadFloatMantissa(const ParsedFloat& fp, int significant_digits);
87
88	// Returns the number of decimal digits of precision this type provides. All
89	// numbers with this many decimal digits or fewer are representable by this
90	// type.
91	//
92	// Analagous to std::numeric_limits<BigUnsigned>::digits10.
93	static constexpr int Digits10() {
94	// 9975007/1035508 is very slightly less than log10(232).
95	return static_cast<uint64_t>(max_words) * `9975007` / `1035508`;
96	}
97
98	// Shifts left by the given number of bits.
99	void ShiftLeft(int count) {
100	if (count > `0`) {
101	const int word_shift = count / `32`;
102	if (word_shift >= max_words) {
103	SetToZero();
104	return;
105	}
106	size_ = (std::min)(size_ + word_shift, max_words);
107	count %= `32`;
108	if (count == `0`) {
109	std::copy_backward(words_, words_ + size_ - word_shift, words_ + size_);
110	} else {
111	for (int i = (std::min)(size_, max_words - `1`); i > word_shift; --i) {
112	words_[i] = (words_[i - word_shift] << count) \|
113	(words_[i - word_shift - `1`] >> (`32` - count));
114	}
115	words_[word_shift] = words_[`0`] << count;
116	// Grow size_ if necessary.
117	if (size_ < max_words && words_[size_]) {
118	++size_;
119	}
120	}
121	std::fill(words_, words_ + word_shift, `0u`);
122	}
123	}
124
125
126	// Multiplies by v in-place.
127	void MultiplyBy(uint32_t v) {
128	if (size_ == `0` \|\| v == `1`) {
129	return;
130	}
131	if (v == `0`) {
132	SetToZero();
133	return;
134	}
135	const uint64_t factor = v;
136	uint64_t window = `0`;
137	for (int i = `0`; i < size_; ++i) {
138	window += factor * words_[i];
139	words_[i] = window & `0xffffffff`;
140	window >>= `32`;
141	}
142	// If carry bits remain and there's space for them, grow size_.
143	if (window && size_ < max_words) {
144	words_[size_] = window & `0xffffffff`;
145	++size_;
146	}
147	}
148
149	void MultiplyBy(uint64_t v) {
150	uint32_t words[`2`];
151	words[`0`] = static_cast<uint32_t>(v);
152	words[`1`] = static_cast<uint32_t>(v >> `32`);
153	if (words[`1`] == `0`) {
154	MultiplyBy(words[`0`]);
155	} else {
156	MultiplyBy(`2`, words);
157	}
158	}
159
160	// Multiplies in place by 5 to the power of n. n must be non-negative.
161	void MultiplyByFiveToTheNth(int n) {
162	while (n >= kMaxSmallPowerOfFive) {
163	MultiplyBy(kFiveToNth[kMaxSmallPowerOfFive]);
164	n -= kMaxSmallPowerOfFive;
165	}
166	if (n > `0`) {
167	MultiplyBy(kFiveToNth[n]);
168	}
169	}
170
171	// Multiplies in place by 10 to the power of n. n must be non-negative.
172	void MultiplyByTenToTheNth(int n) {
173	if (n > kMaxSmallPowerOfTen) {
174	// For large n, raise to a power of 5, then shift left by the same amount.
175	// (10n == 5n 2*n.) This requires fewer multiplications overall.
176	MultiplyByFiveToTheNth(n);
177	ShiftLeft(n);
178	} else if (n > `0`) {
179	// We can do this more quickly for very small N by using a single
180	// multiplication.
181	MultiplyBy(kTenToNth[n]);
182	}
183	}
184
185	// Returns the value of 5n, for non-negative n. This implementation uses
186	// a lookup table, and is faster then seeding a BigUnsigned with 1 and calling
187	// MultiplyByFiveToTheNth().
188	static BigUnsigned FiveToTheNth(int n);
189
190	// Multiplies by another BigUnsigned, in-place.
191	template <int M>
192	void MultiplyBy(const BigUnsigned<M>& other) {
193	MultiplyBy(other.size(), other.words());
194	}
195
196	void SetToZero() {
197	std::fill(words_, words_ + size_, `0u`);
198	size_ = `0`;
199	}
200
201	// Returns the value of the nth word of this BigUnsigned. This is
202	// range-checked, and returns 0 on out-of-bounds accesses.
203	uint32_t GetWord(int index) const {
204	if (index < `0` \|\| index >= size_) {
205	return `0`;
206	}
207	return words_[index];
208	}
209
210	// Returns this integer as a decimal std::string. This is not used in the decimal-
211	// to-binary conversion; it is intended to aid in testing.
212	std::string ToString() const;
213
214	int size() const { return size_; }
215	const uint32_t* words() const { return words_; }
216
217	private:
218	// Reads the number between [begin, end), possibly containing a decimal point,
219	// into this BigUnsigned.
220	//
221	// Callers are required to ensure [begin, end) contains a valid number, with
222	// one or more decimal digits and at most one decimal point. This routine
223	// will behave unpredictably if these preconditions are not met.
224	//
225	// Only the first `significant_digits` digits are read. Digits beyond this
226	// limit are "sticky": If the final significant digit is 0 or 5, and if any
227	// dropped digit is nonzero, then that final significant digit is adjusted up
228	// to 1 or 6. This adjustment allows for precise rounding.
229	//
230	// Returns `exponent_adjustment`, a power-of-ten exponent adjustment to
231	// account for the decimal point and for dropped significant digits. After
232	// this function returns,
233	// actual_value_of_parsed_string ~= this * 10*exponent_adjustment.
234	int ReadDigits(const char* begin, const char* end, int significant_digits);
235
236	// Performs a step of big integer multiplication. This computes the full
237	// (64-bit-wide) values that should be added at the given index (step), and
238	// adds to that location in-place.
239	//
240	// Because our math all occurs in place, we must multiply starting from the
241	// highest word working downward. (This is a bit more expensive due to the
242	// extra carries involved.)
243	//
244	// This must be called in steps, for each word to be calculated, starting from
245	// the high end and working down to 0. The first value of `step` should be
246	// `std::min(original_size + other.size_ - 2, max_words - 1)`.
247	// The reason for this expression is that multiplying the i'th word from one
248	// multiplicand and the j'th word of another multiplicand creates a
249	// two-word-wide value to be stored at the (i+j)'th element. The highest
250	// word indices we will access are `original_size - 1` from this object, and
251	// `other.size_ - 1` from our operand. Therefore,
252	// `original_size + other.size_ - 2` is the first step we should calculate,
253	// but limited on an upper bound by max_words.
254
255	// Working from high-to-low ensures that we do not overwrite the portions of
256	// the initial value of this which are still needed for later steps.*
257	//
258	// Once called with step == 0, this contains the result of the*
259	// multiplication.
260	//
261	// `original_size` is the size_ of this before the first call to*
262	// MultiplyStep(). `other_words` and `other_size` are the contents of our
263	// operand. `step` is the step to perform, as described above.
264	void MultiplyStep(int original_size, const uint32_t* other_words,
265	int other_size, int step);
266
267	void MultiplyBy(int other_size, const uint32_t* other_words) {
268	const int original_size = size_;
269	const int first_step =
270	(std::min)(original_size + other_size - `2`, max_words - `1`);
271	for (int step = first_step; step >= `0`; --step) {
272	MultiplyStep(original_size, other_words, other_size, step);
273	}
274	}
275
276	// Adds a 32-bit value to the index'th word, with carry.
277	void AddWithCarry(int index, uint32_t value) {
278	if (value) {
279	while (index < max_words && value > `0`) {
280	words_[index] += value;
281	// carry if we overflowed in this word:
282	if (value > words_[index]) {
283	value = `1`;
284	++index;
285	} else {
286	value = `0`;
287	}
288	}
289	size_ = (std::min)(max_words, (std::max)(index + `1`, size_));
290	}
291	}
292
293	void AddWithCarry(int index, uint64_t value) {
294	if (value && index < max_words) {
295	uint32_t high = value >> `32`;
296	uint32_t low = value & `0xffffffff`;
297	words_[index] += low;
298	if (words_[index] < low) {
299	++high;
300	if (high == `0`) {
301	// Carry from the low word caused our high word to overflow.
302	// Short circuit here to do the right thing.
303	AddWithCarry(index + `2`, static_cast<uint32_t>(`1`));
304	return;
305	}
306	}
307	if (high > `0`) {
308	AddWithCarry(index + `1`, high);
309	} else {
310	// Normally 32-bit AddWithCarry() sets size_, but since we don't call
311	// it when `high` is 0, do it ourselves here.
312	size_ = (std::min)(max_words, (std::max)(index + `1`, size_));
313	}
314	}
315	}
316
317	// Divide this in place by a constant divisor. Returns the remainder of the
318	// division.
319	template <uint32_t divisor>
320	uint32_t DivMod() {
321	uint64_t accumulator = `0`;
322	for (int i = size_ - `1`; i >= `0`; --i) {
323	accumulator <<= `32`;
324	accumulator += words_[i];
325	// accumulator / divisor will never overflow an int32_t in this loop
326	words_[i] = static_cast<uint32_t>(accumulator / divisor);
327	accumulator = accumulator % divisor;
328	}
329	while (size_ > `0` && words_[size_ - `1`] == `0`) {
330	--size_;
331	}
332	return static_cast<uint32_t>(accumulator);
333	}
334
335	// The number of elements in words_ that may carry significant values.
336	// All elements beyond this point are 0.
337	//
338	// When size_ is 0, this BigUnsigned stores the value 0.
339	// When size_ is nonzero, is not* guaranteed that words_[size_ - 1] is*
340	// nonzero. This can occur due to overflow truncation.
341	// In particular, x.size_ != y.size_ does not* imply x != y.*
342	int size_;
343	uint32_t words_[max_words];
344	};
345
346	// Compares two big integer instances.
347	//
348	// Returns -1 if lhs < rhs, 0 if lhs == rhs, and 1 if lhs > rhs.
349	template <int N, int M>
350	int Compare(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
351	int limit = (std::max)(lhs.size(), rhs.size());
352	for (int i = limit - `1`; i >= `0`; --i) {
353	const uint32_t lhs_word = lhs.GetWord(i);
354	const uint32_t rhs_word = rhs.GetWord(i);
355	if (lhs_word < rhs_word) {
356	return -`1`;
357	} else if (lhs_word > rhs_word) {
358	return `1`;
359	}
360	}
361	return `0`;
362	}
363
364	template <int N, int M>
365	bool operator==(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
366	int limit = (std::max)(lhs.size(), rhs.size());
367	for (int i = `0`; i < limit; ++i) {
368	if (lhs.GetWord(i) != rhs.GetWord(i)) {
369	return false;
370	}
371	}
372	return true;
373	}
374
375	template <int N, int M>
376	bool operator!=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
377	return !(lhs == rhs);
378	}
379
380	template <int N, int M>
381	bool operator<(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
382	return Compare(lhs, rhs) == -`1`;
383	}
384
385	template <int N, int M>
386	bool operator>(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
387	return rhs < lhs;
388	}
389	template <int N, int M>
390	bool operator<=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
391	return !(rhs < lhs);
392	}
393	template <int N, int M>
394	bool operator>=(const BigUnsigned<N>& lhs, const BigUnsigned<M>& rhs) {
395	return !(lhs < rhs);
396	}
397
398	// Output operator for BigUnsigned, for testing purposes only.
399	template <int N>
400	std::ostream& operator<<(std::ostream& os, const BigUnsigned<N>& num) {
401	return os << num.ToString();
402	}
403
404	// Explicit instantiation declarations for the sizes of BigUnsigned that we
405	// are using.
406	//
407	// For now, the choices of 4 and 84 are arbitrary; 4 is a small value that is
408	// still bigger than an int128, and 84 is a large value we will want to use
409	// in the from_chars implementation.
410	//
411	// Comments justifying the use of 84 belong in the from_chars implementation,
412	// and will be added in a follow-up CL.
413	extern template class BigUnsigned<`4`>;
414	extern template class BigUnsigned<`84`>;
415
416	} // namespace strings_internal
417	} // namespace absl
418
419	#endif // ABSL_STRINGS_INTERNAL_CHARCONV_BIGINT_H_
420

Browse the source code of Abseil/strings/internal/charconv_bigint.h