1 | // Copyright 2018 The Abseil Authors. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "absl/strings/internal/charconv_bigint.h" |
16 | |
17 | #include <algorithm> |
18 | #include <cassert> |
19 | #include <string> |
20 | |
21 | namespace absl { |
22 | namespace strings_internal { |
23 | |
24 | namespace { |
25 | |
26 | // Table containing some large powers of 5, for fast computation. |
27 | |
28 | // Constant step size for entries in the kLargePowersOfFive table. Each entry |
29 | // is larger than the previous entry by a factor of 5**kLargePowerOfFiveStep |
30 | // (or 5**27). |
31 | // |
32 | // In other words, the Nth entry in the table is 5**(27*N). |
33 | // |
34 | // 5**27 is the largest power of 5 that fits in 64 bits. |
35 | constexpr int kLargePowerOfFiveStep = 27; |
36 | |
37 | // The largest legal index into the kLargePowersOfFive table. |
38 | // |
39 | // In other words, the largest precomputed power of 5 is 5**(27*20). |
40 | constexpr int kLargestPowerOfFiveIndex = 20; |
41 | |
42 | // Table of powers of (5**27), up to (5**27)**20 == 5**540. |
43 | // |
44 | // Used to generate large powers of 5 while limiting the number of repeated |
45 | // multiplications required. |
46 | // |
47 | // clang-format off |
48 | const uint32_t kLargePowersOfFive[] = { |
49 | // 5**27 (i=1), start=0, end=2 |
50 | 0xfa10079dU, 0x6765c793U, |
51 | // 5**54 (i=2), start=2, end=6 |
52 | 0x97d9f649U, 0x6664242dU, 0x29939b14U, 0x29c30f10U, |
53 | // 5**81 (i=3), start=6, end=12 |
54 | 0xc4f809c5U, 0x7bf3f22aU, 0x67bdae34U, 0xad340517U, 0x369d1b5fU, 0x10de1593U, |
55 | // 5**108 (i=4), start=12, end=20 |
56 | 0x92b260d1U, 0x9efff7c7U, 0x81de0ec6U, 0xaeba5d56U, 0x410664a4U, 0x4f40737aU, |
57 | 0x20d3846fU, 0x06d00f73U, |
58 | // 5**135 (i=5), start=20, end=30 |
59 | 0xff1b172dU, 0x13a1d71cU, 0xefa07617U, 0x7f682d3dU, 0xff8c90c0U, 0x3f0131e7U, |
60 | 0x3fdcb9feU, 0x917b0177U, 0x16c407a7U, 0x02c06b9dU, |
61 | // 5**162 (i=6), start=30, end=42 |
62 | 0x960f7199U, 0x056667ecU, 0xe07aefd8U, 0x80f2b9ccU, 0x8273f5e3U, 0xeb9a214aU, |
63 | 0x40b38005U, 0x0e477ad4U, 0x277d08e6U, 0xfa28b11eU, 0xd3f7d784U, 0x011c835bU, |
64 | // 5**189 (i=7), start=42, end=56 |
65 | 0xf723d9d5U, 0x3282d3f3U, 0xe00857d1U, 0x69659d25U, 0x2cf117cfU, 0x24da6d07U, |
66 | 0x954d1417U, 0x3e5d8cedU, 0x7a8bb766U, 0xfd785ae6U, 0x645436d2U, 0x40c78b34U, |
67 | 0x94151217U, 0x0072e9f7U, |
68 | // 5**216 (i=8), start=56, end=72 |
69 | 0x2b416aa1U, 0x7893c5a7U, 0xe37dc6d4U, 0x2bad2beaU, 0xf0fc846cU, 0x7575ae4bU, |
70 | 0x62587b14U, 0x83b67a34U, 0x02110cdbU, 0xf7992f55U, 0x00deb022U, 0xa4a23becU, |
71 | 0x8af5c5cdU, 0xb85b654fU, 0x818df38bU, 0x002e69d2U, |
72 | // 5**243 (i=9), start=72, end=90 |
73 | 0x3518cbbdU, 0x20b0c15fU, 0x38756c2fU, 0xfb5dc3ddU, 0x22ad2d94U, 0xbf35a952U, |
74 | 0xa699192aU, 0x9a613326U, 0xad2a9cedU, 0xd7f48968U, 0xe87dfb54U, 0xc8f05db6U, |
75 | 0x5ef67531U, 0x31c1ab49U, 0xe202ac9fU, 0x9b2957b5U, 0xa143f6d3U, 0x0012bf07U, |
76 | // 5**270 (i=10), start=90, end=110 |
77 | 0x8b971de9U, 0x21aba2e1U, 0x63944362U, 0x57172336U, 0xd9544225U, 0xfb534166U, |
78 | 0x08c563eeU, 0x14640ee2U, 0x24e40d31U, 0x02b06537U, 0x03887f14U, 0x0285e533U, |
79 | 0xb744ef26U, 0x8be3a6c4U, 0x266979b4U, 0x6761ece2U, 0xd9cb39e4U, 0xe67de319U, |
80 | 0x0d39e796U, 0x00079250U, |
81 | // 5**297 (i=11), start=110, end=132 |
82 | 0x260eb6e5U, 0xf414a796U, 0xee1a7491U, 0xdb9368ebU, 0xf50c105bU, 0x59157750U, |
83 | 0x9ed2fb5cU, 0xf6e56d8bU, 0xeaee8d23U, 0x0f319f75U, 0x2aa134d6U, 0xac2908e9U, |
84 | 0xd4413298U, 0x02f02a55U, 0x989d5a7aU, 0x70dde184U, 0xba8040a7U, 0x03200981U, |
85 | 0xbe03b11cU, 0x3c1c2a18U, 0xd60427a1U, 0x00030ee0U, |
86 | // 5**324 (i=12), start=132, end=156 |
87 | 0xce566d71U, 0xf1c4aa25U, 0x4e93ca53U, 0xa72283d0U, 0x551a73eaU, 0x3d0538e2U, |
88 | 0x8da4303fU, 0x6a58de60U, 0x0e660221U, 0x49cf61a6U, 0x8d058fc1U, 0xb9d1a14cU, |
89 | 0x4bab157dU, 0xc85c6932U, 0x518c8b9eU, 0x9b92b8d0U, 0x0d8a0e21U, 0xbd855df9U, |
90 | 0xb3ea59a1U, 0x8da29289U, 0x4584d506U, 0x3752d80fU, 0xb72569c6U, 0x00013c33U, |
91 | // 5**351 (i=13), start=156, end=182 |
92 | 0x190f354dU, 0x83695cfeU, 0xe5a4d0c7U, 0xb60fb7e8U, 0xee5bbcc4U, 0xb922054cU, |
93 | 0xbb4f0d85U, 0x48394028U, 0x1d8957dbU, 0x0d7edb14U, 0x4ecc7587U, 0x505e9e02U, |
94 | 0x4c87f36bU, 0x99e66bd6U, 0x44b9ed35U, 0x753037d4U, 0xe5fe5f27U, 0x2742c203U, |
95 | 0x13b2ed2bU, 0xdc525d2cU, 0xe6fde59aU, 0x77ffb18fU, 0x13c5752cU, 0x08a84bccU, |
96 | 0x859a4940U, 0x00007fb6U, |
97 | // 5**378 (i=14), start=182, end=210 |
98 | 0x4f98cb39U, 0xa60edbbcU, 0x83b5872eU, 0xa501acffU, 0x9cc76f78U, 0xbadd4c73U, |
99 | 0x43e989faU, 0xca7acf80U, 0x2e0c824fU, 0xb19f4ffcU, 0x092fd81cU, 0xe4eb645bU, |
100 | 0xa1ff84c2U, 0x8a5a83baU, 0xa8a1fae9U, 0x1db43609U, 0xb0fed50bU, 0x0dd7d2bdU, |
101 | 0x7d7accd8U, 0x91fa640fU, 0x37dcc6c5U, 0x1c417fd5U, 0xe4d462adU, 0xe8a43399U, |
102 | 0x131bf9a5U, 0x8df54d29U, 0x36547dc1U, 0x00003395U, |
103 | // 5**405 (i=15), start=210, end=240 |
104 | 0x5bd330f5U, 0x77d21967U, 0x1ac481b7U, 0x6be2f7ceU, 0x7f4792a9U, 0xe84c2c52U, |
105 | 0x84592228U, 0x9dcaf829U, 0xdab44ce1U, 0x3d0c311bU, 0x532e297dU, 0x4704e8b4U, |
106 | 0x9cdc32beU, 0x41e64d9dU, 0x7717bea1U, 0xa824c00dU, 0x08f50b27U, 0x0f198d77U, |
107 | 0x49bbfdf0U, 0x025c6c69U, 0xd4e55cd3U, 0xf083602bU, 0xb9f0fecdU, 0xc0864aeaU, |
108 | 0x9cb98681U, 0xaaf620e9U, 0xacb6df30U, 0x4faafe66U, 0x8af13c3bU, 0x000014d5U, |
109 | // 5**432 (i=16), start=240, end=272 |
110 | 0x682bb941U, 0x89a9f297U, 0xcba75d7bU, 0x404217b1U, 0xb4e519e9U, 0xa1bc162bU, |
111 | 0xf7f5910aU, 0x98715af5U, 0x2ff53e57U, 0xe3ef118cU, 0x490c4543U, 0xbc9b1734U, |
112 | 0x2affbe4dU, 0x4cedcb4cU, 0xfb14e99eU, 0x35e34212U, 0xece39c24U, 0x07673ab3U, |
113 | 0xe73115ddU, 0xd15d38e7U, 0x093eed3bU, 0xf8e7eac5U, 0x78a8cc80U, 0x25227aacU, |
114 | 0x3f590551U, 0x413da1cbU, 0xdf643a55U, 0xab65ad44U, 0xd70b23d7U, 0xc672cd76U, |
115 | 0x3364ea62U, 0x0000086aU, |
116 | // 5**459 (i=17), start=272, end=306 |
117 | 0x22f163ddU, 0x23cf07acU, 0xbe2af6c2U, 0xf412f6f6U, 0xc3ff541eU, 0x6eeaf7deU, |
118 | 0xa47047e0U, 0x408cda92U, 0x0f0eeb08U, 0x56deba9dU, 0xcfc6b090U, 0x8bbbdf04U, |
119 | 0x3933cdb3U, 0x9e7bb67dU, 0x9f297035U, 0x38946244U, 0xee1d37bbU, 0xde898174U, |
120 | 0x63f3559dU, 0x705b72fbU, 0x138d27d9U, 0xf8603a78U, 0x735eec44U, 0xe30987d5U, |
121 | 0xc6d38070U, 0x9cfe548eU, 0x9ff01422U, 0x7c564aa8U, 0x91cc60baU, 0xcbc3565dU, |
122 | 0x7550a50bU, 0x6909aeadU, 0x13234c45U, 0x00000366U, |
123 | // 5**486 (i=18), start=306, end=342 |
124 | 0x17954989U, 0x3a7d7709U, 0x98042de5U, 0xa9011443U, 0x45e723c2U, 0x269ffd6fU, |
125 | 0x58852a46U, 0xaaa1042aU, 0x2eee8153U, 0xb2b6c39eU, 0xaf845b65U, 0xf6c365d7U, |
126 | 0xe4cffb2bU, 0xc840e90cU, 0xabea8abbU, 0x5c58f8d2U, 0x5c19fa3aU, 0x4670910aU, |
127 | 0x4449f21cU, 0xefa645b3U, 0xcc427decU, 0x083c3d73U, 0x467cb413U, 0x6fe10ae4U, |
128 | 0x3caffc72U, 0x9f8da55eU, 0x5e5c8ea7U, 0x490594bbU, 0xf0871b0bU, 0xdd89816cU, |
129 | 0x8e931df8U, 0xe85ce1c9U, 0xcca090a5U, 0x575fa16bU, 0x6b9f106cU, 0x0000015fU, |
130 | // 5**513 (i=19), start=342, end=380 |
131 | 0xee20d805U, 0x57bc3c07U, 0xcdea624eU, 0xd3f0f52dU, 0x9924b4f4U, 0xcf968640U, |
132 | 0x61d41962U, 0xe87fb464U, 0xeaaf51c7U, 0x564c8b60U, 0xccda4028U, 0x529428bbU, |
133 | 0x313a1fa8U, 0x96bd0f94U, 0x7a82ebaaU, 0xad99e7e9U, 0xf2668cd4U, 0xbe33a45eU, |
134 | 0xfd0db669U, 0x87ee369fU, 0xd3ec20edU, 0x9c4d7db7U, 0xdedcf0d8U, 0x7cd2ca64U, |
135 | 0xe25a6577U, 0x61003fd4U, 0xe56f54ccU, 0x10b7c748U, 0x40526e5eU, 0x7300ae87U, |
136 | 0x5c439261U, 0x2c0ff469U, 0xbf723f12U, 0xb2379b61U, 0xbf59b4f5U, 0xc91b1c3fU, |
137 | 0xf0046d27U, 0x0000008dU, |
138 | // 5**540 (i=20), start=380, end=420 |
139 | 0x525c9e11U, 0xf4e0eb41U, 0xebb2895dU, 0x5da512f9U, 0x7d9b29d4U, 0x452f4edcU, |
140 | 0x0b90bc37U, 0x341777cbU, 0x63d269afU, 0x1da77929U, 0x0a5c1826U, 0x77991898U, |
141 | 0x5aeddf86U, 0xf853a877U, 0x538c31ccU, 0xe84896daU, 0xb7a0010bU, 0x17ef4de5U, |
142 | 0xa52a2adeU, 0x029fd81cU, 0x987ce701U, 0x27fefd77U, 0xdb46c66fU, 0x5d301900U, |
143 | 0x496998c0U, 0xbb6598b9U, 0x5eebb607U, 0xe547354aU, 0xdf4a2f7eU, 0xf06c4955U, |
144 | 0x96242ffaU, 0x1775fb27U, 0xbecc58ceU, 0xebf2a53bU, 0x3eaad82aU, 0xf41137baU, |
145 | 0x573e6fbaU, 0xfb4866b8U, 0x54002148U, 0x00000039U, |
146 | }; |
147 | // clang-format on |
148 | |
149 | // Returns a pointer to the big integer data for (5**27)**i. i must be |
150 | // between 1 and 20, inclusive. |
151 | const uint32_t* LargePowerOfFiveData(int i) { |
152 | return kLargePowersOfFive + i * (i - 1); |
153 | } |
154 | |
155 | // Returns the size of the big integer data for (5**27)**i, in words. i must be |
156 | // between 1 and 20, inclusive. |
157 | int LargePowerOfFiveSize(int i) { return 2 * i; } |
158 | } // namespace |
159 | |
160 | const uint32_t kFiveToNth[14] = { |
161 | 1, 5, 25, 125, 625, 3125, 15625, |
162 | 78125, 390625, 1953125, 9765625, 48828125, 244140625, 1220703125, |
163 | }; |
164 | |
165 | const uint32_t kTenToNth[10] = { |
166 | 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, |
167 | }; |
168 | |
169 | template <int max_words> |
170 | int BigUnsigned<max_words>::ReadFloatMantissa(const ParsedFloat& fp, |
171 | int significant_digits) { |
172 | SetToZero(); |
173 | assert(fp.type == FloatType::kNumber); |
174 | |
175 | if (fp.subrange_begin == nullptr) { |
176 | // We already exactly parsed the mantissa, so no more work is necessary. |
177 | words_[0] = fp.mantissa & 0xffffffffu; |
178 | words_[1] = fp.mantissa >> 32; |
179 | if (words_[1]) { |
180 | size_ = 2; |
181 | } else if (words_[0]) { |
182 | size_ = 1; |
183 | } |
184 | return fp.exponent; |
185 | } |
186 | int exponent_adjust = |
187 | ReadDigits(fp.subrange_begin, fp.subrange_end, significant_digits); |
188 | return fp.literal_exponent + exponent_adjust; |
189 | } |
190 | |
191 | template <int max_words> |
192 | int BigUnsigned<max_words>::ReadDigits(const char* begin, const char* end, |
193 | int significant_digits) { |
194 | assert(significant_digits <= Digits10() + 1); |
195 | SetToZero(); |
196 | |
197 | bool after_decimal_point = false; |
198 | // Discard any leading zeroes before the decimal point |
199 | while (begin < end && *begin == '0') { |
200 | ++begin; |
201 | } |
202 | int dropped_digits = 0; |
203 | // Discard any trailing zeroes. These may or may not be after the decimal |
204 | // point. |
205 | while (begin < end && *std::prev(end) == '0') { |
206 | --end; |
207 | ++dropped_digits; |
208 | } |
209 | if (begin < end && *std::prev(end) == '.') { |
210 | // If the std::string ends in '.', either before or after dropping zeroes, then |
211 | // drop the decimal point and look for more digits to drop. |
212 | dropped_digits = 0; |
213 | --end; |
214 | while (begin < end && *std::prev(end) == '0') { |
215 | --end; |
216 | ++dropped_digits; |
217 | } |
218 | } else if (dropped_digits) { |
219 | // We dropped digits, and aren't sure if they're before or after the decimal |
220 | // point. Figure that out now. |
221 | const char* dp = std::find(begin, end, '.'); |
222 | if (dp != end) { |
223 | // The dropped trailing digits were after the decimal point, so don't |
224 | // count them. |
225 | dropped_digits = 0; |
226 | } |
227 | } |
228 | // Any non-fraction digits we dropped need to be accounted for in our exponent |
229 | // adjustment. |
230 | int exponent_adjust = dropped_digits; |
231 | |
232 | uint32_t queued = 0; |
233 | int digits_queued = 0; |
234 | for (; begin != end && significant_digits > 0; ++begin) { |
235 | if (*begin == '.') { |
236 | after_decimal_point = true; |
237 | continue; |
238 | } |
239 | if (after_decimal_point) { |
240 | // For each fractional digit we emit in our parsed integer, adjust our |
241 | // decimal exponent to compensate. |
242 | --exponent_adjust; |
243 | } |
244 | int digit = (*begin - '0'); |
245 | --significant_digits; |
246 | if (significant_digits == 0 && std::next(begin) != end && |
247 | (digit == 0 || digit == 5)) { |
248 | // If this is the very last significant digit, but insignificant digits |
249 | // remain, we know that the last of those remaining significant digits is |
250 | // nonzero. (If it wasn't, we would have stripped it before we got here.) |
251 | // So if this final digit is a 0 or 5, adjust it upward by 1. |
252 | // |
253 | // This adjustment is what allows incredibly large mantissas ending in |
254 | // 500000...000000000001 to correctly round up, rather than to nearest. |
255 | ++digit; |
256 | } |
257 | queued = 10 * queued + digit; |
258 | ++digits_queued; |
259 | if (digits_queued == kMaxSmallPowerOfTen) { |
260 | MultiplyBy(kTenToNth[kMaxSmallPowerOfTen]); |
261 | AddWithCarry(0, queued); |
262 | queued = digits_queued = 0; |
263 | } |
264 | } |
265 | // Encode any remaining digits. |
266 | if (digits_queued) { |
267 | MultiplyBy(kTenToNth[digits_queued]); |
268 | AddWithCarry(0, queued); |
269 | } |
270 | |
271 | // If any insignificant digits remain, we will drop them. But if we have not |
272 | // yet read the decimal point, then we have to adjust the exponent to account |
273 | // for the dropped digits. |
274 | if (begin < end && !after_decimal_point) { |
275 | // This call to std::find will result in a pointer either to the decimal |
276 | // point, or to the end of our buffer if there was none. |
277 | // |
278 | // Either way, [begin, decimal_point) will contain the set of dropped digits |
279 | // that require an exponent adjustment. |
280 | const char* decimal_point = std::find(begin, end, '.'); |
281 | exponent_adjust += (decimal_point - begin); |
282 | } |
283 | return exponent_adjust; |
284 | } |
285 | |
286 | template <int max_words> |
287 | /* static */ BigUnsigned<max_words> BigUnsigned<max_words>::FiveToTheNth( |
288 | int n) { |
289 | BigUnsigned answer(1u); |
290 | |
291 | // Seed from the table of large powers, if possible. |
292 | bool first_pass = true; |
293 | while (n >= kLargePowerOfFiveStep) { |
294 | int big_power = |
295 | std::min(n / kLargePowerOfFiveStep, kLargestPowerOfFiveIndex); |
296 | if (first_pass) { |
297 | // just copy, rather than multiplying by 1 |
298 | std::copy( |
299 | LargePowerOfFiveData(big_power), |
300 | LargePowerOfFiveData(big_power) + LargePowerOfFiveSize(big_power), |
301 | answer.words_); |
302 | answer.size_ = LargePowerOfFiveSize(big_power); |
303 | first_pass = false; |
304 | } else { |
305 | answer.MultiplyBy(LargePowerOfFiveSize(big_power), |
306 | LargePowerOfFiveData(big_power)); |
307 | } |
308 | n -= kLargePowerOfFiveStep * big_power; |
309 | } |
310 | answer.MultiplyByFiveToTheNth(n); |
311 | return answer; |
312 | } |
313 | |
314 | template <int max_words> |
315 | void BigUnsigned<max_words>::MultiplyStep(int original_size, |
316 | const uint32_t* other_words, |
317 | int other_size, int step) { |
318 | int this_i = std::min(original_size - 1, step); |
319 | int other_i = step - this_i; |
320 | |
321 | uint64_t this_word = 0; |
322 | uint64_t carry = 0; |
323 | for (; this_i >= 0 && other_i < other_size; --this_i, ++other_i) { |
324 | uint64_t product = words_[this_i]; |
325 | product *= other_words[other_i]; |
326 | this_word += product; |
327 | carry += (this_word >> 32); |
328 | this_word &= 0xffffffff; |
329 | } |
330 | AddWithCarry(step + 1, carry); |
331 | words_[step] = this_word & 0xffffffff; |
332 | if (this_word > 0 && size_ <= step) { |
333 | size_ = step + 1; |
334 | } |
335 | } |
336 | |
337 | template <int max_words> |
338 | std::string BigUnsigned<max_words>::ToString() const { |
339 | BigUnsigned<max_words> copy = *this; |
340 | std::string result; |
341 | // Build result in reverse order |
342 | while (copy.size() > 0) { |
343 | int next_digit = copy.DivMod<10>(); |
344 | result.push_back('0' + next_digit); |
345 | } |
346 | if (result.empty()) { |
347 | result.push_back('0'); |
348 | } |
349 | std::reverse(result.begin(), result.end()); |
350 | return result; |
351 | } |
352 | |
353 | template class BigUnsigned<4>; |
354 | template class BigUnsigned<84>; |
355 | |
356 | } // namespace strings_internal |
357 | } // namespace absl |
358 | |