numbers.cc source code [Abseil/strings/numbers.cc]

1	// Copyright 2017 The Abseil Authors.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// https://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	// This file contains string processing functions related to
16	// numeric values.
17
18	#include "absl/strings/numbers.h"
19
20	#include <algorithm>
21	#include <cassert>
22	#include <cfloat> // for DBL_DIG and FLT_DIG
23	#include <cmath> // for HUGE_VAL
24	#include <cstdint>
25	#include <cstdio>
26	#include <cstdlib>
27	#include <cstring>
28	#include <iterator>
29	#include <limits>
30	#include <memory>
31	#include <utility>
32
33	#include "absl/base/internal/bits.h"
34	#include "absl/base/internal/raw_logging.h"
35	#include "absl/strings/ascii.h"
36	#include "absl/strings/charconv.h"
37	#include "absl/strings/internal/memutil.h"
38	#include "absl/strings/match.h"
39	#include "absl/strings/str_cat.h"
40
41	namespace absl {
42
43	bool SimpleAtof(absl::string_view str, float* out) {
44	*out = `0.0`;
45	str = StripAsciiWhitespace(str);
46	if (!str.empty() && str [`0`] == `'+'`) {
47	str.remove_prefix(`1`);
48	}
49	auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
50	if (result.ec == std::errc::invalid_argument) {
51	return false;
52	}
53	if (result.ptr != str.data() + str.size()) {
54	// not all non-whitespace characters consumed
55	return false;
56	}
57	// from_chars() with DR 3081's current wording will return max() on
58	// overflow. SimpleAtof returns infinity instead.
59	if (result.ec == std::errc::result_out_of_range) {
60	if (*out > `1.0`) {
61	out = std::numeric_limits<float*>::infinity();
62	} else if (*out < -`1.0`) {
63	out = -std::numeric_limits<float*>::infinity();
64	}
65	}
66	return true;
67	}
68
69	bool SimpleAtod(absl::string_view str, double* out) {
70	*out = `0.0`;
71	str = StripAsciiWhitespace(str);
72	if (!str.empty() && str [`0`] == `'+'`) {
73	str.remove_prefix(`1`);
74	}
75	auto result = absl::from_chars(str.data(), str.data() + str.size(), *out);
76	if (result.ec == std::errc::invalid_argument) {
77	return false;
78	}
79	if (result.ptr != str.data() + str.size()) {
80	// not all non-whitespace characters consumed
81	return false;
82	}
83	// from_chars() with DR 3081's current wording will return max() on
84	// overflow. SimpleAtod returns infinity instead.
85	if (result.ec == std::errc::result_out_of_range) {
86	if (*out > `1.0`) {
87	out = std::numeric_limits<double*>::infinity();
88	} else if (*out < -`1.0`) {
89	out = -std::numeric_limits<double*>::infinity();
90	}
91	}
92	return true;
93	}
94
95	namespace {
96
97	// Writes a two-character representation of 'i' to 'buf'. 'i' must be in the
98	// range 0 <= i < 100, and buf must have space for two characters. Example:
99	// char buf[2];
100	// PutTwoDigits(42, buf);
101	// // buf[0] == '4'
102	// // buf[1] == '2'
103	inline void PutTwoDigits(size_t i, char* buf) {
104	static const char two_ASCII_digits[`100`][`2`] = {
105	{`'0'`, `'0'`}, {`'0'`, `'1'`}, {`'0'`, `'2'`}, {`'0'`, `'3'`}, {`'0'`, `'4'`},
106	{`'0'`, `'5'`}, {`'0'`, `'6'`}, {`'0'`, `'7'`}, {`'0'`, `'8'`}, {`'0'`, `'9'`},
107	{`'1'`, `'0'`}, {`'1'`, `'1'`}, {`'1'`, `'2'`}, {`'1'`, `'3'`}, {`'1'`, `'4'`},
108	{`'1'`, `'5'`}, {`'1'`, `'6'`}, {`'1'`, `'7'`}, {`'1'`, `'8'`}, {`'1'`, `'9'`},
109	{`'2'`, `'0'`}, {`'2'`, `'1'`}, {`'2'`, `'2'`}, {`'2'`, `'3'`}, {`'2'`, `'4'`},
110	{`'2'`, `'5'`}, {`'2'`, `'6'`}, {`'2'`, `'7'`}, {`'2'`, `'8'`}, {`'2'`, `'9'`},
111	{`'3'`, `'0'`}, {`'3'`, `'1'`}, {`'3'`, `'2'`}, {`'3'`, `'3'`}, {`'3'`, `'4'`},
112	{`'3'`, `'5'`}, {`'3'`, `'6'`}, {`'3'`, `'7'`}, {`'3'`, `'8'`}, {`'3'`, `'9'`},
113	{`'4'`, `'0'`}, {`'4'`, `'1'`}, {`'4'`, `'2'`}, {`'4'`, `'3'`}, {`'4'`, `'4'`},
114	{`'4'`, `'5'`}, {`'4'`, `'6'`}, {`'4'`, `'7'`}, {`'4'`, `'8'`}, {`'4'`, `'9'`},
115	{`'5'`, `'0'`}, {`'5'`, `'1'`}, {`'5'`, `'2'`}, {`'5'`, `'3'`}, {`'5'`, `'4'`},
116	{`'5'`, `'5'`}, {`'5'`, `'6'`}, {`'5'`, `'7'`}, {`'5'`, `'8'`}, {`'5'`, `'9'`},
117	{`'6'`, `'0'`}, {`'6'`, `'1'`}, {`'6'`, `'2'`}, {`'6'`, `'3'`}, {`'6'`, `'4'`},
118	{`'6'`, `'5'`}, {`'6'`, `'6'`}, {`'6'`, `'7'`}, {`'6'`, `'8'`}, {`'6'`, `'9'`},
119	{`'7'`, `'0'`}, {`'7'`, `'1'`}, {`'7'`, `'2'`}, {`'7'`, `'3'`}, {`'7'`, `'4'`},
120	{`'7'`, `'5'`}, {`'7'`, `'6'`}, {`'7'`, `'7'`}, {`'7'`, `'8'`}, {`'7'`, `'9'`},
121	{`'8'`, `'0'`}, {`'8'`, `'1'`}, {`'8'`, `'2'`}, {`'8'`, `'3'`}, {`'8'`, `'4'`},
122	{`'8'`, `'5'`}, {`'8'`, `'6'`}, {`'8'`, `'7'`}, {`'8'`, `'8'`}, {`'8'`, `'9'`},
123	{`'9'`, `'0'`}, {`'9'`, `'1'`}, {`'9'`, `'2'`}, {`'9'`, `'3'`}, {`'9'`, `'4'`},
124	{`'9'`, `'5'`}, {`'9'`, `'6'`}, {`'9'`, `'7'`}, {`'9'`, `'8'`}, {`'9'`, `'9'`}
125	};
126	assert(i < `100`);
127	memcpy(buf, two_ASCII_digits[i], `2`);
128	}
129
130	} // namespace
131
132	bool SimpleAtob(absl::string_view str, bool* out) {
133	ABSL_RAW_CHECK(out != nullptr, "Output pointer must not be nullptr.");
134	if (EqualsIgnoreCase(str, "true") \|\| EqualsIgnoreCase(str, "t") \|\|
135	EqualsIgnoreCase(str, "yes") \|\| EqualsIgnoreCase(str, "y") \|\|
136	EqualsIgnoreCase(str, "1")) {
137	out = true*;
138	return true;
139	}
140	if (EqualsIgnoreCase(str, "false") \|\| EqualsIgnoreCase(str, "f") \|\|
141	EqualsIgnoreCase(str, "no") \|\| EqualsIgnoreCase(str, "n") \|\|
142	EqualsIgnoreCase(str, "0")) {
143	out = false*;
144	return true;
145	}
146	return false;
147	}
148
149	// ----------------------------------------------------------------------
150	// FastIntToBuffer() overloads
151	//
152	// Like the FastToBuffer() functions above, these are intended for speed.*
153	// Unlike the FastToBuffer() functions, however, these functions write*
154	// their output to the beginning of the buffer. The caller is responsible
155	// for ensuring that the buffer has enough space to hold the output.
156	//
157	// Returns a pointer to the end of the string (i.e. the null character
158	// terminating the string).
159	// ----------------------------------------------------------------------
160
161	namespace {
162
163	// Used to optimize printing a decimal number's final digit.
164	const char one_ASCII_final_digits[`10`][`2`] {
165	{`'0'`, `0`}, {`'1'`, `0`}, {`'2'`, `0`}, {`'3'`, `0`}, {`'4'`, `0`},
166	{`'5'`, `0`}, {`'6'`, `0`}, {`'7'`, `0`}, {`'8'`, `0`}, {`'9'`, `0`},
167	};
168
169	} // namespace
170
171	char* numbers_internal::FastIntToBuffer(uint32_t i, char* buffer) {
172	uint32_t digits;
173	// The idea of this implementation is to trim the number of divides to as few
174	// as possible, and also reducing memory stores and branches, by going in
175	// steps of two digits at a time rather than one whenever possible.
176	// The huge-number case is first, in the hopes that the compiler will output
177	// that case in one branch-free block of code, and only output conditional
178	// branches into it from below.
179	if (i >= `1000000000`) { // >= 1,000,000,000
180	digits = i / `100000000`; // 100,000,000
181	i -= digits * `100000000`;
182	PutTwoDigits(digits, buffer);
183	buffer += `2`;
184	lt100_000_000:
185	digits = i / `1000000`; // 1,000,000
186	i -= digits * `1000000`;
187	PutTwoDigits(digits, buffer);
188	buffer += `2`;
189	lt1_000_000:
190	digits = i / `10000`; // 10,000
191	i -= digits * `10000`;
192	PutTwoDigits(digits, buffer);
193	buffer += `2`;
194	lt10_000:
195	digits = i / `100`;
196	i -= digits * `100`;
197	PutTwoDigits(digits, buffer);
198	buffer += `2`;
199	lt100:
200	digits = i;
201	PutTwoDigits(digits, buffer);
202	buffer += `2`;
203	*buffer = `0`;
204	return buffer;
205	}
206
207	if (i < `100`) {
208	digits = i;
209	if (i >= `10`) goto lt100;
210	memcpy(buffer, one_ASCII_final_digits[i], `2`);
211	return buffer + `1`;
212	}
213	if (i < `10000`) { // 10,000
214	if (i >= `1000`) goto lt10_000;
215	digits = i / `100`;
216	i -= digits * `100`;
217	*buffer++ = `'0'` + digits;
218	goto lt100;
219	}
220	if (i < `1000000`) { // 1,000,000
221	if (i >= `100000`) goto lt1_000_000;
222	digits = i / `10000`; // 10,000
223	i -= digits * `10000`;
224	*buffer++ = `'0'` + digits;
225	goto lt10_000;
226	}
227	if (i < `100000000`) { // 100,000,000
228	if (i >= `10000000`) goto lt100_000_000;
229	digits = i / `1000000`; // 1,000,000
230	i -= digits * `1000000`;
231	*buffer++ = `'0'` + digits;
232	goto lt1_000_000;
233	}
234	// we already know that i < 1,000,000,000
235	digits = i / `100000000`; // 100,000,000
236	i -= digits * `100000000`;
237	*buffer++ = `'0'` + digits;
238	goto lt100_000_000;
239	}
240
241	char* numbers_internal::FastIntToBuffer(int32_t i, char* buffer) {
242	uint32_t u = i;
243	if (i < `0`) {
244	*buffer++ = `'-'`;
245	// We need to do the negation in modular (i.e., "unsigned")
246	// arithmetic; MSVC++ apprently warns for plain "-u", so
247	// we write the equivalent expression "0 - u" instead.
248	u = `0` - u;
249	}
250	return numbers_internal::FastIntToBuffer(u, buffer);
251	}
252
253	char* numbers_internal::FastIntToBuffer(uint64_t i, char* buffer) {
254	uint32_t u32 = static_cast<uint32_t>(i);
255	if (u32 == i) return numbers_internal::FastIntToBuffer(u32, buffer);
256
257	// Here we know i has at least 10 decimal digits.
258	uint64_t top_1to11 = i / `1000000000`;
259	u32 = static_cast<uint32_t>(i - top_1to11 * `1000000000`);
260	uint32_t top_1to11_32 = static_cast<uint32_t>(top_1to11);
261
262	if (top_1to11_32 == top_1to11) {
263	buffer = numbers_internal::FastIntToBuffer(top_1to11_32, buffer);
264	} else {
265	// top_1to11 has more than 32 bits too; print it in two steps.
266	uint32_t top_8to9 = static_cast<uint32_t>(top_1to11 / `100`);
267	uint32_t mid_2 = static_cast<uint32_t>(top_1to11 - top_8to9 * `100`);
268	buffer = numbers_internal::FastIntToBuffer(top_8to9, buffer);
269	PutTwoDigits(mid_2, buffer);
270	buffer += `2`;
271	}
272
273	// We have only 9 digits now, again the maximum uint32_t can handle fully.
274	uint32_t digits = u32 / `10000000`; // 10,000,000
275	u32 -= digits * `10000000`;
276	PutTwoDigits(digits, buffer);
277	buffer += `2`;
278	digits = u32 / `100000`; // 100,000
279	u32 -= digits * `100000`;
280	PutTwoDigits(digits, buffer);
281	buffer += `2`;
282	digits = u32 / `1000`; // 1,000
283	u32 -= digits * `1000`;
284	PutTwoDigits(digits, buffer);
285	buffer += `2`;
286	digits = u32 / `10`;
287	u32 -= digits * `10`;
288	PutTwoDigits(digits, buffer);
289	buffer += `2`;
290	memcpy(buffer, one_ASCII_final_digits[u32], `2`);
291	return buffer + `1`;
292	}
293
294	char* numbers_internal::FastIntToBuffer(int64_t i, char* buffer) {
295	uint64_t u = i;
296	if (i < `0`) {
297	*buffer++ = `'-'`;
298	u = `0` - u;
299	}
300	return numbers_internal::FastIntToBuffer(u, buffer);
301	}
302
303	// Given a 128-bit number expressed as a pair of uint64_t, high half first,
304	// return that number multiplied by the given 32-bit value. If the result is
305	// too large to fit in a 128-bit number, divide it by 2 until it fits.
306	static std::pair<uint64_t, uint64_t> Mul32(std::pair<uint64_t, uint64_t> num,
307	uint32_t mul) {
308	uint64_t bits0_31 = num.second & `0xFFFFFFFF`;
309	uint64_t bits32_63 = num.second >> `32`;
310	uint64_t bits64_95 = num.first & `0xFFFFFFFF`;
311	uint64_t bits96_127 = num.first >> `32`;
312
313	// The picture so far: each of these 64-bit values has only the lower 32 bits
314	// filled in.
315	// bits96_127: [ 00000000 xxxxxxxx ]
316	// bits64_95: [ 00000000 xxxxxxxx ]
317	// bits32_63: [ 00000000 xxxxxxxx ]
318	// bits0_31: [ 00000000 xxxxxxxx ]
319
320	bits0_31 *= mul;
321	bits32_63 *= mul;
322	bits64_95 *= mul;
323	bits96_127 *= mul;
324
325	// Now the top halves may also have value, though all 64 of their bits will
326	// never be set at the same time, since they are a result of a 32x32 bit
327	// multiply. This makes the carry calculation slightly easier.
328	// bits96_127: [ mmmmmmmm \| mmmmmmmm ]
329	// bits64_95: [ \| mmmmmmmm mmmmmmmm \| ]
330	// bits32_63: \| [ mmmmmmmm \| mmmmmmmm ]
331	// bits0_31: \| [ \| mmmmmmmm mmmmmmmm ]
332	// eventually: [ bits128_up \| ...bits64_127.... \| ..bits0_63... ]
333
334	uint64_t bits0_63 = bits0_31 + (bits32_63 << `32`);
335	uint64_t bits64_127 = bits64_95 + (bits96_127 << `32`) + (bits32_63 >> `32`) +
336	(bits0_63 < bits0_31);
337	uint64_t bits128_up = (bits96_127 >> `32`) + (bits64_127 < bits64_95);
338	if (bits128_up == `0`) return {bits64_127, bits0_63};
339
340	int shift = `64` - base_internal::CountLeadingZeros64(bits128_up);
341	uint64_t lo = (bits0_63 >> shift) + (bits64_127 << (`64` - shift));
342	uint64_t hi = (bits64_127 >> shift) + (bits128_up << (`64` - shift));
343	return {hi, lo};
344	}
345
346	// Compute num 5 ^ expfive, and return the first 128 bits of the result,*
347	// where the first bit is always a one. So PowFive(1, 0) starts 0b100000,
348	// PowFive(1, 1) starts 0b101000, PowFive(1, 2) starts 0b110010, etc.
349	static std::pair<uint64_t, uint64_t> PowFive(uint64_t num, int expfive) {
350	std::pair<uint64_t, uint64_t> result = {num, `0`};
351	while (expfive >= `13`) {
352	// 5^13 is the highest power of five that will fit in a 32-bit integer.
353	result = Mul32(result, `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5`);
354	expfive -= `13`;
355	}
356	constexpr int powers_of_five[`13`] = {
357	`1`,
358	`5`,
359	`5` * `5`,
360	`5` * `5` * `5`,
361	`5` * `5` * `5` * `5`,
362	`5` * `5` * `5` * `5` * `5`,
363	`5` * `5` * `5` * `5` * `5` * `5`,
364	`5` * `5` * `5` * `5` * `5` * `5` * `5`,
365	`5` * `5` * `5` * `5` * `5` * `5` * `5` * `5`,
366	`5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5`,
367	`5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5`,
368	`5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5`,
369	`5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5` * `5`};
370	result = Mul32(result, powers_of_five[expfive & `15`]);
371	int shift = base_internal::CountLeadingZeros64(result.first);
372	if (shift != `0`) {
373	result.first = (result.first << shift) + (result.second >> (`64` - shift));
374	result.second = (result.second << shift);
375	}
376	return result;
377	}
378
379	struct ExpDigits {
380	int32_t exponent;
381	char digits[`6`];
382	};
383
384	// SplitToSix converts value, a positive double-precision floating-point number,
385	// into a base-10 exponent and 6 ASCII digits, where the first digit is never
386	// zero. For example, SplitToSix(1) returns an exponent of zero and a digits
387	// array of {'1', '0', '0', '0', '0', '0'}. If value is exactly halfway between
388	// two possible representations, e.g. value = 100000.5, then "round to even" is
389	// performed.
390	static ExpDigits SplitToSix(const double value) {
391	ExpDigits exp_dig;
392	int exp = `5`;
393	double d = value;
394	// First step: calculate a close approximation of the output, where the
395	// value d will be between 100,000 and 999,999, representing the digits
396	// in the output ASCII array, and exp is the base-10 exponent. It would be
397	// faster to use a table here, and to look up the base-2 exponent of value,
398	// however value is an IEEE-754 64-bit number, so the table would have 2,000
399	// entries, which is not cache-friendly.
400	if (d >= `999999.5`) {
401	if (d >= `1e+261`) exp += `256`, d *= `1e-256`;
402	if (d >= `1e+133`) exp += `128`, d *= `1e-128`;
403	if (d >= `1e+69`) exp += `64`, d *= `1e-64`;
404	if (d >= `1e+37`) exp += `32`, d *= `1e-32`;
405	if (d >= `1e+21`) exp += `16`, d *= `1e-16`;
406	if (d >= `1e+13`) exp += `8`, d *= `1e-8`;
407	if (d >= `1e+9`) exp += `4`, d *= `1e-4`;
408	if (d >= `1e+7`) exp += `2`, d *= `1e-2`;
409	if (d >= `1e+6`) exp += `1`, d *= `1e-1`;
410	} else {
411	if (d < `1e-250`) exp -= `256`, d *= `1e256`;
412	if (d < `1e-122`) exp -= `128`, d *= `1e128`;
413	if (d < `1e-58`) exp -= `64`, d *= `1e64`;
414	if (d < `1e-26`) exp -= `32`, d *= `1e32`;
415	if (d < `1e-10`) exp -= `16`, d *= `1e16`;
416	if (d < `1e-2`) exp -= `8`, d *= `1e8`;
417	if (d < `1e+2`) exp -= `4`, d *= `1e4`;
418	if (d < `1e+4`) exp -= `2`, d *= `1e2`;
419	if (d < `1e+5`) exp -= `1`, d *= `1e1`;
420	}
421	// At this point, d is in the range [99999.5..999999.5) and exp is in the
422	// range [-324..308]. Since we need to round d up, we want to add a half
423	// and truncate.
424	// However, the technique above may have lost some precision, due to its
425	// repeated multiplication by constants that each may be off by half a bit
426	// of precision. This only matters if we're close to the edge though.
427	// Since we'd like to know if the fractional part of d is close to a half,
428	// we multiply it by 65536 and see if the fractional part is close to 32768.
429	// (The number doesn't have to be a power of two,but powers of two are faster)
430	uint64_t d64k = d * `65536`;
431	int dddddd; // A 6-digit decimal integer.
432	if ((d64k % `65536`) == `32767` \|\| (d64k % `65536`) == `32768`) {
433	// OK, it's fairly likely that precision was lost above, which is
434	// not a surprise given only 52 mantissa bits are available. Therefore
435	// redo the calculation using 128-bit numbers. (64 bits are not enough).
436
437	// Start out with digits rounded down; maybe add one below.
438	dddddd = static_cast<int>(d64k / `65536`);
439
440	// mantissa is a 64-bit integer representing M.mmm... 2^63. The actual*
441	// value we're representing, of course, is M.mmm... 2^exp2.*
442	int exp2;
443	double m = std::frexp(value, &exp2);
444	uint64_t mantissa = m * (`32768.0` * `65536.0` * `65536.0` * `65536.0`);
445	// std::frexp returns an m value in the range [0.5, 1.0), however we
446	// can't multiply it by 2^64 and convert to an integer because some FPUs
447	// throw an exception when converting an number higher than 2^63 into an
448	// integer - even an unsigned 64-bit integer! Fortunately it doesn't matter
449	// since m only has 52 significant bits anyway.
450	mantissa <<= `1`;
451	exp2 -= `64`; // not needed, but nice for debugging
452
453	// OK, we are here to compare:
454	// (dddddd + 0.5) 10^(exp-5) vs. mantissa * 2^exp2*
455	// so we can round up dddddd if appropriate. Those values span the full
456	// range of 600 orders of magnitude of IEE 64-bit floating-point.
457	// Fortunately, we already know they are very close, so we don't need to
458	// track the base-2 exponent of both sides. This greatly simplifies the
459	// the math since the 2^exp2 calculation is unnecessary and the power-of-10
460	// calculation can become a power-of-5 instead.
461
462	std::pair<uint64_t, uint64_t> edge, val;
463	if (exp >= `6`) {
464	// Compare (dddddd + 0.5) 5 ^ (exp - 5) to mantissa*
465	// Since we're tossing powers of two, 2 dddddd + 1 is the*
466	// same as dddddd + 0.5
467	edge = PowFive(`2` * dddddd + `1`, exp - `5`);
468
469	val.first = mantissa;
470	val.second = `0`;
471	} else {
472	// We can't compare (dddddd + 0.5) 5 ^ (exp - 5) to mantissa as we did*
473	// above because (exp - 5) is negative. So we compare (dddddd + 0.5) to
474	// mantissa 5 ^ (5 - exp)*
475	edge = PowFive(`2` * dddddd + `1`, `0`);
476
477	val = PowFive(mantissa, `5` - exp);
478	}
479	// printf("exp=%d %016lx %016lx vs %016lx %016lx\n", exp, val.first,
480	// val.second, edge.first, edge.second);
481	if (val > edge) {
482	dddddd++;
483	} else if (val == edge) {
484	dddddd += (dddddd & `1`);
485	}
486	} else {
487	// Here, we are not close to the edge.
488	dddddd = static_cast<int>((d64k + `32768`) / `65536`);
489	}
490	if (dddddd == `1000000`) {
491	dddddd = `100000`;
492	exp += `1`;
493	}
494	exp_dig.exponent = exp;
495
496	int two_digits = dddddd / `10000`;
497	dddddd -= two_digits * `10000`;
498	PutTwoDigits(two_digits, &exp_dig.digits[`0`]);
499
500	two_digits = dddddd / `100`;
501	dddddd -= two_digits * `100`;
502	PutTwoDigits(two_digits, &exp_dig.digits[`2`]);
503
504	PutTwoDigits(dddddd, &exp_dig.digits[`4`]);
505	return exp_dig;
506	}
507
508	// Helper function for fast formatting of floating-point.
509	// The result is the same as "%g", a.k.a. "%.6g".
510	size_t numbers_internal::SixDigitsToBuffer(double d, char* const buffer) {
511	static_assert(std::numeric_limits<float>::is_iec559,
512	"IEEE-754/IEC-559 support only");
513
514	char* out = buffer; // we write data to out, incrementing as we go, but
515	// FloatToBuffer always returns the address of the buffer
516	// passed in.
517
518	if (std::isnan(d)) {
519	strcpy(out, "nan"); // NOLINT(runtime/printf)
520	return `3`;
521	}
522	if (d == `0`) { // +0 and -0 are handled here
523	if (std::signbit(d)) *out++ = `'-'`;
524	*out++ = `'0'`;
525	*out = `0`;
526	return out - buffer;
527	}
528	if (d < `0`) {
529	*out++ = `'-'`;
530	d = -d;
531	}
532	if (std::isinf(d)) {
533	strcpy(out, "inf"); // NOLINT(runtime/printf)
534	return out + `3` - buffer;
535	}
536
537	auto exp_dig = SplitToSix(d);
538	int exp = exp_dig.exponent;
539	const char* digits = exp_dig.digits;
540	out[`0`] = `'0'`;
541	out[`1`] = `'.'`;
542	switch (exp) {
543	case `5`:
544	memcpy(out, &digits[`0`], `6`), out += `6`;
545	*out = `0`;
546	return out - buffer;
547	case `4`:
548	memcpy(out, &digits[`0`], `5`), out += `5`;
549	if (digits[`5`] != `'0'`) {
550	*out++ = `'.'`;
551	*out++ = digits[`5`];
552	}
553	*out = `0`;
554	return out - buffer;
555	case `3`:
556	memcpy(out, &digits[`0`], `4`), out += `4`;
557	if ((digits[`5`] \| digits[`4`]) != `'0'`) {
558	*out++ = `'.'`;
559	*out++ = digits[`4`];
560	if (digits[`5`] != `'0'`) *out++ = digits[`5`];
561	}
562	*out = `0`;
563	return out - buffer;
564	case `2`:
565	memcpy(out, &digits[`0`], `3`), out += `3`;
566	*out++ = `'.'`;
567	memcpy(out, &digits[`3`], `3`);
568	out += `3`;
569	while (out[-`1`] == `'0'`) --out;
570	if (out[-`1`] == `'.'`) --out;
571	*out = `0`;
572	return out - buffer;
573	case `1`:
574	memcpy(out, &digits[`0`], `2`), out += `2`;
575	*out++ = `'.'`;
576	memcpy(out, &digits[`2`], `4`);
577	out += `4`;
578	while (out[-`1`] == `'0'`) --out;
579	if (out[-`1`] == `'.'`) --out;
580	*out = `0`;
581	return out - buffer;
582	case `0`:
583	memcpy(out, &digits[`0`], `1`), out += `1`;
584	*out++ = `'.'`;
585	memcpy(out, &digits[`1`], `5`);
586	out += `5`;
587	while (out[-`1`] == `'0'`) --out;
588	if (out[-`1`] == `'.'`) --out;
589	*out = `0`;
590	return out - buffer;
591	case -`4`:
592	out[`2`] = `'0'`;
593	++out;
594	ABSL_FALLTHROUGH_INTENDED;
595	case -`3`:
596	out[`2`] = `'0'`;
597	++out;
598	ABSL_FALLTHROUGH_INTENDED;
599	case -`2`:
600	out[`2`] = `'0'`;
601	++out;
602	ABSL_FALLTHROUGH_INTENDED;
603	case -`1`:
604	out += `2`;
605	memcpy(out, &digits[`0`], `6`);
606	out += `6`;
607	while (out[-`1`] == `'0'`) --out;
608	*out = `0`;
609	return out - buffer;
610	}
611	assert(exp < -`4` \|\| exp >= `6`);
612	out[`0`] = digits[`0`];
613	assert(out[`1`] == `'.'`);
614	out += `2`;
615	memcpy(out, &digits[`1`], `5`), out += `5`;
616	while (out[-`1`] == `'0'`) --out;
617	if (out[-`1`] == `'.'`) --out;
618	*out++ = `'e'`;
619	if (exp > `0`) {
620	*out++ = `'+'`;
621	} else {
622	*out++ = `'-'`;
623	exp = -exp;
624	}
625	if (exp > `99`) {
626	int dig1 = exp / `100`;
627	exp -= dig1 * `100`;
628	*out++ = `'0'` + dig1;
629	}
630	PutTwoDigits(exp, out);
631	out += `2`;
632	*out = `0`;
633	return out - buffer;
634	}
635
636	namespace {
637	// Represents integer values of digits.
638	// Uses 36 to indicate an invalid character since we support
639	// bases up to 36.
640	static const int8_t kAsciiToInt[`256`] = {
641	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, // 16 36s.
642	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`,
643	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `0`, `1`, `2`, `3`, `4`, `5`,
644	`6`, `7`, `8`, `9`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`,
645	`18`, `19`, `20`, `21`, `22`, `23`, `24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `33`, `34`, `35`, `36`,
646	`36`, `36`, `36`, `36`, `36`, `10`, `11`, `12`, `13`, `14`, `15`, `16`, `17`, `18`, `19`, `20`, `21`, `22`, `23`,
647	`24`, `25`, `26`, `27`, `28`, `29`, `30`, `31`, `32`, `33`, `34`, `35`, `36`, `36`, `36`, `36`, `36`, `36`, `36`,
648	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`,
649	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`,
650	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`,
651	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`,
652	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`,
653	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`,
654	`36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`, `36`};
655
656	// Parse the sign and optional hex or oct prefix in text.
657	inline bool safe_parse_sign_and_base(absl::string_view* text /inout/,
658	int* base_ptr /inout/,
659	bool* negative_ptr /output/) {
660	if (text->data() == nullptr) {
661	return false;
662	}
663
664	const char* start = text->data();
665	const char* end = start + text->size();
666	int base = *base_ptr;
667
668	// Consume whitespace.
669	while (start < end && absl::ascii_isspace(start[`0`])) {
670	++start;
671	}
672	while (start < end && absl::ascii_isspace(end[-`1`])) {
673	--end;
674	}
675	if (start >= end) {
676	return false;
677	}
678
679	// Consume sign.
680	*negative_ptr = (start[`0`] == `'-'`);
681	if (*negative_ptr \|\| start[`0`] == `'+'`) {
682	++start;
683	if (start >= end) {
684	return false;
685	}
686	}
687
688	// Consume base-dependent prefix.
689	// base 0: "0x" -> base 16, "0" -> base 8, default -> base 10
690	// base 16: "0x" -> base 16
691	// Also validate the base.
692	if (base == `0`) {
693	if (end - start >= `2` && start[`0`] == `'0'` &&
694	(start[`1`] == `'x'` \|\| start[`1`] == `'X'`)) {
695	base = `16`;
696	start += `2`;
697	if (start >= end) {
698	// "0x" with no digits after is invalid.
699	return false;
700	}
701	} else if (end - start >= `1` && start[`0`] == `'0'`) {
702	base = `8`;
703	start += `1`;
704	} else {
705	base = `10`;
706	}
707	} else if (base == `16`) {
708	if (end - start >= `2` && start[`0`] == `'0'` &&
709	(start[`1`] == `'x'` \|\| start[`1`] == `'X'`)) {
710	start += `2`;
711	if (start >= end) {
712	// "0x" with no digits after is invalid.
713	return false;
714	}
715	}
716	} else if (base >= `2` && base <= `36`) {
717	// okay
718	} else {
719	return false;
720	}
721	*text = absl::string_view (start, end - start);
722	*base_ptr = base;
723	return true;
724	}
725
726	// Consume digits.
727	//
728	// The classic loop:
729	//
730	// for each digit
731	// value = value base + digit*
732	// value = sign*
733	//
734	// The classic loop needs overflow checking. It also fails on the most
735	// negative integer, -2147483648 in 32-bit two's complement representation.
736	//
737	// My improved loop:
738	//
739	// if (!negative)
740	// for each digit
741	// value = value base*
742	// value = value + digit
743	// else
744	// for each digit
745	// value = value base*
746	// value = value - digit
747	//
748	// Overflow checking becomes simple.
749
750	// Lookup tables per IntType:
751	// vmax/base and vmin/base are precomputed because division costs at least 8ns.
752	// TODO(junyer): Doing this per base instead (i.e. an array of structs, not a
753	// struct of arrays) would probably be better in terms of d-cache for the most
754	// commonly used bases.
755	template <typename IntType>
756	struct LookupTables {
757	static const IntType kVmaxOverBase[];
758	static const IntType kVminOverBase[];
759	};
760
761	// An array initializer macro for X/base where base in [0, 36].
762	// However, note that lookups for base in [0, 1] should never happen because
763	// base has been validated to be in [2, 36] by safe_parse_sign_and_base().
764	#define X_OVER_BASE_INITIALIZER(X) \
765	{ \
766	0, 0, X / 2, X / 3, X / 4, X / 5, X / 6, X / 7, X / 8, X / 9, X / 10, \
767	X / 11, X / 12, X / 13, X / 14, X / 15, X / 16, X / 17, X / 18, \
768	X / 19, X / 20, X / 21, X / 22, X / 23, X / 24, X / 25, X / 26, \
769	X / 27, X / 28, X / 29, X / 30, X / 31, X / 32, X / 33, X / 34, \
770	X / 35, X / 36, \
771	}
772
773	template <typename IntType>
774	const IntType LookupTables<IntType>::kVmaxOverBase[] =
775	X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::max());
776
777	template <typename IntType>
778	const IntType LookupTables<IntType>::kVminOverBase[] =
779	X_OVER_BASE_INITIALIZER(std::numeric_limits<IntType>::min());
780
781	#undef X_OVER_BASE_INITIALIZER
782
783	template <typename IntType>
784	inline bool safe_parse_positive_int(absl::string_view text, int base,
785	IntType* value_p) {
786	IntType value = `0`;
787	const IntType vmax = std::numeric_limits<IntType>::max();
788	assert(vmax > `0`);
789	assert(base >= `0`);
790	assert(vmax >= static_cast<IntType>(base));
791	const IntType vmax_over_base = LookupTables<IntType>::kVmaxOverBase[base];
792	const char* start = text.data();
793	const char* end = start + text.size();
794	// loop over digits
795	for (; start < end; ++start) {
796	unsigned char c = static_cast<unsigned char>(start[`0`]);
797	int digit = kAsciiToInt[c];
798	if (digit >= base) {
799	*value_p = value;
800	return false;
801	}
802	if (value > vmax_over_base) {
803	*value_p = vmax;
804	return false;
805	}
806	value *= base;
807	if (value > vmax - digit) {
808	*value_p = vmax;
809	return false;
810	}
811	value += digit;
812	}
813	*value_p = value;
814	return true;
815	}
816
817	template <typename IntType>
818	inline bool safe_parse_negative_int(absl::string_view text, int base,
819	IntType* value_p) {
820	IntType value = `0`;
821	const IntType vmin = std::numeric_limits<IntType>::min();
822	assert(vmin < `0`);
823	assert(vmin <= `0` - base);
824	IntType vmin_over_base = LookupTables<IntType>::kVminOverBase[base];
825	// 2003 c++ standard [expr.mul]
826	// "... the sign of the remainder is implementation-defined."
827	// Although (vmin/base)base + vmin%base is always vmin.*
828	// 2011 c++ standard tightens the spec but we cannot rely on it.
829	// TODO(junyer): Handle this in the lookup table generation.
830	if (vmin % base > `0`) {
831	vmin_over_base += `1`;
832	}
833	const char* start = text.data();
834	const char* end = start + text.size();
835	// loop over digits
836	for (; start < end; ++start) {
837	unsigned char c = static_cast<unsigned char>(start[`0`]);
838	int digit = kAsciiToInt[c];
839	if (digit >= base) {
840	*value_p = value;
841	return false;
842	}
843	if (value < vmin_over_base) {
844	*value_p = vmin;
845	return false;
846	}
847	value *= base;
848	if (value < vmin + digit) {
849	*value_p = vmin;
850	return false;
851	}
852	value -= digit;
853	}
854	*value_p = value;
855	return true;
856	}
857
858	// Input format based on POSIX.1-2008 strtol
859	// http://pubs.opengroup.org/onlinepubs/9699919799/functions/strtol.html
860	template <typename IntType>
861	inline bool safe_int_internal(absl::string_view text, IntType* value_p,
862	int base) {
863	*value_p = `0`;
864	bool negative;
865	if (!safe_parse_sign_and_base(&text, &base, &negative)) {
866	return false;
867	}
868	if (!negative) {
869	return safe_parse_positive_int(text, base, value_p);
870	} else {
871	return safe_parse_negative_int(text, base, value_p);
872	}
873	}
874
875	template <typename IntType>
876	inline bool safe_uint_internal(absl::string_view text, IntType* value_p,
877	int base) {
878	*value_p = `0`;
879	bool negative;
880	if (!safe_parse_sign_and_base(&text, &base, &negative) \|\| negative) {
881	return false;
882	}
883	return safe_parse_positive_int(text, base, value_p);
884	}
885	} // anonymous namespace
886
887	namespace numbers_internal {
888	bool safe_strto32_base(absl::string_view text, int32_t* value, int base) {
889	return safe_int_internal<int32_t>(text, value, base);
890	}
891
892	bool safe_strto64_base(absl::string_view text, int64_t* value, int base) {
893	return safe_int_internal<int64_t>(text, value, base);
894	}
895
896	bool safe_strtou32_base(absl::string_view text, uint32_t* value, int base) {
897	return safe_uint_internal<uint32_t>(text, value, base);
898	}
899
900	bool safe_strtou64_base(absl::string_view text, uint64_t* value, int base) {
901	return safe_uint_internal<uint64_t>(text, value, base);
902	}
903	} // namespace numbers_internal
904
905	} // namespace absl
906

Browse the source code of Abseil/strings/numbers.cc