1// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
2// Licensed under the MIT License:
3//
4// Permission is hereby granted, free of charge, to any person obtaining a copy
5// of this software and associated documentation files (the "Software"), to deal
6// in the Software without restriction, including without limitation the rights
7// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8// copies of the Software, and to permit persons to whom the Software is
9// furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20// THE SOFTWARE.
21
22#include "string.h"
23#include "debug.h"
24#include <stdio.h>
25#include <float.h>
26#include <errno.h>
27#include <stdlib.h>
28#include <stdint.h>
29
30namespace kj {
31
32#if _MSC_VER
33#pragma warning(disable: 4996)
34// Warns that sprintf() is buffer-overrunny. We know that, it's cool.
35#endif
36
37namespace {
38bool isHex(const char *s) {
39 if (*s == '-') s++;
40 return s[0] == '0' && (s[1] == 'x' || s[1] == 'X');
41}
42
43long long parseSigned(const StringPtr& s, long long min, long long max) {
44 KJ_REQUIRE(s != nullptr, "String does not contain valid number", s) { return 0; }
45 char *endPtr;
46 errno = 0;
47 auto value = strtoll(s.begin(), &endPtr, isHex(s.cStr()) ? 16 : 10);
48 KJ_REQUIRE(endPtr == s.end(), "String does not contain valid number", s) { return 0; }
49 KJ_REQUIRE(errno != ERANGE, "Value out-of-range", s) { return 0; }
50 KJ_REQUIRE(value >= min && value <= max, "Value out-of-range", value, min, max) { return 0; }
51 return value;
52}
53
54unsigned long long parseUnsigned(const StringPtr& s, unsigned long long max) {
55 KJ_REQUIRE(s != nullptr, "String does not contain valid number", s) { return 0; }
56 char *endPtr;
57 errno = 0;
58 auto value = strtoull(s.begin(), &endPtr, isHex(s.cStr()) ? 16 : 10);
59 KJ_REQUIRE(endPtr == s.end(), "String does not contain valid number", s) { return 0; }
60 KJ_REQUIRE(errno != ERANGE, "Value out-of-range", s) { return 0; }
61 KJ_REQUIRE(value <= max, "Value out-of-range", value, max) { return 0; }
62 //strtoull("-1") does not fail with ERANGE
63 KJ_REQUIRE(s[0] != '-', "Value out-of-range", s) { return 0; }
64 return value;
65}
66
67template <typename T>
68T parseInteger(const StringPtr& s) {
69 if (static_cast<T>(minValue) < 0) {
70 long long min = static_cast<T>(minValue);
71 long long max = static_cast<T>(maxValue);
72 return static_cast<T>(parseSigned(s, min, max));
73 } else {
74 unsigned long long max = static_cast<T>(maxValue);
75 return static_cast<T>(parseUnsigned(s, max));
76 }
77}
78
79double parseDouble(const StringPtr& s) {
80 KJ_REQUIRE(s != nullptr, "String does not contain valid number", s) { return 0; }
81 char *endPtr;
82 errno = 0;
83 auto value = strtod(s.begin(), &endPtr);
84 KJ_REQUIRE(endPtr == s.end(), "String does not contain valid floating number", s) { return 0; }
85#if _WIN32 || __CYGWIN__ || __BIONIC__
86 // When Windows' strtod() parses "nan", it returns a value with the sign bit set. But, our
87 // preferred canonical value for NaN does not have the sign bit set, and all other platforms
88 // return one without the sign bit set. So, on Windows, detect NaN and return our preferred
89 // version.
90 //
91 // Cygwin seemingly does not try to emulate Linux behavior here, but rather allows Windows'
92 // behavior to leak through. (Conversely, WINE actually produces the Linux behavior despite
93 // trying to behave like Win32...)
94 //
95 // Bionic (Android) failed the unit test and so I added it to the list without investigating
96 // further.
97 if (isNaN(value)) {
98 // NaN
99 return kj::nan();
100 }
101#endif
102 return value;
103}
104
105} // namespace
106
107#define PARSE_AS_INTEGER(T) \
108 template <> T StringPtr::parseAs<T>() const { return parseInteger<T>(*this); }
109PARSE_AS_INTEGER(char);
110PARSE_AS_INTEGER(signed char);
111PARSE_AS_INTEGER(unsigned char);
112PARSE_AS_INTEGER(short);
113PARSE_AS_INTEGER(unsigned short);
114PARSE_AS_INTEGER(int);
115PARSE_AS_INTEGER(unsigned int);
116PARSE_AS_INTEGER(long);
117PARSE_AS_INTEGER(unsigned long);
118PARSE_AS_INTEGER(long long);
119PARSE_AS_INTEGER(unsigned long long);
120#undef PARSE_AS_INTEGER
121template <> double StringPtr::parseAs<double>() const { return parseDouble(*this); }
122template <> float StringPtr::parseAs<float>() const { return parseDouble(*this); }
123
124String heapString(size_t size) {
125 char* buffer = _::HeapArrayDisposer::allocate<char>(size + 1);
126 buffer[size] = '\0';
127 return String(buffer, size, _::HeapArrayDisposer::instance);
128}
129
130String heapString(const char* value, size_t size) {
131 char* buffer = _::HeapArrayDisposer::allocate<char>(size + 1);
132 if (size != 0u) {
133 memcpy(buffer, value, size);
134 }
135 buffer[size] = '\0';
136 return String(buffer, size, _::HeapArrayDisposer::instance);
137}
138
139template <typename T>
140static CappedArray<char, sizeof(T) * 2 + 1> hexImpl(T i) {
141 // We don't use sprintf() because it's not async-signal-safe (for strPreallocated()).
142 CappedArray<char, sizeof(T) * 2 + 1> result;
143 uint8_t reverse[sizeof(T) * 2];
144 uint8_t* p = reverse;
145 if (i == 0) {
146 *p++ = 0;
147 } else {
148 while (i > 0) {
149 *p++ = i % 16;
150 i /= 16;
151 }
152 }
153
154 char* p2 = result.begin();
155 while (p > reverse) {
156 *p2++ = "0123456789abcdef"[*--p];
157 }
158 result.setSize(p2 - result.begin());
159 return result;
160}
161
162#define HEXIFY_INT(type) \
163CappedArray<char, sizeof(type) * 2 + 1> hex(type i) { \
164 return hexImpl<type>(i); \
165}
166
167HEXIFY_INT(unsigned char);
168HEXIFY_INT(unsigned short);
169HEXIFY_INT(unsigned int);
170HEXIFY_INT(unsigned long);
171HEXIFY_INT(unsigned long long);
172
173#undef HEXIFY_INT
174
175namespace _ { // private
176
177StringPtr Stringifier::operator*(decltype(nullptr)) const {
178 return "nullptr";
179}
180
181StringPtr Stringifier::operator*(bool b) const {
182 return b ? StringPtr("true") : StringPtr("false");
183}
184
185template <typename T, typename Unsigned>
186static CappedArray<char, sizeof(T) * 3 + 2> stringifyImpl(T i) {
187 // We don't use sprintf() because it's not async-signal-safe (for strPreallocated()).
188 CappedArray<char, sizeof(T) * 3 + 2> result;
189 bool negative = i < 0;
190 Unsigned u = negative ? -i : i;
191 uint8_t reverse[sizeof(T) * 3 + 1];
192 uint8_t* p = reverse;
193 if (u == 0) {
194 *p++ = 0;
195 } else {
196 while (u > 0) {
197 *p++ = u % 10;
198 u /= 10;
199 }
200 }
201
202 char* p2 = result.begin();
203 if (negative) *p2++ = '-';
204 while (p > reverse) {
205 *p2++ = '0' + *--p;
206 }
207 result.setSize(p2 - result.begin());
208 return result;
209}
210
211#define STRINGIFY_INT(type, unsigned) \
212CappedArray<char, sizeof(type) * 3 + 2> Stringifier::operator*(type i) const { \
213 return stringifyImpl<type, unsigned>(i); \
214}
215
216STRINGIFY_INT(signed char, uint);
217STRINGIFY_INT(unsigned char, uint);
218STRINGIFY_INT(short, uint);
219STRINGIFY_INT(unsigned short, uint);
220STRINGIFY_INT(int, uint);
221STRINGIFY_INT(unsigned int, uint);
222STRINGIFY_INT(long, unsigned long);
223STRINGIFY_INT(unsigned long, unsigned long);
224STRINGIFY_INT(long long, unsigned long long);
225STRINGIFY_INT(unsigned long long, unsigned long long);
226
227#undef STRINGIFY_INT
228
229CappedArray<char, sizeof(const void*) * 2 + 1> Stringifier::operator*(const void* i) const { \
230 return hexImpl<uintptr_t>(reinterpret_cast<uintptr_t>(i));
231}
232
233namespace {
234
235// ----------------------------------------------------------------------
236// DoubleToBuffer()
237// FloatToBuffer()
238// Copied from Protocol Buffers, (C) Google, BSD license.
239// Kenton wrote this code originally. The following commentary is
240// from the original.
241//
242// Description: converts a double or float to a string which, if
243// passed to NoLocaleStrtod(), will produce the exact same original double
244// (except in case of NaN; all NaNs are considered the same value).
245// We try to keep the string short but it's not guaranteed to be as
246// short as possible.
247//
248// DoubleToBuffer() and FloatToBuffer() write the text to the given
249// buffer and return it. The buffer must be at least
250// kDoubleToBufferSize bytes for doubles and kFloatToBufferSize
251// bytes for floats. kFastToBufferSize is also guaranteed to be large
252// enough to hold either.
253//
254// We want to print the value without losing precision, but we also do
255// not want to print more digits than necessary. This turns out to be
256// trickier than it sounds. Numbers like 0.2 cannot be represented
257// exactly in binary. If we print 0.2 with a very large precision,
258// e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
259// On the other hand, if we set the precision too low, we lose
260// significant digits when printing numbers that actually need them.
261// It turns out there is no precision value that does the right thing
262// for all numbers.
263//
264// Our strategy is to first try printing with a precision that is never
265// over-precise, then parse the result with strtod() to see if it
266// matches. If not, we print again with a precision that will always
267// give a precise result, but may use more digits than necessary.
268//
269// An arguably better strategy would be to use the algorithm described
270// in "How to Print Floating-Point Numbers Accurately" by Steele &
271// White, e.g. as implemented by David M. Gay's dtoa(). It turns out,
272// however, that the following implementation is about as fast as
273// DMG's code. Furthermore, DMG's code locks mutexes, which means it
274// will not scale well on multi-core machines. DMG's code is slightly
275// more accurate (in that it will never use more digits than
276// necessary), but this is probably irrelevant for most users.
277//
278// Rob Pike and Ken Thompson also have an implementation of dtoa() in
279// third_party/fmt/fltfmt.cc. Their implementation is similar to this
280// one in that it makes guesses and then uses strtod() to check them.
281// Their implementation is faster because they use their own code to
282// generate the digits in the first place rather than use snprintf(),
283// thus avoiding format string parsing overhead. However, this makes
284// it considerably more complicated than the following implementation,
285// and it is embedded in a larger library. If speed turns out to be
286// an issue, we could re-implement this in terms of their
287// implementation.
288// ----------------------------------------------------------------------
289
290#ifdef _WIN32
291// MSVC has only _snprintf, not snprintf.
292//
293// MinGW has both snprintf and _snprintf, but they appear to be different
294// functions. The former is buggy. When invoked like so:
295// char buffer[32];
296// snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f);
297// it prints "1.23000e+10". This is plainly wrong: %g should never print
298// trailing zeros after the decimal point. For some reason this bug only
299// occurs with some input values, not all. In any case, _snprintf does the
300// right thing, so we use it.
301#define snprintf _snprintf
302#endif
303
304inline bool IsNaN(double value) {
305 // NaN is never equal to anything, even itself.
306 return value != value;
307}
308
309// In practice, doubles should never need more than 24 bytes and floats
310// should never need more than 14 (including null terminators), but we
311// overestimate to be safe.
312static const int kDoubleToBufferSize = 32;
313static const int kFloatToBufferSize = 24;
314
315static inline bool IsValidFloatChar(char c) {
316 return ('0' <= c && c <= '9') ||
317 c == 'e' || c == 'E' ||
318 c == '+' || c == '-';
319}
320
321void DelocalizeRadix(char* buffer) {
322 // Fast check: if the buffer has a normal decimal point, assume no
323 // translation is needed.
324 if (strchr(buffer, '.') != NULL) return;
325
326 // Find the first unknown character.
327 while (IsValidFloatChar(*buffer)) ++buffer;
328
329 if (*buffer == '\0') {
330 // No radix character found.
331 return;
332 }
333
334 // We are now pointing at the locale-specific radix character. Replace it
335 // with '.'.
336 *buffer = '.';
337 ++buffer;
338
339 if (!IsValidFloatChar(*buffer) && *buffer != '\0') {
340 // It appears the radix was a multi-byte character. We need to remove the
341 // extra bytes.
342 char* target = buffer;
343 do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0');
344 memmove(target, buffer, strlen(buffer) + 1);
345 }
346}
347
348void RemovePlus(char* buffer) {
349 // Remove any + characters because they are redundant and ugly.
350
351 for (;;) {
352 buffer = strchr(buffer, '+');
353 if (buffer == NULL) {
354 return;
355 }
356 memmove(buffer, buffer + 1, strlen(buffer + 1) + 1);
357 }
358}
359
360#if _WIN32
361void RemoveE0(char* buffer) {
362 // Remove redundant leading 0's after an e, e.g. 1e012. Seems to appear on
363 // Windows.
364
365 // Find and skip 'e'.
366 char* ptr = strchr(buffer, 'e');
367 if (ptr == nullptr) return;
368 ++ptr;
369
370 // Skip '-'.
371 if (*ptr == '-') ++ptr;
372
373 // Skip '0's.
374 char* ptr2 = ptr;
375 while (*ptr2 == '0') ++ptr2;
376
377 // If we went past the last digit, back up one.
378 if (*ptr2 < '0' || *ptr2 > '9') --ptr2;
379
380 // Move bytes backwards.
381 if (ptr2 > ptr) {
382 memmove(ptr, ptr2, strlen(ptr2) + 1);
383 }
384}
385#endif
386
387char* DoubleToBuffer(double value, char* buffer) {
388 // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
389 // platforms these days. Just in case some system exists where DBL_DIG
390 // is significantly larger -- and risks overflowing our buffer -- we have
391 // this assert.
392 static_assert(DBL_DIG < 20, "DBL_DIG is too big.");
393
394 if (value == inf()) {
395 strcpy(buffer, "inf");
396 return buffer;
397 } else if (value == -inf()) {
398 strcpy(buffer, "-inf");
399 return buffer;
400 } else if (IsNaN(value)) {
401 strcpy(buffer, "nan");
402 return buffer;
403 }
404
405 int snprintf_result KJ_UNUSED =
406 snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
407
408 // The snprintf should never overflow because the buffer is significantly
409 // larger than the precision we asked for.
410 KJ_DASSERT(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
411
412 // We need to make parsed_value volatile in order to force the compiler to
413 // write it out to the stack. Otherwise, it may keep the value in a
414 // register, and if it does that, it may keep it as a long double instead
415 // of a double. This long double may have extra bits that make it compare
416 // unequal to "value" even though it would be exactly equal if it were
417 // truncated to a double.
418 volatile double parsed_value = strtod(buffer, NULL);
419 if (parsed_value != value) {
420 int snprintf_result2 KJ_UNUSED =
421 snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
422
423 // Should never overflow; see above.
424 KJ_DASSERT(snprintf_result2 > 0 && snprintf_result2 < kDoubleToBufferSize);
425 }
426
427 DelocalizeRadix(buffer);
428 RemovePlus(buffer);
429#if _WIN32
430 RemoveE0(buffer);
431#endif // _WIN32
432 return buffer;
433}
434
435bool safe_strtof(const char* str, float* value) {
436 char* endptr;
437 errno = 0; // errno only gets set on errors
438#if defined(_WIN32) || defined (__hpux) // has no strtof()
439 *value = static_cast<float>(strtod(str, &endptr));
440#else
441 *value = strtof(str, &endptr);
442#endif
443 return *str != 0 && *endptr == 0 && errno == 0;
444}
445
446char* FloatToBuffer(float value, char* buffer) {
447 // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
448 // platforms these days. Just in case some system exists where FLT_DIG
449 // is significantly larger -- and risks overflowing our buffer -- we have
450 // this assert.
451 static_assert(FLT_DIG < 10, "FLT_DIG is too big");
452
453 if (value == inf()) {
454 strcpy(buffer, "inf");
455 return buffer;
456 } else if (value == -inf()) {
457 strcpy(buffer, "-inf");
458 return buffer;
459 } else if (IsNaN(value)) {
460 strcpy(buffer, "nan");
461 return buffer;
462 }
463
464 int snprintf_result KJ_UNUSED =
465 snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
466
467 // The snprintf should never overflow because the buffer is significantly
468 // larger than the precision we asked for.
469 KJ_DASSERT(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
470
471 float parsed_value;
472 if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
473 int snprintf_result2 KJ_UNUSED =
474 snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+2, value);
475
476 // Should never overflow; see above.
477 KJ_DASSERT(snprintf_result2 > 0 && snprintf_result2 < kFloatToBufferSize);
478 }
479
480 DelocalizeRadix(buffer);
481 RemovePlus(buffer);
482#if _WIN32
483 RemoveE0(buffer);
484#endif // _WIN32
485 return buffer;
486}
487
488} // namespace
489
490CappedArray<char, kFloatToBufferSize> Stringifier::operator*(float f) const {
491 CappedArray<char, kFloatToBufferSize> result;
492 result.setSize(strlen(FloatToBuffer(f, result.begin())));
493 return result;
494}
495
496CappedArray<char, kDoubleToBufferSize> Stringifier::operator*(double f) const {
497 CappedArray<char, kDoubleToBufferSize> result;
498 result.setSize(strlen(DoubleToBuffer(f, result.begin())));
499 return result;
500}
501
502} // namespace _ (private)
503} // namespace kj
504