1 | // Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors |
2 | // Licensed under the MIT License: |
3 | // |
4 | // Permission is hereby granted, free of charge, to any person obtaining a copy |
5 | // of this software and associated documentation files (the "Software"), to deal |
6 | // in the Software without restriction, including without limitation the rights |
7 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
8 | // copies of the Software, and to permit persons to whom the Software is |
9 | // furnished to do so, subject to the following conditions: |
10 | // |
11 | // The above copyright notice and this permission notice shall be included in |
12 | // all copies or substantial portions of the Software. |
13 | // |
14 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
17 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
18 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
19 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
20 | // THE SOFTWARE. |
21 | |
22 | #include "string.h" |
23 | #include "debug.h" |
24 | #include <stdio.h> |
25 | #include <float.h> |
26 | #include <errno.h> |
27 | #include <stdlib.h> |
28 | #include <stdint.h> |
29 | |
30 | namespace kj { |
31 | |
32 | #if _MSC_VER |
33 | #pragma warning(disable: 4996) |
34 | // Warns that sprintf() is buffer-overrunny. We know that, it's cool. |
35 | #endif |
36 | |
37 | namespace { |
38 | bool isHex(const char *s) { |
39 | if (*s == '-') s++; |
40 | return s[0] == '0' && (s[1] == 'x' || s[1] == 'X'); |
41 | } |
42 | |
43 | long long parseSigned(const StringPtr& s, long long min, long long max) { |
44 | KJ_REQUIRE(s != nullptr, "String does not contain valid number" , s) { return 0; } |
45 | char *endPtr; |
46 | errno = 0; |
47 | auto value = strtoll(s.begin(), &endPtr, isHex(s.cStr()) ? 16 : 10); |
48 | KJ_REQUIRE(endPtr == s.end(), "String does not contain valid number" , s) { return 0; } |
49 | KJ_REQUIRE(errno != ERANGE, "Value out-of-range" , s) { return 0; } |
50 | KJ_REQUIRE(value >= min && value <= max, "Value out-of-range" , value, min, max) { return 0; } |
51 | return value; |
52 | } |
53 | |
54 | unsigned long long parseUnsigned(const StringPtr& s, unsigned long long max) { |
55 | KJ_REQUIRE(s != nullptr, "String does not contain valid number" , s) { return 0; } |
56 | char *endPtr; |
57 | errno = 0; |
58 | auto value = strtoull(s.begin(), &endPtr, isHex(s.cStr()) ? 16 : 10); |
59 | KJ_REQUIRE(endPtr == s.end(), "String does not contain valid number" , s) { return 0; } |
60 | KJ_REQUIRE(errno != ERANGE, "Value out-of-range" , s) { return 0; } |
61 | KJ_REQUIRE(value <= max, "Value out-of-range" , value, max) { return 0; } |
62 | //strtoull("-1") does not fail with ERANGE |
63 | KJ_REQUIRE(s[0] != '-', "Value out-of-range" , s) { return 0; } |
64 | return value; |
65 | } |
66 | |
67 | template <typename T> |
68 | T parseInteger(const StringPtr& s) { |
69 | if (static_cast<T>(minValue) < 0) { |
70 | long long min = static_cast<T>(minValue); |
71 | long long max = static_cast<T>(maxValue); |
72 | return static_cast<T>(parseSigned(s, min, max)); |
73 | } else { |
74 | unsigned long long max = static_cast<T>(maxValue); |
75 | return static_cast<T>(parseUnsigned(s, max)); |
76 | } |
77 | } |
78 | |
79 | double parseDouble(const StringPtr& s) { |
80 | KJ_REQUIRE(s != nullptr, "String does not contain valid number" , s) { return 0; } |
81 | char *endPtr; |
82 | errno = 0; |
83 | auto value = strtod(s.begin(), &endPtr); |
84 | KJ_REQUIRE(endPtr == s.end(), "String does not contain valid floating number" , s) { return 0; } |
85 | #if _WIN32 || __CYGWIN__ || __BIONIC__ |
86 | // When Windows' strtod() parses "nan", it returns a value with the sign bit set. But, our |
87 | // preferred canonical value for NaN does not have the sign bit set, and all other platforms |
88 | // return one without the sign bit set. So, on Windows, detect NaN and return our preferred |
89 | // version. |
90 | // |
91 | // Cygwin seemingly does not try to emulate Linux behavior here, but rather allows Windows' |
92 | // behavior to leak through. (Conversely, WINE actually produces the Linux behavior despite |
93 | // trying to behave like Win32...) |
94 | // |
95 | // Bionic (Android) failed the unit test and so I added it to the list without investigating |
96 | // further. |
97 | if (isNaN(value)) { |
98 | // NaN |
99 | return kj::nan(); |
100 | } |
101 | #endif |
102 | return value; |
103 | } |
104 | |
105 | } // namespace |
106 | |
107 | #define PARSE_AS_INTEGER(T) \ |
108 | template <> T StringPtr::parseAs<T>() const { return parseInteger<T>(*this); } |
109 | PARSE_AS_INTEGER(char); |
110 | PARSE_AS_INTEGER(signed char); |
111 | PARSE_AS_INTEGER(unsigned char); |
112 | PARSE_AS_INTEGER(short); |
113 | PARSE_AS_INTEGER(unsigned short); |
114 | PARSE_AS_INTEGER(int); |
115 | PARSE_AS_INTEGER(unsigned int); |
116 | PARSE_AS_INTEGER(long); |
117 | PARSE_AS_INTEGER(unsigned long); |
118 | PARSE_AS_INTEGER(long long); |
119 | PARSE_AS_INTEGER(unsigned long long); |
120 | #undef PARSE_AS_INTEGER |
121 | template <> double StringPtr::parseAs<double>() const { return parseDouble(*this); } |
122 | template <> float StringPtr::parseAs<float>() const { return parseDouble(*this); } |
123 | |
124 | String heapString(size_t size) { |
125 | char* buffer = _::HeapArrayDisposer::allocate<char>(size + 1); |
126 | buffer[size] = '\0'; |
127 | return String(buffer, size, _::HeapArrayDisposer::instance); |
128 | } |
129 | |
130 | String heapString(const char* value, size_t size) { |
131 | char* buffer = _::HeapArrayDisposer::allocate<char>(size + 1); |
132 | if (size != 0u) { |
133 | memcpy(buffer, value, size); |
134 | } |
135 | buffer[size] = '\0'; |
136 | return String(buffer, size, _::HeapArrayDisposer::instance); |
137 | } |
138 | |
139 | template <typename T> |
140 | static CappedArray<char, sizeof(T) * 2 + 1> hexImpl(T i) { |
141 | // We don't use sprintf() because it's not async-signal-safe (for strPreallocated()). |
142 | CappedArray<char, sizeof(T) * 2 + 1> result; |
143 | uint8_t reverse[sizeof(T) * 2]; |
144 | uint8_t* p = reverse; |
145 | if (i == 0) { |
146 | *p++ = 0; |
147 | } else { |
148 | while (i > 0) { |
149 | *p++ = i % 16; |
150 | i /= 16; |
151 | } |
152 | } |
153 | |
154 | char* p2 = result.begin(); |
155 | while (p > reverse) { |
156 | *p2++ = "0123456789abcdef" [*--p]; |
157 | } |
158 | result.setSize(p2 - result.begin()); |
159 | return result; |
160 | } |
161 | |
162 | #define HEXIFY_INT(type) \ |
163 | CappedArray<char, sizeof(type) * 2 + 1> hex(type i) { \ |
164 | return hexImpl<type>(i); \ |
165 | } |
166 | |
167 | HEXIFY_INT(unsigned char); |
168 | HEXIFY_INT(unsigned short); |
169 | HEXIFY_INT(unsigned int); |
170 | HEXIFY_INT(unsigned long); |
171 | HEXIFY_INT(unsigned long long); |
172 | |
173 | #undef HEXIFY_INT |
174 | |
175 | namespace _ { // private |
176 | |
177 | StringPtr Stringifier::operator*(decltype(nullptr)) const { |
178 | return "nullptr" ; |
179 | } |
180 | |
181 | StringPtr Stringifier::operator*(bool b) const { |
182 | return b ? StringPtr("true" ) : StringPtr("false" ); |
183 | } |
184 | |
185 | template <typename T, typename Unsigned> |
186 | static CappedArray<char, sizeof(T) * 3 + 2> stringifyImpl(T i) { |
187 | // We don't use sprintf() because it's not async-signal-safe (for strPreallocated()). |
188 | CappedArray<char, sizeof(T) * 3 + 2> result; |
189 | bool negative = i < 0; |
190 | Unsigned u = negative ? -i : i; |
191 | uint8_t reverse[sizeof(T) * 3 + 1]; |
192 | uint8_t* p = reverse; |
193 | if (u == 0) { |
194 | *p++ = 0; |
195 | } else { |
196 | while (u > 0) { |
197 | *p++ = u % 10; |
198 | u /= 10; |
199 | } |
200 | } |
201 | |
202 | char* p2 = result.begin(); |
203 | if (negative) *p2++ = '-'; |
204 | while (p > reverse) { |
205 | *p2++ = '0' + *--p; |
206 | } |
207 | result.setSize(p2 - result.begin()); |
208 | return result; |
209 | } |
210 | |
211 | #define STRINGIFY_INT(type, unsigned) \ |
212 | CappedArray<char, sizeof(type) * 3 + 2> Stringifier::operator*(type i) const { \ |
213 | return stringifyImpl<type, unsigned>(i); \ |
214 | } |
215 | |
216 | STRINGIFY_INT(signed char, uint); |
217 | STRINGIFY_INT(unsigned char, uint); |
218 | STRINGIFY_INT(short, uint); |
219 | STRINGIFY_INT(unsigned short, uint); |
220 | STRINGIFY_INT(int, uint); |
221 | STRINGIFY_INT(unsigned int, uint); |
222 | STRINGIFY_INT(long, unsigned long); |
223 | STRINGIFY_INT(unsigned long, unsigned long); |
224 | STRINGIFY_INT(long long, unsigned long long); |
225 | STRINGIFY_INT(unsigned long long, unsigned long long); |
226 | |
227 | #undef STRINGIFY_INT |
228 | |
229 | CappedArray<char, sizeof(const void*) * 2 + 1> Stringifier::operator*(const void* i) const { \ |
230 | return hexImpl<uintptr_t>(reinterpret_cast<uintptr_t>(i)); |
231 | } |
232 | |
233 | namespace { |
234 | |
235 | // ---------------------------------------------------------------------- |
236 | // DoubleToBuffer() |
237 | // FloatToBuffer() |
238 | // Copied from Protocol Buffers, (C) Google, BSD license. |
239 | // Kenton wrote this code originally. The following commentary is |
240 | // from the original. |
241 | // |
242 | // Description: converts a double or float to a string which, if |
243 | // passed to NoLocaleStrtod(), will produce the exact same original double |
244 | // (except in case of NaN; all NaNs are considered the same value). |
245 | // We try to keep the string short but it's not guaranteed to be as |
246 | // short as possible. |
247 | // |
248 | // DoubleToBuffer() and FloatToBuffer() write the text to the given |
249 | // buffer and return it. The buffer must be at least |
250 | // kDoubleToBufferSize bytes for doubles and kFloatToBufferSize |
251 | // bytes for floats. kFastToBufferSize is also guaranteed to be large |
252 | // enough to hold either. |
253 | // |
254 | // We want to print the value without losing precision, but we also do |
255 | // not want to print more digits than necessary. This turns out to be |
256 | // trickier than it sounds. Numbers like 0.2 cannot be represented |
257 | // exactly in binary. If we print 0.2 with a very large precision, |
258 | // e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167". |
259 | // On the other hand, if we set the precision too low, we lose |
260 | // significant digits when printing numbers that actually need them. |
261 | // It turns out there is no precision value that does the right thing |
262 | // for all numbers. |
263 | // |
264 | // Our strategy is to first try printing with a precision that is never |
265 | // over-precise, then parse the result with strtod() to see if it |
266 | // matches. If not, we print again with a precision that will always |
267 | // give a precise result, but may use more digits than necessary. |
268 | // |
269 | // An arguably better strategy would be to use the algorithm described |
270 | // in "How to Print Floating-Point Numbers Accurately" by Steele & |
271 | // White, e.g. as implemented by David M. Gay's dtoa(). It turns out, |
272 | // however, that the following implementation is about as fast as |
273 | // DMG's code. Furthermore, DMG's code locks mutexes, which means it |
274 | // will not scale well on multi-core machines. DMG's code is slightly |
275 | // more accurate (in that it will never use more digits than |
276 | // necessary), but this is probably irrelevant for most users. |
277 | // |
278 | // Rob Pike and Ken Thompson also have an implementation of dtoa() in |
279 | // third_party/fmt/fltfmt.cc. Their implementation is similar to this |
280 | // one in that it makes guesses and then uses strtod() to check them. |
281 | // Their implementation is faster because they use their own code to |
282 | // generate the digits in the first place rather than use snprintf(), |
283 | // thus avoiding format string parsing overhead. However, this makes |
284 | // it considerably more complicated than the following implementation, |
285 | // and it is embedded in a larger library. If speed turns out to be |
286 | // an issue, we could re-implement this in terms of their |
287 | // implementation. |
288 | // ---------------------------------------------------------------------- |
289 | |
290 | #ifdef _WIN32 |
291 | // MSVC has only _snprintf, not snprintf. |
292 | // |
293 | // MinGW has both snprintf and _snprintf, but they appear to be different |
294 | // functions. The former is buggy. When invoked like so: |
295 | // char buffer[32]; |
296 | // snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f); |
297 | // it prints "1.23000e+10". This is plainly wrong: %g should never print |
298 | // trailing zeros after the decimal point. For some reason this bug only |
299 | // occurs with some input values, not all. In any case, _snprintf does the |
300 | // right thing, so we use it. |
301 | #define snprintf _snprintf |
302 | #endif |
303 | |
304 | inline bool IsNaN(double value) { |
305 | // NaN is never equal to anything, even itself. |
306 | return value != value; |
307 | } |
308 | |
309 | // In practice, doubles should never need more than 24 bytes and floats |
310 | // should never need more than 14 (including null terminators), but we |
311 | // overestimate to be safe. |
312 | static const int kDoubleToBufferSize = 32; |
313 | static const int kFloatToBufferSize = 24; |
314 | |
315 | static inline bool IsValidFloatChar(char c) { |
316 | return ('0' <= c && c <= '9') || |
317 | c == 'e' || c == 'E' || |
318 | c == '+' || c == '-'; |
319 | } |
320 | |
321 | void DelocalizeRadix(char* buffer) { |
322 | // Fast check: if the buffer has a normal decimal point, assume no |
323 | // translation is needed. |
324 | if (strchr(buffer, '.') != NULL) return; |
325 | |
326 | // Find the first unknown character. |
327 | while (IsValidFloatChar(*buffer)) ++buffer; |
328 | |
329 | if (*buffer == '\0') { |
330 | // No radix character found. |
331 | return; |
332 | } |
333 | |
334 | // We are now pointing at the locale-specific radix character. Replace it |
335 | // with '.'. |
336 | *buffer = '.'; |
337 | ++buffer; |
338 | |
339 | if (!IsValidFloatChar(*buffer) && *buffer != '\0') { |
340 | // It appears the radix was a multi-byte character. We need to remove the |
341 | // extra bytes. |
342 | char* target = buffer; |
343 | do { ++buffer; } while (!IsValidFloatChar(*buffer) && *buffer != '\0'); |
344 | memmove(target, buffer, strlen(buffer) + 1); |
345 | } |
346 | } |
347 | |
348 | void RemovePlus(char* buffer) { |
349 | // Remove any + characters because they are redundant and ugly. |
350 | |
351 | for (;;) { |
352 | buffer = strchr(buffer, '+'); |
353 | if (buffer == NULL) { |
354 | return; |
355 | } |
356 | memmove(buffer, buffer + 1, strlen(buffer + 1) + 1); |
357 | } |
358 | } |
359 | |
360 | #if _WIN32 |
361 | void RemoveE0(char* buffer) { |
362 | // Remove redundant leading 0's after an e, e.g. 1e012. Seems to appear on |
363 | // Windows. |
364 | |
365 | // Find and skip 'e'. |
366 | char* ptr = strchr(buffer, 'e'); |
367 | if (ptr == nullptr) return; |
368 | ++ptr; |
369 | |
370 | // Skip '-'. |
371 | if (*ptr == '-') ++ptr; |
372 | |
373 | // Skip '0's. |
374 | char* ptr2 = ptr; |
375 | while (*ptr2 == '0') ++ptr2; |
376 | |
377 | // If we went past the last digit, back up one. |
378 | if (*ptr2 < '0' || *ptr2 > '9') --ptr2; |
379 | |
380 | // Move bytes backwards. |
381 | if (ptr2 > ptr) { |
382 | memmove(ptr, ptr2, strlen(ptr2) + 1); |
383 | } |
384 | } |
385 | #endif |
386 | |
387 | char* DoubleToBuffer(double value, char* buffer) { |
388 | // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all |
389 | // platforms these days. Just in case some system exists where DBL_DIG |
390 | // is significantly larger -- and risks overflowing our buffer -- we have |
391 | // this assert. |
392 | static_assert(DBL_DIG < 20, "DBL_DIG is too big." ); |
393 | |
394 | if (value == inf()) { |
395 | strcpy(buffer, "inf" ); |
396 | return buffer; |
397 | } else if (value == -inf()) { |
398 | strcpy(buffer, "-inf" ); |
399 | return buffer; |
400 | } else if (IsNaN(value)) { |
401 | strcpy(buffer, "nan" ); |
402 | return buffer; |
403 | } |
404 | |
405 | int snprintf_result KJ_UNUSED = |
406 | snprintf(buffer, kDoubleToBufferSize, "%.*g" , DBL_DIG, value); |
407 | |
408 | // The snprintf should never overflow because the buffer is significantly |
409 | // larger than the precision we asked for. |
410 | KJ_DASSERT(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize); |
411 | |
412 | // We need to make parsed_value volatile in order to force the compiler to |
413 | // write it out to the stack. Otherwise, it may keep the value in a |
414 | // register, and if it does that, it may keep it as a long double instead |
415 | // of a double. This long double may have extra bits that make it compare |
416 | // unequal to "value" even though it would be exactly equal if it were |
417 | // truncated to a double. |
418 | volatile double parsed_value = strtod(buffer, NULL); |
419 | if (parsed_value != value) { |
420 | int snprintf_result2 KJ_UNUSED = |
421 | snprintf(buffer, kDoubleToBufferSize, "%.*g" , DBL_DIG+2, value); |
422 | |
423 | // Should never overflow; see above. |
424 | KJ_DASSERT(snprintf_result2 > 0 && snprintf_result2 < kDoubleToBufferSize); |
425 | } |
426 | |
427 | DelocalizeRadix(buffer); |
428 | RemovePlus(buffer); |
429 | #if _WIN32 |
430 | RemoveE0(buffer); |
431 | #endif // _WIN32 |
432 | return buffer; |
433 | } |
434 | |
435 | bool safe_strtof(const char* str, float* value) { |
436 | char* endptr; |
437 | errno = 0; // errno only gets set on errors |
438 | #if defined(_WIN32) || defined (__hpux) // has no strtof() |
439 | *value = static_cast<float>(strtod(str, &endptr)); |
440 | #else |
441 | *value = strtof(str, &endptr); |
442 | #endif |
443 | return *str != 0 && *endptr == 0 && errno == 0; |
444 | } |
445 | |
446 | char* FloatToBuffer(float value, char* buffer) { |
447 | // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all |
448 | // platforms these days. Just in case some system exists where FLT_DIG |
449 | // is significantly larger -- and risks overflowing our buffer -- we have |
450 | // this assert. |
451 | static_assert(FLT_DIG < 10, "FLT_DIG is too big" ); |
452 | |
453 | if (value == inf()) { |
454 | strcpy(buffer, "inf" ); |
455 | return buffer; |
456 | } else if (value == -inf()) { |
457 | strcpy(buffer, "-inf" ); |
458 | return buffer; |
459 | } else if (IsNaN(value)) { |
460 | strcpy(buffer, "nan" ); |
461 | return buffer; |
462 | } |
463 | |
464 | int snprintf_result KJ_UNUSED = |
465 | snprintf(buffer, kFloatToBufferSize, "%.*g" , FLT_DIG, value); |
466 | |
467 | // The snprintf should never overflow because the buffer is significantly |
468 | // larger than the precision we asked for. |
469 | KJ_DASSERT(snprintf_result > 0 && snprintf_result < kFloatToBufferSize); |
470 | |
471 | float parsed_value; |
472 | if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { |
473 | int snprintf_result2 KJ_UNUSED = |
474 | snprintf(buffer, kFloatToBufferSize, "%.*g" , FLT_DIG+2, value); |
475 | |
476 | // Should never overflow; see above. |
477 | KJ_DASSERT(snprintf_result2 > 0 && snprintf_result2 < kFloatToBufferSize); |
478 | } |
479 | |
480 | DelocalizeRadix(buffer); |
481 | RemovePlus(buffer); |
482 | #if _WIN32 |
483 | RemoveE0(buffer); |
484 | #endif // _WIN32 |
485 | return buffer; |
486 | } |
487 | |
488 | } // namespace |
489 | |
490 | CappedArray<char, kFloatToBufferSize> Stringifier::operator*(float f) const { |
491 | CappedArray<char, kFloatToBufferSize> result; |
492 | result.setSize(strlen(FloatToBuffer(f, result.begin()))); |
493 | return result; |
494 | } |
495 | |
496 | CappedArray<char, kDoubleToBufferSize> Stringifier::operator*(double f) const { |
497 | CappedArray<char, kDoubleToBufferSize> result; |
498 | result.setSize(strlen(DoubleToBuffer(f, result.begin()))); |
499 | return result; |
500 | } |
501 | |
502 | } // namespace _ (private) |
503 | } // namespace kj |
504 | |