1 | // |
2 | // Copyright (C) 1999-2005 Google, Inc. |
3 | // |
4 | |
5 | #include "strutil.h" |
6 | |
7 | #include <ctype.h> |
8 | #include <errno.h> |
9 | #include <float.h> // for DBL_DIG and FLT_DIG |
10 | #include <math.h> // for HUGE_VAL |
11 | #include <pthread.h> // for gmtime_r (on Windows) |
12 | #include <stdarg.h> |
13 | #include <stdlib.h> |
14 | #include <stdio.h> |
15 | #include <string.h> |
16 | #include <time.h> // for FastTimeToBuffer() |
17 | |
18 | #include <algorithm> |
19 | using std::min; |
20 | using std::max; |
21 | using std::swap; |
22 | using std::reverse; |
23 | |
24 | #include <hash_map> |
25 | using __gnu_cxx::hash_map; |
26 | |
27 | #include <hash_set> |
28 | using __gnu_cxx::hash_set; |
29 | |
30 | #include <iterator> |
31 | #include <limits> |
32 | using std::numeric_limits; |
33 | |
34 | #include <set> |
35 | using std::set; |
36 | using std::multiset; |
37 | |
38 | #include <string> |
39 | using std::string; |
40 | |
41 | #include <vector> |
42 | using std::vector; |
43 | |
44 | |
45 | #include "base/logging.h" |
46 | #include "base/scoped_ptr.h" |
47 | //#include "strutil-inl.h" |
48 | //#include "third_party/utf/utf.h" // for runetochar |
49 | //#include "util/gtl/stl_util-inl.h" // for string_as_array |
50 | //#include "util/hash/hash.h" |
51 | #include "split.h" |
52 | |
53 | #ifdef OS_WINDOWS |
54 | #include <pthread.h> // for gmtime_r |
55 | #ifdef min // windows.h defines this to something silly |
56 | #undef min |
57 | #endif |
58 | #endif |
59 | |
60 | // ---------------------------------------------------------------------- |
61 | // FpToString() |
62 | // FloatToString() |
63 | // IntToString() |
64 | // Convert various types to their string representation. These |
65 | // all do the obvious, trivial thing. |
66 | // ---------------------------------------------------------------------- |
67 | |
68 | string FpToString(Fprint fp) { |
69 | char buf[17]; |
70 | snprintf(buf, sizeof(buf), "%016llx" , fp); |
71 | return string(buf); |
72 | } |
73 | |
74 | string FloatToString(float f, const char* format) { |
75 | char buf[80]; |
76 | snprintf(buf, sizeof(buf), format, f); |
77 | return string(buf); |
78 | } |
79 | |
80 | string IntToString(int i, const char* format) { |
81 | char buf[80]; |
82 | snprintf(buf, sizeof(buf), format, i); |
83 | return string(buf); |
84 | } |
85 | |
86 | string Int64ToString(int64 i64, const char* format) { |
87 | char buf[80]; |
88 | snprintf(buf, sizeof(buf), format, i64); |
89 | return string(buf); |
90 | } |
91 | |
92 | string UInt64ToString(uint64 ui64, const char* format) { |
93 | char buf[80]; |
94 | snprintf(buf, sizeof(buf), format, ui64); |
95 | return string(buf); |
96 | } |
97 | |
98 | // Default arguments |
99 | string FloatToString(float f) { return FloatToString(f, "%7f" ); } |
100 | string IntToString(int i) { return IntToString(i, "%7d" ); } |
101 | string Int64ToString(int64 i64) { |
102 | return Int64ToString(i64, "%7" GG_LL_FORMAT "d" ); |
103 | } |
104 | string UInt64ToString(uint64 ui64) { |
105 | return UInt64ToString(ui64, "%7" GG_LL_FORMAT "u" ); |
106 | } |
107 | |
108 | // ---------------------------------------------------------------------- |
109 | // FastIntToBuffer() |
110 | // FastInt64ToBuffer() |
111 | // FastHexToBuffer() |
112 | // FastHex64ToBuffer() |
113 | // FastHex32ToBuffer() |
114 | // FastTimeToBuffer() |
115 | // These are intended for speed. FastHexToBuffer() assumes the |
116 | // integer is non-negative. FastHexToBuffer() puts output in |
117 | // hex rather than decimal. FastTimeToBuffer() puts the output |
118 | // into RFC822 format. If time is 0, uses the current time. |
119 | // |
120 | // FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, |
121 | // padded to exactly 16 bytes (plus one byte for '\0') |
122 | // |
123 | // FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, |
124 | // padded to exactly 8 bytes (plus one byte for '\0') |
125 | // |
126 | // All functions take the output buffer as an arg. FastInt() |
127 | // uses at most 22 bytes, FastTime() uses exactly 30 bytes. |
128 | // They all return a pointer to the beginning of the output, |
129 | // which may not be the beginning of the input buffer. (Though |
130 | // for FastTimeToBuffer(), we guarantee that it is.) |
131 | // ---------------------------------------------------------------------- |
132 | |
133 | char *FastInt64ToBuffer(int64 i, char* buffer) { |
134 | FastInt64ToBufferLeft(i, buffer); |
135 | return buffer; |
136 | } |
137 | |
138 | // Offset into buffer where FastInt32ToBuffer places the end of string |
139 | // null character. Also used by FastInt32ToBufferLeft |
140 | static const int kFastInt32ToBufferOffset = 11; |
141 | |
142 | // Copied from http://gears.googlecode.com/svn-history/r395/trunk/gears/base/common/string16.cc |
143 | char *FastInt32ToBuffer(int32 i, char* buffer) { |
144 | // We could collapse the positive and negative sections, but that |
145 | // would be slightly slower for positive numbers... |
146 | // 12 bytes is enough to store -2**32, -4294967296. |
147 | char* p = buffer + kFastInt32ToBufferOffset; |
148 | *p-- = '\0'; |
149 | if (i >= 0) { |
150 | do { |
151 | *p-- = '0' + i % 10; |
152 | i /= 10; |
153 | } while (i > 0); |
154 | return p + 1; |
155 | } else { |
156 | // On different platforms, % and / have different behaviors for |
157 | // negative numbers, so we need to jump through hoops to make sure |
158 | // we don't divide negative numbers. |
159 | if (i > -10) { |
160 | i = -i; |
161 | *p-- = '0' + i; |
162 | *p = '-'; |
163 | return p; |
164 | } else { |
165 | // Make sure we aren't at MIN_INT, in which case we can't say i = -i |
166 | i = i + 10; |
167 | i = -i; |
168 | *p-- = '0' + i % 10; |
169 | // Undo what we did a moment ago |
170 | i = i / 10 + 1; |
171 | do { |
172 | *p-- = '0' + i % 10; |
173 | i /= 10; |
174 | } while (i > 0); |
175 | *p = '-'; |
176 | return p; |
177 | } |
178 | } |
179 | } |
180 | |
181 | char *FastHexToBuffer(int i, char* buffer) { |
182 | CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i; |
183 | |
184 | static const char *hexdigits = "0123456789abcdef" ; |
185 | char *p = buffer + 21; |
186 | *p-- = '\0'; |
187 | do { |
188 | *p-- = hexdigits[i & 15]; // mod by 16 |
189 | i >>= 4; // divide by 16 |
190 | } while (i > 0); |
191 | return p + 1; |
192 | } |
193 | |
194 | char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) { |
195 | static const char *hexdigits = "0123456789abcdef" ; |
196 | buffer[num_byte] = '\0'; |
197 | for (int i = num_byte - 1; i >= 0; i--) { |
198 | buffer[i] = hexdigits[uint32(value) & 0xf]; |
199 | value >>= 4; |
200 | } |
201 | return buffer; |
202 | } |
203 | |
204 | char *FastHex64ToBuffer(uint64 value, char* buffer) { |
205 | return InternalFastHexToBuffer(value, buffer, 16); |
206 | } |
207 | |
208 | char *FastHex32ToBuffer(uint32 value, char* buffer) { |
209 | return InternalFastHexToBuffer(value, buffer, 8); |
210 | } |
211 | |
212 | // Several converters use this table to reduce |
213 | // division and modulo operations. |
214 | static const char two_ASCII_digits[100][2] = { |
215 | {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'}, |
216 | {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'}, |
217 | {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'}, |
218 | {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'}, |
219 | {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'}, |
220 | {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'}, |
221 | {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'}, |
222 | {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'}, |
223 | {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'}, |
224 | {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'}, |
225 | {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'}, |
226 | {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'}, |
227 | {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'}, |
228 | {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'}, |
229 | {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'}, |
230 | {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'}, |
231 | {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'}, |
232 | {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'}, |
233 | {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'}, |
234 | {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'} |
235 | }; |
236 | |
237 | char* FastUInt32ToBufferLeft(uint32 u, char* buffer) { |
238 | int digits; |
239 | const char *ASCII_digits = NULL; |
240 | // The idea of this implementation is to trim the number of divides to as few |
241 | // as possible by using multiplication and subtraction rather than mod (%), |
242 | // and by outputting two digits at a time rather than one. |
243 | // The huge-number case is first, in the hopes that the compiler will output |
244 | // that case in one branch-free block of code, and only output conditional |
245 | // branches into it from below. |
246 | if (u >= 1000000000) { // >= 1,000,000,000 |
247 | digits = u / 100000000; // 100,000,000 |
248 | ASCII_digits = two_ASCII_digits[digits]; |
249 | buffer[0] = ASCII_digits[0]; |
250 | buffer[1] = ASCII_digits[1]; |
251 | buffer += 2; |
252 | sublt100_000_000: |
253 | u -= digits * 100000000; // 100,000,000 |
254 | lt100_000_000: |
255 | digits = u / 1000000; // 1,000,000 |
256 | ASCII_digits = two_ASCII_digits[digits]; |
257 | buffer[0] = ASCII_digits[0]; |
258 | buffer[1] = ASCII_digits[1]; |
259 | buffer += 2; |
260 | sublt1_000_000: |
261 | u -= digits * 1000000; // 1,000,000 |
262 | lt1_000_000: |
263 | digits = u / 10000; // 10,000 |
264 | ASCII_digits = two_ASCII_digits[digits]; |
265 | buffer[0] = ASCII_digits[0]; |
266 | buffer[1] = ASCII_digits[1]; |
267 | buffer += 2; |
268 | sublt10_000: |
269 | u -= digits * 10000; // 10,000 |
270 | lt10_000: |
271 | digits = u / 100; |
272 | ASCII_digits = two_ASCII_digits[digits]; |
273 | buffer[0] = ASCII_digits[0]; |
274 | buffer[1] = ASCII_digits[1]; |
275 | buffer += 2; |
276 | sublt100: |
277 | u -= digits * 100; |
278 | lt100: |
279 | digits = u; |
280 | ASCII_digits = two_ASCII_digits[digits]; |
281 | buffer[0] = ASCII_digits[0]; |
282 | buffer[1] = ASCII_digits[1]; |
283 | buffer += 2; |
284 | done: |
285 | *buffer = 0; |
286 | return buffer; |
287 | } |
288 | |
289 | if (u < 100) { |
290 | digits = u; |
291 | if (u >= 10) goto lt100; |
292 | *buffer++ = '0' + digits; |
293 | goto done; |
294 | } |
295 | if (u < 10000) { // 10,000 |
296 | if (u >= 1000) goto lt10_000; |
297 | digits = u / 100; |
298 | *buffer++ = '0' + digits; |
299 | goto sublt100; |
300 | } |
301 | if (u < 1000000) { // 1,000,000 |
302 | if (u >= 100000) goto lt1_000_000; |
303 | digits = u / 10000; // 10,000 |
304 | *buffer++ = '0' + digits; |
305 | goto sublt10_000; |
306 | } |
307 | if (u < 100000000) { // 100,000,000 |
308 | if (u >= 10000000) goto lt100_000_000; |
309 | digits = u / 1000000; // 1,000,000 |
310 | *buffer++ = '0' + digits; |
311 | goto sublt1_000_000; |
312 | } |
313 | // we already know that u < 1,000,000,000 |
314 | digits = u / 100000000; // 100,000,000 |
315 | *buffer++ = '0' + digits; |
316 | goto sublt100_000_000; |
317 | } |
318 | |
319 | char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) { |
320 | int digits; |
321 | const char *ASCII_digits = NULL; |
322 | |
323 | uint32 u = static_cast<uint32>(u64); |
324 | if (u == u64) return FastUInt32ToBufferLeft(u, buffer); |
325 | |
326 | uint64 top_11_digits = u64 / 1000000000; |
327 | buffer = FastUInt64ToBufferLeft(top_11_digits, buffer); |
328 | u = u64 - (top_11_digits * 1000000000); |
329 | |
330 | digits = u / 10000000; // 10,000,000 |
331 | DCHECK_LT(digits, 100); |
332 | ASCII_digits = two_ASCII_digits[digits]; |
333 | buffer[0] = ASCII_digits[0]; |
334 | buffer[1] = ASCII_digits[1]; |
335 | buffer += 2; |
336 | u -= digits * 10000000; // 10,000,000 |
337 | digits = u / 100000; // 100,000 |
338 | ASCII_digits = two_ASCII_digits[digits]; |
339 | buffer[0] = ASCII_digits[0]; |
340 | buffer[1] = ASCII_digits[1]; |
341 | buffer += 2; |
342 | u -= digits * 100000; // 100,000 |
343 | digits = u / 1000; // 1,000 |
344 | ASCII_digits = two_ASCII_digits[digits]; |
345 | buffer[0] = ASCII_digits[0]; |
346 | buffer[1] = ASCII_digits[1]; |
347 | buffer += 2; |
348 | u -= digits * 1000; // 1,000 |
349 | digits = u / 10; |
350 | ASCII_digits = two_ASCII_digits[digits]; |
351 | buffer[0] = ASCII_digits[0]; |
352 | buffer[1] = ASCII_digits[1]; |
353 | buffer += 2; |
354 | u -= digits * 10; |
355 | digits = u; |
356 | *buffer++ = '0' + digits; |
357 | *buffer = 0; |
358 | return buffer; |
359 | } |
360 | |
361 | char* FastInt64ToBufferLeft(int64 i, char* buffer) { |
362 | uint64 u = i; |
363 | if (i < 0) { |
364 | *buffer++ = '-'; |
365 | u = -i; |
366 | } |
367 | return FastUInt64ToBufferLeft(u, buffer); |
368 | } |
369 | |
370 | static inline void PutTwoDigits(int i, char* p) { |
371 | DCHECK_GE(i, 0); |
372 | DCHECK_LT(i, 100); |
373 | p[0] = two_ASCII_digits[i][0]; |
374 | p[1] = two_ASCII_digits[i][1]; |
375 | } |
376 | |
377 | char* FastTimeToBuffer(time_t s, char* buffer) { |
378 | if (s == 0) { |
379 | time(&s); |
380 | } |
381 | |
382 | struct tm tm; |
383 | if (gmtime_r(&s, &tm) == NULL) { |
384 | // Error message must fit in 30-char buffer. |
385 | memcpy(buffer, "Invalid:" , sizeof("Invalid:" )); |
386 | FastInt64ToBufferLeft(s, buffer+strlen(buffer)); |
387 | return buffer; |
388 | } |
389 | |
390 | // strftime format: "%a, %d %b %Y %H:%M:%S GMT", |
391 | // but strftime does locale stuff which we do not want |
392 | // plus strftime takes > 10x the time of hard code |
393 | |
394 | const char* weekday_name = "Xxx" ; |
395 | switch (tm.tm_wday) { |
396 | default: { DCHECK(false); } break; |
397 | case 0: weekday_name = "Sun" ; break; |
398 | case 1: weekday_name = "Mon" ; break; |
399 | case 2: weekday_name = "Tue" ; break; |
400 | case 3: weekday_name = "Wed" ; break; |
401 | case 4: weekday_name = "Thu" ; break; |
402 | case 5: weekday_name = "Fri" ; break; |
403 | case 6: weekday_name = "Sat" ; break; |
404 | } |
405 | |
406 | const char* month_name = "Xxx" ; |
407 | switch (tm.tm_mon) { |
408 | default: { DCHECK(false); } break; |
409 | case 0: month_name = "Jan" ; break; |
410 | case 1: month_name = "Feb" ; break; |
411 | case 2: month_name = "Mar" ; break; |
412 | case 3: month_name = "Apr" ; break; |
413 | case 4: month_name = "May" ; break; |
414 | case 5: month_name = "Jun" ; break; |
415 | case 6: month_name = "Jul" ; break; |
416 | case 7: month_name = "Aug" ; break; |
417 | case 8: month_name = "Sep" ; break; |
418 | case 9: month_name = "Oct" ; break; |
419 | case 10: month_name = "Nov" ; break; |
420 | case 11: month_name = "Dec" ; break; |
421 | } |
422 | |
423 | // Write out the buffer. |
424 | |
425 | memcpy(buffer+0, weekday_name, 3); |
426 | buffer[3] = ','; |
427 | buffer[4] = ' '; |
428 | |
429 | PutTwoDigits(tm.tm_mday, buffer+5); |
430 | buffer[7] = ' '; |
431 | |
432 | memcpy(buffer+8, month_name, 3); |
433 | buffer[11] = ' '; |
434 | |
435 | int32 year = tm.tm_year + 1900; |
436 | PutTwoDigits(year/100, buffer+12); |
437 | PutTwoDigits(year%100, buffer+14); |
438 | buffer[16] = ' '; |
439 | |
440 | PutTwoDigits(tm.tm_hour, buffer+17); |
441 | buffer[19] = ':'; |
442 | |
443 | PutTwoDigits(tm.tm_min, buffer+20); |
444 | buffer[22] = ':'; |
445 | |
446 | PutTwoDigits(tm.tm_sec, buffer+23); |
447 | |
448 | // includes ending NUL |
449 | memcpy(buffer+25, " GMT" , 5); |
450 | |
451 | return buffer; |
452 | } |
453 | |
454 | // ---------------------------------------------------------------------- |
455 | // ParseLeadingUInt64Value |
456 | // ParseLeadingInt64Value |
457 | // ParseLeadingHex64Value |
458 | // A simple parser for long long values. Returns the parsed value if a |
459 | // valid integer is found; else returns deflt |
460 | // UInt64 and Int64 cannot handle decimal numbers with leading 0s. |
461 | // -------------------------------------------------------------------- |
462 | uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) { |
463 | char *error = NULL; |
464 | const uint64 value = strtoull(str, &error, 0); |
465 | return (error == str) ? deflt : value; |
466 | } |
467 | |
468 | int64 ParseLeadingInt64Value(const char *str, int64 deflt) { |
469 | char *error = NULL; |
470 | const int64 value = strtoll(str, &error, 0); |
471 | return (error == str) ? deflt : value; |
472 | } |
473 | |
474 | uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) { |
475 | char *error = NULL; |
476 | const uint64 value = strtoull(str, &error, 16); |
477 | return (error == str) ? deflt : value; |
478 | } |
479 | |
480 | // ---------------------------------------------------------------------- |
481 | // ParseLeadingDec64Value |
482 | // ParseLeadingUDec64Value |
483 | // A simple parser for [u]int64 values. Returns the parsed value |
484 | // if a valid value is found; else returns deflt |
485 | // The string passed in is treated as *10 based*. |
486 | // This can handle strings with leading 0s. |
487 | // -------------------------------------------------------------------- |
488 | |
489 | int64 ParseLeadingDec64Value(const char *str, int64 deflt) { |
490 | char *error = NULL; |
491 | const int64 value = strtoll(str, &error, 10); |
492 | return (error == str) ? deflt : value; |
493 | } |
494 | |
495 | uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) { |
496 | char *error = NULL; |
497 | const uint64 value = strtoull(str, &error, 10); |
498 | return (error == str) ? deflt : value; |
499 | } |
500 | |
501 | bool DictionaryParse(const string& encoded_str, |
502 | vector<pair<string, string> >* items) { |
503 | vector<string> entries; |
504 | SplitStringUsing(encoded_str, "," , &entries); |
505 | for (int i = 0; i < entries.size(); ++i) { |
506 | vector<string> fields; |
507 | SplitStringAllowEmpty(entries[i], ":" , &fields); |
508 | if (fields.size() != 2) // parsing error |
509 | return false; |
510 | items->push_back(make_pair(fields[0], fields[1])); |
511 | } |
512 | return true; |
513 | } |
514 | |