1//
2// Copyright (C) 1999-2005 Google, Inc.
3//
4
5#include "strutil.h"
6
7#include <ctype.h>
8#include <errno.h>
9#include <float.h> // for DBL_DIG and FLT_DIG
10#include <math.h> // for HUGE_VAL
11#include <pthread.h> // for gmtime_r (on Windows)
12#include <stdarg.h>
13#include <stdlib.h>
14#include <stdio.h>
15#include <string.h>
16#include <time.h> // for FastTimeToBuffer()
17
18#include <algorithm>
19using std::min;
20using std::max;
21using std::swap;
22using std::reverse;
23
24#include <hash_map>
25using __gnu_cxx::hash_map;
26
27#include <hash_set>
28using __gnu_cxx::hash_set;
29
30#include <iterator>
31#include <limits>
32using std::numeric_limits;
33
34#include <set>
35using std::set;
36using std::multiset;
37
38#include <string>
39using std::string;
40
41#include <vector>
42using std::vector;
43
44
45#include "base/logging.h"
46#include "base/scoped_ptr.h"
47//#include "strutil-inl.h"
48//#include "third_party/utf/utf.h" // for runetochar
49//#include "util/gtl/stl_util-inl.h" // for string_as_array
50//#include "util/hash/hash.h"
51#include "split.h"
52
53#ifdef OS_WINDOWS
54#include <pthread.h> // for gmtime_r
55#ifdef min // windows.h defines this to something silly
56#undef min
57#endif
58#endif
59
60// ----------------------------------------------------------------------
61// FpToString()
62// FloatToString()
63// IntToString()
64// Convert various types to their string representation. These
65// all do the obvious, trivial thing.
66// ----------------------------------------------------------------------
67
68string FpToString(Fprint fp) {
69 char buf[17];
70 snprintf(buf, sizeof(buf), "%016llx", fp);
71 return string(buf);
72}
73
74string FloatToString(float f, const char* format) {
75 char buf[80];
76 snprintf(buf, sizeof(buf), format, f);
77 return string(buf);
78}
79
80string IntToString(int i, const char* format) {
81 char buf[80];
82 snprintf(buf, sizeof(buf), format, i);
83 return string(buf);
84}
85
86string Int64ToString(int64 i64, const char* format) {
87 char buf[80];
88 snprintf(buf, sizeof(buf), format, i64);
89 return string(buf);
90}
91
92string UInt64ToString(uint64 ui64, const char* format) {
93 char buf[80];
94 snprintf(buf, sizeof(buf), format, ui64);
95 return string(buf);
96}
97
98// Default arguments
99string FloatToString(float f) { return FloatToString(f, "%7f"); }
100string IntToString(int i) { return IntToString(i, "%7d"); }
101string Int64ToString(int64 i64) {
102 return Int64ToString(i64, "%7" GG_LL_FORMAT "d");
103}
104string UInt64ToString(uint64 ui64) {
105 return UInt64ToString(ui64, "%7" GG_LL_FORMAT "u");
106}
107
108// ----------------------------------------------------------------------
109// FastIntToBuffer()
110// FastInt64ToBuffer()
111// FastHexToBuffer()
112// FastHex64ToBuffer()
113// FastHex32ToBuffer()
114// FastTimeToBuffer()
115// These are intended for speed. FastHexToBuffer() assumes the
116// integer is non-negative. FastHexToBuffer() puts output in
117// hex rather than decimal. FastTimeToBuffer() puts the output
118// into RFC822 format. If time is 0, uses the current time.
119//
120// FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
121// padded to exactly 16 bytes (plus one byte for '\0')
122//
123// FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
124// padded to exactly 8 bytes (plus one byte for '\0')
125//
126// All functions take the output buffer as an arg. FastInt()
127// uses at most 22 bytes, FastTime() uses exactly 30 bytes.
128// They all return a pointer to the beginning of the output,
129// which may not be the beginning of the input buffer. (Though
130// for FastTimeToBuffer(), we guarantee that it is.)
131// ----------------------------------------------------------------------
132
133char *FastInt64ToBuffer(int64 i, char* buffer) {
134 FastInt64ToBufferLeft(i, buffer);
135 return buffer;
136}
137
138// Offset into buffer where FastInt32ToBuffer places the end of string
139// null character. Also used by FastInt32ToBufferLeft
140static const int kFastInt32ToBufferOffset = 11;
141
142// Copied from http://gears.googlecode.com/svn-history/r395/trunk/gears/base/common/string16.cc
143char *FastInt32ToBuffer(int32 i, char* buffer) {
144 // We could collapse the positive and negative sections, but that
145 // would be slightly slower for positive numbers...
146 // 12 bytes is enough to store -2**32, -4294967296.
147 char* p = buffer + kFastInt32ToBufferOffset;
148 *p-- = '\0';
149 if (i >= 0) {
150 do {
151 *p-- = '0' + i % 10;
152 i /= 10;
153 } while (i > 0);
154 return p + 1;
155 } else {
156 // On different platforms, % and / have different behaviors for
157 // negative numbers, so we need to jump through hoops to make sure
158 // we don't divide negative numbers.
159 if (i > -10) {
160 i = -i;
161 *p-- = '0' + i;
162 *p = '-';
163 return p;
164 } else {
165 // Make sure we aren't at MIN_INT, in which case we can't say i = -i
166 i = i + 10;
167 i = -i;
168 *p-- = '0' + i % 10;
169 // Undo what we did a moment ago
170 i = i / 10 + 1;
171 do {
172 *p-- = '0' + i % 10;
173 i /= 10;
174 } while (i > 0);
175 *p = '-';
176 return p;
177 }
178 }
179}
180
181char *FastHexToBuffer(int i, char* buffer) {
182 CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
183
184 static const char *hexdigits = "0123456789abcdef";
185 char *p = buffer + 21;
186 *p-- = '\0';
187 do {
188 *p-- = hexdigits[i & 15]; // mod by 16
189 i >>= 4; // divide by 16
190 } while (i > 0);
191 return p + 1;
192}
193
194char *InternalFastHexToBuffer(uint64 value, char* buffer, int num_byte) {
195 static const char *hexdigits = "0123456789abcdef";
196 buffer[num_byte] = '\0';
197 for (int i = num_byte - 1; i >= 0; i--) {
198 buffer[i] = hexdigits[uint32(value) & 0xf];
199 value >>= 4;
200 }
201 return buffer;
202}
203
204char *FastHex64ToBuffer(uint64 value, char* buffer) {
205 return InternalFastHexToBuffer(value, buffer, 16);
206}
207
208char *FastHex32ToBuffer(uint32 value, char* buffer) {
209 return InternalFastHexToBuffer(value, buffer, 8);
210}
211
212// Several converters use this table to reduce
213// division and modulo operations.
214static const char two_ASCII_digits[100][2] = {
215 {'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
216 {'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
217 {'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
218 {'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
219 {'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
220 {'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
221 {'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
222 {'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
223 {'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
224 {'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
225 {'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
226 {'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
227 {'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
228 {'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
229 {'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
230 {'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
231 {'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
232 {'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
233 {'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
234 {'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
235};
236
237char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
238 int digits;
239 const char *ASCII_digits = NULL;
240 // The idea of this implementation is to trim the number of divides to as few
241 // as possible by using multiplication and subtraction rather than mod (%),
242 // and by outputting two digits at a time rather than one.
243 // The huge-number case is first, in the hopes that the compiler will output
244 // that case in one branch-free block of code, and only output conditional
245 // branches into it from below.
246 if (u >= 1000000000) { // >= 1,000,000,000
247 digits = u / 100000000; // 100,000,000
248 ASCII_digits = two_ASCII_digits[digits];
249 buffer[0] = ASCII_digits[0];
250 buffer[1] = ASCII_digits[1];
251 buffer += 2;
252sublt100_000_000:
253 u -= digits * 100000000; // 100,000,000
254lt100_000_000:
255 digits = u / 1000000; // 1,000,000
256 ASCII_digits = two_ASCII_digits[digits];
257 buffer[0] = ASCII_digits[0];
258 buffer[1] = ASCII_digits[1];
259 buffer += 2;
260sublt1_000_000:
261 u -= digits * 1000000; // 1,000,000
262lt1_000_000:
263 digits = u / 10000; // 10,000
264 ASCII_digits = two_ASCII_digits[digits];
265 buffer[0] = ASCII_digits[0];
266 buffer[1] = ASCII_digits[1];
267 buffer += 2;
268sublt10_000:
269 u -= digits * 10000; // 10,000
270lt10_000:
271 digits = u / 100;
272 ASCII_digits = two_ASCII_digits[digits];
273 buffer[0] = ASCII_digits[0];
274 buffer[1] = ASCII_digits[1];
275 buffer += 2;
276sublt100:
277 u -= digits * 100;
278lt100:
279 digits = u;
280 ASCII_digits = two_ASCII_digits[digits];
281 buffer[0] = ASCII_digits[0];
282 buffer[1] = ASCII_digits[1];
283 buffer += 2;
284done:
285 *buffer = 0;
286 return buffer;
287 }
288
289 if (u < 100) {
290 digits = u;
291 if (u >= 10) goto lt100;
292 *buffer++ = '0' + digits;
293 goto done;
294 }
295 if (u < 10000) { // 10,000
296 if (u >= 1000) goto lt10_000;
297 digits = u / 100;
298 *buffer++ = '0' + digits;
299 goto sublt100;
300 }
301 if (u < 1000000) { // 1,000,000
302 if (u >= 100000) goto lt1_000_000;
303 digits = u / 10000; // 10,000
304 *buffer++ = '0' + digits;
305 goto sublt10_000;
306 }
307 if (u < 100000000) { // 100,000,000
308 if (u >= 10000000) goto lt100_000_000;
309 digits = u / 1000000; // 1,000,000
310 *buffer++ = '0' + digits;
311 goto sublt1_000_000;
312 }
313 // we already know that u < 1,000,000,000
314 digits = u / 100000000; // 100,000,000
315 *buffer++ = '0' + digits;
316 goto sublt100_000_000;
317}
318
319char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
320 int digits;
321 const char *ASCII_digits = NULL;
322
323 uint32 u = static_cast<uint32>(u64);
324 if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
325
326 uint64 top_11_digits = u64 / 1000000000;
327 buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
328 u = u64 - (top_11_digits * 1000000000);
329
330 digits = u / 10000000; // 10,000,000
331 DCHECK_LT(digits, 100);
332 ASCII_digits = two_ASCII_digits[digits];
333 buffer[0] = ASCII_digits[0];
334 buffer[1] = ASCII_digits[1];
335 buffer += 2;
336 u -= digits * 10000000; // 10,000,000
337 digits = u / 100000; // 100,000
338 ASCII_digits = two_ASCII_digits[digits];
339 buffer[0] = ASCII_digits[0];
340 buffer[1] = ASCII_digits[1];
341 buffer += 2;
342 u -= digits * 100000; // 100,000
343 digits = u / 1000; // 1,000
344 ASCII_digits = two_ASCII_digits[digits];
345 buffer[0] = ASCII_digits[0];
346 buffer[1] = ASCII_digits[1];
347 buffer += 2;
348 u -= digits * 1000; // 1,000
349 digits = u / 10;
350 ASCII_digits = two_ASCII_digits[digits];
351 buffer[0] = ASCII_digits[0];
352 buffer[1] = ASCII_digits[1];
353 buffer += 2;
354 u -= digits * 10;
355 digits = u;
356 *buffer++ = '0' + digits;
357 *buffer = 0;
358 return buffer;
359}
360
361char* FastInt64ToBufferLeft(int64 i, char* buffer) {
362 uint64 u = i;
363 if (i < 0) {
364 *buffer++ = '-';
365 u = -i;
366 }
367 return FastUInt64ToBufferLeft(u, buffer);
368}
369
370static inline void PutTwoDigits(int i, char* p) {
371 DCHECK_GE(i, 0);
372 DCHECK_LT(i, 100);
373 p[0] = two_ASCII_digits[i][0];
374 p[1] = two_ASCII_digits[i][1];
375}
376
377char* FastTimeToBuffer(time_t s, char* buffer) {
378 if (s == 0) {
379 time(&s);
380 }
381
382 struct tm tm;
383 if (gmtime_r(&s, &tm) == NULL) {
384 // Error message must fit in 30-char buffer.
385 memcpy(buffer, "Invalid:", sizeof("Invalid:"));
386 FastInt64ToBufferLeft(s, buffer+strlen(buffer));
387 return buffer;
388 }
389
390 // strftime format: "%a, %d %b %Y %H:%M:%S GMT",
391 // but strftime does locale stuff which we do not want
392 // plus strftime takes > 10x the time of hard code
393
394 const char* weekday_name = "Xxx";
395 switch (tm.tm_wday) {
396 default: { DCHECK(false); } break;
397 case 0: weekday_name = "Sun"; break;
398 case 1: weekday_name = "Mon"; break;
399 case 2: weekday_name = "Tue"; break;
400 case 3: weekday_name = "Wed"; break;
401 case 4: weekday_name = "Thu"; break;
402 case 5: weekday_name = "Fri"; break;
403 case 6: weekday_name = "Sat"; break;
404 }
405
406 const char* month_name = "Xxx";
407 switch (tm.tm_mon) {
408 default: { DCHECK(false); } break;
409 case 0: month_name = "Jan"; break;
410 case 1: month_name = "Feb"; break;
411 case 2: month_name = "Mar"; break;
412 case 3: month_name = "Apr"; break;
413 case 4: month_name = "May"; break;
414 case 5: month_name = "Jun"; break;
415 case 6: month_name = "Jul"; break;
416 case 7: month_name = "Aug"; break;
417 case 8: month_name = "Sep"; break;
418 case 9: month_name = "Oct"; break;
419 case 10: month_name = "Nov"; break;
420 case 11: month_name = "Dec"; break;
421 }
422
423 // Write out the buffer.
424
425 memcpy(buffer+0, weekday_name, 3);
426 buffer[3] = ',';
427 buffer[4] = ' ';
428
429 PutTwoDigits(tm.tm_mday, buffer+5);
430 buffer[7] = ' ';
431
432 memcpy(buffer+8, month_name, 3);
433 buffer[11] = ' ';
434
435 int32 year = tm.tm_year + 1900;
436 PutTwoDigits(year/100, buffer+12);
437 PutTwoDigits(year%100, buffer+14);
438 buffer[16] = ' ';
439
440 PutTwoDigits(tm.tm_hour, buffer+17);
441 buffer[19] = ':';
442
443 PutTwoDigits(tm.tm_min, buffer+20);
444 buffer[22] = ':';
445
446 PutTwoDigits(tm.tm_sec, buffer+23);
447
448 // includes ending NUL
449 memcpy(buffer+25, " GMT", 5);
450
451 return buffer;
452}
453
454// ----------------------------------------------------------------------
455// ParseLeadingUInt64Value
456// ParseLeadingInt64Value
457// ParseLeadingHex64Value
458// A simple parser for long long values. Returns the parsed value if a
459// valid integer is found; else returns deflt
460// UInt64 and Int64 cannot handle decimal numbers with leading 0s.
461// --------------------------------------------------------------------
462uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) {
463 char *error = NULL;
464 const uint64 value = strtoull(str, &error, 0);
465 return (error == str) ? deflt : value;
466}
467
468int64 ParseLeadingInt64Value(const char *str, int64 deflt) {
469 char *error = NULL;
470 const int64 value = strtoll(str, &error, 0);
471 return (error == str) ? deflt : value;
472}
473
474uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
475 char *error = NULL;
476 const uint64 value = strtoull(str, &error, 16);
477 return (error == str) ? deflt : value;
478}
479
480// ----------------------------------------------------------------------
481// ParseLeadingDec64Value
482// ParseLeadingUDec64Value
483// A simple parser for [u]int64 values. Returns the parsed value
484// if a valid value is found; else returns deflt
485// The string passed in is treated as *10 based*.
486// This can handle strings with leading 0s.
487// --------------------------------------------------------------------
488
489int64 ParseLeadingDec64Value(const char *str, int64 deflt) {
490 char *error = NULL;
491 const int64 value = strtoll(str, &error, 10);
492 return (error == str) ? deflt : value;
493}
494
495uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) {
496 char *error = NULL;
497 const uint64 value = strtoull(str, &error, 10);
498 return (error == str) ? deflt : value;
499}
500
501bool DictionaryParse(const string& encoded_str,
502 vector<pair<string, string> >* items) {
503 vector<string> entries;
504 SplitStringUsing(encoded_str, ",", &entries);
505 for (int i = 0; i < entries.size(); ++i) {
506 vector<string> fields;
507 SplitStringAllowEmpty(entries[i], ":", &fields);
508 if (fields.size() != 2) // parsing error
509 return false;
510 items->push_back(make_pair(fields[0], fields[1]));
511 }
512 return true;
513}
514