strutil.cc source code [Aerospike/modules/s2-geometry-library/geometry/strings/strutil.cc]

1	//
2	// Copyright (C) 1999-2005 Google, Inc.
3	//
4
5	#include "strutil.h"
6
7	#include <ctype.h>
8	#include <errno.h>
9	#include <float.h> // for DBL_DIG and FLT_DIG
10	#include <math.h> // for HUGE_VAL
11	#include <pthread.h> // for gmtime_r (on Windows)
12	#include <stdarg.h>
13	#include <stdlib.h>
14	#include <stdio.h>
15	#include <string.h>
16	#include <time.h> // for FastTimeToBuffer()
17
18	#include <algorithm>
19	using std::min;
20	using std::max;
21	using std::swap;
22	using std::reverse;
23
24	#include <hash_map>
25	using __gnu_cxx::hash_map;
26
27	#include <hash_set>
28	using __gnu_cxx::hash_set;
29
30	#include <iterator>
31	#include <limits>
32	using std::numeric_limits;
33
34	#include <set>
35	using std::set;
36	using std::multiset;
37
38	#include <string>
39	using std::string;
40
41	#include <vector>
42	using std::vector;
43
44
45	#include "base/logging.h"
46	#include "base/scoped_ptr.h"
47	//#include "strutil-inl.h"
48	//#include "third_party/utf/utf.h" // for runetochar
49	//#include "util/gtl/stl_util-inl.h" // for string_as_array
50	//#include "util/hash/hash.h"
51	#include "split.h"
52
53	#ifdef OS_WINDOWS
54	#include <pthread.h> // for gmtime_r
55	#ifdef min // windows.h defines this to something silly
56	#undef min
57	#endif
58	#endif
59
60	// ----------------------------------------------------------------------
61	// FpToString()
62	// FloatToString()
63	// IntToString()
64	// Convert various types to their string representation. These
65	// all do the obvious, trivial thing.
66	// ----------------------------------------------------------------------
67
68	string FpToString(Fprint fp) {
69	char buf[`17`];
70	snprintf(buf, sizeof(buf), "%016llx", fp);
71	return string (buf);
72	}
73
74	string FloatToString(float f, const char* format) {
75	char buf[`80`];
76	snprintf(buf, sizeof(buf), format, f);
77	return string (buf);
78	}
79
80	string IntToString(int i, const char* format) {
81	char buf[`80`];
82	snprintf(buf, sizeof(buf), format, i);
83	return string (buf);
84	}
85
86	string Int64ToString(int64 i64, const char* format) {
87	char buf[`80`];
88	snprintf(buf, sizeof(buf), format, i64);
89	return string (buf);
90	}
91
92	string UInt64ToString(uint64 ui64, const char* format) {
93	char buf[`80`];
94	snprintf(buf, sizeof(buf), format, ui64);
95	return string (buf);
96	}
97
98	// Default arguments
99	string FloatToString(float f) { return FloatToString(f, "%7f"); }
100	string IntToString(int i) { return IntToString(i, "%7d"); }
101	string Int64ToString(int64 i64) {
102	return Int64ToString(i64, "%7" GG_LL_FORMAT "d");
103	}
104	string UInt64ToString(uint64 ui64) {
105	return UInt64ToString(ui64, "%7" GG_LL_FORMAT "u");
106	}
107
108	// ----------------------------------------------------------------------
109	// FastIntToBuffer()
110	// FastInt64ToBuffer()
111	// FastHexToBuffer()
112	// FastHex64ToBuffer()
113	// FastHex32ToBuffer()
114	// FastTimeToBuffer()
115	// These are intended for speed. FastHexToBuffer() assumes the
116	// integer is non-negative. FastHexToBuffer() puts output in
117	// hex rather than decimal. FastTimeToBuffer() puts the output
118	// into RFC822 format. If time is 0, uses the current time.
119	//
120	// FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format,
121	// padded to exactly 16 bytes (plus one byte for '\0')
122	//
123	// FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format,
124	// padded to exactly 8 bytes (plus one byte for '\0')
125	//
126	// All functions take the output buffer as an arg. FastInt()
127	// uses at most 22 bytes, FastTime() uses exactly 30 bytes.
128	// They all return a pointer to the beginning of the output,
129	// which may not be the beginning of the input buffer. (Though
130	// for FastTimeToBuffer(), we guarantee that it is.)
131	// ----------------------------------------------------------------------
132
133	char FastInt64ToBuffer(int64 i, char** buffer) {
134	FastInt64ToBufferLeft(i, buffer);
135	return buffer;
136	}
137
138	// Offset into buffer where FastInt32ToBuffer places the end of string
139	// null character. Also used by FastInt32ToBufferLeft
140	static const int kFastInt32ToBufferOffset = `11`;
141
142	// Copied from http://gears.googlecode.com/svn-history/r395/trunk/gears/base/common/string16.cc
143	char FastInt32ToBuffer(int32 i, char** buffer) {
144	// We could collapse the positive and negative sections, but that
145	// would be slightly slower for positive numbers...
146	// 12 bytes is enough to store -232, -4294967296.
147	char* p = buffer + kFastInt32ToBufferOffset;
148	*p-- = `'\0'`;
149	if (i >= `0`) {
150	do {
151	*p-- = `'0'` + i % `10`;
152	i /= `10`;
153	} while (i > `0`);
154	return p + `1`;
155	} else {
156	// On different platforms, % and / have different behaviors for
157	// negative numbers, so we need to jump through hoops to make sure
158	// we don't divide negative numbers.
159	if (i > -`10`) {
160	i = -i;
161	*p-- = `'0'` + i;
162	*p = `'-'`;
163	return p;
164	} else {
165	// Make sure we aren't at MIN_INT, in which case we can't say i = -i
166	i = i + `10`;
167	i = -i;
168	*p-- = `'0'` + i % `10`;
169	// Undo what we did a moment ago
170	i = i / `10` + `1`;
171	do {
172	*p-- = `'0'` + i % `10`;
173	i /= `10`;
174	} while (i > `0`);
175	*p = `'-'`;
176	return p;
177	}
178	}
179	}
180
181	char FastHexToBuffer(int* i, char* buffer) {
182	CHECK(i >= `0`) << "FastHexToBuffer() wants non-negative integers, not " << i;
183
184	static const char *hexdigits = "0123456789abcdef";
185	char *p = buffer + `21`;
186	*p-- = `'\0'`;
187	do {
188	p-- = hexdigits[i & `15`]; // mod by 16*
189	i >>= `4`; // divide by 16
190	} while (i > `0`);
191	return p + `1`;
192	}
193
194	char InternalFastHexToBuffer(uint64 value, char** buffer, int num_byte) {
195	static const char *hexdigits = "0123456789abcdef";
196	buffer[num_byte] = `'\0'`;
197	for (int i = num_byte - `1`; i >= `0`; i--) {
198	buffer[i] = hexdigits[uint32(value) & `0xf`];
199	value >>= `4`;
200	}
201	return buffer;
202	}
203
204	char FastHex64ToBuffer(uint64 value, char** buffer) {
205	return InternalFastHexToBuffer(value, buffer, `16`);
206	}
207
208	char FastHex32ToBuffer(uint32 value, char** buffer) {
209	return InternalFastHexToBuffer(value, buffer, `8`);
210	}
211
212	// Several converters use this table to reduce
213	// division and modulo operations.
214	static const char two_ASCII_digits[`100`][`2`] = {
215	{`'0'`,`'0'`}, {`'0'`,`'1'`}, {`'0'`,`'2'`}, {`'0'`,`'3'`}, {`'0'`,`'4'`},
216	{`'0'`,`'5'`}, {`'0'`,`'6'`}, {`'0'`,`'7'`}, {`'0'`,`'8'`}, {`'0'`,`'9'`},
217	{`'1'`,`'0'`}, {`'1'`,`'1'`}, {`'1'`,`'2'`}, {`'1'`,`'3'`}, {`'1'`,`'4'`},
218	{`'1'`,`'5'`}, {`'1'`,`'6'`}, {`'1'`,`'7'`}, {`'1'`,`'8'`}, {`'1'`,`'9'`},
219	{`'2'`,`'0'`}, {`'2'`,`'1'`}, {`'2'`,`'2'`}, {`'2'`,`'3'`}, {`'2'`,`'4'`},
220	{`'2'`,`'5'`}, {`'2'`,`'6'`}, {`'2'`,`'7'`}, {`'2'`,`'8'`}, {`'2'`,`'9'`},
221	{`'3'`,`'0'`}, {`'3'`,`'1'`}, {`'3'`,`'2'`}, {`'3'`,`'3'`}, {`'3'`,`'4'`},
222	{`'3'`,`'5'`}, {`'3'`,`'6'`}, {`'3'`,`'7'`}, {`'3'`,`'8'`}, {`'3'`,`'9'`},
223	{`'4'`,`'0'`}, {`'4'`,`'1'`}, {`'4'`,`'2'`}, {`'4'`,`'3'`}, {`'4'`,`'4'`},
224	{`'4'`,`'5'`}, {`'4'`,`'6'`}, {`'4'`,`'7'`}, {`'4'`,`'8'`}, {`'4'`,`'9'`},
225	{`'5'`,`'0'`}, {`'5'`,`'1'`}, {`'5'`,`'2'`}, {`'5'`,`'3'`}, {`'5'`,`'4'`},
226	{`'5'`,`'5'`}, {`'5'`,`'6'`}, {`'5'`,`'7'`}, {`'5'`,`'8'`}, {`'5'`,`'9'`},
227	{`'6'`,`'0'`}, {`'6'`,`'1'`}, {`'6'`,`'2'`}, {`'6'`,`'3'`}, {`'6'`,`'4'`},
228	{`'6'`,`'5'`}, {`'6'`,`'6'`}, {`'6'`,`'7'`}, {`'6'`,`'8'`}, {`'6'`,`'9'`},
229	{`'7'`,`'0'`}, {`'7'`,`'1'`}, {`'7'`,`'2'`}, {`'7'`,`'3'`}, {`'7'`,`'4'`},
230	{`'7'`,`'5'`}, {`'7'`,`'6'`}, {`'7'`,`'7'`}, {`'7'`,`'8'`}, {`'7'`,`'9'`},
231	{`'8'`,`'0'`}, {`'8'`,`'1'`}, {`'8'`,`'2'`}, {`'8'`,`'3'`}, {`'8'`,`'4'`},
232	{`'8'`,`'5'`}, {`'8'`,`'6'`}, {`'8'`,`'7'`}, {`'8'`,`'8'`}, {`'8'`,`'9'`},
233	{`'9'`,`'0'`}, {`'9'`,`'1'`}, {`'9'`,`'2'`}, {`'9'`,`'3'`}, {`'9'`,`'4'`},
234	{`'9'`,`'5'`}, {`'9'`,`'6'`}, {`'9'`,`'7'`}, {`'9'`,`'8'`}, {`'9'`,`'9'`}
235	};
236
237	char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
238	int digits;
239	const char *ASCII_digits = NULL;
240	// The idea of this implementation is to trim the number of divides to as few
241	// as possible by using multiplication and subtraction rather than mod (%),
242	// and by outputting two digits at a time rather than one.
243	// The huge-number case is first, in the hopes that the compiler will output
244	// that case in one branch-free block of code, and only output conditional
245	// branches into it from below.
246	if (u >= `1000000000`) { // >= 1,000,000,000
247	digits = u / `100000000`; // 100,000,000
248	ASCII_digits = two_ASCII_digits[digits];
249	buffer[`0`] = ASCII_digits[`0`];
250	buffer[`1`] = ASCII_digits[`1`];
251	buffer += `2`;
252	sublt100_000_000:
253	u -= digits * `100000000`; // 100,000,000
254	lt100_000_000:
255	digits = u / `1000000`; // 1,000,000
256	ASCII_digits = two_ASCII_digits[digits];
257	buffer[`0`] = ASCII_digits[`0`];
258	buffer[`1`] = ASCII_digits[`1`];
259	buffer += `2`;
260	sublt1_000_000:
261	u -= digits * `1000000`; // 1,000,000
262	lt1_000_000:
263	digits = u / `10000`; // 10,000
264	ASCII_digits = two_ASCII_digits[digits];
265	buffer[`0`] = ASCII_digits[`0`];
266	buffer[`1`] = ASCII_digits[`1`];
267	buffer += `2`;
268	sublt10_000:
269	u -= digits * `10000`; // 10,000
270	lt10_000:
271	digits = u / `100`;
272	ASCII_digits = two_ASCII_digits[digits];
273	buffer[`0`] = ASCII_digits[`0`];
274	buffer[`1`] = ASCII_digits[`1`];
275	buffer += `2`;
276	sublt100:
277	u -= digits * `100`;
278	lt100:
279	digits = u;
280	ASCII_digits = two_ASCII_digits[digits];
281	buffer[`0`] = ASCII_digits[`0`];
282	buffer[`1`] = ASCII_digits[`1`];
283	buffer += `2`;
284	done:
285	*buffer = `0`;
286	return buffer;
287	}
288
289	if (u < `100`) {
290	digits = u;
291	if (u >= `10`) goto lt100;
292	*buffer++ = `'0'` + digits;
293	goto done;
294	}
295	if (u < `10000`) { // 10,000
296	if (u >= `1000`) goto lt10_000;
297	digits = u / `100`;
298	*buffer++ = `'0'` + digits;
299	goto sublt100;
300	}
301	if (u < `1000000`) { // 1,000,000
302	if (u >= `100000`) goto lt1_000_000;
303	digits = u / `10000`; // 10,000
304	*buffer++ = `'0'` + digits;
305	goto sublt10_000;
306	}
307	if (u < `100000000`) { // 100,000,000
308	if (u >= `10000000`) goto lt100_000_000;
309	digits = u / `1000000`; // 1,000,000
310	*buffer++ = `'0'` + digits;
311	goto sublt1_000_000;
312	}
313	// we already know that u < 1,000,000,000
314	digits = u / `100000000`; // 100,000,000
315	*buffer++ = `'0'` + digits;
316	goto sublt100_000_000;
317	}
318
319	char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
320	int digits;
321	const char *ASCII_digits = NULL;
322
323	uint32 u = static_cast<uint32>(u64);
324	if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
325
326	uint64 top_11_digits = u64 / `1000000000`;
327	buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
328	u = u64 - (top_11_digits * `1000000000`);
329
330	digits = u / `10000000`; // 10,000,000
331	DCHECK_LT(digits, `100`);
332	ASCII_digits = two_ASCII_digits[digits];
333	buffer[`0`] = ASCII_digits[`0`];
334	buffer[`1`] = ASCII_digits[`1`];
335	buffer += `2`;
336	u -= digits * `10000000`; // 10,000,000
337	digits = u / `100000`; // 100,000
338	ASCII_digits = two_ASCII_digits[digits];
339	buffer[`0`] = ASCII_digits[`0`];
340	buffer[`1`] = ASCII_digits[`1`];
341	buffer += `2`;
342	u -= digits * `100000`; // 100,000
343	digits = u / `1000`; // 1,000
344	ASCII_digits = two_ASCII_digits[digits];
345	buffer[`0`] = ASCII_digits[`0`];
346	buffer[`1`] = ASCII_digits[`1`];
347	buffer += `2`;
348	u -= digits * `1000`; // 1,000
349	digits = u / `10`;
350	ASCII_digits = two_ASCII_digits[digits];
351	buffer[`0`] = ASCII_digits[`0`];
352	buffer[`1`] = ASCII_digits[`1`];
353	buffer += `2`;
354	u -= digits * `10`;
355	digits = u;
356	*buffer++ = `'0'` + digits;
357	*buffer = `0`;
358	return buffer;
359	}
360
361	char* FastInt64ToBufferLeft(int64 i, char* buffer) {
362	uint64 u = i;
363	if (i < `0`) {
364	*buffer++ = `'-'`;
365	u = -i;
366	}
367	return FastUInt64ToBufferLeft(u, buffer);
368	}
369
370	static inline void PutTwoDigits(int i, char* p) {
371	DCHECK_GE(i, `0`);
372	DCHECK_LT(i, `100`);
373	p[`0`] = two_ASCII_digits[i][`0`];
374	p[`1`] = two_ASCII_digits[i][`1`];
375	}
376
377	char* FastTimeToBuffer(time_t s, char* buffer) {
378	if (s == `0`) {
379	time(&s);
380	}
381
382	struct tm tm;
383	if (gmtime_r(&s, &tm) == NULL) {
384	// Error message must fit in 30-char buffer.
385	memcpy(buffer, "Invalid:", sizeof("Invalid:"));
386	FastInt64ToBufferLeft(s, buffer+strlen(buffer));
387	return buffer;
388	}
389
390	// strftime format: "%a, %d %b %Y %H:%M:%S GMT",
391	// but strftime does locale stuff which we do not want
392	// plus strftime takes > 10x the time of hard code
393
394	const char* weekday_name = "Xxx";
395	switch (tm.tm_wday) {
396	default: { DCHECK(false); } break;
397	case `0`: weekday_name = "Sun"; break;
398	case `1`: weekday_name = "Mon"; break;
399	case `2`: weekday_name = "Tue"; break;
400	case `3`: weekday_name = "Wed"; break;
401	case `4`: weekday_name = "Thu"; break;
402	case `5`: weekday_name = "Fri"; break;
403	case `6`: weekday_name = "Sat"; break;
404	}
405
406	const char* month_name = "Xxx";
407	switch (tm.tm_mon) {
408	default: { DCHECK(false); } break;
409	case `0`: month_name = "Jan"; break;
410	case `1`: month_name = "Feb"; break;
411	case `2`: month_name = "Mar"; break;
412	case `3`: month_name = "Apr"; break;
413	case `4`: month_name = "May"; break;
414	case `5`: month_name = "Jun"; break;
415	case `6`: month_name = "Jul"; break;
416	case `7`: month_name = "Aug"; break;
417	case `8`: month_name = "Sep"; break;
418	case `9`: month_name = "Oct"; break;
419	case `10`: month_name = "Nov"; break;
420	case `11`: month_name = "Dec"; break;
421	}
422
423	// Write out the buffer.
424
425	memcpy(buffer+`0`, weekday_name, `3`);
426	buffer[`3`] = `','`;
427	buffer[`4`] = `' '`;
428
429	PutTwoDigits(tm.tm_mday, buffer+`5`);
430	buffer[`7`] = `' '`;
431
432	memcpy(buffer+`8`, month_name, `3`);
433	buffer[`11`] = `' '`;
434
435	int32 year = tm.tm_year + `1900`;
436	PutTwoDigits(year/`100`, buffer+`12`);
437	PutTwoDigits(year%`100`, buffer+`14`);
438	buffer[`16`] = `' '`;
439
440	PutTwoDigits(tm.tm_hour, buffer+`17`);
441	buffer[`19`] = `':'`;
442
443	PutTwoDigits(tm.tm_min, buffer+`20`);
444	buffer[`22`] = `':'`;
445
446	PutTwoDigits(tm.tm_sec, buffer+`23`);
447
448	// includes ending NUL
449	memcpy(buffer+`25`, " GMT", `5`);
450
451	return buffer;
452	}
453
454	// ----------------------------------------------------------------------
455	// ParseLeadingUInt64Value
456	// ParseLeadingInt64Value
457	// ParseLeadingHex64Value
458	// A simple parser for long long values. Returns the parsed value if a
459	// valid integer is found; else returns deflt
460	// UInt64 and Int64 cannot handle decimal numbers with leading 0s.
461	// --------------------------------------------------------------------
462	uint64 ParseLeadingUInt64Value(const char *str, uint64 deflt) {
463	char *error = NULL;
464	const uint64 value = strtoull(str, &error, `0`);
465	return (error == str) ? deflt : value;
466	}
467
468	int64 ParseLeadingInt64Value(const char *str, int64 deflt) {
469	char *error = NULL;
470	const int64 value = strtoll(str, &error, `0`);
471	return (error == str) ? deflt : value;
472	}
473
474	uint64 ParseLeadingHex64Value(const char *str, uint64 deflt) {
475	char *error = NULL;
476	const uint64 value = strtoull(str, &error, `16`);
477	return (error == str) ? deflt : value;
478	}
479
480	// ----------------------------------------------------------------------
481	// ParseLeadingDec64Value
482	// ParseLeadingUDec64Value
483	// A simple parser for [u]int64 values. Returns the parsed value
484	// if a valid value is found; else returns deflt
485	// The string passed in is treated as 10 based.
486	// This can handle strings with leading 0s.
487	// --------------------------------------------------------------------
488
489	int64 ParseLeadingDec64Value(const char *str, int64 deflt) {
490	char *error = NULL;
491	const int64 value = strtoll(str, &error, `10`);
492	return (error == str) ? deflt : value;
493	}
494
495	uint64 ParseLeadingUDec64Value(const char *str, uint64 deflt) {
496	char *error = NULL;
497	const uint64 value = strtoull(str, &error, `10`);
498	return (error == str) ? deflt : value;
499	}
500
501	bool DictionaryParse(const string& encoded_str,
502	vector<pair<string, string> >* items) {
503	vector<string> entries;
504	SplitStringUsing(encoded_str, ",", &entries);
505	for (int i = `0`; i < entries.size(); ++i) {
506	vector<string> fields;
507	SplitStringAllowEmpty(entries [i], ":", &fields);
508	if (fields.size() != `2`) // parsing error
509	return false;
510	items->push_back(make_pair(fields [`0`], fields [`1`]));
511	}
512	return true;
513	}
514

Browse the source code of Aerospike/modules/s2-geometry-library/geometry/strings/strutil.cc