strutil.cc source code [Velox/build/_deps/protobuf-src/src/google/protobuf/stubs/strutil.cc]

1	// Protocol Buffers - Google's data interchange format
2	// Copyright 2008 Google Inc. All rights reserved.
3	// https://developers.google.com/protocol-buffers/
4	//
5	// Redistribution and use in source and binary forms, with or without
6	// modification, are permitted provided that the following conditions are
7	// met:
8	//
9	// Redistributions of source code must retain the above copyright*
10	// notice, this list of conditions and the following disclaimer.
11	// Redistributions in binary form must reproduce the above*
12	// copyright notice, this list of conditions and the following disclaimer
13	// in the documentation and/or other materials provided with the
14	// distribution.
15	// Neither the name of Google Inc. nor the names of its*
16	// contributors may be used to endorse or promote products derived from
17	// this software without specific prior written permission.
18	//
19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31	// from google3/strings/strutil.cc
32
33	#include <google/protobuf/stubs/strutil.h>
34
35	#include <errno.h>
36	#include <float.h> // FLT_DIG and DBL_DIG
37	#include <limits.h>
38	#include <stdio.h>
39	#include <cmath>
40	#include <iterator>
41	#include <limits>
42
43	#include <google/protobuf/stubs/logging.h>
44	#include <google/protobuf/stubs/stl_util.h>
45
46	#ifdef _WIN32
47	// MSVC has only _snprintf, not snprintf.
48	//
49	// MinGW has both snprintf and _snprintf, but they appear to be different
50	// functions. The former is buggy. When invoked like so:
51	// char buffer[32];
52	// snprintf(buffer, 32, "%.g\n", FLT_DIG, 1.23e10f);*
53	// it prints "1.23000e+10". This is plainly wrong: %g should never print
54	// trailing zeros after the decimal point. For some reason this bug only
55	// occurs with some input values, not all. In any case, _snprintf does the
56	// right thing, so we use it.
57	#define snprintf _snprintf
58	#endif
59
60	namespace google {
61	namespace protobuf {
62
63	// These are defined as macros on some platforms. #undef them so that we can
64	// redefine them.
65	#undef isxdigit
66	#undef isprint
67
68	// The definitions of these in ctype.h change based on locale. Since our
69	// string manipulation is all in relation to the protocol buffer and C++
70	// languages, we always want to use the C locale. So, we re-define these
71	// exactly as we want them.
72	inline bool isxdigit(char c) {
73	return (`'0'` <= c && c <= `'9'`) \|\|
74	(`'a'` <= c && c <= `'f'`) \|\|
75	(`'A'` <= c && c <= `'F'`);
76	}
77
78	inline bool isprint(char c) {
79	return c >= `0x20` && c <= `0x7E`;
80	}
81
82	// ----------------------------------------------------------------------
83	// ReplaceCharacters
84	// Replaces any occurrence of the character 'remove' (or the characters
85	// in 'remove') with the character 'replacewith'.
86	// ----------------------------------------------------------------------
87	void ReplaceCharacters(std::string s, const* char remove, char* replacewith) {
88	const char *str_start = s->c_str();
89	const char *str = str_start;
90	for (str = strpbrk(s: str, accept: remove);
91	str != nullptr;
92	str = strpbrk(s: str + `1`, accept: remove)) {
93	(*s)[str - str_start] = replacewith;
94	}
95	}
96
97	void StripWhitespace(std::string *str) {
98	int str_length = str->length();
99
100	// Strip off leading whitespace.
101	int first = `0`;
102	while (first < str_length && ascii_isspace(c: str->at(n: first))) {
103	++first;
104	}
105	// If entire string is white space.
106	if (first == str_length) {
107	str->clear();
108	return;
109	}
110	if (first > `0`) {
111	str->erase(pos: `0`, n: first);
112	str_length -= first;
113	}
114
115	// Strip off trailing whitespace.
116	int last = str_length - `1`;
117	while (last >= `0` && ascii_isspace(c: str->at(n: last))) {
118	--last;
119	}
120	if (last != (str_length - `1`) && last >= `0`) {
121	str->erase(pos: last + `1`, n: std::string::npos);
122	}
123	}
124
125	// ----------------------------------------------------------------------
126	// StringReplace()
127	// Replace the "old" pattern with the "new" pattern in a string,
128	// and append the result to "res". If replace_all is false,
129	// it only replaces the first instance of "old."
130	// ----------------------------------------------------------------------
131
132	void StringReplace(const std::string &s, const std::string &oldsub,
133	const std::string &newsub, bool replace_all,
134	std::string *res) {
135	if (oldsub.empty()) {
136	res->append(str: s); // if empty, append the given string.
137	return;
138	}
139
140	std::string::size_type start_pos = `0`;
141	std::string::size_type pos;
142	do {
143	pos = s.find(str: oldsub, pos: start_pos);
144	if (pos == std::string::npos) {
145	break;
146	}
147	res->append(str: s, pos: start_pos, n: pos - start_pos);
148	res->append(str: newsub);
149	start_pos = pos + oldsub.size(); // start searching again after the "old"
150	} while (replace_all);
151	res->append(str: s, pos: start_pos, n: s.length() - start_pos);
152	}
153
154	// ----------------------------------------------------------------------
155	// StringReplace()
156	// Give me a string and two patterns "old" and "new", and I replace
157	// the first instance of "old" in the string with "new", if it
158	// exists. If "global" is true; call this repeatedly until it
159	// fails. RETURN a new string, regardless of whether the replacement
160	// happened or not.
161	// ----------------------------------------------------------------------
162
163	std::string StringReplace(const std::string &s, const std::string &oldsub,
164	const std::string &newsub, bool replace_all) {
165	std::string ret;
166	StringReplace(s, oldsub, newsub, replace_all, res: &ret);
167	return ret;
168	}
169
170	// ----------------------------------------------------------------------
171	// SplitStringUsing()
172	// Split a string using a character delimiter. Append the components
173	// to 'result'.
174	//
175	// Note: For multi-character delimiters, this routine will split on ANY* of*
176	// the characters in the string, not the entire string as a single delimiter.
177	// ----------------------------------------------------------------------
178	template <typename ITR>
179	static inline void SplitStringToIteratorUsing(StringPiece full,
180	const char *delim, ITR &result) {
181	// Optimize the common case where delim is a single character.
182	if (delim[`0`] != `'\0'` && delim[`1`] == `'\0'`) {
183	char c = delim[`0`];
184	const char* p = full.data();
185	const char* end = p + full.size();
186	while (p != end) {
187	if (*p == c) {
188	++p;
189	} else {
190	const char* start = p;
191	while (++p != end && *p != c);
192	*result++ = std::string (start, p - start);
193	}
194	}
195	return;
196	}
197
198	std::string::size_type begin_index, end_index;
199	begin_index = full.find_first_not_of(s: delim);
200	while (begin_index != std::string::npos) {
201	end_index = full.find_first_of(s: delim, pos: begin_index);
202	if (end_index == std::string::npos) {
203	*result++ = std::string (full.substr(pos: begin_index));
204	return;
205	}
206	*result++ =
207	std::string (full.substr(pos: begin_index, n: (end_index - begin_index)));
208	begin_index = full.find_first_not_of(s: delim, pos: end_index);
209	}
210	}
211
212	void SplitStringUsing(StringPiece full, const char *delim,
213	std::vector<std::string> *result) {
214	std::back_insert_iterator<std::vector<std::string> > it(*result);
215	SplitStringToIteratorUsing(full, delim, result&: it);
216	}
217
218	// Split a string using a character delimiter. Append the components
219	// to 'result'. If there are consecutive delimiters, this function
220	// will return corresponding empty strings. The string is split into
221	// at most the specified number of pieces greedily. This means that the
222	// last piece may possibly be split further. To split into as many pieces
223	// as possible, specify 0 as the number of pieces.
224	//
225	// If "full" is the empty string, yields an empty string as the only value.
226	//
227	// If "pieces" is negative for some reason, it returns the whole string
228	// ----------------------------------------------------------------------
229	template <typename ITR>
230	static inline void SplitStringToIteratorAllowEmpty(StringPiece full,
231	const char *delim,
232	int pieces, ITR &result) {
233	std::string::size_type begin_index, end_index;
234	begin_index = `0`;
235
236	for (int i = `0`; (i < pieces-`1`) \|\| (pieces == `0`); i++) {
237	end_index = full.find_first_of(s: delim, pos: begin_index);
238	if (end_index == std::string::npos) {
239	*result++ = std::string (full.substr(pos: begin_index));
240	return;
241	}
242	*result++ =
243	std::string (full.substr(pos: begin_index, n: (end_index - begin_index)));
244	begin_index = end_index + `1`;
245	}
246	*result++ = std::string (full.substr(pos: begin_index));
247	}
248
249	void SplitStringAllowEmpty(StringPiece full, const char *delim,
250	std::vector<std::string> *result) {
251	std::back_insert_iterator<std::vector<std::string> > it(*result);
252	SplitStringToIteratorAllowEmpty(full, delim, pieces: `0`, result&: it);
253	}
254
255	// ----------------------------------------------------------------------
256	// JoinStrings()
257	// This merges a vector of string components with delim inserted
258	// as separaters between components.
259	//
260	// ----------------------------------------------------------------------
261	template <class ITERATOR>
262	static void JoinStringsIterator(const ITERATOR &start, const ITERATOR &end,
263	const char delim, std::string result) {
264	GOOGLE_CHECK(result != nullptr);
265	result->clear();
266	int delim_length = strlen(s: delim);
267
268	// Precompute resulting length so we can reserve() memory in one shot.
269	int length = `0`;
270	for (ITERATOR iter = start; iter != end; ++iter) {
271	if (iter != start) {
272	length += delim_length;
273	}
274	length += iter->size();
275	}
276	result->reserve(res_arg: length);
277
278	// Now combine everything.
279	for (ITERATOR iter = start; iter != end; ++iter) {
280	if (iter != start) {
281	result->append(s: delim, n: delim_length);
282	}
283	result->append(iter->data(), iter->size());
284	}
285	}
286
287	void JoinStrings(const std::vector<std::string> &components, const char *delim,
288	std::string *result) {
289	JoinStringsIterator(start: components.begin(), end: components.end(), delim, result);
290	}
291
292	// ----------------------------------------------------------------------
293	// UnescapeCEscapeSequences()
294	// This does all the unescaping that C does: \ooo, \r, \n, etc
295	// Returns length of resulting string.
296	// The implementation of \x parses any positive number of hex digits,
297	// but it is an error if the value requires more than 8 bits, and the
298	// result is truncated to 8 bits.
299	//
300	// The second call stores its errors in a supplied string vector.
301	// If the string vector pointer is nullptr, it reports the errors with LOG().
302	// ----------------------------------------------------------------------
303
304	#define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))
305
306	// Protocol buffers doesn't ever care about errors, but I don't want to remove
307	// the code.
308	#define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)
309
310	int UnescapeCEscapeSequences(const char* source, char* dest) {
311	return UnescapeCEscapeSequences(source, dest, errors: nullptr);
312	}
313
314	int UnescapeCEscapeSequences(const char source, char* *dest,
315	std::vector<std::string> *errors) {
316	GOOGLE_DCHECK(errors == nullptr) << "Error reporting not implemented.";
317
318	char* d = dest;
319	const char* p = source;
320
321	// Small optimization for case where source = dest and there's no escaping
322	while ( p == d && p != `'\0'` && p != `'\\'` )
323	p++, d++;
324
325	while (*p != `'\0'`) {
326	if (*p != `'\\'`) {
327	d++ = p++;
328	} else {
329	switch ( ++p ) { // skip past the '\\'*
330	case `'\0'`:
331	LOG_STRING(ERROR, errors) << "String cannot end with \\";
332	*d = `'\0'`;
333	return d - dest; // we're done with p
334	case `'a'`: d++ = `'\a'`; break*;
335	case `'b'`: d++ = `'\b'`; break*;
336	case `'f'`: d++ = `'\f'`; break*;
337	case `'n'`: d++ = `'\n'`; break*;
338	case `'r'`: d++ = `'\r'`; break*;
339	case `'t'`: d++ = `'\t'`; break*;
340	case `'v'`: d++ = `'\v'`; break*;
341	case `'\\'`: d++ = `'\\'`; break*;
342	case `'?'`: d++ = `'\?'`; break; // \? Who knew?*
343	case `'\''`: d++ = `'\''`; break*;
344	case `'"'`: d++ = `'\"'`; break*;
345	case `'0'`: case `'1'`: case `'2'`: case `'3'`: // octal digit: 1 to 3 digits
346	case `'4'`: case `'5'`: case `'6'`: case `'7'`: {
347	char ch = *p - `'0'`;
348	if ( IS_OCTAL_DIGIT(p[`1`]) )
349	ch = ch * `8` + *++p - `'0'`;
350	if ( IS_OCTAL_DIGIT(p[`1`]) ) // safe (and easy) to do this twice
351	ch = ch * `8` + ++p - `'0'`; // now points at last digit*
352	*d++ = ch;
353	break;
354	}
355	case `'x'`: case `'X'`: {
356	if (!isxdigit(c: p[`1`])) {
357	if (p[`1`] == `'\0'`) {
358	LOG_STRING(ERROR, errors) << "String cannot end with \\x";
359	} else {
360	LOG_STRING(ERROR, errors) <<
361	"\\x cannot be followed by non-hex digit: \\" << *p << p[`1`];
362	}
363	break;
364	}
365	unsigned int ch = `0`;
366	const char *hex_start = p;
367	while (isxdigit(c: p[`1`])) // arbitrarily many hex digits
368	ch = (ch << `4`) + hex_digit_to_int(c: *++p);
369	if (ch > `0xFF`)
370	LOG_STRING(ERROR, errors)
371	<< "Value of "
372	<< "\\" << std::string (hex_start, p + `1` - hex_start)
373	<< " exceeds 8 bits";
374	*d++ = ch;
375	break;
376	}
377	#if 0 // TODO(kenton): Support \u and \U? Requires runetochar().
378	case `'u'`: {
379	// \uhhhh => convert 4 hex digits to UTF-8
380	char32 rune = `0`;
381	const char *hex_start = p;
382	for (int i = `0`; i < `4`; ++i) {
383	if (isxdigit(p[`1`])) { // Look one char ahead.
384	rune = (rune << `4`) + hex_digit_to_int(++p); // Advance p.*
385	} else {
386	LOG_STRING(ERROR, errors)
387	<< "\\u must be followed by 4 hex digits: \\"
388	<< std::string(hex_start, p+`1`-hex_start);
389	break;
390	}
391	}
392	d += runetochar(d, &rune);
393	break;
394	}
395	case `'U'`: {
396	// \Uhhhhhhhh => convert 8 hex digits to UTF-8
397	char32 rune = `0`;
398	const char *hex_start = p;
399	for (int i = `0`; i < `8`; ++i) {
400	if (isxdigit(p[`1`])) { // Look one char ahead.
401	// Don't change rune until we're sure this
402	// is within the Unicode limit, but do advance p.
403	char32 newrune = (rune << `4`) + hex_digit_to_int(*++p);
404	if (newrune > `0x10FFFF`) {
405	LOG_STRING(ERROR, errors)
406	<< "Value of \\"
407	<< std::string(hex_start, p + `1` - hex_start)
408	<< " exceeds Unicode limit (0x10FFFF)";
409	break;
410	} else {
411	rune = newrune;
412	}
413	} else {
414	LOG_STRING(ERROR, errors)
415	<< "\\U must be followed by 8 hex digits: \\"
416	<< std::string(hex_start, p+`1`-hex_start);
417	break;
418	}
419	}
420	d += runetochar(d, &rune);
421	break;
422	}
423	#endif
424	default:
425	LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p;
426	}
427	p++; // read past letter we escaped
428	}
429	}
430	*d = `'\0'`;
431	return d - dest;
432	}
433
434	// ----------------------------------------------------------------------
435	// UnescapeCEscapeString()
436	// This does the same thing as UnescapeCEscapeSequences, but creates
437	// a new string. The caller does not need to worry about allocating
438	// a dest buffer. This should be used for non performance critical
439	// tasks such as printing debug messages. It is safe for src and dest
440	// to be the same.
441	//
442	// The second call stores its errors in a supplied string vector.
443	// If the string vector pointer is nullptr, it reports the errors with LOG().
444	//
445	// In the first and second calls, the length of dest is returned. In the
446	// the third call, the new string is returned.
447	// ----------------------------------------------------------------------
448	int UnescapeCEscapeString(const std::string &src, std::string *dest) {
449	return UnescapeCEscapeString(src, dest, errors: nullptr);
450	}
451
452	int UnescapeCEscapeString(const std::string &src, std::string *dest,
453	std::vector<std::string> *errors) {
454	std::unique_ptr<char[]> unescaped(new char[src.size() + `1`]);
455	int len = UnescapeCEscapeSequences(source: src.c_str(), dest: unescaped.get(), errors);
456	GOOGLE_CHECK(dest);
457	dest->assign(s: unescaped.get(), n: len);
458	return len;
459	}
460
461	std::string UnescapeCEscapeString(const std::string &src) {
462	std::unique_ptr<char[]> unescaped(new char[src.size() + `1`]);
463	int len = UnescapeCEscapeSequences(source: src.c_str(), dest: unescaped.get(), errors: nullptr);
464	return std::string (unescaped.get(), len);
465	}
466
467	// ----------------------------------------------------------------------
468	// CEscapeString()
469	// CHexEscapeString()
470	// Copies 'src' to 'dest', escaping dangerous characters using
471	// C-style escape sequences. This is very useful for preparing query
472	// flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
473	// hexadecimal rather than octal sequences.
474	// Returns the number of bytes written to 'dest' (not including the \0)
475	// or -1 if there was insufficient space.
476	//
477	// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
478	// ----------------------------------------------------------------------
479	int CEscapeInternal(const char* src, int src_len, char* dest,
480	int dest_len, bool use_hex, bool utf8_safe) {
481	const char* src_end = src + src_len;
482	int used = `0`;
483	bool last_hex_escape = false; // true if last output char was \xNN
484
485	for (; src < src_end; src++) {
486	if (dest_len - used < `2`) // Need space for two letter escape
487	return -`1`;
488
489	bool is_hex_escape = false;
490	switch (*src) {
491	case `'\n'`: dest[used++] = `'\\'`; dest[used++] = `'n'`; break;
492	case `'\r'`: dest[used++] = `'\\'`; dest[used++] = `'r'`; break;
493	case `'\t'`: dest[used++] = `'\\'`; dest[used++] = `'t'`; break;
494	case `'\"'`: dest[used++] = `'\\'`; dest[used++] = `'\"'`; break;
495	case `'\''`: dest[used++] = `'\\'`; dest[used++] = `'\''`; break;
496	case `'\\'`: dest[used++] = `'\\'`; dest[used++] = `'\\'`; break;
497	default:
498	// Note that if we emit \xNN and the src character after that is a hex
499	// digit then that digit must be escaped too to prevent it being
500	// interpreted as part of the character code by C.
501	if ((!utf8_safe \|\| static_cast<uint8_t>(*src) < `0x80`) &&
502	(!isprint(c: *src) \|\|
503	(last_hex_escape && isxdigit(c: *src)))) {
504	if (dest_len - used < `4`) // need space for 4 letter escape
505	return -`1`;
506	sprintf(s: dest + used, format: (use_hex ? "\\x%02x" : "\\%03o"),
507	static_cast<uint8_t>(*src));
508	is_hex_escape = use_hex;
509	used += `4`;
510	} else {
511	dest[used++] = src; break*;
512	}
513	}
514	last_hex_escape = is_hex_escape;
515	}
516
517	if (dest_len - used < `1`) // make sure that there is room for \0
518	return -`1`;
519
520	dest[used] = `'\0'`; // doesn't count towards return value though
521	return used;
522	}
523
524	// Calculates the length of the C-style escaped version of 'src'.
525	// Assumes that non-printable characters are escaped using octal sequences, and
526	// that UTF-8 bytes are not handled specially.
527	static inline size_t CEscapedLength(StringPiece src) {
528	static char c_escaped_len[`256`] = {
529	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `2`, `2`, `4`, `4`, `2`, `4`, `4`, // \t, \n, \r
530	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
531	`1`, `1`, `2`, `1`, `1`, `1`, `1`, `2`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // ", '
532	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // '0'..'9'
533	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 'A'..'O'
534	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `1`, `1`, `1`, // 'P'..'Z', '\'
535	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 'a'..'o'
536	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `4`, // 'p'..'z', DEL
537	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
538	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
539	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
540	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
541	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
542	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
543	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
544	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
545	};
546
547	size_t escaped_len = `0`;
548	for (StringPiece::size_type i = `0`; i < src.size(); ++i) {
549	unsigned char c = static_cast<unsigned char>(src [i]);
550	escaped_len += c_escaped_len[c];
551	}
552	return escaped_len;
553	}
554
555	// ----------------------------------------------------------------------
556	// Escapes 'src' using C-style escape sequences, and appends the escaped string
557	// to 'dest'. This version is faster than calling CEscapeInternal as it computes
558	// the required space using a lookup table, and also does not do any special
559	// handling for Hex or UTF-8 characters.
560	// ----------------------------------------------------------------------
561	void CEscapeAndAppend(StringPiece src, std::string *dest) {
562	size_t escaped_len = CEscapedLength(src);
563	if (escaped_len == src.size()) {
564	dest->append(s: src.data(), n: src.size());
565	return;
566	}
567
568	size_t cur_dest_len = dest->size();
569	dest->resize(n: cur_dest_len + escaped_len);
570	char* append_ptr = &(*dest)[cur_dest_len];
571
572	for (StringPiece::size_type i = `0`; i < src.size(); ++i) {
573	unsigned char c = static_cast<unsigned char>(src [i]);
574	switch (c) {
575	case `'\n'`: append_ptr++ = `'\\'`; append_ptr++ = `'n'`; break;
576	case `'\r'`: append_ptr++ = `'\\'`; append_ptr++ = `'r'`; break;
577	case `'\t'`: append_ptr++ = `'\\'`; append_ptr++ = `'t'`; break;
578	case `'\"'`: append_ptr++ = `'\\'`; append_ptr++ = `'\"'`; break;
579	case `'\''`: append_ptr++ = `'\\'`; append_ptr++ = `'\''`; break;
580	case `'\\'`: append_ptr++ = `'\\'`; append_ptr++ = `'\\'`; break;
581	default:
582	if (!isprint(c)) {
583	*append_ptr++ = `'\\'`;
584	*append_ptr++ = `'0'` + c / `64`;
585	*append_ptr++ = `'0'` + (c % `64`) / `8`;
586	*append_ptr++ = `'0'` + c % `8`;
587	} else {
588	*append_ptr++ = c;
589	}
590	break;
591	}
592	}
593	}
594
595	std::string CEscape(const std::string &src) {
596	std::string dest;
597	CEscapeAndAppend(src, dest: &dest);
598	return dest;
599	}
600
601	namespace strings {
602
603	std::string Utf8SafeCEscape(const std::string &src) {
604	const int dest_length = src.size() * `4` + `1`; // Maximum possible expansion
605	std::unique_ptr<char[]> dest(new char[dest_length]);
606	const int len = CEscapeInternal(src: src.data(), src_len: src.size(),
607	dest: dest.get(), dest_len: dest_length, use_hex: false, utf8_safe: true);
608	GOOGLE_DCHECK_GE(len, `0`);
609	return std::string (dest.get(), len);
610	}
611
612	std::string CHexEscape(const std::string &src) {
613	const int dest_length = src.size() * `4` + `1`; // Maximum possible expansion
614	std::unique_ptr<char[]> dest(new char[dest_length]);
615	const int len = CEscapeInternal(src: src.data(), src_len: src.size(),
616	dest: dest.get(), dest_len: dest_length, use_hex: true, utf8_safe: false);
617	GOOGLE_DCHECK_GE(len, `0`);
618	return std::string (dest.get(), len);
619	}
620
621	} // namespace strings
622
623	// ----------------------------------------------------------------------
624	// strto32_adaptor()
625	// strtou32_adaptor()
626	// Implementation of strto[u]l replacements that have identical
627	// overflow and underflow characteristics for both ILP-32 and LP-64
628	// platforms, including errno preservation in error-free calls.
629	// ----------------------------------------------------------------------
630
631	int32_t strto32_adaptor(const char nptr, char* *endptr, int* base) {
632	const int saved_errno = errno;
633	errno = `0`;
634	const long result = strtol(nptr: nptr, endptr: endptr, base: base);
635	if (errno == ERANGE && result == LONG_MIN) {
636	return std::numeric_limits<int32_t>::min();
637	} else if (errno == ERANGE && result == LONG_MAX) {
638	return std::numeric_limits<int32_t>::max();
639	} else if (errno == `0` && result < std::numeric_limits<int32_t>::min()) {
640	errno = ERANGE;
641	return std::numeric_limits<int32_t>::min();
642	} else if (errno == `0` && result > std::numeric_limits<int32_t>::max()) {
643	errno = ERANGE;
644	return std::numeric_limits<int32_t>::max();
645	}
646	if (errno == `0`)
647	errno = saved_errno;
648	return static_cast<int32_t>(result);
649	}
650
651	uint32_t strtou32_adaptor(const char nptr, char* *endptr, int* base) {
652	const int saved_errno = errno;
653	errno = `0`;
654	const unsigned long result = strtoul(nptr: nptr, endptr: endptr, base: base);
655	if (errno == ERANGE && result == ULONG_MAX) {
656	return std::numeric_limits<uint32_t>::max();
657	} else if (errno == `0` && result > std::numeric_limits<uint32_t>::max()) {
658	errno = ERANGE;
659	return std::numeric_limits<uint32_t>::max();
660	}
661	if (errno == `0`)
662	errno = saved_errno;
663	return static_cast<uint32_t>(result);
664	}
665
666	inline bool safe_parse_sign(std::string text /inout/*,
667	bool negative_ptr /output/*) {
668	const char* start = text->data();
669	const char* end = start + text->size();
670
671	// Consume whitespace.
672	while (start < end && (start[`0`] == `' '`)) {
673	++start;
674	}
675	while (start < end && (end[-`1`] == `' '`)) {
676	--end;
677	}
678	if (start >= end) {
679	return false;
680	}
681
682	// Consume sign.
683	*negative_ptr = (start[`0`] == `'-'`);
684	if (*negative_ptr \|\| start[`0`] == `'+'`) {
685	++start;
686	if (start >= end) {
687	return false;
688	}
689	}
690	*text = text->substr(pos: start - text->data(), n: end - start);
691	return true;
692	}
693
694	template <typename IntType>
695	bool safe_parse_positive_int(std::string text, IntType *value_p) {
696	int base = `10`;
697	IntType value = `0`;
698	const IntType vmax = std::numeric_limits<IntType>::max();
699	assert(vmax > `0`);
700	assert(vmax >= base);
701	const IntType vmax_over_base = vmax / base;
702	const char* start = text.data();
703	const char* end = start + text.size();
704	// loop over digits
705	for (; start < end; ++start) {
706	unsigned char c = static_cast<unsigned char>(start[`0`]);
707	int digit = c - `'0'`;
708	if (digit >= base \|\| digit < `0`) {
709	*value_p = value;
710	return false;
711	}
712	if (value > vmax_over_base) {
713	*value_p = vmax;
714	return false;
715	}
716	value *= base;
717	if (value > vmax - digit) {
718	*value_p = vmax;
719	return false;
720	}
721	value += digit;
722	}
723	*value_p = value;
724	return true;
725	}
726
727	template <typename IntType>
728	bool safe_parse_negative_int(const std::string &text, IntType *value_p) {
729	int base = `10`;
730	IntType value = `0`;
731	const IntType vmin = std::numeric_limits<IntType>::min();
732	assert(vmin < `0`);
733	assert(vmin <= `0` - base);
734	IntType vmin_over_base = vmin / base;
735	// 2003 c++ standard [expr.mul]
736	// "... the sign of the remainder is implementation-defined."
737	// Although (vmin/base)base + vmin%base is always vmin.*
738	// 2011 c++ standard tightens the spec but we cannot rely on it.
739	if (vmin % base > `0`) {
740	vmin_over_base += `1`;
741	}
742	const char* start = text.data();
743	const char* end = start + text.size();
744	// loop over digits
745	for (; start < end; ++start) {
746	unsigned char c = static_cast<unsigned char>(start[`0`]);
747	int digit = c - `'0'`;
748	if (digit >= base \|\| digit < `0`) {
749	*value_p = value;
750	return false;
751	}
752	if (value < vmin_over_base) {
753	*value_p = vmin;
754	return false;
755	}
756	value *= base;
757	if (value < vmin + digit) {
758	*value_p = vmin;
759	return false;
760	}
761	value -= digit;
762	}
763	*value_p = value;
764	return true;
765	}
766
767	template <typename IntType>
768	bool safe_int_internal(std::string text, IntType *value_p) {
769	*value_p = `0`;
770	bool negative;
771	if (!safe_parse_sign(text: &text, negative_ptr: &negative)) {
772	return false;
773	}
774	if (!negative) {
775	return safe_parse_positive_int(text, value_p);
776	} else {
777	return safe_parse_negative_int(text, value_p);
778	}
779	}
780
781	template <typename IntType>
782	bool safe_uint_internal(std::string text, IntType *value_p) {
783	*value_p = `0`;
784	bool negative;
785	if (!safe_parse_sign(text: &text, negative_ptr: &negative) \|\| negative) {
786	return false;
787	}
788	return safe_parse_positive_int(text, value_p);
789	}
790
791	// ----------------------------------------------------------------------
792	// FastIntToBuffer()
793	// FastInt64ToBuffer()
794	// FastHexToBuffer()
795	// FastHex64ToBuffer()
796	// FastHex32ToBuffer()
797	// ----------------------------------------------------------------------
798
799	// Offset into buffer where FastInt64ToBuffer places the end of string
800	// null character. Also used by FastInt64ToBufferLeft.
801	static const int kFastInt64ToBufferOffset = `21`;
802
803	char FastInt64ToBuffer(int64_t i, char** buffer) {
804	// We could collapse the positive and negative sections, but that
805	// would be slightly slower for positive numbers...
806	// 22 bytes is enough to store -264, -18446744073709551616.
807	char* p = buffer + kFastInt64ToBufferOffset;
808	*p-- = `'\0'`;
809	if (i >= `0`) {
810	do {
811	*p-- = `'0'` + i % `10`;
812	i /= `10`;
813	} while (i > `0`);
814	return p + `1`;
815	} else {
816	// On different platforms, % and / have different behaviors for
817	// negative numbers, so we need to jump through hoops to make sure
818	// we don't divide negative numbers.
819	if (i > -`10`) {
820	i = -i;
821	*p-- = `'0'` + i;
822	*p = `'-'`;
823	return p;
824	} else {
825	// Make sure we aren't at MIN_INT, in which case we can't say i = -i
826	i = i + `10`;
827	i = -i;
828	*p-- = `'0'` + i % `10`;
829	// Undo what we did a moment ago
830	i = i / `10` + `1`;
831	do {
832	*p-- = `'0'` + i % `10`;
833	i /= `10`;
834	} while (i > `0`);
835	*p = `'-'`;
836	return p;
837	}
838	}
839	}
840
841	// Offset into buffer where FastInt32ToBuffer places the end of string
842	// null character. Also used by FastInt32ToBufferLeft
843	static const int kFastInt32ToBufferOffset = `11`;
844
845	// Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the
846	// compiler to generate 32 bit arithmetic instructions. It's much faster, at
847	// least with 32 bit binaries.
848	char FastInt32ToBuffer(int32_t i, char** buffer) {
849	// We could collapse the positive and negative sections, but that
850	// would be slightly slower for positive numbers...
851	// 12 bytes is enough to store -232, -4294967296.
852	char* p = buffer + kFastInt32ToBufferOffset;
853	*p-- = `'\0'`;
854	if (i >= `0`) {
855	do {
856	*p-- = `'0'` + i % `10`;
857	i /= `10`;
858	} while (i > `0`);
859	return p + `1`;
860	} else {
861	// On different platforms, % and / have different behaviors for
862	// negative numbers, so we need to jump through hoops to make sure
863	// we don't divide negative numbers.
864	if (i > -`10`) {
865	i = -i;
866	*p-- = `'0'` + i;
867	*p = `'-'`;
868	return p;
869	} else {
870	// Make sure we aren't at MIN_INT, in which case we can't say i = -i
871	i = i + `10`;
872	i = -i;
873	*p-- = `'0'` + i % `10`;
874	// Undo what we did a moment ago
875	i = i / `10` + `1`;
876	do {
877	*p-- = `'0'` + i % `10`;
878	i /= `10`;
879	} while (i > `0`);
880	*p = `'-'`;
881	return p;
882	}
883	}
884	}
885
886	char FastHexToBuffer(int* i, char* buffer) {
887	GOOGLE_CHECK(i >= `0`) << "FastHexToBuffer() wants non-negative integers, not " << i;
888
889	static const char *hexdigits = "0123456789abcdef";
890	char *p = buffer + `21`;
891	*p-- = `'\0'`;
892	do {
893	p-- = hexdigits[i & `15`]; // mod by 16*
894	i >>= `4`; // divide by 16
895	} while (i > `0`);
896	return p + `1`;
897	}
898
899	char InternalFastHexToBuffer(uint64_t value, char** buffer, int num_byte) {
900	static const char *hexdigits = "0123456789abcdef";
901	buffer[num_byte] = `'\0'`;
902	for (int i = num_byte - `1`; i >= `0`; i--) {
903	#ifdef _M_X64
904	// MSVC x64 platform has a bug optimizing the uint32(value) in the #else
905	// block. Given that the uint32 cast was to improve performance on 32-bit
906	// platforms, we use 64-bit '&' directly.
907	buffer[i] = hexdigits[value & `0xf`];
908	#else
909	buffer[i] = hexdigits[uint32_t(value) & `0xf`];
910	#endif
911	value >>= `4`;
912	}
913	return buffer;
914	}
915
916	char FastHex64ToBuffer(uint64_t value, char** buffer) {
917	return InternalFastHexToBuffer(value, buffer, num_byte: `16`);
918	}
919
920	char FastHex32ToBuffer(uint32_t value, char** buffer) {
921	return InternalFastHexToBuffer(value, buffer, num_byte: `8`);
922	}
923
924	// ----------------------------------------------------------------------
925	// FastInt32ToBufferLeft()
926	// FastUInt32ToBufferLeft()
927	// FastInt64ToBufferLeft()
928	// FastUInt64ToBufferLeft()
929	//
930	// Like the FastToBuffer() functions above, these are intended for speed.*
931	// Unlike the FastToBuffer() functions, however, these functions write*
932	// their output to the beginning of the buffer (hence the name, as the
933	// output is left-aligned). The caller is responsible for ensuring that
934	// the buffer has enough space to hold the output.
935	//
936	// Returns a pointer to the end of the string (i.e. the null character
937	// terminating the string).
938	// ----------------------------------------------------------------------
939
940	static const char two_ASCII_digits[`100`][`2`] = {
941	{`'0'`,`'0'`}, {`'0'`,`'1'`}, {`'0'`,`'2'`}, {`'0'`,`'3'`}, {`'0'`,`'4'`},
942	{`'0'`,`'5'`}, {`'0'`,`'6'`}, {`'0'`,`'7'`}, {`'0'`,`'8'`}, {`'0'`,`'9'`},
943	{`'1'`,`'0'`}, {`'1'`,`'1'`}, {`'1'`,`'2'`}, {`'1'`,`'3'`}, {`'1'`,`'4'`},
944	{`'1'`,`'5'`}, {`'1'`,`'6'`}, {`'1'`,`'7'`}, {`'1'`,`'8'`}, {`'1'`,`'9'`},
945	{`'2'`,`'0'`}, {`'2'`,`'1'`}, {`'2'`,`'2'`}, {`'2'`,`'3'`}, {`'2'`,`'4'`},
946	{`'2'`,`'5'`}, {`'2'`,`'6'`}, {`'2'`,`'7'`}, {`'2'`,`'8'`}, {`'2'`,`'9'`},
947	{`'3'`,`'0'`}, {`'3'`,`'1'`}, {`'3'`,`'2'`}, {`'3'`,`'3'`}, {`'3'`,`'4'`},
948	{`'3'`,`'5'`}, {`'3'`,`'6'`}, {`'3'`,`'7'`}, {`'3'`,`'8'`}, {`'3'`,`'9'`},
949	{`'4'`,`'0'`}, {`'4'`,`'1'`}, {`'4'`,`'2'`}, {`'4'`,`'3'`}, {`'4'`,`'4'`},
950	{`'4'`,`'5'`}, {`'4'`,`'6'`}, {`'4'`,`'7'`}, {`'4'`,`'8'`}, {`'4'`,`'9'`},
951	{`'5'`,`'0'`}, {`'5'`,`'1'`}, {`'5'`,`'2'`}, {`'5'`,`'3'`}, {`'5'`,`'4'`},
952	{`'5'`,`'5'`}, {`'5'`,`'6'`}, {`'5'`,`'7'`}, {`'5'`,`'8'`}, {`'5'`,`'9'`},
953	{`'6'`,`'0'`}, {`'6'`,`'1'`}, {`'6'`,`'2'`}, {`'6'`,`'3'`}, {`'6'`,`'4'`},
954	{`'6'`,`'5'`}, {`'6'`,`'6'`}, {`'6'`,`'7'`}, {`'6'`,`'8'`}, {`'6'`,`'9'`},
955	{`'7'`,`'0'`}, {`'7'`,`'1'`}, {`'7'`,`'2'`}, {`'7'`,`'3'`}, {`'7'`,`'4'`},
956	{`'7'`,`'5'`}, {`'7'`,`'6'`}, {`'7'`,`'7'`}, {`'7'`,`'8'`}, {`'7'`,`'9'`},
957	{`'8'`,`'0'`}, {`'8'`,`'1'`}, {`'8'`,`'2'`}, {`'8'`,`'3'`}, {`'8'`,`'4'`},
958	{`'8'`,`'5'`}, {`'8'`,`'6'`}, {`'8'`,`'7'`}, {`'8'`,`'8'`}, {`'8'`,`'9'`},
959	{`'9'`,`'0'`}, {`'9'`,`'1'`}, {`'9'`,`'2'`}, {`'9'`,`'3'`}, {`'9'`,`'4'`},
960	{`'9'`,`'5'`}, {`'9'`,`'6'`}, {`'9'`,`'7'`}, {`'9'`,`'8'`}, {`'9'`,`'9'`}
961	};
962
963	char* FastUInt32ToBufferLeft(uint32_t u, char* buffer) {
964	uint32_t digits;
965	const char ASCII_digits = nullptr*;
966	// The idea of this implementation is to trim the number of divides to as few
967	// as possible by using multiplication and subtraction rather than mod (%),
968	// and by outputting two digits at a time rather than one.
969	// The huge-number case is first, in the hopes that the compiler will output
970	// that case in one branch-free block of code, and only output conditional
971	// branches into it from below.
972	if (u >= `1000000000`) { // >= 1,000,000,000
973	digits = u / `100000000`; // 100,000,000
974	ASCII_digits = two_ASCII_digits[digits];
975	buffer[`0`] = ASCII_digits[`0`];
976	buffer[`1`] = ASCII_digits[`1`];
977	buffer += `2`;
978	sublt100_000_000:
979	u -= digits * `100000000`; // 100,000,000
980	lt100_000_000:
981	digits = u / `1000000`; // 1,000,000
982	ASCII_digits = two_ASCII_digits[digits];
983	buffer[`0`] = ASCII_digits[`0`];
984	buffer[`1`] = ASCII_digits[`1`];
985	buffer += `2`;
986	sublt1_000_000:
987	u -= digits * `1000000`; // 1,000,000
988	lt1_000_000:
989	digits = u / `10000`; // 10,000
990	ASCII_digits = two_ASCII_digits[digits];
991	buffer[`0`] = ASCII_digits[`0`];
992	buffer[`1`] = ASCII_digits[`1`];
993	buffer += `2`;
994	sublt10_000:
995	u -= digits * `10000`; // 10,000
996	lt10_000:
997	digits = u / `100`;
998	ASCII_digits = two_ASCII_digits[digits];
999	buffer[`0`] = ASCII_digits[`0`];
1000	buffer[`1`] = ASCII_digits[`1`];
1001	buffer += `2`;
1002	sublt100:
1003	u -= digits * `100`;
1004	lt100:
1005	digits = u;
1006	ASCII_digits = two_ASCII_digits[digits];
1007	buffer[`0`] = ASCII_digits[`0`];
1008	buffer[`1`] = ASCII_digits[`1`];
1009	buffer += `2`;
1010	done:
1011	*buffer = `0`;
1012	return buffer;
1013	}
1014
1015	if (u < `100`) {
1016	digits = u;
1017	if (u >= `10`) goto lt100;
1018	*buffer++ = `'0'` + digits;
1019	goto done;
1020	}
1021	if (u < `10000`) { // 10,000
1022	if (u >= `1000`) goto lt10_000;
1023	digits = u / `100`;
1024	*buffer++ = `'0'` + digits;
1025	goto sublt100;
1026	}
1027	if (u < `1000000`) { // 1,000,000
1028	if (u >= `100000`) goto lt1_000_000;
1029	digits = u / `10000`; // 10,000
1030	*buffer++ = `'0'` + digits;
1031	goto sublt10_000;
1032	}
1033	if (u < `100000000`) { // 100,000,000
1034	if (u >= `10000000`) goto lt100_000_000;
1035	digits = u / `1000000`; // 1,000,000
1036	*buffer++ = `'0'` + digits;
1037	goto sublt1_000_000;
1038	}
1039	// we already know that u < 1,000,000,000
1040	digits = u / `100000000`; // 100,000,000
1041	*buffer++ = `'0'` + digits;
1042	goto sublt100_000_000;
1043	}
1044
1045	char* FastInt32ToBufferLeft(int32_t i, char* buffer) {
1046	uint32_t u = `0`;
1047	if (i < `0`) {
1048	*buffer++ = `'-'`;
1049	u -= i;
1050	} else {
1051	u = i;
1052	}
1053	return FastUInt32ToBufferLeft(u, buffer);
1054	}
1055
1056	char* FastUInt64ToBufferLeft(uint64_t u64, char* buffer) {
1057	int digits;
1058	const char ASCII_digits = nullptr*;
1059
1060	uint32_t u = static_cast<uint32_t>(u64);
1061	if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
1062
1063	uint64_t top_11_digits = u64 / `1000000000`;
1064	buffer = FastUInt64ToBufferLeft(u64: top_11_digits, buffer);
1065	u = u64 - (top_11_digits * `1000000000`);
1066
1067	digits = u / `10000000`; // 10,000,000
1068	GOOGLE_DCHECK_LT(digits, `100`);
1069	ASCII_digits = two_ASCII_digits[digits];
1070	buffer[`0`] = ASCII_digits[`0`];
1071	buffer[`1`] = ASCII_digits[`1`];
1072	buffer += `2`;
1073	u -= digits * `10000000`; // 10,000,000
1074	digits = u / `100000`; // 100,000
1075	ASCII_digits = two_ASCII_digits[digits];
1076	buffer[`0`] = ASCII_digits[`0`];
1077	buffer[`1`] = ASCII_digits[`1`];
1078	buffer += `2`;
1079	u -= digits * `100000`; // 100,000
1080	digits = u / `1000`; // 1,000
1081	ASCII_digits = two_ASCII_digits[digits];
1082	buffer[`0`] = ASCII_digits[`0`];
1083	buffer[`1`] = ASCII_digits[`1`];
1084	buffer += `2`;
1085	u -= digits * `1000`; // 1,000
1086	digits = u / `10`;
1087	ASCII_digits = two_ASCII_digits[digits];
1088	buffer[`0`] = ASCII_digits[`0`];
1089	buffer[`1`] = ASCII_digits[`1`];
1090	buffer += `2`;
1091	u -= digits * `10`;
1092	digits = u;
1093	*buffer++ = `'0'` + digits;
1094	*buffer = `0`;
1095	return buffer;
1096	}
1097
1098	char* FastInt64ToBufferLeft(int64_t i, char* buffer) {
1099	uint64_t u = `0`;
1100	if (i < `0`) {
1101	*buffer++ = `'-'`;
1102	u -= i;
1103	} else {
1104	u = i;
1105	}
1106	return FastUInt64ToBufferLeft(u64: u, buffer);
1107	}
1108
1109	// ----------------------------------------------------------------------
1110	// SimpleItoa()
1111	// Description: converts an integer to a string.
1112	//
1113	// Return value: string
1114	// ----------------------------------------------------------------------
1115
1116	std::string SimpleItoa(int i) {
1117	char buffer[kFastToBufferSize];
1118	return (sizeof(i) == `4`) ?
1119	FastInt32ToBuffer(i, buffer) :
1120	FastInt64ToBuffer(i, buffer);
1121	}
1122
1123	std::string SimpleItoa(unsigned int i) {
1124	char buffer[kFastToBufferSize];
1125	return std::string (buffer, (sizeof(i) == `4`)
1126	? FastUInt32ToBufferLeft(u: i, buffer)
1127	: FastUInt64ToBufferLeft(u64: i, buffer));
1128	}
1129
1130	std::string SimpleItoa(long i) {
1131	char buffer[kFastToBufferSize];
1132	return (sizeof(i) == `4`) ?
1133	FastInt32ToBuffer(i, buffer) :
1134	FastInt64ToBuffer(i, buffer);
1135	}
1136
1137	std::string SimpleItoa(unsigned long i) {
1138	char buffer[kFastToBufferSize];
1139	return std::string (buffer, (sizeof(i) == `4`)
1140	? FastUInt32ToBufferLeft(u: i, buffer)
1141	: FastUInt64ToBufferLeft(u64: i, buffer));
1142	}
1143
1144	std::string SimpleItoa(long long i) {
1145	char buffer[kFastToBufferSize];
1146	return (sizeof(i) == `4`) ?
1147	FastInt32ToBuffer(i, buffer) :
1148	FastInt64ToBuffer(i, buffer);
1149	}
1150
1151	std::string SimpleItoa(unsigned long long i) {
1152	char buffer[kFastToBufferSize];
1153	return std::string (buffer, (sizeof(i) == `4`)
1154	? FastUInt32ToBufferLeft(u: i, buffer)
1155	: FastUInt64ToBufferLeft(u64: i, buffer));
1156	}
1157
1158	// ----------------------------------------------------------------------
1159	// SimpleDtoa()
1160	// SimpleFtoa()
1161	// DoubleToBuffer()
1162	// FloatToBuffer()
1163	// We want to print the value without losing precision, but we also do
1164	// not want to print more digits than necessary. This turns out to be
1165	// trickier than it sounds. Numbers like 0.2 cannot be represented
1166	// exactly in binary. If we print 0.2 with a very large precision,
1167	// e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
1168	// On the other hand, if we set the precision too low, we lose
1169	// significant digits when printing numbers that actually need them.
1170	// It turns out there is no precision value that does the right thing
1171	// for all numbers.
1172	//
1173	// Our strategy is to first try printing with a precision that is never
1174	// over-precise, then parse the result with strtod() to see if it
1175	// matches. If not, we print again with a precision that will always
1176	// give a precise result, but may use more digits than necessary.
1177	//
1178	// An arguably better strategy would be to use the algorithm described
1179	// in "How to Print Floating-Point Numbers Accurately" by Steele &
1180	// White, e.g. as implemented by David M. Gay's dtoa(). It turns out,
1181	// however, that the following implementation is about as fast as
1182	// DMG's code. Furthermore, DMG's code locks mutexes, which means it
1183	// will not scale well on multi-core machines. DMG's code is slightly
1184	// more accurate (in that it will never use more digits than
1185	// necessary), but this is probably irrelevant for most users.
1186	//
1187	// Rob Pike and Ken Thompson also have an implementation of dtoa() in
1188	// third_party/fmt/fltfmt.cc. Their implementation is similar to this
1189	// one in that it makes guesses and then uses strtod() to check them.
1190	// Their implementation is faster because they use their own code to
1191	// generate the digits in the first place rather than use snprintf(),
1192	// thus avoiding format string parsing overhead. However, this makes
1193	// it considerably more complicated than the following implementation,
1194	// and it is embedded in a larger library. If speed turns out to be
1195	// an issue, we could re-implement this in terms of their
1196	// implementation.
1197	// ----------------------------------------------------------------------
1198
1199	std::string SimpleDtoa(double value) {
1200	char buffer[kDoubleToBufferSize];
1201	return DoubleToBuffer(i: value, buffer);
1202	}
1203
1204	std::string SimpleFtoa(float value) {
1205	char buffer[kFloatToBufferSize];
1206	return FloatToBuffer(i: value, buffer);
1207	}
1208
1209	static inline bool IsValidFloatChar(char c) {
1210	return (`'0'` <= c && c <= `'9'`) \|\|
1211	c == `'e'` \|\| c == `'E'` \|\|
1212	c == `'+'` \|\| c == `'-'`;
1213	}
1214
1215	void DelocalizeRadix(char* buffer) {
1216	// Fast check: if the buffer has a normal decimal point, assume no
1217	// translation is needed.
1218	if (strchr(s: buffer, c: `'.'`) != nullptr) return;
1219
1220	// Find the first unknown character.
1221	while (IsValidFloatChar(c: *buffer)) ++buffer;
1222
1223	if (*buffer == `'\0'`) {
1224	// No radix character found.
1225	return;
1226	}
1227
1228	// We are now pointing at the locale-specific radix character. Replace it
1229	// with '.'.
1230	*buffer = `'.'`;
1231	++buffer;
1232
1233	if (!IsValidFloatChar(c: buffer) && buffer != `'\0'`) {
1234	// It appears the radix was a multi-byte character. We need to remove the
1235	// extra bytes.
1236	char* target = buffer;
1237	do { ++buffer; } while (!IsValidFloatChar(c: buffer) && buffer != `'\0'`);
1238	memmove(dest: target, src: buffer, n: strlen(s: buffer) + `1`);
1239	}
1240	}
1241
1242	char* DoubleToBuffer(double value, char* buffer) {
1243	// DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
1244	// platforms these days. Just in case some system exists where DBL_DIG
1245	// is significantly larger -- and risks overflowing our buffer -- we have
1246	// this assert.
1247	static_assert(DBL_DIG < `20`, "DBL_DIG_is_too_big");
1248
1249	if (value == std::numeric_limits<double>::infinity()) {
1250	strcpy(dest: buffer, src: "inf");
1251	return buffer;
1252	} else if (value == -std::numeric_limits<double>::infinity()) {
1253	strcpy(dest: buffer, src: "-inf");
1254	return buffer;
1255	} else if (std::isnan(x: value)) {
1256	strcpy(dest: buffer, src: "nan");
1257	return buffer;
1258	}
1259
1260	int snprintf_result =
1261	snprintf(s: buffer, maxlen: kDoubleToBufferSize, format: "%.*g", DBL_DIG, value);
1262
1263	// The snprintf should never overflow because the buffer is significantly
1264	// larger than the precision we asked for.
1265	GOOGLE_DCHECK(snprintf_result > `0` && snprintf_result < kDoubleToBufferSize);
1266
1267	// We need to make parsed_value volatile in order to force the compiler to
1268	// write it out to the stack. Otherwise, it may keep the value in a
1269	// register, and if it does that, it may keep it as a long double instead
1270	// of a double. This long double may have extra bits that make it compare
1271	// unequal to "value" even though it would be exactly equal if it were
1272	// truncated to a double.
1273	volatile double parsed_value = internal::NoLocaleStrtod(str: buffer, endptr: nullptr);
1274	if (parsed_value != value) {
1275	snprintf_result =
1276	snprintf(s: buffer, maxlen: kDoubleToBufferSize, format: "%.*g", DBL_DIG + `2`, value);
1277
1278	// Should never overflow; see above.
1279	GOOGLE_DCHECK(snprintf_result > `0` && snprintf_result < kDoubleToBufferSize);
1280	}
1281
1282	DelocalizeRadix(buffer);
1283	return buffer;
1284	}
1285
1286	static int memcasecmp(const char s1, const* char *s2, size_t len) {
1287	const unsigned char us1 = reinterpret_cast<const* unsigned char *>(s1);
1288	const unsigned char us2 = reinterpret_cast<const* unsigned char *>(s2);
1289
1290	for (size_t i = `0`; i < len; i++) {
1291	const int diff =
1292	static_cast<int>(static_cast<unsigned char>(ascii_tolower(c: us1[i]))) -
1293	static_cast<int>(static_cast<unsigned char>(ascii_tolower(c: us2[i])));
1294	if (diff != `0`) return diff;
1295	}
1296	return `0`;
1297	}
1298
1299	inline bool CaseEqual(StringPiece s1, StringPiece s2) {
1300	if (s1.size() != s2.size()) return false;
1301	return memcasecmp(s1: s1.data(), s2: s2.data(), len: s1.size()) == `0`;
1302	}
1303
1304	bool safe_strtob(StringPiece str, bool* value) {
1305	GOOGLE_CHECK(value != nullptr) << "nullptr output boolean given.";
1306	if (CaseEqual(s1: str, s2: "true") \|\| CaseEqual(s1: str, s2: "t") \|\|
1307	CaseEqual(s1: str, s2: "yes") \|\| CaseEqual(s1: str, s2: "y") \|\|
1308	CaseEqual(s1: str, s2: "1")) {
1309	value = true*;
1310	return true;
1311	}
1312	if (CaseEqual(s1: str, s2: "false") \|\| CaseEqual(s1: str, s2: "f") \|\|
1313	CaseEqual(s1: str, s2: "no") \|\| CaseEqual(s1: str, s2: "n") \|\|
1314	CaseEqual(s1: str, s2: "0")) {
1315	value = false*;
1316	return true;
1317	}
1318	return false;
1319	}
1320
1321	bool safe_strtof(const char* str, float* value) {
1322	char* endptr;
1323	errno = `0`; // errno only gets set on errors
1324	#if defined(_WIN32) \|\| defined (__hpux) // has no strtof()
1325	*value = internal::NoLocaleStrtod(str, &endptr);
1326	#else
1327	*value = strtof(nptr: str, endptr: &endptr);
1328	#endif
1329	return str != `0` && endptr == `0` && errno == `0`;
1330	}
1331
1332	bool safe_strtod(const char* str, double* value) {
1333	char* endptr;
1334	*value = internal::NoLocaleStrtod(str, endptr: &endptr);
1335	if (endptr != str) {
1336	while (ascii_isspace(c: *endptr)) ++endptr;
1337	}
1338	// Ignore range errors from strtod. The values it
1339	// returns on underflow and overflow are the right
1340	// fallback in a robust setting.
1341	return str != `'\0'` && endptr == `'\0'`;
1342	}
1343
1344	bool safe_strto32(const std::string &str, int32_t *value) {
1345	return safe_int_internal(text: str, value_p: value);
1346	}
1347
1348	bool safe_strtou32(const std::string &str, uint32_t *value) {
1349	return safe_uint_internal(text: str, value_p: value);
1350	}
1351
1352	bool safe_strto64(const std::string &str, int64_t *value) {
1353	return safe_int_internal(text: str, value_p: value);
1354	}
1355
1356	bool safe_strtou64(const std::string &str, uint64_t *value) {
1357	return safe_uint_internal(text: str, value_p: value);
1358	}
1359
1360	char* FloatToBuffer(float value, char* buffer) {
1361	// FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
1362	// platforms these days. Just in case some system exists where FLT_DIG
1363	// is significantly larger -- and risks overflowing our buffer -- we have
1364	// this assert.
1365	static_assert(FLT_DIG < `10`, "FLT_DIG_is_too_big");
1366
1367	if (value == std::numeric_limits<double>::infinity()) {
1368	strcpy(dest: buffer, src: "inf");
1369	return buffer;
1370	} else if (value == -std::numeric_limits<double>::infinity()) {
1371	strcpy(dest: buffer, src: "-inf");
1372	return buffer;
1373	} else if (std::isnan(x: value)) {
1374	strcpy(dest: buffer, src: "nan");
1375	return buffer;
1376	}
1377
1378	int snprintf_result =
1379	snprintf(s: buffer, maxlen: kFloatToBufferSize, format: "%.*g", FLT_DIG, value);
1380
1381	// The snprintf should never overflow because the buffer is significantly
1382	// larger than the precision we asked for.
1383	GOOGLE_DCHECK(snprintf_result > `0` && snprintf_result < kFloatToBufferSize);
1384
1385	float parsed_value;
1386	if (!safe_strtof(str: buffer, value: &parsed_value) \|\| parsed_value != value) {
1387	snprintf_result =
1388	snprintf(s: buffer, maxlen: kFloatToBufferSize, format: "%.*g", FLT_DIG + `3`, value);
1389
1390	// Should never overflow; see above.
1391	GOOGLE_DCHECK(snprintf_result > `0` && snprintf_result < kFloatToBufferSize);
1392	}
1393
1394	DelocalizeRadix(buffer);
1395	return buffer;
1396	}
1397
1398	namespace strings {
1399
1400	AlphaNum::AlphaNum(strings::Hex hex) {
1401	char *const end = &digits[kFastToBufferSize];
1402	char *writer = end;
1403	uint64_t value = hex.value;
1404	uint64_t width = hex.spec;
1405	// We accomplish minimum width by OR'ing in 0x10000 to the user's value,
1406	// where 0x10000 is the smallest hex number that is as wide as the user
1407	// asked for.
1408	uint64_t mask = (static_cast<uint64_t>(`1`) << ((width - `1`) * `4`)) \| value;
1409	static const char hexdigits[] = "0123456789abcdef";
1410	do {
1411	*--writer = hexdigits[value & `0xF`];
1412	value >>= `4`;
1413	mask >>= `4`;
1414	} while (mask != `0`);
1415	piece_data_ = writer;
1416	piece_size_ = end - writer;
1417	}
1418
1419	} // namespace strings
1420
1421	// ----------------------------------------------------------------------
1422	// StrCat()
1423	// This merges the given strings or integers, with no delimiter. This
1424	// is designed to be the fastest possible way to construct a string out
1425	// of a mix of raw C strings, C++ strings, and integer values.
1426	// ----------------------------------------------------------------------
1427
1428	// Append is merely a version of memcpy that returns the address of the byte
1429	// after the area just overwritten. It comes in multiple flavors to minimize
1430	// call overhead.
1431	static char Append1(char* out, const* AlphaNum &x) {
1432	if (x.size() > `0`) {
1433	memcpy(dest: out, src: x.data(), n: x.size());
1434	out += x.size();
1435	}
1436	return out;
1437	}
1438
1439	static char Append2(char* out, const* AlphaNum &x1, const AlphaNum &x2) {
1440	if (x1.size() > `0`) {
1441	memcpy(dest: out, src: x1.data(), n: x1.size());
1442	out += x1.size();
1443	}
1444	if (x2.size() > `0`) {
1445	memcpy(dest: out, src: x2.data(), n: x2.size());
1446	out += x2.size();
1447	}
1448	return out;
1449	}
1450
1451	static char Append4(char* out, const* AlphaNum &x1, const AlphaNum &x2,
1452	const AlphaNum &x3, const AlphaNum &x4) {
1453	if (x1.size() > `0`) {
1454	memcpy(dest: out, src: x1.data(), n: x1.size());
1455	out += x1.size();
1456	}
1457	if (x2.size() > `0`) {
1458	memcpy(dest: out, src: x2.data(), n: x2.size());
1459	out += x2.size();
1460	}
1461	if (x3.size() > `0`) {
1462	memcpy(dest: out, src: x3.data(), n: x3.size());
1463	out += x3.size();
1464	}
1465	if (x4.size() > `0`) {
1466	memcpy(dest: out, src: x4.data(), n: x4.size());
1467	out += x4.size();
1468	}
1469	return out;
1470	}
1471
1472	std::string StrCat(const AlphaNum &a, const AlphaNum &b) {
1473	std::string result;
1474	result.resize(n: a.size() + b.size());
1475	char *const begin = &*result.begin();
1476	char *out = Append2(out: begin, x1: a, x2: b);
1477	GOOGLE_DCHECK_EQ(out, begin + result.size());
1478	return result;
1479	}
1480
1481	std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
1482	std::string result;
1483	result.resize(n: a.size() + b.size() + c.size());
1484	char *const begin = &*result.begin();
1485	char *out = Append2(out: begin, x1: a, x2: b);
1486	out = Append1(out, x: c);
1487	GOOGLE_DCHECK_EQ(out, begin + result.size());
1488	return result;
1489	}
1490
1491	std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1492	const AlphaNum &d) {
1493	std::string result;
1494	result.resize(n: a.size() + b.size() + c.size() + d.size());
1495	char *const begin = &*result.begin();
1496	char *out = Append4(out: begin, x1: a, x2: b, x3: c, x4: d);
1497	GOOGLE_DCHECK_EQ(out, begin + result.size());
1498	return result;
1499	}
1500
1501	std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1502	const AlphaNum &d, const AlphaNum &e) {
1503	std::string result;
1504	result.resize(n: a.size() + b.size() + c.size() + d.size() + e.size());
1505	char *const begin = &*result.begin();
1506	char *out = Append4(out: begin, x1: a, x2: b, x3: c, x4: d);
1507	out = Append1(out, x: e);
1508	GOOGLE_DCHECK_EQ(out, begin + result.size());
1509	return result;
1510	}
1511
1512	std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1513	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) {
1514	std::string result;
1515	result.resize(n: a.size() + b.size() + c.size() + d.size() + e.size() +
1516	f.size());
1517	char *const begin = &*result.begin();
1518	char *out = Append4(out: begin, x1: a, x2: b, x3: c, x4: d);
1519	out = Append2(out, x1: e, x2: f);
1520	GOOGLE_DCHECK_EQ(out, begin + result.size());
1521	return result;
1522	}
1523
1524	std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1525	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
1526	const AlphaNum &g) {
1527	std::string result;
1528	result.resize(n: a.size() + b.size() + c.size() + d.size() + e.size() +
1529	f.size() + g.size());
1530	char *const begin = &*result.begin();
1531	char *out = Append4(out: begin, x1: a, x2: b, x3: c, x4: d);
1532	out = Append2(out, x1: e, x2: f);
1533	out = Append1(out, x: g);
1534	GOOGLE_DCHECK_EQ(out, begin + result.size());
1535	return result;
1536	}
1537
1538	std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1539	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
1540	const AlphaNum &g, const AlphaNum &h) {
1541	std::string result;
1542	result.resize(n: a.size() + b.size() + c.size() + d.size() + e.size() +
1543	f.size() + g.size() + h.size());
1544	char *const begin = &*result.begin();
1545	char *out = Append4(out: begin, x1: a, x2: b, x3: c, x4: d);
1546	out = Append4(out, x1: e, x2: f, x3: g, x4: h);
1547	GOOGLE_DCHECK_EQ(out, begin + result.size());
1548	return result;
1549	}
1550
1551	std::string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
1552	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
1553	const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) {
1554	std::string result;
1555	result.resize(n: a.size() + b.size() + c.size() + d.size() + e.size() +
1556	f.size() + g.size() + h.size() + i.size());
1557	char *const begin = &*result.begin();
1558	char *out = Append4(out: begin, x1: a, x2: b, x3: c, x4: d);
1559	out = Append4(out, x1: e, x2: f, x3: g, x4: h);
1560	out = Append1(out, x: i);
1561	GOOGLE_DCHECK_EQ(out, begin + result.size());
1562	return result;
1563	}
1564
1565	// It's possible to call StrAppend with a char pointer that is partway into*
1566	// the string we're appending to. However the results of this are random.
1567	// Therefore, check for this in debug mode. Use unsigned math so we only have
1568	// to do one comparison.
1569	#define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \
1570	GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \
1571	uintptr_t((dest).size()))
1572
1573	void StrAppend(std::string result, const* AlphaNum &a) {
1574	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1575	result->append(s: a.data(), n: a.size());
1576	}
1577
1578	void StrAppend(std::string result, const* AlphaNum &a, const AlphaNum &b) {
1579	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1580	GOOGLE_DCHECK_NO_OVERLAP(*result, b);
1581	std::string::size_type old_size = result->size();
1582	result->resize(n: old_size + a.size() + b.size());
1583	char *const begin = &*result->begin();
1584	char *out = Append2(out: begin + old_size, x1: a, x2: b);
1585	GOOGLE_DCHECK_EQ(out, begin + result->size());
1586	}
1587
1588	void StrAppend(std::string result, const* AlphaNum &a, const AlphaNum &b,
1589	const AlphaNum &c) {
1590	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1591	GOOGLE_DCHECK_NO_OVERLAP(*result, b);
1592	GOOGLE_DCHECK_NO_OVERLAP(*result, c);
1593	std::string::size_type old_size = result->size();
1594	result->resize(n: old_size + a.size() + b.size() + c.size());
1595	char *const begin = &*result->begin();
1596	char *out = Append2(out: begin + old_size, x1: a, x2: b);
1597	out = Append1(out, x: c);
1598	GOOGLE_DCHECK_EQ(out, begin + result->size());
1599	}
1600
1601	void StrAppend(std::string result, const* AlphaNum &a, const AlphaNum &b,
1602	const AlphaNum &c, const AlphaNum &d) {
1603	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
1604	GOOGLE_DCHECK_NO_OVERLAP(*result, b);
1605	GOOGLE_DCHECK_NO_OVERLAP(*result, c);
1606	GOOGLE_DCHECK_NO_OVERLAP(*result, d);
1607	std::string::size_type old_size = result->size();
1608	result->resize(n: old_size + a.size() + b.size() + c.size() + d.size());
1609	char *const begin = &*result->begin();
1610	char *out = Append4(out: begin + old_size, x1: a, x2: b, x3: c, x4: d);
1611	GOOGLE_DCHECK_EQ(out, begin + result->size());
1612	}
1613
1614	int GlobalReplaceSubstring(const std::string &substring,
1615	const std::string &replacement, std::string *s) {
1616	GOOGLE_CHECK(s != nullptr);
1617	if (s->empty() \|\| substring.empty())
1618	return `0`;
1619	std::string tmp;
1620	int num_replacements = `0`;
1621	int pos = `0`;
1622	for (StringPiece::size_type match_pos =
1623	s->find(s: substring.data(), pos: pos, n: substring.length());
1624	match_pos != std::string::npos; pos = match_pos + substring.length(),
1625	match_pos = s->find(s: substring.data(), pos: pos,
1626	n: substring.length())) {
1627	++num_replacements;
1628	// Append the original content before the match.
1629	tmp.append(str: *s, pos: pos, n: match_pos - pos);
1630	// Append the replacement for the match.
1631	tmp.append(first: replacement.begin(), last: replacement.end());
1632	}
1633	// Append the content after the last match. If no replacements were made, the
1634	// original string is left untouched.
1635	if (num_replacements > `0`) {
1636	tmp.append(str: *s, pos: pos, n: s->length() - pos);
1637	s->swap(s&: tmp);
1638	}
1639	return num_replacements;
1640	}
1641
1642	int CalculateBase64EscapedLen(int input_len, bool do_padding) {
1643	// Base64 encodes three bytes of input at a time. If the input is not
1644	// divisible by three, we pad as appropriate.
1645	//
1646	// (from http://tools.ietf.org/html/rfc3548)
1647	// Special processing is performed if fewer than 24 bits are available
1648	// at the end of the data being encoded. A full encoding quantum is
1649	// always completed at the end of a quantity. When fewer than 24 input
1650	// bits are available in an input group, zero bits are added (on the
1651	// right) to form an integral number of 6-bit groups. Padding at the
1652	// end of the data is performed using the '=' character. Since all base
1653	// 64 input is an integral number of octets, only the following cases
1654	// can arise:
1655
1656
1657	// Base64 encodes each three bytes of input into four bytes of output.
1658	int len = (input_len / `3`) * `4`;
1659
1660	if (input_len % `3` == `0`) {
1661	// (from http://tools.ietf.org/html/rfc3548)
1662	// (1) the final quantum of encoding input is an integral multiple of 24
1663	// bits; here, the final unit of encoded output will be an integral
1664	// multiple of 4 characters with no "=" padding,
1665	} else if (input_len % `3` == `1`) {
1666	// (from http://tools.ietf.org/html/rfc3548)
1667	// (2) the final quantum of encoding input is exactly 8 bits; here, the
1668	// final unit of encoded output will be two characters followed by two
1669	// "=" padding characters, or
1670	len += `2`;
1671	if (do_padding) {
1672	len += `2`;
1673	}
1674	} else { // (input_len % 3 == 2)
1675	// (from http://tools.ietf.org/html/rfc3548)
1676	// (3) the final quantum of encoding input is exactly 16 bits; here, the
1677	// final unit of encoded output will be three characters followed by one
1678	// "=" padding character.
1679	len += `3`;
1680	if (do_padding) {
1681	len += `1`;
1682	}
1683	}
1684
1685	assert(len >= input_len); // make sure we didn't overflow
1686	return len;
1687	}
1688
1689	// Base64Escape does padding, so this calculation includes padding.
1690	int CalculateBase64EscapedLen(int input_len) {
1691	return CalculateBase64EscapedLen(input_len, do_padding: true);
1692	}
1693
1694	// ----------------------------------------------------------------------
1695	// int Base64Unescape() - base64 decoder
1696	// int Base64Escape() - base64 encoder
1697	// int WebSafeBase64Unescape() - Google's variation of base64 decoder
1698	// int WebSafeBase64Escape() - Google's variation of base64 encoder
1699	//
1700	// Check out
1701	// http://tools.ietf.org/html/rfc2045 for formal description, but what we
1702	// care about is that...
1703	// Take the encoded stuff in groups of 4 characters and turn each
1704	// character into a code 0 to 63 thus:
1705	// A-Z map to 0 to 25
1706	// a-z map to 26 to 51
1707	// 0-9 map to 52 to 61
1708	// +(- for WebSafe) maps to 62
1709	// /(_ for WebSafe) maps to 63
1710	// There will be four numbers, all less than 64 which can be represented
1711	// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
1712	// Arrange the 6 digit binary numbers into three bytes as such:
1713	// aaaaaabb bbbbcccc ccdddddd
1714	// Equals signs (one or two) are used at the end of the encoded block to
1715	// indicate that the text was not an integer multiple of three bytes long.
1716	// ----------------------------------------------------------------------
1717
1718	int Base64UnescapeInternal(const char src_param, int* szsrc,
1719	char dest, int* szdest,
1720	const signed char* unbase64) {
1721	static const char kPad64Equals = `'='`;
1722	static const char kPad64Dot = `'.'`;
1723
1724	int decode = `0`;
1725	int destidx = `0`;
1726	int state = `0`;
1727	unsigned int ch = `0`;
1728	unsigned int temp = `0`;
1729
1730	// If "char" is signed by default, using src as an array index results in*
1731	// accessing negative array elements. Treat the input as a pointer to
1732	// unsigned char to avoid this.
1733	const unsigned char src = reinterpret_cast<const* unsigned char*>(src_param);
1734
1735	// The GET_INPUT macro gets the next input character, skipping
1736	// over any whitespace, and stopping when we reach the end of the
1737	// string or when we read any non-data character. The arguments are
1738	// an arbitrary identifier (used as a label for goto) and the number
1739	// of data bytes that must remain in the input to avoid aborting the
1740	// loop.
1741	#define GET_INPUT(label, remain) \
1742	label: \
1743	--szsrc; \
1744	ch = *src++; \
1745	decode = unbase64[ch]; \
1746	if (decode < 0) { \
1747	if (ascii_isspace(ch) && szsrc >= remain) \
1748	goto label; \
1749	state = 4 - remain; \
1750	break; \
1751	}
1752
1753	// if dest is null, we're just checking to see if it's legal input
1754	// rather than producing output. (I suspect this could just be done
1755	// with a regexp...). We duplicate the loop so this test can be
1756	// outside it instead of in every iteration.
1757
1758	if (dest) {
1759	// This loop consumes 4 input bytes and produces 3 output bytes
1760	// per iteration. We can't know at the start that there is enough
1761	// data left in the string for a full iteration, so the loop may
1762	// break out in the middle; if so 'state' will be set to the
1763	// number of input bytes read.
1764
1765	while (szsrc >= `4`) {
1766	// We'll start by optimistically assuming that the next four
1767	// bytes of the string (src[0..3]) are four good data bytes
1768	// (that is, no nulls, whitespace, padding chars, or illegal
1769	// chars). We need to test src[0..2] for nulls individually
1770	// before constructing temp to preserve the property that we
1771	// never read past a null in the string (no matter how long
1772	// szsrc claims the string is).
1773
1774	if (!src[`0`] \|\| !src[`1`] \|\| !src[`2`] \|\|
1775	(temp = ((unsigned(unbase64[src[`0`]]) << `18`) \|
1776	(unsigned(unbase64[src[`1`]]) << `12`) \|
1777	(unsigned(unbase64[src[`2`]]) << `6`) \|
1778	(unsigned(unbase64[src[`3`]])))) & `0x80000000`) {
1779	// Iff any of those four characters was bad (null, illegal,
1780	// whitespace, padding), then temp's high bit will be set
1781	// (because unbase64[] is -1 for all bad characters).
1782	//
1783	// We'll back up and resort to the slower decoder, which knows
1784	// how to handle those cases.
1785
1786	GET_INPUT(first, `4`);
1787	temp = decode;
1788	GET_INPUT(second, `3`);
1789	temp = (temp << `6`) \| decode;
1790	GET_INPUT(third, `2`);
1791	temp = (temp << `6`) \| decode;
1792	GET_INPUT(fourth, `1`);
1793	temp = (temp << `6`) \| decode;
1794	} else {
1795	// We really did have four good data bytes, so advance four
1796	// characters in the string.
1797
1798	szsrc -= `4`;
1799	src += `4`;
1800	decode = -`1`;
1801	ch = `'\0'`;
1802	}
1803
1804	// temp has 24 bits of input, so write that out as three bytes.
1805
1806	if (destidx+`3` > szdest) return -`1`;
1807	dest[destidx+`2`] = temp;
1808	temp >>= `8`;
1809	dest[destidx+`1`] = temp;
1810	temp >>= `8`;
1811	dest[destidx] = temp;
1812	destidx += `3`;
1813	}
1814	} else {
1815	while (szsrc >= `4`) {
1816	if (!src[`0`] \|\| !src[`1`] \|\| !src[`2`] \|\|
1817	(temp = ((unsigned(unbase64[src[`0`]]) << `18`) \|
1818	(unsigned(unbase64[src[`1`]]) << `12`) \|
1819	(unsigned(unbase64[src[`2`]]) << `6`) \|
1820	(unsigned(unbase64[src[`3`]])))) & `0x80000000`) {
1821	GET_INPUT(first_no_dest, `4`);
1822	GET_INPUT(second_no_dest, `3`);
1823	GET_INPUT(third_no_dest, `2`);
1824	GET_INPUT(fourth_no_dest, `1`);
1825	} else {
1826	szsrc -= `4`;
1827	src += `4`;
1828	decode = -`1`;
1829	ch = `'\0'`;
1830	}
1831	destidx += `3`;
1832	}
1833	}
1834
1835	#undef GET_INPUT
1836
1837	// if the loop terminated because we read a bad character, return
1838	// now.
1839	if (decode < `0` && ch != `'\0'` &&
1840	ch != kPad64Equals && ch != kPad64Dot && !ascii_isspace(c: ch))
1841	return -`1`;
1842
1843	if (ch == kPad64Equals \|\| ch == kPad64Dot) {
1844	// if we stopped by hitting an '=' or '.', un-read that character -- we'll
1845	// look at it again when we count to check for the proper number of
1846	// equals signs at the end.
1847	++szsrc;
1848	--src;
1849	} else {
1850	// This loop consumes 1 input byte per iteration. It's used to
1851	// clean up the 0-3 input bytes remaining when the first, faster
1852	// loop finishes. 'temp' contains the data from 'state' input
1853	// characters read by the first loop.
1854	while (szsrc > `0`) {
1855	--szsrc;
1856	ch = *src++;
1857	decode = unbase64[ch];
1858	if (decode < `0`) {
1859	if (ascii_isspace(c: ch)) {
1860	continue;
1861	} else if (ch == `'\0'`) {
1862	break;
1863	} else if (ch == kPad64Equals \|\| ch == kPad64Dot) {
1864	// back up one character; we'll read it again when we check
1865	// for the correct number of pad characters at the end.
1866	++szsrc;
1867	--src;
1868	break;
1869	} else {
1870	return -`1`;
1871	}
1872	}
1873
1874	// Each input character gives us six bits of output.
1875	temp = (temp << `6`) \| decode;
1876	++state;
1877	if (state == `4`) {
1878	// If we've accumulated 24 bits of output, write that out as
1879	// three bytes.
1880	if (dest) {
1881	if (destidx+`3` > szdest) return -`1`;
1882	dest[destidx+`2`] = temp;
1883	temp >>= `8`;
1884	dest[destidx+`1`] = temp;
1885	temp >>= `8`;
1886	dest[destidx] = temp;
1887	}
1888	destidx += `3`;
1889	state = `0`;
1890	temp = `0`;
1891	}
1892	}
1893	}
1894
1895	// Process the leftover data contained in 'temp' at the end of the input.
1896	int expected_equals = `0`;
1897	switch (state) {
1898	case `0`:
1899	// Nothing left over; output is a multiple of 3 bytes.
1900	break;
1901
1902	case `1`:
1903	// Bad input; we have 6 bits left over.
1904	return -`1`;
1905
1906	case `2`:
1907	// Produce one more output byte from the 12 input bits we have left.
1908	if (dest) {
1909	if (destidx+`1` > szdest) return -`1`;
1910	temp >>= `4`;
1911	dest[destidx] = temp;
1912	}
1913	++destidx;
1914	expected_equals = `2`;
1915	break;
1916
1917	case `3`:
1918	// Produce two more output bytes from the 18 input bits we have left.
1919	if (dest) {
1920	if (destidx+`2` > szdest) return -`1`;
1921	temp >>= `2`;
1922	dest[destidx+`1`] = temp;
1923	temp >>= `8`;
1924	dest[destidx] = temp;
1925	}
1926	destidx += `2`;
1927	expected_equals = `1`;
1928	break;
1929
1930	default:
1931	// state should have no other values at this point.
1932	GOOGLE_LOG(FATAL) << "This can't happen; base64 decoder state = " << state;
1933	}
1934
1935	// The remainder of the string should be all whitespace, mixed with
1936	// exactly 0 equals signs, or exactly 'expected_equals' equals
1937	// signs. (Always accepting 0 equals signs is a google extension
1938	// not covered in the RFC, as is accepting dot as the pad character.)
1939
1940	int equals = `0`;
1941	while (szsrc > `0` && *src) {
1942	if (src == kPad64Equals \|\| src == kPad64Dot)
1943	++equals;
1944	else if (!ascii_isspace(c: *src))
1945	return -`1`;
1946	--szsrc;
1947	++src;
1948	}
1949
1950	return (equals == `0` \|\| equals == expected_equals) ? destidx : -`1`;
1951	}
1952
1953	// The arrays below were generated by the following code
1954	// #include <sys/time.h>
1955	// #include <stdlib.h>
1956	// #include <string.h>
1957	// #include <stdio.h>
1958	// main()
1959	// {
1960	// static const char Base64[] =
1961	// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1962	// const char pos;*
1963	// int idx, i, j;
1964	// printf(" ");
1965	// for (i = 0; i < 255; i += 8) {
1966	// for (j = i; j < i + 8; j++) {
1967	// pos = strchr(Base64, j);
1968	// if ((pos == nullptr) \|\| (j == 0))
1969	// idx = -1;
1970	// else
1971	// idx = pos - Base64;
1972	// if (idx == -1)
1973	// printf(" %2d, ", idx);
1974	// else
1975	// printf(" %2d/""%c""/,", idx, j);
1976	// }
1977	// printf("\n ");
1978	// }
1979	// }
1980	//
1981	// where the value of "Base64[]" was replaced by one of the base-64 conversion
1982	// tables from the functions below.
1983	static const signed char kUnBase64[] = {
1984	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
1985	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
1986	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
1987	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
1988	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
1989	-`1`, -`1`, -`1`, `62`/+/, -`1`, -`1`, -`1`, `63`// /,
1990	`52`/0/, `53`/1/, `54`/2/, `55`/3/, `56`/4/, `57`/5/, `58`/6/, `59`/7/,
1991	`60`/8/, `61`/9/, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
1992	-`1`, `0`/A/, `1`/B/, `2`/C/, `3`/D/, `4`/E/, `5`/F/, `6`/G/,
1993	`7`/H/, `8`/I/, `9`/J/, `10`/K/, `11`/L/, `12`/M/, `13`/N/, `14`/O/,
1994	`15`/P/, `16`/Q/, `17`/R/, `18`/S/, `19`/T/, `20`/U/, `21`/V/, `22`/W/,
1995	`23`/X/, `24`/Y/, `25`/Z/, -`1`, -`1`, -`1`, -`1`, -`1`,
1996	-`1`, `26`/a/, `27`/b/, `28`/c/, `29`/d/, `30`/e/, `31`/f/, `32`/g/,
1997	`33`/h/, `34`/i/, `35`/j/, `36`/k/, `37`/l/, `38`/m/, `39`/n/, `40`/o/,
1998	`41`/p/, `42`/q/, `43`/r/, `44`/s/, `45`/t/, `46`/u/, `47`/v/, `48`/w/,
1999	`49`/x/, `50`/y/, `51`/z/, -`1`, -`1`, -`1`, -`1`, -`1`,
2000	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2001	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2002	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2003	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2004	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2005	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2006	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2007	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2008	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2009	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2010	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2011	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2012	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2013	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2014	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2015	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`
2016	};
2017	static const signed char kUnWebSafeBase64[] = {
2018	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2019	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2020	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2021	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2022	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2023	-`1`, -`1`, -`1`, -`1`, -`1`, `62`/-/, -`1`, -`1`,
2024	`52`/0/, `53`/1/, `54`/2/, `55`/3/, `56`/4/, `57`/5/, `58`/6/, `59`/7/,
2025	`60`/8/, `61`/9/, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2026	-`1`, `0`/A/, `1`/B/, `2`/C/, `3`/D/, `4`/E/, `5`/F/, `6`/G/,
2027	`7`/H/, `8`/I/, `9`/J/, `10`/K/, `11`/L/, `12`/M/, `13`/N/, `14`/O/,
2028	`15`/P/, `16`/Q/, `17`/R/, `18`/S/, `19`/T/, `20`/U/, `21`/V/, `22`/W/,
2029	`23`/X/, `24`/Y/, `25`/Z/, -`1`, -`1`, -`1`, -`1`, `63`/_/,
2030	-`1`, `26`/a/, `27`/b/, `28`/c/, `29`/d/, `30`/e/, `31`/f/, `32`/g/,
2031	`33`/h/, `34`/i/, `35`/j/, `36`/k/, `37`/l/, `38`/m/, `39`/n/, `40`/o/,
2032	`41`/p/, `42`/q/, `43`/r/, `44`/s/, `45`/t/, `46`/u/, `47`/v/, `48`/w/,
2033	`49`/x/, `50`/y/, `51`/z/, -`1`, -`1`, -`1`, -`1`, -`1`,
2034	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2035	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2036	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2037	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2038	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2039	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2040	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2041	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2042	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2043	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2044	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2045	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2046	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2047	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2048	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
2049	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`
2050	};
2051
2052	int WebSafeBase64Unescape(const char src, int* szsrc, char dest, int* szdest) {
2053	return Base64UnescapeInternal(src_param: src, szsrc, dest, szdest, unbase64: kUnWebSafeBase64);
2054	}
2055
2056	static bool Base64UnescapeInternal(const char src, int* slen, std::string *dest,
2057	const signed char *unbase64) {
2058	// Determine the size of the output string. Base64 encodes every 3 bytes into
2059	// 4 characters. any leftover chars are added directly for good measure.
2060	// This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
2061	const int dest_len = `3` * (slen / `4`) + (slen % `4`);
2062
2063	dest->resize(n: dest_len);
2064
2065	// We are getting the destination buffer by getting the beginning of the
2066	// string and converting it into a char .*
2067	const int len = Base64UnescapeInternal(src_param: src, szsrc: slen, dest: string_as_array(str: dest),
2068	szdest: dest_len, unbase64);
2069	if (len < `0`) {
2070	dest->clear();
2071	return false;
2072	}
2073
2074	// could be shorter if there was padding
2075	GOOGLE_DCHECK_LE(len, dest_len);
2076	dest->erase(pos: len);
2077
2078	return true;
2079	}
2080
2081	bool Base64Unescape(StringPiece src, std::string *dest) {
2082	return Base64UnescapeInternal(src: src.data(), slen: src.size(), dest, unbase64: kUnBase64);
2083	}
2084
2085	bool WebSafeBase64Unescape(StringPiece src, std::string *dest) {
2086	return Base64UnescapeInternal(src: src.data(), slen: src.size(), dest, unbase64: kUnWebSafeBase64);
2087	}
2088
2089	int Base64EscapeInternal(const unsigned char src, int* szsrc,
2090	char dest, int* szdest, const char *base64,
2091	bool do_padding) {
2092	static const char kPad64 = `'='`;
2093
2094	if (szsrc <= `0`) return `0`;
2095
2096	if (szsrc * `4` > szdest * `3`) return `0`;
2097
2098	char *cur_dest = dest;
2099	const unsigned char *cur_src = src;
2100
2101	char *limit_dest = dest + szdest;
2102	const unsigned char *limit_src = src + szsrc;
2103
2104	// Three bytes of data encodes to four characters of ciphertext.
2105	// So we can pump through three-byte chunks atomically.
2106	while (cur_src < limit_src - `3`) { // keep going as long as we have >= 32 bits
2107	uint32_t in = BigEndian::Load32(p: cur_src) >> `8`;
2108
2109	cur_dest[`0`] = base64[in >> `18`];
2110	in &= `0x3FFFF`;
2111	cur_dest[`1`] = base64[in >> `12`];
2112	in &= `0xFFF`;
2113	cur_dest[`2`] = base64[in >> `6`];
2114	in &= `0x3F`;
2115	cur_dest[`3`] = base64[in];
2116
2117	cur_dest += `4`;
2118	cur_src += `3`;
2119	}
2120	// To save time, we didn't update szdest or szsrc in the loop. So do it now.
2121	szdest = limit_dest - cur_dest;
2122	szsrc = limit_src - cur_src;
2123
2124	/ now deal with the tail (<=3 bytes) /
2125	switch (szsrc) {
2126	case `0`:
2127	// Nothing left; nothing more to do.
2128	break;
2129	case `1`: {
2130	// One byte left: this encodes to two characters, and (optionally)
2131	// two pad characters to round out the four-character cipherblock.
2132	if ((szdest -= `2`) < `0`) return `0`;
2133	uint32_t in = cur_src[`0`];
2134	cur_dest[`0`] = base64[in >> `2`];
2135	in &= `0x3`;
2136	cur_dest[`1`] = base64[in << `4`];
2137	cur_dest += `2`;
2138	if (do_padding) {
2139	if ((szdest -= `2`) < `0`) return `0`;
2140	cur_dest[`0`] = kPad64;
2141	cur_dest[`1`] = kPad64;
2142	cur_dest += `2`;
2143	}
2144	break;
2145	}
2146	case `2`: {
2147	// Two bytes left: this encodes to three characters, and (optionally)
2148	// one pad character to round out the four-character cipherblock.
2149	if ((szdest -= `3`) < `0`) return `0`;
2150	uint32_t in = BigEndian::Load16(p: cur_src);
2151	cur_dest[`0`] = base64[in >> `10`];
2152	in &= `0x3FF`;
2153	cur_dest[`1`] = base64[in >> `4`];
2154	in &= `0x00F`;
2155	cur_dest[`2`] = base64[in << `2`];
2156	cur_dest += `3`;
2157	if (do_padding) {
2158	if ((szdest -= `1`) < `0`) return `0`;
2159	cur_dest[`0`] = kPad64;
2160	cur_dest += `1`;
2161	}
2162	break;
2163	}
2164	case `3`: {
2165	// Three bytes left: same as in the big loop above. We can't do this in
2166	// the loop because the loop above always reads 4 bytes, and the fourth
2167	// byte is past the end of the input.
2168	if ((szdest -= `4`) < `0`) return `0`;
2169	uint32_t in = (cur_src[`0`] << `16`) + BigEndian::Load16(p: cur_src + `1`);
2170	cur_dest[`0`] = base64[in >> `18`];
2171	in &= `0x3FFFF`;
2172	cur_dest[`1`] = base64[in >> `12`];
2173	in &= `0xFFF`;
2174	cur_dest[`2`] = base64[in >> `6`];
2175	in &= `0x3F`;
2176	cur_dest[`3`] = base64[in];
2177	cur_dest += `4`;
2178	break;
2179	}
2180	default:
2181	// Should not be reached: blocks of 4 bytes are handled
2182	// in the while loop before this switch statement.
2183	GOOGLE_LOG(FATAL) << "Logic problem? szsrc = " << szsrc;
2184	break;
2185	}
2186	return (cur_dest - dest);
2187	}
2188
2189	static const char kBase64Chars[] =
2190	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
2191
2192	static const char kWebSafeBase64Chars[] =
2193	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
2194
2195	int Base64Escape(const unsigned char src, int* szsrc, char dest, int* szdest) {
2196	return Base64EscapeInternal(src, szsrc, dest, szdest, base64: kBase64Chars, do_padding: true);
2197	}
2198	int WebSafeBase64Escape(const unsigned char src, int* szsrc, char *dest,
2199	int szdest, bool do_padding) {
2200	return Base64EscapeInternal(src, szsrc, dest, szdest,
2201	base64: kWebSafeBase64Chars, do_padding);
2202	}
2203
2204	void Base64EscapeInternal(const unsigned char src, int* szsrc,
2205	std::string dest, bool* do_padding,
2206	const char *base64_chars) {
2207	const int calc_escaped_size =
2208	CalculateBase64EscapedLen(input_len: szsrc, do_padding);
2209	dest->resize(n: calc_escaped_size);
2210	const int escaped_len = Base64EscapeInternal(src, szsrc,
2211	dest: string_as_array(str: dest),
2212	szdest: dest->size(),
2213	base64: base64_chars,
2214	do_padding);
2215	GOOGLE_DCHECK_EQ(calc_escaped_size, escaped_len);
2216	dest->erase(pos: escaped_len);
2217	}
2218
2219	void Base64Escape(const unsigned char src, int* szsrc, std::string *dest,
2220	bool do_padding) {
2221	Base64EscapeInternal(src, szsrc, dest, do_padding, base64_chars: kBase64Chars);
2222	}
2223
2224	void WebSafeBase64Escape(const unsigned char src, int* szsrc, std::string *dest,
2225	bool do_padding) {
2226	Base64EscapeInternal(src, szsrc, dest, do_padding, base64_chars: kWebSafeBase64Chars);
2227	}
2228
2229	void Base64Escape(StringPiece src, std::string *dest) {
2230	Base64Escape(src: reinterpret_cast<const unsigned char*>(src.data()),
2231	szsrc: src.size(), dest, do_padding: true);
2232	}
2233
2234	void WebSafeBase64Escape(StringPiece src, std::string *dest) {
2235	WebSafeBase64Escape(src: reinterpret_cast<const unsigned char*>(src.data()),
2236	szsrc: src.size(), dest, do_padding: false);
2237	}
2238
2239	void WebSafeBase64EscapeWithPadding(StringPiece src, std::string *dest) {
2240	WebSafeBase64Escape(src: reinterpret_cast<const unsigned char*>(src.data()),
2241	szsrc: src.size(), dest, do_padding: true);
2242	}
2243
2244	// Helper to append a Unicode code point to a string as UTF8, without bringing
2245	// in any external dependencies.
2246	int EncodeAsUTF8Char(uint32_t code_point, char* output) {
2247	uint32_t tmp = `0`;
2248	int len = `0`;
2249	if (code_point <= `0x7f`) {
2250	tmp = code_point;
2251	len = `1`;
2252	} else if (code_point <= `0x07ff`) {
2253	tmp = `0x0000c080` \|
2254	((code_point & `0x07c0`) << `2`) \|
2255	(code_point & `0x003f`);
2256	len = `2`;
2257	} else if (code_point <= `0xffff`) {
2258	tmp = `0x00e08080` \|
2259	((code_point & `0xf000`) << `4`) \|
2260	((code_point & `0x0fc0`) << `2`) \|
2261	(code_point & `0x003f`);
2262	len = `3`;
2263	} else {
2264	// UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
2265	// normally only defined up to there as well.
2266	tmp = `0xf0808080` \|
2267	((code_point & `0x1c0000`) << `6`) \|
2268	((code_point & `0x03f000`) << `4`) \|
2269	((code_point & `0x000fc0`) << `2`) \|
2270	(code_point & `0x003f`);
2271	len = `4`;
2272	}
2273	tmp = ghtonl(x: tmp);
2274	memcpy(dest: output, src: reinterpret_cast<const char>(&tmp) + sizeof*(tmp) - len, n: len);
2275	return len;
2276	}
2277
2278	// Table of UTF-8 character lengths, based on first byte
2279	static const unsigned char kUTF8LenTbl[`256`] = {
2280	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
2281	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
2282	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
2283	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
2284	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
2285	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
2286
2287	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
2288	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`,
2289	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `2`,
2290	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`,
2291	`2`, `2`, `2`, `2`, `2`, `2`, `2`, `2`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`, `3`,
2292	`3`, `3`, `4`, `4`, `4`, `4`, `4`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`};
2293
2294	// Return length of a single UTF-8 source character
2295	int UTF8FirstLetterNumBytes(const char* src, int len) {
2296	if (len == `0`) {
2297	return `0`;
2298	}
2299	return kUTF8LenTbl[*reinterpret_cast<const uint8_t*>(src)];
2300	}
2301
2302	// ----------------------------------------------------------------------
2303	// CleanStringLineEndings()
2304	// Clean up a multi-line string to conform to Unix line endings.
2305	// Reads from src and appends to dst, so usually dst should be empty.
2306	//
2307	// If there is no line ending at the end of a non-empty string, it can
2308	// be added automatically.
2309	//
2310	// Four different types of input are correctly handled:
2311	//
2312	// - Unix/Linux files: line ending is LF: pass through unchanged
2313	//
2314	// - DOS/Windows files: line ending is CRLF: convert to LF
2315	//
2316	// - Legacy Mac files: line ending is CR: convert to LF
2317	//
2318	// - Garbled files: random line endings: convert gracefully
2319	// lonely CR, lonely LF, CRLF: convert to LF
2320	//
2321	// @param src The multi-line string to convert
2322	// @param dst The converted string is appended to this string
2323	// @param auto_end_last_line Automatically terminate the last line
2324	//
2325	// Limitations:
2326	//
2327	// This does not do the right thing for CRCRLF files created by
2328	// broken programs that do another Unix->DOS conversion on files
2329	// that are already in CRLF format. For this, a two-pass approach
2330	// brute-force would be needed that
2331	//
2332	// (1) determines the presence of LF (first one is ok)
2333	// (2) if yes, removes any CR, else convert every CR to LF
2334
2335	void CleanStringLineEndings(const std::string &src, std::string *dst,
2336	bool auto_end_last_line) {
2337	if (dst->empty()) {
2338	dst->append(str: src);
2339	CleanStringLineEndings(str: dst, auto_end_last_line);
2340	} else {
2341	std::string tmp = src;
2342	CleanStringLineEndings(str: &tmp, auto_end_last_line);
2343	dst->append(str: tmp);
2344	}
2345	}
2346
2347	void CleanStringLineEndings(std::string str, bool* auto_end_last_line) {
2348	ptrdiff_t output_pos = `0`;
2349	bool r_seen = false;
2350	ptrdiff_t len = str->size();
2351
2352	char p = &(str)[`0`];
2353
2354	for (ptrdiff_t input_pos = `0`; input_pos < len;) {
2355	if (!r_seen && input_pos + `8` < len) {
2356	uint64_t v = GOOGLE_UNALIGNED_LOAD64(p: p + input_pos);
2357	// Loop over groups of 8 bytes at a time until we come across
2358	// a word that has a byte whose value is less than or equal to
2359	// '\r' (i.e. could contain a \n (0x0a) or a \r (0x0d) ).
2360	//
2361	// We use a has_less macro that quickly tests a whole 64-bit
2362	// word to see if any of the bytes has a value < N.
2363	//
2364	// For more details, see:
2365	// http://graphics.stanford.edu/~seander/bithacks.html#HasLessInWord
2366	#define has_less(x, n) (((x) - ~0ULL / 255 * (n)) & ~(x) & ~0ULL / 255 * 128)
2367	if (!has_less(v, `'\r'` + `1`)) {
2368	#undef has_less
2369	// No byte in this word has a value that could be a \r or a \n
2370	if (output_pos != input_pos) {
2371	GOOGLE_UNALIGNED_STORE64(p: p + output_pos, v);
2372	}
2373	input_pos += `8`;
2374	output_pos += `8`;
2375	continue;
2376	}
2377	}
2378	std::string::const_reference in = p[input_pos];
2379	if (in == `'\r'`) {
2380	if (r_seen) p[output_pos++] = `'\n'`;
2381	r_seen = true;
2382	} else if (in == `'\n'`) {
2383	if (input_pos != output_pos)
2384	p[output_pos++] = `'\n'`;
2385	else
2386	output_pos++;
2387	r_seen = false;
2388	} else {
2389	if (r_seen) p[output_pos++] = `'\n'`;
2390	r_seen = false;
2391	if (input_pos != output_pos)
2392	p[output_pos++] = in;
2393	else
2394	output_pos++;
2395	}
2396	input_pos++;
2397	}
2398	if (r_seen \|\|
2399	(auto_end_last_line && output_pos > `0` && p[output_pos - `1`] != `'\n'`)) {
2400	str->resize(n: output_pos + `1`);
2401	str->operator[](pos: output_pos) = `'\n'`;
2402	} else if (output_pos < len) {
2403	str->resize(n: output_pos);
2404	}
2405	}
2406
2407	namespace internal {
2408
2409	// ----------------------------------------------------------------------
2410	// NoLocaleStrtod()
2411	// This code will make you cry.
2412	// ----------------------------------------------------------------------
2413
2414	namespace {
2415
2416	// Returns a string identical to input except that the character pointed to*
2417	// by radix_pos (which should be '.') is replaced with the locale-specific
2418	// radix character.
2419	std::string LocalizeRadix(const char input, const* char *radix_pos) {
2420	// Determine the locale-specific radix character by calling sprintf() to
2421	// print the number 1.5, then stripping off the digits. As far as I can
2422	// tell, this is the only portable, thread-safe way to get the C library
2423	// to divuldge the locale's radix character. No, localeconv() is NOT
2424	// thread-safe.
2425	char temp[`16`];
2426	int size = snprintf(s: temp, maxlen: sizeof(temp), format: "%.1f", `1.5`);
2427	GOOGLE_CHECK_EQ(temp[`0`], `'1'`);
2428	GOOGLE_CHECK_EQ(temp[size - `1`], `'5'`);
2429	GOOGLE_CHECK_LE(size, `6`);
2430
2431	// Now replace the '.' in the input with it.
2432	std::string result;
2433	result.reserve(res_arg: strlen(s: input) + size - `3`);
2434	result.append(first: input, last: radix_pos);
2435	result.append(s: temp + `1`, n: size - `2`);
2436	result.append(s: radix_pos + `1`);
2437	return result;
2438	}
2439
2440	} // namespace
2441
2442	double NoLocaleStrtod(const char str, char* **endptr) {
2443	// We cannot simply set the locale to "C" temporarily with setlocale()
2444	// as this is not thread-safe. Instead, we try to parse in the current
2445	// locale first. If parsing stops at a '.' character, then this is a
2446	// pretty good hint that we're actually in some other locale in which
2447	// '.' is not the radix character.
2448
2449	char *temp_endptr;
2450	double result = strtod(nptr: str, endptr: &temp_endptr);
2451	if (endptr != NULL) *endptr = temp_endptr;
2452	if (temp_endptr != `'.'`) return* result;
2453
2454	// Parsing halted on a '.'. Perhaps we're in a different locale? Let's
2455	// try to replace the '.' with a locale-specific radix character and
2456	// try again.
2457	std::string localized = LocalizeRadix(input: str, radix_pos: temp_endptr);
2458	const char *localized_cstr = localized.c_str();
2459	char *localized_endptr;
2460	result = strtod(nptr: localized_cstr, endptr: &localized_endptr);
2461	if ((localized_endptr - localized_cstr) > (temp_endptr - str)) {
2462	// This attempt got further, so replacing the decimal must have helped.
2463	// Update endptr to point at the right location.
2464	if (endptr != NULL) {
2465	// size_diff is non-zero if the localized radix has multiple bytes.
2466	int size_diff = localized.size() - strlen(s: str);
2467	// const_cast is necessary to match the strtod() interface.
2468	endptr = const_cast<char* *>(
2469	str + (localized_endptr - localized_cstr - size_diff));
2470	}
2471	}
2472
2473	return result;
2474	}
2475
2476	} // namespace internal
2477
2478	} // namespace protobuf
2479	} // namespace google
2480

Browse the source code of Velox/build/_deps/protobuf-src/src/google/protobuf/stubs/strutil.cc