escaping.cc source code [Abseil/strings/escaping.cc]

1	// Copyright 2017 The Abseil Authors.
2	//
3	// Licensed under the Apache License, Version 2.0 (the "License");
4	// you may not use this file except in compliance with the License.
5	// You may obtain a copy of the License at
6	//
7	// https://www.apache.org/licenses/LICENSE-2.0
8	//
9	// Unless required by applicable law or agreed to in writing, software
10	// distributed under the License is distributed on an "AS IS" BASIS,
11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12	// See the License for the specific language governing permissions and
13	// limitations under the License.
14
15	#include "absl/strings/escaping.h"
16
17	#include <algorithm>
18	#include <cassert>
19	#include <cstdint>
20	#include <cstring>
21	#include <iterator>
22	#include <limits>
23	#include <string>
24
25	#include "absl/base/internal/endian.h"
26	#include "absl/base/internal/raw_logging.h"
27	#include "absl/base/internal/unaligned_access.h"
28	#include "absl/strings/internal/char_map.h"
29	#include "absl/strings/internal/resize_uninitialized.h"
30	#include "absl/strings/internal/utf8.h"
31	#include "absl/strings/str_cat.h"
32	#include "absl/strings/str_join.h"
33	#include "absl/strings/string_view.h"
34
35	namespace absl {
36	namespace {
37
38	// Digit conversion.
39	constexpr char kHexChar[] = "0123456789abcdef";
40
41	constexpr char kHexTable[`513`] =
42	"000102030405060708090a0b0c0d0e0f"
43	"101112131415161718191a1b1c1d1e1f"
44	"202122232425262728292a2b2c2d2e2f"
45	"303132333435363738393a3b3c3d3e3f"
46	"404142434445464748494a4b4c4d4e4f"
47	"505152535455565758595a5b5c5d5e5f"
48	"606162636465666768696a6b6c6d6e6f"
49	"707172737475767778797a7b7c7d7e7f"
50	"808182838485868788898a8b8c8d8e8f"
51	"909192939495969798999a9b9c9d9e9f"
52	"a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"
53	"b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"
54	"c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"
55	"d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"
56	"e0e1e2e3e4e5e6e7e8e9eaebecedeeef"
57	"f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff";
58
59	// These are used for the leave_nulls_escaped argument to CUnescapeInternal().
60	constexpr bool kUnescapeNulls = false;
61
62	inline bool is_octal_digit(char c) { return (`'0'` <= c) && (c <= `'7'`); }
63
64	inline int hex_digit_to_int(char c) {
65	static_assert(`'0'` == `0x30` && `'A'` == `0x41` && `'a'` == `0x61`,
66	"Character set must be ASCII.");
67	assert(absl::ascii_isxdigit(c));
68	int x = static_cast<unsigned char>(c);
69	if (x > `'9'`) {
70	x += `9`;
71	}
72	return x & `0xf`;
73	}
74
75	inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) {
76	if (c >= `0xD800` && c <= `0xDFFF`) {
77	if (error) {
78	*error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\",
79	src);
80	}
81	return true;
82	}
83	return false;
84	}
85
86	// ----------------------------------------------------------------------
87	// CUnescapeInternal()
88	// Implements both CUnescape() and CUnescapeForNullTerminatedString().
89	//
90	// Unescapes C escape sequences and is the reverse of CEscape().
91	//
92	// If 'source' is valid, stores the unescaped string and its size in
93	// 'dest' and 'dest_len' respectively, and returns true. Otherwise
94	// returns false and optionally stores the error description in
95	// 'error'. Set 'error' to nullptr to disable error reporting.
96	//
97	// 'dest' should point to a buffer that is at least as big as 'source'.
98	// 'source' and 'dest' may be the same.
99	//
100	// NOTE: any changes to this function must also be reflected in the older
101	// UnescapeCEscapeSequences().
102	// ----------------------------------------------------------------------
103	bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
104	char* dest, ptrdiff_t* dest_len, std::string* error) {
105	char* d = dest;
106	const char* p = source.data();
107	const char* end = p + source.size();
108	const char* last_byte = end - `1`;
109
110	// Small optimization for case where source = dest and there's no escaping
111	while (p == d && p < end && *p != `'\\'`) p++, d++;
112
113	while (p < end) {
114	if (*p != `'\\'`) {
115	d++ = p++;
116	} else {
117	if (++p > last_byte) { // skip past the '\\'
118	if (error) *error = "String cannot end with \\";
119	return false;
120	}
121	switch (*p) {
122	case `'a'`: d++ = `'\a'`; break*;
123	case `'b'`: d++ = `'\b'`; break*;
124	case `'f'`: d++ = `'\f'`; break*;
125	case `'n'`: d++ = `'\n'`; break*;
126	case `'r'`: d++ = `'\r'`; break*;
127	case `'t'`: d++ = `'\t'`; break*;
128	case `'v'`: d++ = `'\v'`; break*;
129	case `'\\'`: d++ = `'\\'`; break*;
130	case `'?'`: d++ = `'\?'`; break; // \? Who knew?*
131	case `'\''`: d++ = `'\''`; break*;
132	case `'"'`: d++ = `'\"'`; break*;
133	case `'0'`:
134	case `'1'`:
135	case `'2'`:
136	case `'3'`:
137	case `'4'`:
138	case `'5'`:
139	case `'6'`:
140	case `'7'`: {
141	// octal digit: 1 to 3 digits
142	const char* octal_start = p;
143	unsigned int ch = *p - `'0'`;
144	if (p < last_byte && is_octal_digit(p[`1`])) ch = ch * `8` + *++p - `'0'`;
145	if (p < last_byte && is_octal_digit(p[`1`]))
146	ch = ch * `8` + ++p - `'0'`; // now points at last digit*
147	if (ch > `0xff`) {
148	if (error) {
149	*error = "Value of \\" +
150	std::string (octal_start, p + `1` - octal_start) +
151	" exceeds 0xff";
152	}
153	return false;
154	}
155	if ((ch == `0`) && leave_nulls_escaped) {
156	// Copy the escape sequence for the null character
157	const ptrdiff_t octal_size = p + `1` - octal_start;
158	*d++ = `'\\'`;
159	memcpy(d, octal_start, octal_size);
160	d += octal_size;
161	break;
162	}
163	*d++ = ch;
164	break;
165	}
166	case `'x'`:
167	case `'X'`: {
168	if (p >= last_byte) {
169	if (error) *error = "String cannot end with \\x";
170	return false;
171	} else if (!absl::ascii_isxdigit(p[`1`])) {
172	if (error) *error = "\\x cannot be followed by a non-hex digit";
173	return false;
174	}
175	unsigned int ch = `0`;
176	const char* hex_start = p;
177	while (p < last_byte && absl::ascii_isxdigit(p[`1`]))
178	// Arbitrarily many hex digits
179	ch = (ch << `4`) + hex_digit_to_int(*++p);
180	if (ch > `0xFF`) {
181	if (error) {
182	*error = "Value of \\" +
183	std::string (hex_start, p + `1` - hex_start) +
184	" exceeds 0xff";
185	}
186	return false;
187	}
188	if ((ch == `0`) && leave_nulls_escaped) {
189	// Copy the escape sequence for the null character
190	const ptrdiff_t hex_size = p + `1` - hex_start;
191	*d++ = `'\\'`;
192	memcpy(d, hex_start, hex_size);
193	d += hex_size;
194	break;
195	}
196	*d++ = ch;
197	break;
198	}
199	case `'u'`: {
200	// \uhhhh => convert 4 hex digits to UTF-8
201	char32_t rune = `0`;
202	const char* hex_start = p;
203	if (p + `4` >= end) {
204	if (error) {
205	*error = "\\u must be followed by 4 hex digits: \\" +
206	std::string (hex_start, p + `1` - hex_start);
207	}
208	return false;
209	}
210	for (int i = `0`; i < `4`; ++i) {
211	// Look one char ahead.
212	if (absl::ascii_isxdigit(p[`1`])) {
213	rune = (rune << `4`) + hex_digit_to_int(++p); // Advance p.*
214	} else {
215	if (error) {
216	*error = "\\u must be followed by 4 hex digits: \\" +
217	std::string (hex_start, p + `1` - hex_start);
218	}
219	return false;
220	}
221	}
222	if ((rune == `0`) && leave_nulls_escaped) {
223	// Copy the escape sequence for the null character
224	*d++ = `'\\'`;
225	memcpy(d, hex_start, `5`); // u0000
226	d += `5`;
227	break;
228	}
229	if (IsSurrogate(rune, absl::string_view (hex_start, `5`), error)) {
230	return false;
231	}
232	d += strings_internal::EncodeUTF8Char(d, rune);
233	break;
234	}
235	case `'U'`: {
236	// \Uhhhhhhhh => convert 8 hex digits to UTF-8
237	char32_t rune = `0`;
238	const char* hex_start = p;
239	if (p + `8` >= end) {
240	if (error) {
241	*error = "\\U must be followed by 8 hex digits: \\" +
242	std::string (hex_start, p + `1` - hex_start);
243	}
244	return false;
245	}
246	for (int i = `0`; i < `8`; ++i) {
247	// Look one char ahead.
248	if (absl::ascii_isxdigit(p[`1`])) {
249	// Don't change rune until we're sure this
250	// is within the Unicode limit, but do advance p.
251	uint32_t newrune = (rune << `4`) + hex_digit_to_int(*++p);
252	if (newrune > `0x10FFFF`) {
253	if (error) {
254	*error = "Value of \\" +
255	std::string (hex_start, p + `1` - hex_start) +
256	" exceeds Unicode limit (0x10FFFF)";
257	}
258	return false;
259	} else {
260	rune = newrune;
261	}
262	} else {
263	if (error) {
264	*error = "\\U must be followed by 8 hex digits: \\" +
265	std::string (hex_start, p + `1` - hex_start);
266	}
267	return false;
268	}
269	}
270	if ((rune == `0`) && leave_nulls_escaped) {
271	// Copy the escape sequence for the null character
272	*d++ = `'\\'`;
273	memcpy(d, hex_start, `9`); // U00000000
274	d += `9`;
275	break;
276	}
277	if (IsSurrogate(rune, absl::string_view (hex_start, `9`), error)) {
278	return false;
279	}
280	d += strings_internal::EncodeUTF8Char(d, rune);
281	break;
282	}
283	default: {
284	if (error) error = std::string ("Unknown escape sequence: \\") + p;
285	return false;
286	}
287	}
288	p++; // read past letter we escaped
289	}
290	}
291	*dest_len = d - dest;
292	return true;
293	}
294
295	// ----------------------------------------------------------------------
296	// CUnescapeInternal()
297	//
298	// Same as above but uses a std::string for output. 'source' and 'dest'
299	// may be the same.
300	// ----------------------------------------------------------------------
301	bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped,
302	std::string* dest, std::string* error) {
303	strings_internal::STLStringResizeUninitialized(dest, source.size());
304
305	ptrdiff_t dest_size;
306	if (!CUnescapeInternal(source,
307	leave_nulls_escaped,
308	&(*dest)[`0`],
309	&dest_size,
310	error)) {
311	return false;
312	}
313	dest->erase(dest_size);
314	return true;
315	}
316
317	// ----------------------------------------------------------------------
318	// CEscape()
319	// CHexEscape()
320	// Utf8SafeCEscape()
321	// Utf8SafeCHexEscape()
322	// Escapes 'src' using C-style escape sequences. This is useful for
323	// preparing query flags. The 'Hex' version uses hexadecimal rather than
324	// octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes.
325	//
326	// Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint().
327	// ----------------------------------------------------------------------
328	std::string CEscapeInternal(absl::string_view src, bool use_hex,
329	bool utf8_safe) {
330	std::string dest;
331	bool last_hex_escape = false; // true if last output char was \xNN.
332
333	for (unsigned char c : src) {
334	bool is_hex_escape = false;
335	switch (c) {
336	case `'\n'`: dest.append("\\" "n"); break;
337	case `'\r'`: dest.append("\\" "r"); break;
338	case `'\t'`: dest.append("\\" "t"); break;
339	case `'\"'`: dest.append("\\" "\""); break;
340	case `'\''`: dest.append("\\" "'"); break;
341	case `'\\'`: dest.append("\\" "\\"); break;
342	default:
343	// Note that if we emit \xNN and the src character after that is a hex
344	// digit then that digit must be escaped too to prevent it being
345	// interpreted as part of the character code by C.
346	if ((!utf8_safe \|\| c < `0x80`) &&
347	(!absl::ascii_isprint(c) \|\|
348	(last_hex_escape && absl::ascii_isxdigit(c)))) {
349	if (use_hex) {
350	dest.append("\\" "x");
351	dest.push_back(kHexChar[c / `16`]);
352	dest.push_back(kHexChar[c % `16`]);
353	is_hex_escape = true;
354	} else {
355	dest.append("\\");
356	dest.push_back(kHexChar[c / `64`]);
357	dest.push_back(kHexChar[(c % `64`) / `8`]);
358	dest.push_back(kHexChar[c % `8`]);
359	}
360	} else {
361	dest.push_back(c);
362	break;
363	}
364	}
365	last_hex_escape = is_hex_escape;
366	}
367
368	return dest;
369	}
370
371	/ clang-format off /
372	constexpr char c_escaped_len[`256`] = {
373	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `2`, `2`, `4`, `4`, `2`, `4`, `4`, // \t, \n, \r
374	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
375	`1`, `1`, `2`, `1`, `1`, `1`, `1`, `2`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // ", '
376	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // '0'..'9'
377	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 'A'..'O'
378	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `2`, `1`, `1`, `1`, // 'P'..'Z', '\'
379	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, // 'a'..'o'
380	`1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `1`, `4`, // 'p'..'z', DEL
381	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
382	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
383	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
384	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
385	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
386	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
387	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
388	`4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`, `4`,
389	};
390	/ clang-format on /
391
392	// Calculates the length of the C-style escaped version of 'src'.
393	// Assumes that non-printable characters are escaped using octal sequences, and
394	// that UTF-8 bytes are not handled specially.
395	inline size_t CEscapedLength(absl::string_view src) {
396	size_t escaped_len = `0`;
397	for (unsigned char c : src) escaped_len += c_escaped_len[c];
398	return escaped_len;
399	}
400
401	void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) {
402	size_t escaped_len = CEscapedLength(src);
403	if (escaped_len == src.size()) {
404	dest->append(src.data(), src.size());
405	return;
406	}
407
408	size_t cur_dest_len = dest->size();
409	strings_internal::STLStringResizeUninitialized(dest,
410	cur_dest_len + escaped_len);
411	char* append_ptr = &(*dest)[cur_dest_len];
412
413	for (unsigned char c : src) {
414	int char_len = c_escaped_len[c];
415	if (char_len == `1`) {
416	*append_ptr++ = c;
417	} else if (char_len == `2`) {
418	switch (c) {
419	case `'\n'`:
420	*append_ptr++ = `'\\'`;
421	*append_ptr++ = `'n'`;
422	break;
423	case `'\r'`:
424	*append_ptr++ = `'\\'`;
425	*append_ptr++ = `'r'`;
426	break;
427	case `'\t'`:
428	*append_ptr++ = `'\\'`;
429	*append_ptr++ = `'t'`;
430	break;
431	case `'\"'`:
432	*append_ptr++ = `'\\'`;
433	*append_ptr++ = `'\"'`;
434	break;
435	case `'\''`:
436	*append_ptr++ = `'\\'`;
437	*append_ptr++ = `'\''`;
438	break;
439	case `'\\'`:
440	*append_ptr++ = `'\\'`;
441	*append_ptr++ = `'\\'`;
442	break;
443	}
444	} else {
445	*append_ptr++ = `'\\'`;
446	*append_ptr++ = `'0'` + c / `64`;
447	*append_ptr++ = `'0'` + (c % `64`) / `8`;
448	*append_ptr++ = `'0'` + c % `8`;
449	}
450	}
451	}
452
453	bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
454	size_t szdest, const signed char* unbase64,
455	size_t* len) {
456	static const char kPad64Equals = `'='`;
457	static const char kPad64Dot = `'.'`;
458
459	size_t destidx = `0`;
460	int decode = `0`;
461	int state = `0`;
462	unsigned int ch = `0`;
463	unsigned int temp = `0`;
464
465	// If "char" is signed by default, using src as an array index results in*
466	// accessing negative array elements. Treat the input as a pointer to
467	// unsigned char to avoid this.
468	const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param);
469
470	// The GET_INPUT macro gets the next input character, skipping
471	// over any whitespace, and stopping when we reach the end of the
472	// std::string or when we read any non-data character. The arguments are
473	// an arbitrary identifier (used as a label for goto) and the number
474	// of data bytes that must remain in the input to avoid aborting the
475	// loop.
476	#define GET_INPUT(label, remain) \
477	label: \
478	--szsrc; \
479	ch = *src++; \
480	decode = unbase64[ch]; \
481	if (decode < 0) { \
482	if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \
483	state = 4 - remain; \
484	break; \
485	}
486
487	// if dest is null, we're just checking to see if it's legal input
488	// rather than producing output. (I suspect this could just be done
489	// with a regexp...). We duplicate the loop so this test can be
490	// outside it instead of in every iteration.
491
492	if (dest) {
493	// This loop consumes 4 input bytes and produces 3 output bytes
494	// per iteration. We can't know at the start that there is enough
495	// data left in the std::string for a full iteration, so the loop may
496	// break out in the middle; if so 'state' will be set to the
497	// number of input bytes read.
498
499	while (szsrc >= `4`) {
500	// We'll start by optimistically assuming that the next four
501	// bytes of the std::string (src[0..3]) are four good data bytes
502	// (that is, no nulls, whitespace, padding chars, or illegal
503	// chars). We need to test src[0..2] for nulls individually
504	// before constructing temp to preserve the property that we
505	// never read past a null in the std::string (no matter how long
506	// szsrc claims the std::string is).
507
508	if (!src[`0`] \|\| !src[`1`] \|\| !src[`2`] \|\|
509	((temp = ((unsigned(unbase64[src[`0`]]) << `18`) \|
510	(unsigned(unbase64[src[`1`]]) << `12`) \|
511	(unsigned(unbase64[src[`2`]]) << `6`) \|
512	(unsigned(unbase64[src[`3`]])))) &
513	`0x80000000`)) {
514	// Iff any of those four characters was bad (null, illegal,
515	// whitespace, padding), then temp's high bit will be set
516	// (because unbase64[] is -1 for all bad characters).
517	//
518	// We'll back up and resort to the slower decoder, which knows
519	// how to handle those cases.
520
521	GET_INPUT(first, `4`);
522	temp = decode;
523	GET_INPUT(second, `3`);
524	temp = (temp << `6`) \| decode;
525	GET_INPUT(third, `2`);
526	temp = (temp << `6`) \| decode;
527	GET_INPUT(fourth, `1`);
528	temp = (temp << `6`) \| decode;
529	} else {
530	// We really did have four good data bytes, so advance four
531	// characters in the std::string.
532
533	szsrc -= `4`;
534	src += `4`;
535	}
536
537	// temp has 24 bits of input, so write that out as three bytes.
538
539	if (destidx + `3` > szdest) return false;
540	dest[destidx + `2`] = temp;
541	temp >>= `8`;
542	dest[destidx + `1`] = temp;
543	temp >>= `8`;
544	dest[destidx] = temp;
545	destidx += `3`;
546	}
547	} else {
548	while (szsrc >= `4`) {
549	if (!src[`0`] \|\| !src[`1`] \|\| !src[`2`] \|\|
550	((temp = ((unsigned(unbase64[src[`0`]]) << `18`) \|
551	(unsigned(unbase64[src[`1`]]) << `12`) \|
552	(unsigned(unbase64[src[`2`]]) << `6`) \|
553	(unsigned(unbase64[src[`3`]])))) &
554	`0x80000000`)) {
555	GET_INPUT(first_no_dest, `4`);
556	GET_INPUT(second_no_dest, `3`);
557	GET_INPUT(third_no_dest, `2`);
558	GET_INPUT(fourth_no_dest, `1`);
559	} else {
560	szsrc -= `4`;
561	src += `4`;
562	}
563	destidx += `3`;
564	}
565	}
566
567	#undef GET_INPUT
568
569	// if the loop terminated because we read a bad character, return
570	// now.
571	if (decode < `0` && ch != kPad64Equals && ch != kPad64Dot &&
572	!absl::ascii_isspace(ch))
573	return false;
574
575	if (ch == kPad64Equals \|\| ch == kPad64Dot) {
576	// if we stopped by hitting an '=' or '.', un-read that character -- we'll
577	// look at it again when we count to check for the proper number of
578	// equals signs at the end.
579	++szsrc;
580	--src;
581	} else {
582	// This loop consumes 1 input byte per iteration. It's used to
583	// clean up the 0-3 input bytes remaining when the first, faster
584	// loop finishes. 'temp' contains the data from 'state' input
585	// characters read by the first loop.
586	while (szsrc > `0`) {
587	--szsrc;
588	ch = *src++;
589	decode = unbase64[ch];
590	if (decode < `0`) {
591	if (absl::ascii_isspace(ch)) {
592	continue;
593	} else if (ch == kPad64Equals \|\| ch == kPad64Dot) {
594	// back up one character; we'll read it again when we check
595	// for the correct number of pad characters at the end.
596	++szsrc;
597	--src;
598	break;
599	} else {
600	return false;
601	}
602	}
603
604	// Each input character gives us six bits of output.
605	temp = (temp << `6`) \| decode;
606	++state;
607	if (state == `4`) {
608	// If we've accumulated 24 bits of output, write that out as
609	// three bytes.
610	if (dest) {
611	if (destidx + `3` > szdest) return false;
612	dest[destidx + `2`] = temp;
613	temp >>= `8`;
614	dest[destidx + `1`] = temp;
615	temp >>= `8`;
616	dest[destidx] = temp;
617	}
618	destidx += `3`;
619	state = `0`;
620	temp = `0`;
621	}
622	}
623	}
624
625	// Process the leftover data contained in 'temp' at the end of the input.
626	int expected_equals = `0`;
627	switch (state) {
628	case `0`:
629	// Nothing left over; output is a multiple of 3 bytes.
630	break;
631
632	case `1`:
633	// Bad input; we have 6 bits left over.
634	return false;
635
636	case `2`:
637	// Produce one more output byte from the 12 input bits we have left.
638	if (dest) {
639	if (destidx + `1` > szdest) return false;
640	temp >>= `4`;
641	dest[destidx] = temp;
642	}
643	++destidx;
644	expected_equals = `2`;
645	break;
646
647	case `3`:
648	// Produce two more output bytes from the 18 input bits we have left.
649	if (dest) {
650	if (destidx + `2` > szdest) return false;
651	temp >>= `2`;
652	dest[destidx + `1`] = temp;
653	temp >>= `8`;
654	dest[destidx] = temp;
655	}
656	destidx += `2`;
657	expected_equals = `1`;
658	break;
659
660	default:
661	// state should have no other values at this point.
662	ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d",
663	state);
664	}
665
666	// The remainder of the std::string should be all whitespace, mixed with
667	// exactly 0 equals signs, or exactly 'expected_equals' equals
668	// signs. (Always accepting 0 equals signs is an Abseil extension
669	// not covered in the RFC, as is accepting dot as the pad character.)
670
671	int equals = `0`;
672	while (szsrc > `0`) {
673	if (src == kPad64Equals \|\| src == kPad64Dot)
674	++equals;
675	else if (!absl::ascii_isspace(*src))
676	return false;
677	--szsrc;
678	++src;
679	}
680
681	const bool ok = (equals == `0` \|\| equals == expected_equals);
682	if (ok) *len = destidx;
683	return ok;
684	}
685
686	// The arrays below were generated by the following code
687	// #include <sys/time.h>
688	// #include <stdlib.h>
689	// #include <string.h>
690	// main()
691	// {
692	// static const char Base64[] =
693	// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
694	// char pos;*
695	// int idx, i, j;
696	// printf(" ");
697	// for (i = 0; i < 255; i += 8) {
698	// for (j = i; j < i + 8; j++) {
699	// pos = strchr(Base64, j);
700	// if ((pos == nullptr) \|\| (j == 0))
701	// idx = -1;
702	// else
703	// idx = pos - Base64;
704	// if (idx == -1)
705	// printf(" %2d, ", idx);
706	// else
707	// printf(" %2d/%c/,", idx, j);
708	// }
709	// printf("\n ");
710	// }
711	// }
712	//
713	// where the value of "Base64[]" was replaced by one of the base-64 conversion
714	// tables from the functions below.
715	/ clang-format off /
716	constexpr signed char kUnBase64[] = {
717	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
718	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
719	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
720	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
721	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
722	-`1`, -`1`, -`1`, `62`/+/, -`1`, -`1`, -`1`, `63`// /,
723	`52`/0/, `53`/1/, `54`/2/, `55`/3/, `56`/4/, `57`/5/, `58`/6/, `59`/7/,
724	`60`/8/, `61`/9/, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
725	-`1`, `0`/A/, `1`/B/, `2`/C/, `3`/D/, `4`/E/, `5`/F/, `6`/G/,
726	`07`/H/, `8`/I/, `9`/J/, `10`/K/, `11`/L/, `12`/M/, `13`/N/, `14`/O/,
727	`15`/P/, `16`/Q/, `17`/R/, `18`/S/, `19`/T/, `20`/U/, `21`/V/, `22`/W/,
728	`23`/X/, `24`/Y/, `25`/Z/, -`1`, -`1`, -`1`, -`1`, -`1`,
729	-`1`, `26`/a/, `27`/b/, `28`/c/, `29`/d/, `30`/e/, `31`/f/, `32`/g/,
730	`33`/h/, `34`/i/, `35`/j/, `36`/k/, `37`/l/, `38`/m/, `39`/n/, `40`/o/,
731	`41`/p/, `42`/q/, `43`/r/, `44`/s/, `45`/t/, `46`/u/, `47`/v/, `48`/w/,
732	`49`/x/, `50`/y/, `51`/z/, -`1`, -`1`, -`1`, -`1`, -`1`,
733	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
734	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
735	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
736	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
737	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
738	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
739	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
740	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
741	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
742	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
743	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
744	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
745	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
746	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
747	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
748	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`
749	};
750
751	constexpr signed char kUnWebSafeBase64[] = {
752	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
753	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
754	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
755	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
756	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
757	-`1`, -`1`, -`1`, -`1`, -`1`, `62`/-/, -`1`, -`1`,
758	`52`/0/, `53`/1/, `54`/2/, `55`/3/, `56`/4/, `57`/5/, `58`/6/, `59`/7/,
759	`60`/8/, `61`/9/, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
760	-`1`, `0`/A/, `1`/B/, `2`/C/, `3`/D/, `4`/E/, `5`/F/, `6`/G/,
761	`07`/H/, `8`/I/, `9`/J/, `10`/K/, `11`/L/, `12`/M/, `13`/N/, `14`/O/,
762	`15`/P/, `16`/Q/, `17`/R/, `18`/S/, `19`/T/, `20`/U/, `21`/V/, `22`/W/,
763	`23`/X/, `24`/Y/, `25`/Z/, -`1`, -`1`, -`1`, -`1`, `63`/_/,
764	-`1`, `26`/a/, `27`/b/, `28`/c/, `29`/d/, `30`/e/, `31`/f/, `32`/g/,
765	`33`/h/, `34`/i/, `35`/j/, `36`/k/, `37`/l/, `38`/m/, `39`/n/, `40`/o/,
766	`41`/p/, `42`/q/, `43`/r/, `44`/s/, `45`/t/, `46`/u/, `47`/v/, `48`/w/,
767	`49`/x/, `50`/y/, `51`/z/, -`1`, -`1`, -`1`, -`1`, -`1`,
768	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
769	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
770	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
771	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
772	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
773	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
774	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
775	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
776	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
777	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
778	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
779	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
780	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
781	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
782	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`,
783	-`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`
784	};
785	/ clang-format on /
786
787	size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
788	// Base64 encodes three bytes of input at a time. If the input is not
789	// divisible by three, we pad as appropriate.
790	//
791	// (from https://tools.ietf.org/html/rfc3548)
792	// Special processing is performed if fewer than 24 bits are available
793	// at the end of the data being encoded. A full encoding quantum is
794	// always completed at the end of a quantity. When fewer than 24 input
795	// bits are available in an input group, zero bits are added (on the
796	// right) to form an integral number of 6-bit groups. Padding at the
797	// end of the data is performed using the '=' character. Since all base
798	// 64 input is an integral number of octets, only the following cases
799	// can arise:
800
801	// Base64 encodes each three bytes of input into four bytes of output.
802	size_t len = (input_len / `3`) * `4`;
803
804	if (input_len % `3` == `0`) {
805	// (from https://tools.ietf.org/html/rfc3548)
806	// (1) the final quantum of encoding input is an integral multiple of 24
807	// bits; here, the final unit of encoded output will be an integral
808	// multiple of 4 characters with no "=" padding,
809	} else if (input_len % `3` == `1`) {
810	// (from https://tools.ietf.org/html/rfc3548)
811	// (2) the final quantum of encoding input is exactly 8 bits; here, the
812	// final unit of encoded output will be two characters followed by two
813	// "=" padding characters, or
814	len += `2`;
815	if (do_padding) {
816	len += `2`;
817	}
818	} else { // (input_len % 3 == 2)
819	// (from https://tools.ietf.org/html/rfc3548)
820	// (3) the final quantum of encoding input is exactly 16 bits; here, the
821	// final unit of encoded output will be three characters followed by one
822	// "=" padding character.
823	len += `3`;
824	if (do_padding) {
825	len += `1`;
826	}
827	}
828
829	assert(len >= input_len); // make sure we didn't overflow
830	return len;
831	}
832
833	size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
834	size_t szdest, const char* base64,
835	bool do_padding) {
836	static const char kPad64 = `'='`;
837
838	if (szsrc * `4` > szdest * `3`) return `0`;
839
840	char* cur_dest = dest;
841	const unsigned char* cur_src = src;
842
843	char* const limit_dest = dest + szdest;
844	const unsigned char* const limit_src = src + szsrc;
845
846	// Three bytes of data encodes to four characters of cyphertext.
847	// So we can pump through three-byte chunks atomically.
848	if (szsrc >= `3`) { // "limit_src - 3" is UB if szsrc < 3.
849	while (cur_src < limit_src - `3`) { // While we have >= 32 bits.
850	uint32_t in = absl::big_endian::Load32(cur_src) >> `8`;
851
852	cur_dest[`0`] = base64[in >> `18`];
853	in &= `0x3FFFF`;
854	cur_dest[`1`] = base64[in >> `12`];
855	in &= `0xFFF`;
856	cur_dest[`2`] = base64[in >> `6`];
857	in &= `0x3F`;
858	cur_dest[`3`] = base64[in];
859
860	cur_dest += `4`;
861	cur_src += `3`;
862	}
863	}
864	// To save time, we didn't update szdest or szsrc in the loop. So do it now.
865	szdest = limit_dest - cur_dest;
866	szsrc = limit_src - cur_src;
867
868	/ now deal with the tail (<=3 bytes) /
869	switch (szsrc) {
870	case `0`:
871	// Nothing left; nothing more to do.
872	break;
873	case `1`: {
874	// One byte left: this encodes to two characters, and (optionally)
875	// two pad characters to round out the four-character cypherblock.
876	if (szdest < `2`) return `0`;
877	uint32_t in = cur_src[`0`];
878	cur_dest[`0`] = base64[in >> `2`];
879	in &= `0x3`;
880	cur_dest[`1`] = base64[in << `4`];
881	cur_dest += `2`;
882	szdest -= `2`;
883	if (do_padding) {
884	if (szdest < `2`) return `0`;
885	cur_dest[`0`] = kPad64;
886	cur_dest[`1`] = kPad64;
887	cur_dest += `2`;
888	szdest -= `2`;
889	}
890	break;
891	}
892	case `2`: {
893	// Two bytes left: this encodes to three characters, and (optionally)
894	// one pad character to round out the four-character cypherblock.
895	if (szdest < `3`) return `0`;
896	uint32_t in = absl::big_endian::Load16(cur_src);
897	cur_dest[`0`] = base64[in >> `10`];
898	in &= `0x3FF`;
899	cur_dest[`1`] = base64[in >> `4`];
900	in &= `0x00F`;
901	cur_dest[`2`] = base64[in << `2`];
902	cur_dest += `3`;
903	szdest -= `3`;
904	if (do_padding) {
905	if (szdest < `1`) return `0`;
906	cur_dest[`0`] = kPad64;
907	cur_dest += `1`;
908	szdest -= `1`;
909	}
910	break;
911	}
912	case `3`: {
913	// Three bytes left: same as in the big loop above. We can't do this in
914	// the loop because the loop above always reads 4 bytes, and the fourth
915	// byte is past the end of the input.
916	if (szdest < `4`) return `0`;
917	uint32_t in = (cur_src[`0`] << `16`) + absl::big_endian::Load16(cur_src + `1`);
918	cur_dest[`0`] = base64[in >> `18`];
919	in &= `0x3FFFF`;
920	cur_dest[`1`] = base64[in >> `12`];
921	in &= `0xFFF`;
922	cur_dest[`2`] = base64[in >> `6`];
923	in &= `0x3F`;
924	cur_dest[`3`] = base64[in];
925	cur_dest += `4`;
926	szdest -= `4`;
927	break;
928	}
929	default:
930	// Should not be reached: blocks of 4 bytes are handled
931	// in the while loop before this switch statement.
932	ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
933	break;
934	}
935	return (cur_dest - dest);
936	}
937
938	constexpr char kBase64Chars[] =
939	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
940
941	constexpr char kWebSafeBase64Chars[] =
942	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
943
944	template <typename String>
945	void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
946	bool do_padding, const char* base64_chars) {
947	const size_t calc_escaped_size =
948	CalculateBase64EscapedLenInternal(szsrc, do_padding);
949	strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size);
950
951	const size_t escaped_len = Base64EscapeInternal(
952	src, szsrc, &(*dest)[`0`], dest->size(), base64_chars, do_padding);
953	assert(calc_escaped_size == escaped_len);
954	dest->erase(escaped_len);
955	}
956
957	template <typename String>
958	bool Base64UnescapeInternal(const char* src, size_t slen, String* dest,
959	const signed char* unbase64) {
960	// Determine the size of the output std::string. Base64 encodes every 3 bytes into
961	// 4 characters. any leftover chars are added directly for good measure.
962	// This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
963	const size_t dest_len = `3` * (slen / `4`) + (slen % `4`);
964
965	strings_internal::STLStringResizeUninitialized(dest, dest_len);
966
967	// We are getting the destination buffer by getting the beginning of the
968	// std::string and converting it into a char .*
969	size_t len;
970	const bool ok =
971	Base64UnescapeInternal(src, slen, &(*dest)[`0`], dest_len, unbase64, &len);
972	if (!ok) {
973	dest->clear();
974	return false;
975	}
976
977	// could be shorter if there was padding
978	assert(len <= dest_len);
979	dest->erase(len);
980
981	return true;
982	}
983
984	/ clang-format off /
985	constexpr char kHexValue[`256`] = {
986	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
987	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
988	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
989	`0`, `1`, `2`, `3`, `4`, `5`, `6`, `7`, `8`, `9`, `0`, `0`, `0`, `0`, `0`, `0`, // '0'..'9'
990	`0`, `10`, `11`, `12`, `13`, `14`, `15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, // 'A'..'F'
991	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
992	`0`, `10`, `11`, `12`, `13`, `14`, `15`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, // 'a'..'f'
993	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
994	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
995	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
996	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
997	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
998	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
999	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1000	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`,
1001	`0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`, `0`
1002	};
1003	/ clang-format on /
1004
1005	// This is a templated function so that T can be either a char*
1006	// or a string. This works because we use the [] operator to access
1007	// individual characters at a time.
1008	template <typename T>
1009	void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) {
1010	for (int i = `0`; i < num; i++) {
1011	to[i] = (kHexValue[from[i * `2`] & `0xFF`] << `4`) +
1012	(kHexValue[from[i * `2` + `1`] & `0xFF`]);
1013	}
1014	}
1015
1016	// This is a templated function so that T can be either a char or a*
1017	// std::string.
1018	template <typename T>
1019	void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) {
1020	auto dest_ptr = &dest[`0`];
1021	for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += `2`) {
1022	const char* hex_p = &kHexTable[src_ptr `2`];
1023	std::copy(hex_p, hex_p + `2`, dest_ptr);
1024	}
1025	}
1026
1027	} // namespace
1028
1029	// ----------------------------------------------------------------------
1030	// CUnescape()
1031	//
1032	// See CUnescapeInternal() for implementation details.
1033	// ----------------------------------------------------------------------
1034	bool CUnescape(absl::string_view source, std::string* dest,
1035	std::string* error) {
1036	return CUnescapeInternal(source, kUnescapeNulls, dest, error);
1037	}
1038
1039	std::string CEscape(absl::string_view src) {
1040	std::string dest;
1041	CEscapeAndAppendInternal(src, &dest);
1042	return dest;
1043	}
1044
1045	std::string CHexEscape(absl::string_view src) {
1046	return CEscapeInternal(src, true, false);
1047	}
1048
1049	std::string Utf8SafeCEscape(absl::string_view src) {
1050	return CEscapeInternal(src, false, true);
1051	}
1052
1053	std::string Utf8SafeCHexEscape(absl::string_view src) {
1054	return CEscapeInternal(src, true, true);
1055	}
1056
1057	// ----------------------------------------------------------------------
1058	// Base64Unescape() - base64 decoder
1059	// Base64Escape() - base64 encoder
1060	// WebSafeBase64Unescape() - Google's variation of base64 decoder
1061	// WebSafeBase64Escape() - Google's variation of base64 encoder
1062	//
1063	// Check out
1064	// http://tools.ietf.org/html/rfc2045 for formal description, but what we
1065	// care about is that...
1066	// Take the encoded stuff in groups of 4 characters and turn each
1067	// character into a code 0 to 63 thus:
1068	// A-Z map to 0 to 25
1069	// a-z map to 26 to 51
1070	// 0-9 map to 52 to 61
1071	// +(- for WebSafe) maps to 62
1072	// /(_ for WebSafe) maps to 63
1073	// There will be four numbers, all less than 64 which can be represented
1074	// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
1075	// Arrange the 6 digit binary numbers into three bytes as such:
1076	// aaaaaabb bbbbcccc ccdddddd
1077	// Equals signs (one or two) are used at the end of the encoded block to
1078	// indicate that the text was not an integer multiple of three bytes long.
1079	// ----------------------------------------------------------------------
1080
1081	bool Base64Unescape(absl::string_view src, std::string* dest) {
1082	return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
1083	}
1084
1085	bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) {
1086	return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
1087	}
1088
1089	void Base64Escape(absl::string_view src, std::string* dest) {
1090	Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
1091	src.size(), dest, true, kBase64Chars);
1092	}
1093
1094	void WebSafeBase64Escape(absl::string_view src, std::string* dest) {
1095	Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
1096	src.size(), dest, false, kWebSafeBase64Chars);
1097	}
1098
1099	std::string Base64Escape(absl::string_view src) {
1100	std::string dest;
1101	Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
1102	src.size(), &dest, true, kBase64Chars);
1103	return dest;
1104	}
1105
1106	std::string WebSafeBase64Escape(absl::string_view src) {
1107	std::string dest;
1108	Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()),
1109	src.size(), &dest, false, kWebSafeBase64Chars);
1110	return dest;
1111	}
1112
1113	std::string HexStringToBytes(absl::string_view from) {
1114	std::string result;
1115	const auto num = from.size() / `2`;
1116	strings_internal::STLStringResizeUninitialized(&result, num);
1117	absl::HexStringToBytesInternal<std::string&>(from.data(), result, num);
1118	return result;
1119	}
1120
1121	std::string BytesToHexString(absl::string_view from) {
1122	std::string result;
1123	strings_internal::STLStringResizeUninitialized(&result, `2` * from.size());
1124	absl::BytesToHexStringInternal<std::string&>(
1125	reinterpret_cast<const unsigned char*>(from.data()), result, from.size());
1126	return result;
1127	}
1128
1129	} // namespace absl
1130

Browse the source code of Abseil/strings/escaping.cc