1 | // Copyright 2017 The Abseil Authors. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | // you may not use this file except in compliance with the License. |
5 | // You may obtain a copy of the License at |
6 | // |
7 | // https://www.apache.org/licenses/LICENSE-2.0 |
8 | // |
9 | // Unless required by applicable law or agreed to in writing, software |
10 | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | // See the License for the specific language governing permissions and |
13 | // limitations under the License. |
14 | |
15 | #include "absl/strings/escaping.h" |
16 | |
17 | #include <algorithm> |
18 | #include <cassert> |
19 | #include <cstdint> |
20 | #include <cstring> |
21 | #include <iterator> |
22 | #include <limits> |
23 | #include <string> |
24 | |
25 | #include "absl/base/internal/endian.h" |
26 | #include "absl/base/internal/raw_logging.h" |
27 | #include "absl/base/internal/unaligned_access.h" |
28 | #include "absl/strings/internal/char_map.h" |
29 | #include "absl/strings/internal/resize_uninitialized.h" |
30 | #include "absl/strings/internal/utf8.h" |
31 | #include "absl/strings/str_cat.h" |
32 | #include "absl/strings/str_join.h" |
33 | #include "absl/strings/string_view.h" |
34 | |
35 | namespace absl { |
36 | namespace { |
37 | |
38 | // Digit conversion. |
39 | constexpr char kHexChar[] = "0123456789abcdef" ; |
40 | |
41 | constexpr char kHexTable[513] = |
42 | "000102030405060708090a0b0c0d0e0f" |
43 | "101112131415161718191a1b1c1d1e1f" |
44 | "202122232425262728292a2b2c2d2e2f" |
45 | "303132333435363738393a3b3c3d3e3f" |
46 | "404142434445464748494a4b4c4d4e4f" |
47 | "505152535455565758595a5b5c5d5e5f" |
48 | "606162636465666768696a6b6c6d6e6f" |
49 | "707172737475767778797a7b7c7d7e7f" |
50 | "808182838485868788898a8b8c8d8e8f" |
51 | "909192939495969798999a9b9c9d9e9f" |
52 | "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf" |
53 | "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf" |
54 | "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf" |
55 | "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf" |
56 | "e0e1e2e3e4e5e6e7e8e9eaebecedeeef" |
57 | "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff" ; |
58 | |
59 | // These are used for the leave_nulls_escaped argument to CUnescapeInternal(). |
60 | constexpr bool kUnescapeNulls = false; |
61 | |
62 | inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); } |
63 | |
64 | inline int hex_digit_to_int(char c) { |
65 | static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61, |
66 | "Character set must be ASCII." ); |
67 | assert(absl::ascii_isxdigit(c)); |
68 | int x = static_cast<unsigned char>(c); |
69 | if (x > '9') { |
70 | x += 9; |
71 | } |
72 | return x & 0xf; |
73 | } |
74 | |
75 | inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) { |
76 | if (c >= 0xD800 && c <= 0xDFFF) { |
77 | if (error) { |
78 | *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\" , |
79 | src); |
80 | } |
81 | return true; |
82 | } |
83 | return false; |
84 | } |
85 | |
86 | // ---------------------------------------------------------------------- |
87 | // CUnescapeInternal() |
88 | // Implements both CUnescape() and CUnescapeForNullTerminatedString(). |
89 | // |
90 | // Unescapes C escape sequences and is the reverse of CEscape(). |
91 | // |
92 | // If 'source' is valid, stores the unescaped string and its size in |
93 | // 'dest' and 'dest_len' respectively, and returns true. Otherwise |
94 | // returns false and optionally stores the error description in |
95 | // 'error'. Set 'error' to nullptr to disable error reporting. |
96 | // |
97 | // 'dest' should point to a buffer that is at least as big as 'source'. |
98 | // 'source' and 'dest' may be the same. |
99 | // |
100 | // NOTE: any changes to this function must also be reflected in the older |
101 | // UnescapeCEscapeSequences(). |
102 | // ---------------------------------------------------------------------- |
103 | bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, |
104 | char* dest, ptrdiff_t* dest_len, std::string* error) { |
105 | char* d = dest; |
106 | const char* p = source.data(); |
107 | const char* end = p + source.size(); |
108 | const char* last_byte = end - 1; |
109 | |
110 | // Small optimization for case where source = dest and there's no escaping |
111 | while (p == d && p < end && *p != '\\') p++, d++; |
112 | |
113 | while (p < end) { |
114 | if (*p != '\\') { |
115 | *d++ = *p++; |
116 | } else { |
117 | if (++p > last_byte) { // skip past the '\\' |
118 | if (error) *error = "String cannot end with \\" ; |
119 | return false; |
120 | } |
121 | switch (*p) { |
122 | case 'a': *d++ = '\a'; break; |
123 | case 'b': *d++ = '\b'; break; |
124 | case 'f': *d++ = '\f'; break; |
125 | case 'n': *d++ = '\n'; break; |
126 | case 'r': *d++ = '\r'; break; |
127 | case 't': *d++ = '\t'; break; |
128 | case 'v': *d++ = '\v'; break; |
129 | case '\\': *d++ = '\\'; break; |
130 | case '?': *d++ = '\?'; break; // \? Who knew? |
131 | case '\'': *d++ = '\''; break; |
132 | case '"': *d++ = '\"'; break; |
133 | case '0': |
134 | case '1': |
135 | case '2': |
136 | case '3': |
137 | case '4': |
138 | case '5': |
139 | case '6': |
140 | case '7': { |
141 | // octal digit: 1 to 3 digits |
142 | const char* octal_start = p; |
143 | unsigned int ch = *p - '0'; |
144 | if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0'; |
145 | if (p < last_byte && is_octal_digit(p[1])) |
146 | ch = ch * 8 + *++p - '0'; // now points at last digit |
147 | if (ch > 0xff) { |
148 | if (error) { |
149 | *error = "Value of \\" + |
150 | std::string(octal_start, p + 1 - octal_start) + |
151 | " exceeds 0xff" ; |
152 | } |
153 | return false; |
154 | } |
155 | if ((ch == 0) && leave_nulls_escaped) { |
156 | // Copy the escape sequence for the null character |
157 | const ptrdiff_t octal_size = p + 1 - octal_start; |
158 | *d++ = '\\'; |
159 | memcpy(d, octal_start, octal_size); |
160 | d += octal_size; |
161 | break; |
162 | } |
163 | *d++ = ch; |
164 | break; |
165 | } |
166 | case 'x': |
167 | case 'X': { |
168 | if (p >= last_byte) { |
169 | if (error) *error = "String cannot end with \\x" ; |
170 | return false; |
171 | } else if (!absl::ascii_isxdigit(p[1])) { |
172 | if (error) *error = "\\x cannot be followed by a non-hex digit" ; |
173 | return false; |
174 | } |
175 | unsigned int ch = 0; |
176 | const char* hex_start = p; |
177 | while (p < last_byte && absl::ascii_isxdigit(p[1])) |
178 | // Arbitrarily many hex digits |
179 | ch = (ch << 4) + hex_digit_to_int(*++p); |
180 | if (ch > 0xFF) { |
181 | if (error) { |
182 | *error = "Value of \\" + |
183 | std::string(hex_start, p + 1 - hex_start) + |
184 | " exceeds 0xff" ; |
185 | } |
186 | return false; |
187 | } |
188 | if ((ch == 0) && leave_nulls_escaped) { |
189 | // Copy the escape sequence for the null character |
190 | const ptrdiff_t hex_size = p + 1 - hex_start; |
191 | *d++ = '\\'; |
192 | memcpy(d, hex_start, hex_size); |
193 | d += hex_size; |
194 | break; |
195 | } |
196 | *d++ = ch; |
197 | break; |
198 | } |
199 | case 'u': { |
200 | // \uhhhh => convert 4 hex digits to UTF-8 |
201 | char32_t rune = 0; |
202 | const char* hex_start = p; |
203 | if (p + 4 >= end) { |
204 | if (error) { |
205 | *error = "\\u must be followed by 4 hex digits: \\" + |
206 | std::string(hex_start, p + 1 - hex_start); |
207 | } |
208 | return false; |
209 | } |
210 | for (int i = 0; i < 4; ++i) { |
211 | // Look one char ahead. |
212 | if (absl::ascii_isxdigit(p[1])) { |
213 | rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p. |
214 | } else { |
215 | if (error) { |
216 | *error = "\\u must be followed by 4 hex digits: \\" + |
217 | std::string(hex_start, p + 1 - hex_start); |
218 | } |
219 | return false; |
220 | } |
221 | } |
222 | if ((rune == 0) && leave_nulls_escaped) { |
223 | // Copy the escape sequence for the null character |
224 | *d++ = '\\'; |
225 | memcpy(d, hex_start, 5); // u0000 |
226 | d += 5; |
227 | break; |
228 | } |
229 | if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) { |
230 | return false; |
231 | } |
232 | d += strings_internal::EncodeUTF8Char(d, rune); |
233 | break; |
234 | } |
235 | case 'U': { |
236 | // \Uhhhhhhhh => convert 8 hex digits to UTF-8 |
237 | char32_t rune = 0; |
238 | const char* hex_start = p; |
239 | if (p + 8 >= end) { |
240 | if (error) { |
241 | *error = "\\U must be followed by 8 hex digits: \\" + |
242 | std::string(hex_start, p + 1 - hex_start); |
243 | } |
244 | return false; |
245 | } |
246 | for (int i = 0; i < 8; ++i) { |
247 | // Look one char ahead. |
248 | if (absl::ascii_isxdigit(p[1])) { |
249 | // Don't change rune until we're sure this |
250 | // is within the Unicode limit, but do advance p. |
251 | uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p); |
252 | if (newrune > 0x10FFFF) { |
253 | if (error) { |
254 | *error = "Value of \\" + |
255 | std::string(hex_start, p + 1 - hex_start) + |
256 | " exceeds Unicode limit (0x10FFFF)" ; |
257 | } |
258 | return false; |
259 | } else { |
260 | rune = newrune; |
261 | } |
262 | } else { |
263 | if (error) { |
264 | *error = "\\U must be followed by 8 hex digits: \\" + |
265 | std::string(hex_start, p + 1 - hex_start); |
266 | } |
267 | return false; |
268 | } |
269 | } |
270 | if ((rune == 0) && leave_nulls_escaped) { |
271 | // Copy the escape sequence for the null character |
272 | *d++ = '\\'; |
273 | memcpy(d, hex_start, 9); // U00000000 |
274 | d += 9; |
275 | break; |
276 | } |
277 | if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) { |
278 | return false; |
279 | } |
280 | d += strings_internal::EncodeUTF8Char(d, rune); |
281 | break; |
282 | } |
283 | default: { |
284 | if (error) *error = std::string("Unknown escape sequence: \\" ) + *p; |
285 | return false; |
286 | } |
287 | } |
288 | p++; // read past letter we escaped |
289 | } |
290 | } |
291 | *dest_len = d - dest; |
292 | return true; |
293 | } |
294 | |
295 | // ---------------------------------------------------------------------- |
296 | // CUnescapeInternal() |
297 | // |
298 | // Same as above but uses a std::string for output. 'source' and 'dest' |
299 | // may be the same. |
300 | // ---------------------------------------------------------------------- |
301 | bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, |
302 | std::string* dest, std::string* error) { |
303 | strings_internal::STLStringResizeUninitialized(dest, source.size()); |
304 | |
305 | ptrdiff_t dest_size; |
306 | if (!CUnescapeInternal(source, |
307 | leave_nulls_escaped, |
308 | &(*dest)[0], |
309 | &dest_size, |
310 | error)) { |
311 | return false; |
312 | } |
313 | dest->erase(dest_size); |
314 | return true; |
315 | } |
316 | |
317 | // ---------------------------------------------------------------------- |
318 | // CEscape() |
319 | // CHexEscape() |
320 | // Utf8SafeCEscape() |
321 | // Utf8SafeCHexEscape() |
322 | // Escapes 'src' using C-style escape sequences. This is useful for |
323 | // preparing query flags. The 'Hex' version uses hexadecimal rather than |
324 | // octal sequences. The 'Utf8Safe' version does not touch UTF-8 bytes. |
325 | // |
326 | // Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint(). |
327 | // ---------------------------------------------------------------------- |
328 | std::string CEscapeInternal(absl::string_view src, bool use_hex, |
329 | bool utf8_safe) { |
330 | std::string dest; |
331 | bool last_hex_escape = false; // true if last output char was \xNN. |
332 | |
333 | for (unsigned char c : src) { |
334 | bool is_hex_escape = false; |
335 | switch (c) { |
336 | case '\n': dest.append("\\" "n" ); break; |
337 | case '\r': dest.append("\\" "r" ); break; |
338 | case '\t': dest.append("\\" "t" ); break; |
339 | case '\"': dest.append("\\" "\"" ); break; |
340 | case '\'': dest.append("\\" "'" ); break; |
341 | case '\\': dest.append("\\" "\\" ); break; |
342 | default: |
343 | // Note that if we emit \xNN and the src character after that is a hex |
344 | // digit then that digit must be escaped too to prevent it being |
345 | // interpreted as part of the character code by C. |
346 | if ((!utf8_safe || c < 0x80) && |
347 | (!absl::ascii_isprint(c) || |
348 | (last_hex_escape && absl::ascii_isxdigit(c)))) { |
349 | if (use_hex) { |
350 | dest.append("\\" "x" ); |
351 | dest.push_back(kHexChar[c / 16]); |
352 | dest.push_back(kHexChar[c % 16]); |
353 | is_hex_escape = true; |
354 | } else { |
355 | dest.append("\\" ); |
356 | dest.push_back(kHexChar[c / 64]); |
357 | dest.push_back(kHexChar[(c % 64) / 8]); |
358 | dest.push_back(kHexChar[c % 8]); |
359 | } |
360 | } else { |
361 | dest.push_back(c); |
362 | break; |
363 | } |
364 | } |
365 | last_hex_escape = is_hex_escape; |
366 | } |
367 | |
368 | return dest; |
369 | } |
370 | |
371 | /* clang-format off */ |
372 | constexpr char c_escaped_len[256] = { |
373 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r |
374 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
375 | 1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", ' |
376 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9' |
377 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O' |
378 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\' |
379 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o' |
380 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL |
381 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
382 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
383 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
384 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
385 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
386 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
387 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
388 | 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, |
389 | }; |
390 | /* clang-format on */ |
391 | |
392 | // Calculates the length of the C-style escaped version of 'src'. |
393 | // Assumes that non-printable characters are escaped using octal sequences, and |
394 | // that UTF-8 bytes are not handled specially. |
395 | inline size_t CEscapedLength(absl::string_view src) { |
396 | size_t escaped_len = 0; |
397 | for (unsigned char c : src) escaped_len += c_escaped_len[c]; |
398 | return escaped_len; |
399 | } |
400 | |
401 | void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { |
402 | size_t escaped_len = CEscapedLength(src); |
403 | if (escaped_len == src.size()) { |
404 | dest->append(src.data(), src.size()); |
405 | return; |
406 | } |
407 | |
408 | size_t cur_dest_len = dest->size(); |
409 | strings_internal::STLStringResizeUninitialized(dest, |
410 | cur_dest_len + escaped_len); |
411 | char* append_ptr = &(*dest)[cur_dest_len]; |
412 | |
413 | for (unsigned char c : src) { |
414 | int char_len = c_escaped_len[c]; |
415 | if (char_len == 1) { |
416 | *append_ptr++ = c; |
417 | } else if (char_len == 2) { |
418 | switch (c) { |
419 | case '\n': |
420 | *append_ptr++ = '\\'; |
421 | *append_ptr++ = 'n'; |
422 | break; |
423 | case '\r': |
424 | *append_ptr++ = '\\'; |
425 | *append_ptr++ = 'r'; |
426 | break; |
427 | case '\t': |
428 | *append_ptr++ = '\\'; |
429 | *append_ptr++ = 't'; |
430 | break; |
431 | case '\"': |
432 | *append_ptr++ = '\\'; |
433 | *append_ptr++ = '\"'; |
434 | break; |
435 | case '\'': |
436 | *append_ptr++ = '\\'; |
437 | *append_ptr++ = '\''; |
438 | break; |
439 | case '\\': |
440 | *append_ptr++ = '\\'; |
441 | *append_ptr++ = '\\'; |
442 | break; |
443 | } |
444 | } else { |
445 | *append_ptr++ = '\\'; |
446 | *append_ptr++ = '0' + c / 64; |
447 | *append_ptr++ = '0' + (c % 64) / 8; |
448 | *append_ptr++ = '0' + c % 8; |
449 | } |
450 | } |
451 | } |
452 | |
453 | bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, |
454 | size_t szdest, const signed char* unbase64, |
455 | size_t* len) { |
456 | static const char kPad64Equals = '='; |
457 | static const char kPad64Dot = '.'; |
458 | |
459 | size_t destidx = 0; |
460 | int decode = 0; |
461 | int state = 0; |
462 | unsigned int ch = 0; |
463 | unsigned int temp = 0; |
464 | |
465 | // If "char" is signed by default, using *src as an array index results in |
466 | // accessing negative array elements. Treat the input as a pointer to |
467 | // unsigned char to avoid this. |
468 | const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param); |
469 | |
470 | // The GET_INPUT macro gets the next input character, skipping |
471 | // over any whitespace, and stopping when we reach the end of the |
472 | // std::string or when we read any non-data character. The arguments are |
473 | // an arbitrary identifier (used as a label for goto) and the number |
474 | // of data bytes that must remain in the input to avoid aborting the |
475 | // loop. |
476 | #define GET_INPUT(label, remain) \ |
477 | label: \ |
478 | --szsrc; \ |
479 | ch = *src++; \ |
480 | decode = unbase64[ch]; \ |
481 | if (decode < 0) { \ |
482 | if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \ |
483 | state = 4 - remain; \ |
484 | break; \ |
485 | } |
486 | |
487 | // if dest is null, we're just checking to see if it's legal input |
488 | // rather than producing output. (I suspect this could just be done |
489 | // with a regexp...). We duplicate the loop so this test can be |
490 | // outside it instead of in every iteration. |
491 | |
492 | if (dest) { |
493 | // This loop consumes 4 input bytes and produces 3 output bytes |
494 | // per iteration. We can't know at the start that there is enough |
495 | // data left in the std::string for a full iteration, so the loop may |
496 | // break out in the middle; if so 'state' will be set to the |
497 | // number of input bytes read. |
498 | |
499 | while (szsrc >= 4) { |
500 | // We'll start by optimistically assuming that the next four |
501 | // bytes of the std::string (src[0..3]) are four good data bytes |
502 | // (that is, no nulls, whitespace, padding chars, or illegal |
503 | // chars). We need to test src[0..2] for nulls individually |
504 | // before constructing temp to preserve the property that we |
505 | // never read past a null in the std::string (no matter how long |
506 | // szsrc claims the std::string is). |
507 | |
508 | if (!src[0] || !src[1] || !src[2] || |
509 | ((temp = ((unsigned(unbase64[src[0]]) << 18) | |
510 | (unsigned(unbase64[src[1]]) << 12) | |
511 | (unsigned(unbase64[src[2]]) << 6) | |
512 | (unsigned(unbase64[src[3]])))) & |
513 | 0x80000000)) { |
514 | // Iff any of those four characters was bad (null, illegal, |
515 | // whitespace, padding), then temp's high bit will be set |
516 | // (because unbase64[] is -1 for all bad characters). |
517 | // |
518 | // We'll back up and resort to the slower decoder, which knows |
519 | // how to handle those cases. |
520 | |
521 | GET_INPUT(first, 4); |
522 | temp = decode; |
523 | GET_INPUT(second, 3); |
524 | temp = (temp << 6) | decode; |
525 | GET_INPUT(third, 2); |
526 | temp = (temp << 6) | decode; |
527 | GET_INPUT(fourth, 1); |
528 | temp = (temp << 6) | decode; |
529 | } else { |
530 | // We really did have four good data bytes, so advance four |
531 | // characters in the std::string. |
532 | |
533 | szsrc -= 4; |
534 | src += 4; |
535 | } |
536 | |
537 | // temp has 24 bits of input, so write that out as three bytes. |
538 | |
539 | if (destidx + 3 > szdest) return false; |
540 | dest[destidx + 2] = temp; |
541 | temp >>= 8; |
542 | dest[destidx + 1] = temp; |
543 | temp >>= 8; |
544 | dest[destidx] = temp; |
545 | destidx += 3; |
546 | } |
547 | } else { |
548 | while (szsrc >= 4) { |
549 | if (!src[0] || !src[1] || !src[2] || |
550 | ((temp = ((unsigned(unbase64[src[0]]) << 18) | |
551 | (unsigned(unbase64[src[1]]) << 12) | |
552 | (unsigned(unbase64[src[2]]) << 6) | |
553 | (unsigned(unbase64[src[3]])))) & |
554 | 0x80000000)) { |
555 | GET_INPUT(first_no_dest, 4); |
556 | GET_INPUT(second_no_dest, 3); |
557 | GET_INPUT(third_no_dest, 2); |
558 | GET_INPUT(fourth_no_dest, 1); |
559 | } else { |
560 | szsrc -= 4; |
561 | src += 4; |
562 | } |
563 | destidx += 3; |
564 | } |
565 | } |
566 | |
567 | #undef GET_INPUT |
568 | |
569 | // if the loop terminated because we read a bad character, return |
570 | // now. |
571 | if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot && |
572 | !absl::ascii_isspace(ch)) |
573 | return false; |
574 | |
575 | if (ch == kPad64Equals || ch == kPad64Dot) { |
576 | // if we stopped by hitting an '=' or '.', un-read that character -- we'll |
577 | // look at it again when we count to check for the proper number of |
578 | // equals signs at the end. |
579 | ++szsrc; |
580 | --src; |
581 | } else { |
582 | // This loop consumes 1 input byte per iteration. It's used to |
583 | // clean up the 0-3 input bytes remaining when the first, faster |
584 | // loop finishes. 'temp' contains the data from 'state' input |
585 | // characters read by the first loop. |
586 | while (szsrc > 0) { |
587 | --szsrc; |
588 | ch = *src++; |
589 | decode = unbase64[ch]; |
590 | if (decode < 0) { |
591 | if (absl::ascii_isspace(ch)) { |
592 | continue; |
593 | } else if (ch == kPad64Equals || ch == kPad64Dot) { |
594 | // back up one character; we'll read it again when we check |
595 | // for the correct number of pad characters at the end. |
596 | ++szsrc; |
597 | --src; |
598 | break; |
599 | } else { |
600 | return false; |
601 | } |
602 | } |
603 | |
604 | // Each input character gives us six bits of output. |
605 | temp = (temp << 6) | decode; |
606 | ++state; |
607 | if (state == 4) { |
608 | // If we've accumulated 24 bits of output, write that out as |
609 | // three bytes. |
610 | if (dest) { |
611 | if (destidx + 3 > szdest) return false; |
612 | dest[destidx + 2] = temp; |
613 | temp >>= 8; |
614 | dest[destidx + 1] = temp; |
615 | temp >>= 8; |
616 | dest[destidx] = temp; |
617 | } |
618 | destidx += 3; |
619 | state = 0; |
620 | temp = 0; |
621 | } |
622 | } |
623 | } |
624 | |
625 | // Process the leftover data contained in 'temp' at the end of the input. |
626 | int expected_equals = 0; |
627 | switch (state) { |
628 | case 0: |
629 | // Nothing left over; output is a multiple of 3 bytes. |
630 | break; |
631 | |
632 | case 1: |
633 | // Bad input; we have 6 bits left over. |
634 | return false; |
635 | |
636 | case 2: |
637 | // Produce one more output byte from the 12 input bits we have left. |
638 | if (dest) { |
639 | if (destidx + 1 > szdest) return false; |
640 | temp >>= 4; |
641 | dest[destidx] = temp; |
642 | } |
643 | ++destidx; |
644 | expected_equals = 2; |
645 | break; |
646 | |
647 | case 3: |
648 | // Produce two more output bytes from the 18 input bits we have left. |
649 | if (dest) { |
650 | if (destidx + 2 > szdest) return false; |
651 | temp >>= 2; |
652 | dest[destidx + 1] = temp; |
653 | temp >>= 8; |
654 | dest[destidx] = temp; |
655 | } |
656 | destidx += 2; |
657 | expected_equals = 1; |
658 | break; |
659 | |
660 | default: |
661 | // state should have no other values at this point. |
662 | ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d" , |
663 | state); |
664 | } |
665 | |
666 | // The remainder of the std::string should be all whitespace, mixed with |
667 | // exactly 0 equals signs, or exactly 'expected_equals' equals |
668 | // signs. (Always accepting 0 equals signs is an Abseil extension |
669 | // not covered in the RFC, as is accepting dot as the pad character.) |
670 | |
671 | int equals = 0; |
672 | while (szsrc > 0) { |
673 | if (*src == kPad64Equals || *src == kPad64Dot) |
674 | ++equals; |
675 | else if (!absl::ascii_isspace(*src)) |
676 | return false; |
677 | --szsrc; |
678 | ++src; |
679 | } |
680 | |
681 | const bool ok = (equals == 0 || equals == expected_equals); |
682 | if (ok) *len = destidx; |
683 | return ok; |
684 | } |
685 | |
686 | // The arrays below were generated by the following code |
687 | // #include <sys/time.h> |
688 | // #include <stdlib.h> |
689 | // #include <string.h> |
690 | // main() |
691 | // { |
692 | // static const char Base64[] = |
693 | // "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
694 | // char* pos; |
695 | // int idx, i, j; |
696 | // printf(" "); |
697 | // for (i = 0; i < 255; i += 8) { |
698 | // for (j = i; j < i + 8; j++) { |
699 | // pos = strchr(Base64, j); |
700 | // if ((pos == nullptr) || (j == 0)) |
701 | // idx = -1; |
702 | // else |
703 | // idx = pos - Base64; |
704 | // if (idx == -1) |
705 | // printf(" %2d, ", idx); |
706 | // else |
707 | // printf(" %2d/*%c*/,", idx, j); |
708 | // } |
709 | // printf("\n "); |
710 | // } |
711 | // } |
712 | // |
713 | // where the value of "Base64[]" was replaced by one of the base-64 conversion |
714 | // tables from the functions below. |
715 | /* clang-format off */ |
716 | constexpr signed char kUnBase64[] = { |
717 | -1, -1, -1, -1, -1, -1, -1, -1, |
718 | -1, -1, -1, -1, -1, -1, -1, -1, |
719 | -1, -1, -1, -1, -1, -1, -1, -1, |
720 | -1, -1, -1, -1, -1, -1, -1, -1, |
721 | -1, -1, -1, -1, -1, -1, -1, -1, |
722 | -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, |
723 | 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, |
724 | 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, |
725 | -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, |
726 | 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, |
727 | 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, |
728 | 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, |
729 | -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, |
730 | 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, |
731 | 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, |
732 | 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, |
733 | -1, -1, -1, -1, -1, -1, -1, -1, |
734 | -1, -1, -1, -1, -1, -1, -1, -1, |
735 | -1, -1, -1, -1, -1, -1, -1, -1, |
736 | -1, -1, -1, -1, -1, -1, -1, -1, |
737 | -1, -1, -1, -1, -1, -1, -1, -1, |
738 | -1, -1, -1, -1, -1, -1, -1, -1, |
739 | -1, -1, -1, -1, -1, -1, -1, -1, |
740 | -1, -1, -1, -1, -1, -1, -1, -1, |
741 | -1, -1, -1, -1, -1, -1, -1, -1, |
742 | -1, -1, -1, -1, -1, -1, -1, -1, |
743 | -1, -1, -1, -1, -1, -1, -1, -1, |
744 | -1, -1, -1, -1, -1, -1, -1, -1, |
745 | -1, -1, -1, -1, -1, -1, -1, -1, |
746 | -1, -1, -1, -1, -1, -1, -1, -1, |
747 | -1, -1, -1, -1, -1, -1, -1, -1, |
748 | -1, -1, -1, -1, -1, -1, -1, -1 |
749 | }; |
750 | |
751 | constexpr signed char kUnWebSafeBase64[] = { |
752 | -1, -1, -1, -1, -1, -1, -1, -1, |
753 | -1, -1, -1, -1, -1, -1, -1, -1, |
754 | -1, -1, -1, -1, -1, -1, -1, -1, |
755 | -1, -1, -1, -1, -1, -1, -1, -1, |
756 | -1, -1, -1, -1, -1, -1, -1, -1, |
757 | -1, -1, -1, -1, -1, 62/*-*/, -1, -1, |
758 | 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, |
759 | 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, |
760 | -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, |
761 | 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, |
762 | 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, |
763 | 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, 63/*_*/, |
764 | -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, |
765 | 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, |
766 | 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, |
767 | 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, |
768 | -1, -1, -1, -1, -1, -1, -1, -1, |
769 | -1, -1, -1, -1, -1, -1, -1, -1, |
770 | -1, -1, -1, -1, -1, -1, -1, -1, |
771 | -1, -1, -1, -1, -1, -1, -1, -1, |
772 | -1, -1, -1, -1, -1, -1, -1, -1, |
773 | -1, -1, -1, -1, -1, -1, -1, -1, |
774 | -1, -1, -1, -1, -1, -1, -1, -1, |
775 | -1, -1, -1, -1, -1, -1, -1, -1, |
776 | -1, -1, -1, -1, -1, -1, -1, -1, |
777 | -1, -1, -1, -1, -1, -1, -1, -1, |
778 | -1, -1, -1, -1, -1, -1, -1, -1, |
779 | -1, -1, -1, -1, -1, -1, -1, -1, |
780 | -1, -1, -1, -1, -1, -1, -1, -1, |
781 | -1, -1, -1, -1, -1, -1, -1, -1, |
782 | -1, -1, -1, -1, -1, -1, -1, -1, |
783 | -1, -1, -1, -1, -1, -1, -1, -1 |
784 | }; |
785 | /* clang-format on */ |
786 | |
787 | size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { |
788 | // Base64 encodes three bytes of input at a time. If the input is not |
789 | // divisible by three, we pad as appropriate. |
790 | // |
791 | // (from https://tools.ietf.org/html/rfc3548) |
792 | // Special processing is performed if fewer than 24 bits are available |
793 | // at the end of the data being encoded. A full encoding quantum is |
794 | // always completed at the end of a quantity. When fewer than 24 input |
795 | // bits are available in an input group, zero bits are added (on the |
796 | // right) to form an integral number of 6-bit groups. Padding at the |
797 | // end of the data is performed using the '=' character. Since all base |
798 | // 64 input is an integral number of octets, only the following cases |
799 | // can arise: |
800 | |
801 | // Base64 encodes each three bytes of input into four bytes of output. |
802 | size_t len = (input_len / 3) * 4; |
803 | |
804 | if (input_len % 3 == 0) { |
805 | // (from https://tools.ietf.org/html/rfc3548) |
806 | // (1) the final quantum of encoding input is an integral multiple of 24 |
807 | // bits; here, the final unit of encoded output will be an integral |
808 | // multiple of 4 characters with no "=" padding, |
809 | } else if (input_len % 3 == 1) { |
810 | // (from https://tools.ietf.org/html/rfc3548) |
811 | // (2) the final quantum of encoding input is exactly 8 bits; here, the |
812 | // final unit of encoded output will be two characters followed by two |
813 | // "=" padding characters, or |
814 | len += 2; |
815 | if (do_padding) { |
816 | len += 2; |
817 | } |
818 | } else { // (input_len % 3 == 2) |
819 | // (from https://tools.ietf.org/html/rfc3548) |
820 | // (3) the final quantum of encoding input is exactly 16 bits; here, the |
821 | // final unit of encoded output will be three characters followed by one |
822 | // "=" padding character. |
823 | len += 3; |
824 | if (do_padding) { |
825 | len += 1; |
826 | } |
827 | } |
828 | |
829 | assert(len >= input_len); // make sure we didn't overflow |
830 | return len; |
831 | } |
832 | |
833 | size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, |
834 | size_t szdest, const char* base64, |
835 | bool do_padding) { |
836 | static const char kPad64 = '='; |
837 | |
838 | if (szsrc * 4 > szdest * 3) return 0; |
839 | |
840 | char* cur_dest = dest; |
841 | const unsigned char* cur_src = src; |
842 | |
843 | char* const limit_dest = dest + szdest; |
844 | const unsigned char* const limit_src = src + szsrc; |
845 | |
846 | // Three bytes of data encodes to four characters of cyphertext. |
847 | // So we can pump through three-byte chunks atomically. |
848 | if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3. |
849 | while (cur_src < limit_src - 3) { // While we have >= 32 bits. |
850 | uint32_t in = absl::big_endian::Load32(cur_src) >> 8; |
851 | |
852 | cur_dest[0] = base64[in >> 18]; |
853 | in &= 0x3FFFF; |
854 | cur_dest[1] = base64[in >> 12]; |
855 | in &= 0xFFF; |
856 | cur_dest[2] = base64[in >> 6]; |
857 | in &= 0x3F; |
858 | cur_dest[3] = base64[in]; |
859 | |
860 | cur_dest += 4; |
861 | cur_src += 3; |
862 | } |
863 | } |
864 | // To save time, we didn't update szdest or szsrc in the loop. So do it now. |
865 | szdest = limit_dest - cur_dest; |
866 | szsrc = limit_src - cur_src; |
867 | |
868 | /* now deal with the tail (<=3 bytes) */ |
869 | switch (szsrc) { |
870 | case 0: |
871 | // Nothing left; nothing more to do. |
872 | break; |
873 | case 1: { |
874 | // One byte left: this encodes to two characters, and (optionally) |
875 | // two pad characters to round out the four-character cypherblock. |
876 | if (szdest < 2) return 0; |
877 | uint32_t in = cur_src[0]; |
878 | cur_dest[0] = base64[in >> 2]; |
879 | in &= 0x3; |
880 | cur_dest[1] = base64[in << 4]; |
881 | cur_dest += 2; |
882 | szdest -= 2; |
883 | if (do_padding) { |
884 | if (szdest < 2) return 0; |
885 | cur_dest[0] = kPad64; |
886 | cur_dest[1] = kPad64; |
887 | cur_dest += 2; |
888 | szdest -= 2; |
889 | } |
890 | break; |
891 | } |
892 | case 2: { |
893 | // Two bytes left: this encodes to three characters, and (optionally) |
894 | // one pad character to round out the four-character cypherblock. |
895 | if (szdest < 3) return 0; |
896 | uint32_t in = absl::big_endian::Load16(cur_src); |
897 | cur_dest[0] = base64[in >> 10]; |
898 | in &= 0x3FF; |
899 | cur_dest[1] = base64[in >> 4]; |
900 | in &= 0x00F; |
901 | cur_dest[2] = base64[in << 2]; |
902 | cur_dest += 3; |
903 | szdest -= 3; |
904 | if (do_padding) { |
905 | if (szdest < 1) return 0; |
906 | cur_dest[0] = kPad64; |
907 | cur_dest += 1; |
908 | szdest -= 1; |
909 | } |
910 | break; |
911 | } |
912 | case 3: { |
913 | // Three bytes left: same as in the big loop above. We can't do this in |
914 | // the loop because the loop above always reads 4 bytes, and the fourth |
915 | // byte is past the end of the input. |
916 | if (szdest < 4) return 0; |
917 | uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); |
918 | cur_dest[0] = base64[in >> 18]; |
919 | in &= 0x3FFFF; |
920 | cur_dest[1] = base64[in >> 12]; |
921 | in &= 0xFFF; |
922 | cur_dest[2] = base64[in >> 6]; |
923 | in &= 0x3F; |
924 | cur_dest[3] = base64[in]; |
925 | cur_dest += 4; |
926 | szdest -= 4; |
927 | break; |
928 | } |
929 | default: |
930 | // Should not be reached: blocks of 4 bytes are handled |
931 | // in the while loop before this switch statement. |
932 | ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu" , szsrc); |
933 | break; |
934 | } |
935 | return (cur_dest - dest); |
936 | } |
937 | |
938 | constexpr char kBase64Chars[] = |
939 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ; |
940 | |
941 | constexpr char kWebSafeBase64Chars[] = |
942 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" ; |
943 | |
944 | template <typename String> |
945 | void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, |
946 | bool do_padding, const char* base64_chars) { |
947 | const size_t calc_escaped_size = |
948 | CalculateBase64EscapedLenInternal(szsrc, do_padding); |
949 | strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size); |
950 | |
951 | const size_t escaped_len = Base64EscapeInternal( |
952 | src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); |
953 | assert(calc_escaped_size == escaped_len); |
954 | dest->erase(escaped_len); |
955 | } |
956 | |
957 | template <typename String> |
958 | bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, |
959 | const signed char* unbase64) { |
960 | // Determine the size of the output std::string. Base64 encodes every 3 bytes into |
961 | // 4 characters. any leftover chars are added directly for good measure. |
962 | // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548 |
963 | const size_t dest_len = 3 * (slen / 4) + (slen % 4); |
964 | |
965 | strings_internal::STLStringResizeUninitialized(dest, dest_len); |
966 | |
967 | // We are getting the destination buffer by getting the beginning of the |
968 | // std::string and converting it into a char *. |
969 | size_t len; |
970 | const bool ok = |
971 | Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); |
972 | if (!ok) { |
973 | dest->clear(); |
974 | return false; |
975 | } |
976 | |
977 | // could be shorter if there was padding |
978 | assert(len <= dest_len); |
979 | dest->erase(len); |
980 | |
981 | return true; |
982 | } |
983 | |
984 | /* clang-format off */ |
985 | constexpr char kHexValue[256] = { |
986 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
987 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
988 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
989 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, // '0'..'9' |
990 | 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'A'..'F' |
991 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
992 | 0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 'a'..'f' |
993 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
994 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
995 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
996 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
997 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
998 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
999 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
1000 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
1001 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
1002 | }; |
1003 | /* clang-format on */ |
1004 | |
1005 | // This is a templated function so that T can be either a char* |
1006 | // or a string. This works because we use the [] operator to access |
1007 | // individual characters at a time. |
1008 | template <typename T> |
1009 | void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) { |
1010 | for (int i = 0; i < num; i++) { |
1011 | to[i] = (kHexValue[from[i * 2] & 0xFF] << 4) + |
1012 | (kHexValue[from[i * 2 + 1] & 0xFF]); |
1013 | } |
1014 | } |
1015 | |
1016 | // This is a templated function so that T can be either a char* or a |
1017 | // std::string. |
1018 | template <typename T> |
1019 | void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { |
1020 | auto dest_ptr = &dest[0]; |
1021 | for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { |
1022 | const char* hex_p = &kHexTable[*src_ptr * 2]; |
1023 | std::copy(hex_p, hex_p + 2, dest_ptr); |
1024 | } |
1025 | } |
1026 | |
1027 | } // namespace |
1028 | |
1029 | // ---------------------------------------------------------------------- |
1030 | // CUnescape() |
1031 | // |
1032 | // See CUnescapeInternal() for implementation details. |
1033 | // ---------------------------------------------------------------------- |
1034 | bool CUnescape(absl::string_view source, std::string* dest, |
1035 | std::string* error) { |
1036 | return CUnescapeInternal(source, kUnescapeNulls, dest, error); |
1037 | } |
1038 | |
1039 | std::string CEscape(absl::string_view src) { |
1040 | std::string dest; |
1041 | CEscapeAndAppendInternal(src, &dest); |
1042 | return dest; |
1043 | } |
1044 | |
1045 | std::string CHexEscape(absl::string_view src) { |
1046 | return CEscapeInternal(src, true, false); |
1047 | } |
1048 | |
1049 | std::string Utf8SafeCEscape(absl::string_view src) { |
1050 | return CEscapeInternal(src, false, true); |
1051 | } |
1052 | |
1053 | std::string Utf8SafeCHexEscape(absl::string_view src) { |
1054 | return CEscapeInternal(src, true, true); |
1055 | } |
1056 | |
1057 | // ---------------------------------------------------------------------- |
1058 | // Base64Unescape() - base64 decoder |
1059 | // Base64Escape() - base64 encoder |
1060 | // WebSafeBase64Unescape() - Google's variation of base64 decoder |
1061 | // WebSafeBase64Escape() - Google's variation of base64 encoder |
1062 | // |
1063 | // Check out |
1064 | // http://tools.ietf.org/html/rfc2045 for formal description, but what we |
1065 | // care about is that... |
1066 | // Take the encoded stuff in groups of 4 characters and turn each |
1067 | // character into a code 0 to 63 thus: |
1068 | // A-Z map to 0 to 25 |
1069 | // a-z map to 26 to 51 |
1070 | // 0-9 map to 52 to 61 |
1071 | // +(- for WebSafe) maps to 62 |
1072 | // /(_ for WebSafe) maps to 63 |
1073 | // There will be four numbers, all less than 64 which can be represented |
1074 | // by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). |
1075 | // Arrange the 6 digit binary numbers into three bytes as such: |
1076 | // aaaaaabb bbbbcccc ccdddddd |
1077 | // Equals signs (one or two) are used at the end of the encoded block to |
1078 | // indicate that the text was not an integer multiple of three bytes long. |
1079 | // ---------------------------------------------------------------------- |
1080 | |
1081 | bool Base64Unescape(absl::string_view src, std::string* dest) { |
1082 | return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); |
1083 | } |
1084 | |
1085 | bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) { |
1086 | return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64); |
1087 | } |
1088 | |
1089 | void Base64Escape(absl::string_view src, std::string* dest) { |
1090 | Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), |
1091 | src.size(), dest, true, kBase64Chars); |
1092 | } |
1093 | |
1094 | void WebSafeBase64Escape(absl::string_view src, std::string* dest) { |
1095 | Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), |
1096 | src.size(), dest, false, kWebSafeBase64Chars); |
1097 | } |
1098 | |
1099 | std::string Base64Escape(absl::string_view src) { |
1100 | std::string dest; |
1101 | Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), |
1102 | src.size(), &dest, true, kBase64Chars); |
1103 | return dest; |
1104 | } |
1105 | |
1106 | std::string WebSafeBase64Escape(absl::string_view src) { |
1107 | std::string dest; |
1108 | Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), |
1109 | src.size(), &dest, false, kWebSafeBase64Chars); |
1110 | return dest; |
1111 | } |
1112 | |
1113 | std::string HexStringToBytes(absl::string_view from) { |
1114 | std::string result; |
1115 | const auto num = from.size() / 2; |
1116 | strings_internal::STLStringResizeUninitialized(&result, num); |
1117 | absl::HexStringToBytesInternal<std::string&>(from.data(), result, num); |
1118 | return result; |
1119 | } |
1120 | |
1121 | std::string BytesToHexString(absl::string_view from) { |
1122 | std::string result; |
1123 | strings_internal::STLStringResizeUninitialized(&result, 2 * from.size()); |
1124 | absl::BytesToHexStringInternal<std::string&>( |
1125 | reinterpret_cast<const unsigned char*>(from.data()), result, from.size()); |
1126 | return result; |
1127 | } |
1128 | |
1129 | } // namespace absl |
1130 | |