| 1 | // Copyright 2017 The Abseil Authors. | 
| 2 | // | 
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
| 4 | // you may not use this file except in compliance with the License. | 
| 5 | // You may obtain a copy of the License at | 
| 6 | // | 
| 7 | //      https://www.apache.org/licenses/LICENSE-2.0 | 
| 8 | // | 
| 9 | // Unless required by applicable law or agreed to in writing, software | 
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, | 
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
| 12 | // See the License for the specific language governing permissions and | 
| 13 | // limitations under the License. | 
| 14 |  | 
| 15 | #include "absl/strings/escaping.h" | 
| 16 |  | 
| 17 | #include <algorithm> | 
| 18 | #include <cassert> | 
| 19 | #include <cstdint> | 
| 20 | #include <cstring> | 
| 21 | #include <iterator> | 
| 22 | #include <limits> | 
| 23 | #include <string> | 
| 24 |  | 
| 25 | #include "absl/base/internal/endian.h" | 
| 26 | #include "absl/base/internal/raw_logging.h" | 
| 27 | #include "absl/base/internal/unaligned_access.h" | 
| 28 | #include "absl/strings/internal/char_map.h" | 
| 29 | #include "absl/strings/internal/resize_uninitialized.h" | 
| 30 | #include "absl/strings/internal/utf8.h" | 
| 31 | #include "absl/strings/str_cat.h" | 
| 32 | #include "absl/strings/str_join.h" | 
| 33 | #include "absl/strings/string_view.h" | 
| 34 |  | 
| 35 | namespace absl { | 
| 36 | namespace { | 
| 37 |  | 
| 38 | // Digit conversion. | 
| 39 | constexpr char kHexChar[] = "0123456789abcdef" ; | 
| 40 |  | 
| 41 | constexpr char kHexTable[513] = | 
| 42 |     "000102030405060708090a0b0c0d0e0f"  | 
| 43 |     "101112131415161718191a1b1c1d1e1f"  | 
| 44 |     "202122232425262728292a2b2c2d2e2f"  | 
| 45 |     "303132333435363738393a3b3c3d3e3f"  | 
| 46 |     "404142434445464748494a4b4c4d4e4f"  | 
| 47 |     "505152535455565758595a5b5c5d5e5f"  | 
| 48 |     "606162636465666768696a6b6c6d6e6f"  | 
| 49 |     "707172737475767778797a7b7c7d7e7f"  | 
| 50 |     "808182838485868788898a8b8c8d8e8f"  | 
| 51 |     "909192939495969798999a9b9c9d9e9f"  | 
| 52 |     "a0a1a2a3a4a5a6a7a8a9aaabacadaeaf"  | 
| 53 |     "b0b1b2b3b4b5b6b7b8b9babbbcbdbebf"  | 
| 54 |     "c0c1c2c3c4c5c6c7c8c9cacbcccdcecf"  | 
| 55 |     "d0d1d2d3d4d5d6d7d8d9dadbdcdddedf"  | 
| 56 |     "e0e1e2e3e4e5e6e7e8e9eaebecedeeef"  | 
| 57 |     "f0f1f2f3f4f5f6f7f8f9fafbfcfdfeff" ; | 
| 58 |  | 
| 59 | // These are used for the leave_nulls_escaped argument to CUnescapeInternal(). | 
| 60 | constexpr bool kUnescapeNulls = false; | 
| 61 |  | 
| 62 | inline bool is_octal_digit(char c) { return ('0' <= c) && (c <= '7'); } | 
| 63 |  | 
| 64 | inline int hex_digit_to_int(char c) { | 
| 65 |   static_assert('0' == 0x30 && 'A' == 0x41 && 'a' == 0x61, | 
| 66 |                 "Character set must be ASCII." ); | 
| 67 |   assert(absl::ascii_isxdigit(c)); | 
| 68 |   int x = static_cast<unsigned char>(c); | 
| 69 |   if (x > '9') { | 
| 70 |     x += 9; | 
| 71 |   } | 
| 72 |   return x & 0xf; | 
| 73 | } | 
| 74 |  | 
| 75 | inline bool IsSurrogate(char32_t c, absl::string_view src, std::string* error) { | 
| 76 |   if (c >= 0xD800 && c <= 0xDFFF) { | 
| 77 |     if (error) { | 
| 78 |       *error = absl::StrCat("invalid surrogate character (0xD800-DFFF): \\" , | 
| 79 |                             src); | 
| 80 |     } | 
| 81 |     return true; | 
| 82 |   } | 
| 83 |   return false; | 
| 84 | } | 
| 85 |  | 
| 86 | // ---------------------------------------------------------------------- | 
| 87 | // CUnescapeInternal() | 
| 88 | //    Implements both CUnescape() and CUnescapeForNullTerminatedString(). | 
| 89 | // | 
| 90 | //    Unescapes C escape sequences and is the reverse of CEscape(). | 
| 91 | // | 
| 92 | //    If 'source' is valid, stores the unescaped string and its size in | 
| 93 | //    'dest' and 'dest_len' respectively, and returns true. Otherwise | 
| 94 | //    returns false and optionally stores the error description in | 
| 95 | //    'error'. Set 'error' to nullptr to disable error reporting. | 
| 96 | // | 
| 97 | //    'dest' should point to a buffer that is at least as big as 'source'. | 
| 98 | //    'source' and 'dest' may be the same. | 
| 99 | // | 
| 100 | //     NOTE: any changes to this function must also be reflected in the older | 
| 101 | //     UnescapeCEscapeSequences(). | 
| 102 | // ---------------------------------------------------------------------- | 
| 103 | bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, | 
| 104 |                        char* dest, ptrdiff_t* dest_len, std::string* error) { | 
| 105 |   char* d = dest; | 
| 106 |   const char* p = source.data(); | 
| 107 |   const char* end = p + source.size(); | 
| 108 |   const char* last_byte = end - 1; | 
| 109 |  | 
| 110 |   // Small optimization for case where source = dest and there's no escaping | 
| 111 |   while (p == d && p < end && *p != '\\') p++, d++; | 
| 112 |  | 
| 113 |   while (p < end) { | 
| 114 |     if (*p != '\\') { | 
| 115 |       *d++ = *p++; | 
| 116 |     } else { | 
| 117 |       if (++p > last_byte) {  // skip past the '\\' | 
| 118 |         if (error) *error = "String cannot end with \\" ; | 
| 119 |         return false; | 
| 120 |       } | 
| 121 |       switch (*p) { | 
| 122 |         case 'a':  *d++ = '\a';  break; | 
| 123 |         case 'b':  *d++ = '\b';  break; | 
| 124 |         case 'f':  *d++ = '\f';  break; | 
| 125 |         case 'n':  *d++ = '\n';  break; | 
| 126 |         case 'r':  *d++ = '\r';  break; | 
| 127 |         case 't':  *d++ = '\t';  break; | 
| 128 |         case 'v':  *d++ = '\v';  break; | 
| 129 |         case '\\': *d++ = '\\';  break; | 
| 130 |         case '?':  *d++ = '\?';  break;    // \?  Who knew? | 
| 131 |         case '\'': *d++ = '\'';  break; | 
| 132 |         case '"':  *d++ = '\"';  break; | 
| 133 |         case '0': | 
| 134 |         case '1': | 
| 135 |         case '2': | 
| 136 |         case '3': | 
| 137 |         case '4': | 
| 138 |         case '5': | 
| 139 |         case '6': | 
| 140 |         case '7': { | 
| 141 |           // octal digit: 1 to 3 digits | 
| 142 |           const char* octal_start = p; | 
| 143 |           unsigned int ch = *p - '0'; | 
| 144 |           if (p < last_byte && is_octal_digit(p[1])) ch = ch * 8 + *++p - '0'; | 
| 145 |           if (p < last_byte && is_octal_digit(p[1])) | 
| 146 |             ch = ch * 8 + *++p - '0';      // now points at last digit | 
| 147 |           if (ch > 0xff) { | 
| 148 |             if (error) { | 
| 149 |               *error = "Value of \\"  + | 
| 150 |                        std::string(octal_start, p + 1 - octal_start) + | 
| 151 |                        " exceeds 0xff" ; | 
| 152 |             } | 
| 153 |             return false; | 
| 154 |           } | 
| 155 |           if ((ch == 0) && leave_nulls_escaped) { | 
| 156 |             // Copy the escape sequence for the null character | 
| 157 |             const ptrdiff_t octal_size = p + 1 - octal_start; | 
| 158 |             *d++ = '\\'; | 
| 159 |             memcpy(d, octal_start, octal_size); | 
| 160 |             d += octal_size; | 
| 161 |             break; | 
| 162 |           } | 
| 163 |           *d++ = ch; | 
| 164 |           break; | 
| 165 |         } | 
| 166 |         case 'x': | 
| 167 |         case 'X': { | 
| 168 |           if (p >= last_byte) { | 
| 169 |             if (error) *error = "String cannot end with \\x" ; | 
| 170 |             return false; | 
| 171 |           } else if (!absl::ascii_isxdigit(p[1])) { | 
| 172 |             if (error) *error = "\\x cannot be followed by a non-hex digit" ; | 
| 173 |             return false; | 
| 174 |           } | 
| 175 |           unsigned int ch = 0; | 
| 176 |           const char* hex_start = p; | 
| 177 |           while (p < last_byte && absl::ascii_isxdigit(p[1])) | 
| 178 |             // Arbitrarily many hex digits | 
| 179 |             ch = (ch << 4) + hex_digit_to_int(*++p); | 
| 180 |           if (ch > 0xFF) { | 
| 181 |             if (error) { | 
| 182 |               *error = "Value of \\"  + | 
| 183 |                        std::string(hex_start, p + 1 - hex_start) + | 
| 184 |                        " exceeds 0xff" ; | 
| 185 |             } | 
| 186 |             return false; | 
| 187 |           } | 
| 188 |           if ((ch == 0) && leave_nulls_escaped) { | 
| 189 |             // Copy the escape sequence for the null character | 
| 190 |             const ptrdiff_t hex_size = p + 1 - hex_start; | 
| 191 |             *d++ = '\\'; | 
| 192 |             memcpy(d, hex_start, hex_size); | 
| 193 |             d += hex_size; | 
| 194 |             break; | 
| 195 |           } | 
| 196 |           *d++ = ch; | 
| 197 |           break; | 
| 198 |         } | 
| 199 |         case 'u': { | 
| 200 |           // \uhhhh => convert 4 hex digits to UTF-8 | 
| 201 |           char32_t rune = 0; | 
| 202 |           const char* hex_start = p; | 
| 203 |           if (p + 4 >= end) { | 
| 204 |             if (error) { | 
| 205 |               *error = "\\u must be followed by 4 hex digits: \\"  + | 
| 206 |                        std::string(hex_start, p + 1 - hex_start); | 
| 207 |             } | 
| 208 |             return false; | 
| 209 |           } | 
| 210 |           for (int i = 0; i < 4; ++i) { | 
| 211 |             // Look one char ahead. | 
| 212 |             if (absl::ascii_isxdigit(p[1])) { | 
| 213 |               rune = (rune << 4) + hex_digit_to_int(*++p);  // Advance p. | 
| 214 |             } else { | 
| 215 |               if (error) { | 
| 216 |                 *error = "\\u must be followed by 4 hex digits: \\"  + | 
| 217 |                          std::string(hex_start, p + 1 - hex_start); | 
| 218 |               } | 
| 219 |               return false; | 
| 220 |             } | 
| 221 |           } | 
| 222 |           if ((rune == 0) && leave_nulls_escaped) { | 
| 223 |             // Copy the escape sequence for the null character | 
| 224 |             *d++ = '\\'; | 
| 225 |             memcpy(d, hex_start, 5);  // u0000 | 
| 226 |             d += 5; | 
| 227 |             break; | 
| 228 |           } | 
| 229 |           if (IsSurrogate(rune, absl::string_view(hex_start, 5), error)) { | 
| 230 |             return false; | 
| 231 |           } | 
| 232 |           d += strings_internal::EncodeUTF8Char(d, rune); | 
| 233 |           break; | 
| 234 |         } | 
| 235 |         case 'U': { | 
| 236 |           // \Uhhhhhhhh => convert 8 hex digits to UTF-8 | 
| 237 |           char32_t rune = 0; | 
| 238 |           const char* hex_start = p; | 
| 239 |           if (p + 8 >= end) { | 
| 240 |             if (error) { | 
| 241 |               *error = "\\U must be followed by 8 hex digits: \\"  + | 
| 242 |                        std::string(hex_start, p + 1 - hex_start); | 
| 243 |             } | 
| 244 |             return false; | 
| 245 |           } | 
| 246 |           for (int i = 0; i < 8; ++i) { | 
| 247 |             // Look one char ahead. | 
| 248 |             if (absl::ascii_isxdigit(p[1])) { | 
| 249 |               // Don't change rune until we're sure this | 
| 250 |               // is within the Unicode limit, but do advance p. | 
| 251 |               uint32_t newrune = (rune << 4) + hex_digit_to_int(*++p); | 
| 252 |               if (newrune > 0x10FFFF) { | 
| 253 |                 if (error) { | 
| 254 |                   *error = "Value of \\"  + | 
| 255 |                            std::string(hex_start, p + 1 - hex_start) + | 
| 256 |                            " exceeds Unicode limit (0x10FFFF)" ; | 
| 257 |                 } | 
| 258 |                 return false; | 
| 259 |               } else { | 
| 260 |                 rune = newrune; | 
| 261 |               } | 
| 262 |             } else { | 
| 263 |               if (error) { | 
| 264 |                 *error = "\\U must be followed by 8 hex digits: \\"  + | 
| 265 |                          std::string(hex_start, p + 1 - hex_start); | 
| 266 |               } | 
| 267 |               return false; | 
| 268 |             } | 
| 269 |           } | 
| 270 |           if ((rune == 0) && leave_nulls_escaped) { | 
| 271 |             // Copy the escape sequence for the null character | 
| 272 |             *d++ = '\\'; | 
| 273 |             memcpy(d, hex_start, 9);  // U00000000 | 
| 274 |             d += 9; | 
| 275 |             break; | 
| 276 |           } | 
| 277 |           if (IsSurrogate(rune, absl::string_view(hex_start, 9), error)) { | 
| 278 |             return false; | 
| 279 |           } | 
| 280 |           d += strings_internal::EncodeUTF8Char(d, rune); | 
| 281 |           break; | 
| 282 |         } | 
| 283 |         default: { | 
| 284 |           if (error) *error = std::string("Unknown escape sequence: \\" ) + *p; | 
| 285 |           return false; | 
| 286 |         } | 
| 287 |       } | 
| 288 |       p++;                                 // read past letter we escaped | 
| 289 |     } | 
| 290 |   } | 
| 291 |   *dest_len = d - dest; | 
| 292 |   return true; | 
| 293 | } | 
| 294 |  | 
| 295 | // ---------------------------------------------------------------------- | 
| 296 | // CUnescapeInternal() | 
| 297 | // | 
| 298 | //    Same as above but uses a std::string for output. 'source' and 'dest' | 
| 299 | //    may be the same. | 
| 300 | // ---------------------------------------------------------------------- | 
| 301 | bool CUnescapeInternal(absl::string_view source, bool leave_nulls_escaped, | 
| 302 |                        std::string* dest, std::string* error) { | 
| 303 |   strings_internal::STLStringResizeUninitialized(dest, source.size()); | 
| 304 |  | 
| 305 |   ptrdiff_t dest_size; | 
| 306 |   if (!CUnescapeInternal(source, | 
| 307 |                          leave_nulls_escaped, | 
| 308 |                          &(*dest)[0], | 
| 309 |                          &dest_size, | 
| 310 |                          error)) { | 
| 311 |     return false; | 
| 312 |   } | 
| 313 |   dest->erase(dest_size); | 
| 314 |   return true; | 
| 315 | } | 
| 316 |  | 
| 317 | // ---------------------------------------------------------------------- | 
| 318 | // CEscape() | 
| 319 | // CHexEscape() | 
| 320 | // Utf8SafeCEscape() | 
| 321 | // Utf8SafeCHexEscape() | 
| 322 | //    Escapes 'src' using C-style escape sequences.  This is useful for | 
| 323 | //    preparing query flags.  The 'Hex' version uses hexadecimal rather than | 
| 324 | //    octal sequences.  The 'Utf8Safe' version does not touch UTF-8 bytes. | 
| 325 | // | 
| 326 | //    Escaped chars: \n, \r, \t, ", ', \, and !absl::ascii_isprint(). | 
| 327 | // ---------------------------------------------------------------------- | 
| 328 | std::string CEscapeInternal(absl::string_view src, bool use_hex, | 
| 329 |                             bool utf8_safe) { | 
| 330 |   std::string dest; | 
| 331 |   bool last_hex_escape = false;  // true if last output char was \xNN. | 
| 332 |  | 
| 333 |   for (unsigned char c : src) { | 
| 334 |     bool is_hex_escape = false; | 
| 335 |     switch (c) { | 
| 336 |       case '\n': dest.append("\\"  "n" ); break; | 
| 337 |       case '\r': dest.append("\\"  "r" ); break; | 
| 338 |       case '\t': dest.append("\\"  "t" ); break; | 
| 339 |       case '\"': dest.append("\\"  "\"" ); break; | 
| 340 |       case '\'': dest.append("\\"  "'" ); break; | 
| 341 |       case '\\': dest.append("\\"  "\\" ); break; | 
| 342 |       default: | 
| 343 |         // Note that if we emit \xNN and the src character after that is a hex | 
| 344 |         // digit then that digit must be escaped too to prevent it being | 
| 345 |         // interpreted as part of the character code by C. | 
| 346 |         if ((!utf8_safe || c < 0x80) && | 
| 347 |             (!absl::ascii_isprint(c) || | 
| 348 |              (last_hex_escape && absl::ascii_isxdigit(c)))) { | 
| 349 |           if (use_hex) { | 
| 350 |             dest.append("\\"  "x" ); | 
| 351 |             dest.push_back(kHexChar[c / 16]); | 
| 352 |             dest.push_back(kHexChar[c % 16]); | 
| 353 |             is_hex_escape = true; | 
| 354 |           } else { | 
| 355 |             dest.append("\\" ); | 
| 356 |             dest.push_back(kHexChar[c / 64]); | 
| 357 |             dest.push_back(kHexChar[(c % 64) / 8]); | 
| 358 |             dest.push_back(kHexChar[c % 8]); | 
| 359 |           } | 
| 360 |         } else { | 
| 361 |           dest.push_back(c); | 
| 362 |           break; | 
| 363 |         } | 
| 364 |     } | 
| 365 |     last_hex_escape = is_hex_escape; | 
| 366 |   } | 
| 367 |  | 
| 368 |   return dest; | 
| 369 | } | 
| 370 |  | 
| 371 | /* clang-format off */ | 
| 372 | constexpr char c_escaped_len[256] = { | 
| 373 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4,  // \t, \n, \r | 
| 374 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | 
| 375 |     1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1,  // ", ' | 
| 376 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // '0'..'9' | 
| 377 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'A'..'O' | 
| 378 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1,  // 'P'..'Z', '\' | 
| 379 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 'a'..'o' | 
| 380 |     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4,  // 'p'..'z', DEL | 
| 381 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | 
| 382 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | 
| 383 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | 
| 384 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | 
| 385 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | 
| 386 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | 
| 387 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | 
| 388 |     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, | 
| 389 | }; | 
| 390 | /* clang-format on */ | 
| 391 |  | 
| 392 | // Calculates the length of the C-style escaped version of 'src'. | 
| 393 | // Assumes that non-printable characters are escaped using octal sequences, and | 
| 394 | // that UTF-8 bytes are not handled specially. | 
| 395 | inline size_t CEscapedLength(absl::string_view src) { | 
| 396 |   size_t escaped_len = 0; | 
| 397 |   for (unsigned char c : src) escaped_len += c_escaped_len[c]; | 
| 398 |   return escaped_len; | 
| 399 | } | 
| 400 |  | 
| 401 | void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { | 
| 402 |   size_t escaped_len = CEscapedLength(src); | 
| 403 |   if (escaped_len == src.size()) { | 
| 404 |     dest->append(src.data(), src.size()); | 
| 405 |     return; | 
| 406 |   } | 
| 407 |  | 
| 408 |   size_t cur_dest_len = dest->size(); | 
| 409 |   strings_internal::STLStringResizeUninitialized(dest, | 
| 410 |                                                  cur_dest_len + escaped_len); | 
| 411 |   char* append_ptr = &(*dest)[cur_dest_len]; | 
| 412 |  | 
| 413 |   for (unsigned char c : src) { | 
| 414 |     int char_len = c_escaped_len[c]; | 
| 415 |     if (char_len == 1) { | 
| 416 |       *append_ptr++ = c; | 
| 417 |     } else if (char_len == 2) { | 
| 418 |       switch (c) { | 
| 419 |         case '\n': | 
| 420 |           *append_ptr++ = '\\'; | 
| 421 |           *append_ptr++ = 'n'; | 
| 422 |           break; | 
| 423 |         case '\r': | 
| 424 |           *append_ptr++ = '\\'; | 
| 425 |           *append_ptr++ = 'r'; | 
| 426 |           break; | 
| 427 |         case '\t': | 
| 428 |           *append_ptr++ = '\\'; | 
| 429 |           *append_ptr++ = 't'; | 
| 430 |           break; | 
| 431 |         case '\"': | 
| 432 |           *append_ptr++ = '\\'; | 
| 433 |           *append_ptr++ = '\"'; | 
| 434 |           break; | 
| 435 |         case '\'': | 
| 436 |           *append_ptr++ = '\\'; | 
| 437 |           *append_ptr++ = '\''; | 
| 438 |           break; | 
| 439 |         case '\\': | 
| 440 |           *append_ptr++ = '\\'; | 
| 441 |           *append_ptr++ = '\\'; | 
| 442 |           break; | 
| 443 |       } | 
| 444 |     } else { | 
| 445 |       *append_ptr++ = '\\'; | 
| 446 |       *append_ptr++ = '0' + c / 64; | 
| 447 |       *append_ptr++ = '0' + (c % 64) / 8; | 
| 448 |       *append_ptr++ = '0' + c % 8; | 
| 449 |     } | 
| 450 |   } | 
| 451 | } | 
| 452 |  | 
| 453 | bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, | 
| 454 |                             size_t szdest, const signed char* unbase64, | 
| 455 |                             size_t* len) { | 
| 456 |   static const char kPad64Equals = '='; | 
| 457 |   static const char kPad64Dot = '.'; | 
| 458 |  | 
| 459 |   size_t destidx = 0; | 
| 460 |   int decode = 0; | 
| 461 |   int state = 0; | 
| 462 |   unsigned int ch = 0; | 
| 463 |   unsigned int temp = 0; | 
| 464 |  | 
| 465 |   // If "char" is signed by default, using *src as an array index results in | 
| 466 |   // accessing negative array elements. Treat the input as a pointer to | 
| 467 |   // unsigned char to avoid this. | 
| 468 |   const unsigned char* src = reinterpret_cast<const unsigned char*>(src_param); | 
| 469 |  | 
| 470 |   // The GET_INPUT macro gets the next input character, skipping | 
| 471 |   // over any whitespace, and stopping when we reach the end of the | 
| 472 |   // std::string or when we read any non-data character.  The arguments are | 
| 473 |   // an arbitrary identifier (used as a label for goto) and the number | 
| 474 |   // of data bytes that must remain in the input to avoid aborting the | 
| 475 |   // loop. | 
| 476 | #define GET_INPUT(label, remain)                                \ | 
| 477 |   label:                                                        \ | 
| 478 |   --szsrc;                                                      \ | 
| 479 |   ch = *src++;                                                  \ | 
| 480 |   decode = unbase64[ch];                                        \ | 
| 481 |   if (decode < 0) {                                             \ | 
| 482 |     if (absl::ascii_isspace(ch) && szsrc >= remain) goto label; \ | 
| 483 |     state = 4 - remain;                                         \ | 
| 484 |     break;                                                      \ | 
| 485 |   } | 
| 486 |  | 
| 487 |   // if dest is null, we're just checking to see if it's legal input | 
| 488 |   // rather than producing output.  (I suspect this could just be done | 
| 489 |   // with a regexp...).  We duplicate the loop so this test can be | 
| 490 |   // outside it instead of in every iteration. | 
| 491 |  | 
| 492 |   if (dest) { | 
| 493 |     // This loop consumes 4 input bytes and produces 3 output bytes | 
| 494 |     // per iteration.  We can't know at the start that there is enough | 
| 495 |     // data left in the std::string for a full iteration, so the loop may | 
| 496 |     // break out in the middle; if so 'state' will be set to the | 
| 497 |     // number of input bytes read. | 
| 498 |  | 
| 499 |     while (szsrc >= 4) { | 
| 500 |       // We'll start by optimistically assuming that the next four | 
| 501 |       // bytes of the std::string (src[0..3]) are four good data bytes | 
| 502 |       // (that is, no nulls, whitespace, padding chars, or illegal | 
| 503 |       // chars).  We need to test src[0..2] for nulls individually | 
| 504 |       // before constructing temp to preserve the property that we | 
| 505 |       // never read past a null in the std::string (no matter how long | 
| 506 |       // szsrc claims the std::string is). | 
| 507 |  | 
| 508 |       if (!src[0] || !src[1] || !src[2] || | 
| 509 |           ((temp = ((unsigned(unbase64[src[0]]) << 18) | | 
| 510 |                     (unsigned(unbase64[src[1]]) << 12) | | 
| 511 |                     (unsigned(unbase64[src[2]]) << 6) | | 
| 512 |                     (unsigned(unbase64[src[3]])))) & | 
| 513 |            0x80000000)) { | 
| 514 |         // Iff any of those four characters was bad (null, illegal, | 
| 515 |         // whitespace, padding), then temp's high bit will be set | 
| 516 |         // (because unbase64[] is -1 for all bad characters). | 
| 517 |         // | 
| 518 |         // We'll back up and resort to the slower decoder, which knows | 
| 519 |         // how to handle those cases. | 
| 520 |  | 
| 521 |         GET_INPUT(first, 4); | 
| 522 |         temp = decode; | 
| 523 |         GET_INPUT(second, 3); | 
| 524 |         temp = (temp << 6) | decode; | 
| 525 |         GET_INPUT(third, 2); | 
| 526 |         temp = (temp << 6) | decode; | 
| 527 |         GET_INPUT(fourth, 1); | 
| 528 |         temp = (temp << 6) | decode; | 
| 529 |       } else { | 
| 530 |         // We really did have four good data bytes, so advance four | 
| 531 |         // characters in the std::string. | 
| 532 |  | 
| 533 |         szsrc -= 4; | 
| 534 |         src += 4; | 
| 535 |       } | 
| 536 |  | 
| 537 |       // temp has 24 bits of input, so write that out as three bytes. | 
| 538 |  | 
| 539 |       if (destidx + 3 > szdest) return false; | 
| 540 |       dest[destidx + 2] = temp; | 
| 541 |       temp >>= 8; | 
| 542 |       dest[destidx + 1] = temp; | 
| 543 |       temp >>= 8; | 
| 544 |       dest[destidx] = temp; | 
| 545 |       destidx += 3; | 
| 546 |     } | 
| 547 |   } else { | 
| 548 |     while (szsrc >= 4) { | 
| 549 |       if (!src[0] || !src[1] || !src[2] || | 
| 550 |           ((temp = ((unsigned(unbase64[src[0]]) << 18) | | 
| 551 |                     (unsigned(unbase64[src[1]]) << 12) | | 
| 552 |                     (unsigned(unbase64[src[2]]) << 6) | | 
| 553 |                     (unsigned(unbase64[src[3]])))) & | 
| 554 |            0x80000000)) { | 
| 555 |         GET_INPUT(first_no_dest, 4); | 
| 556 |         GET_INPUT(second_no_dest, 3); | 
| 557 |         GET_INPUT(third_no_dest, 2); | 
| 558 |         GET_INPUT(fourth_no_dest, 1); | 
| 559 |       } else { | 
| 560 |         szsrc -= 4; | 
| 561 |         src += 4; | 
| 562 |       } | 
| 563 |       destidx += 3; | 
| 564 |     } | 
| 565 |   } | 
| 566 |  | 
| 567 | #undef GET_INPUT | 
| 568 |  | 
| 569 |   // if the loop terminated because we read a bad character, return | 
| 570 |   // now. | 
| 571 |   if (decode < 0 && ch != kPad64Equals && ch != kPad64Dot && | 
| 572 |       !absl::ascii_isspace(ch)) | 
| 573 |     return false; | 
| 574 |  | 
| 575 |   if (ch == kPad64Equals || ch == kPad64Dot) { | 
| 576 |     // if we stopped by hitting an '=' or '.', un-read that character -- we'll | 
| 577 |     // look at it again when we count to check for the proper number of | 
| 578 |     // equals signs at the end. | 
| 579 |     ++szsrc; | 
| 580 |     --src; | 
| 581 |   } else { | 
| 582 |     // This loop consumes 1 input byte per iteration.  It's used to | 
| 583 |     // clean up the 0-3 input bytes remaining when the first, faster | 
| 584 |     // loop finishes.  'temp' contains the data from 'state' input | 
| 585 |     // characters read by the first loop. | 
| 586 |     while (szsrc > 0) { | 
| 587 |       --szsrc; | 
| 588 |       ch = *src++; | 
| 589 |       decode = unbase64[ch]; | 
| 590 |       if (decode < 0) { | 
| 591 |         if (absl::ascii_isspace(ch)) { | 
| 592 |           continue; | 
| 593 |         } else if (ch == kPad64Equals || ch == kPad64Dot) { | 
| 594 |           // back up one character; we'll read it again when we check | 
| 595 |           // for the correct number of pad characters at the end. | 
| 596 |           ++szsrc; | 
| 597 |           --src; | 
| 598 |           break; | 
| 599 |         } else { | 
| 600 |           return false; | 
| 601 |         } | 
| 602 |       } | 
| 603 |  | 
| 604 |       // Each input character gives us six bits of output. | 
| 605 |       temp = (temp << 6) | decode; | 
| 606 |       ++state; | 
| 607 |       if (state == 4) { | 
| 608 |         // If we've accumulated 24 bits of output, write that out as | 
| 609 |         // three bytes. | 
| 610 |         if (dest) { | 
| 611 |           if (destidx + 3 > szdest) return false; | 
| 612 |           dest[destidx + 2] = temp; | 
| 613 |           temp >>= 8; | 
| 614 |           dest[destidx + 1] = temp; | 
| 615 |           temp >>= 8; | 
| 616 |           dest[destidx] = temp; | 
| 617 |         } | 
| 618 |         destidx += 3; | 
| 619 |         state = 0; | 
| 620 |         temp = 0; | 
| 621 |       } | 
| 622 |     } | 
| 623 |   } | 
| 624 |  | 
| 625 |   // Process the leftover data contained in 'temp' at the end of the input. | 
| 626 |   int expected_equals = 0; | 
| 627 |   switch (state) { | 
| 628 |     case 0: | 
| 629 |       // Nothing left over; output is a multiple of 3 bytes. | 
| 630 |       break; | 
| 631 |  | 
| 632 |     case 1: | 
| 633 |       // Bad input; we have 6 bits left over. | 
| 634 |       return false; | 
| 635 |  | 
| 636 |     case 2: | 
| 637 |       // Produce one more output byte from the 12 input bits we have left. | 
| 638 |       if (dest) { | 
| 639 |         if (destidx + 1 > szdest) return false; | 
| 640 |         temp >>= 4; | 
| 641 |         dest[destidx] = temp; | 
| 642 |       } | 
| 643 |       ++destidx; | 
| 644 |       expected_equals = 2; | 
| 645 |       break; | 
| 646 |  | 
| 647 |     case 3: | 
| 648 |       // Produce two more output bytes from the 18 input bits we have left. | 
| 649 |       if (dest) { | 
| 650 |         if (destidx + 2 > szdest) return false; | 
| 651 |         temp >>= 2; | 
| 652 |         dest[destidx + 1] = temp; | 
| 653 |         temp >>= 8; | 
| 654 |         dest[destidx] = temp; | 
| 655 |       } | 
| 656 |       destidx += 2; | 
| 657 |       expected_equals = 1; | 
| 658 |       break; | 
| 659 |  | 
| 660 |     default: | 
| 661 |       // state should have no other values at this point. | 
| 662 |       ABSL_RAW_LOG(FATAL, "This can't happen; base64 decoder state = %d" , | 
| 663 |                    state); | 
| 664 |   } | 
| 665 |  | 
| 666 |   // The remainder of the std::string should be all whitespace, mixed with | 
| 667 |   // exactly 0 equals signs, or exactly 'expected_equals' equals | 
| 668 |   // signs.  (Always accepting 0 equals signs is an Abseil extension | 
| 669 |   // not covered in the RFC, as is accepting dot as the pad character.) | 
| 670 |  | 
| 671 |   int equals = 0; | 
| 672 |   while (szsrc > 0) { | 
| 673 |     if (*src == kPad64Equals || *src == kPad64Dot) | 
| 674 |       ++equals; | 
| 675 |     else if (!absl::ascii_isspace(*src)) | 
| 676 |       return false; | 
| 677 |     --szsrc; | 
| 678 |     ++src; | 
| 679 |   } | 
| 680 |  | 
| 681 |   const bool ok = (equals == 0 || equals == expected_equals); | 
| 682 |   if (ok) *len = destidx; | 
| 683 |   return ok; | 
| 684 | } | 
| 685 |  | 
| 686 | // The arrays below were generated by the following code | 
| 687 | // #include <sys/time.h> | 
| 688 | // #include <stdlib.h> | 
| 689 | // #include <string.h> | 
| 690 | // main() | 
| 691 | // { | 
| 692 | //   static const char Base64[] = | 
| 693 | //     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | 
| 694 | //   char* pos; | 
| 695 | //   int idx, i, j; | 
| 696 | //   printf("    "); | 
| 697 | //   for (i = 0; i < 255; i += 8) { | 
| 698 | //     for (j = i; j < i + 8; j++) { | 
| 699 | //       pos = strchr(Base64, j); | 
| 700 | //       if ((pos == nullptr) || (j == 0)) | 
| 701 | //         idx = -1; | 
| 702 | //       else | 
| 703 | //         idx = pos - Base64; | 
| 704 | //       if (idx == -1) | 
| 705 | //         printf(" %2d,     ", idx); | 
| 706 | //       else | 
| 707 | //         printf(" %2d/*%c*/,", idx, j); | 
| 708 | //     } | 
| 709 | //     printf("\n    "); | 
| 710 | //   } | 
| 711 | // } | 
| 712 | // | 
| 713 | // where the value of "Base64[]" was replaced by one of the base-64 conversion | 
| 714 | // tables from the functions below. | 
| 715 | /* clang-format off */ | 
| 716 | constexpr signed char kUnBase64[] = { | 
| 717 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 718 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 719 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 720 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 721 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 722 |     -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */, | 
| 723 |     52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, | 
| 724 |     60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1, | 
| 725 |     -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/, | 
| 726 |     07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, | 
| 727 |     15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, | 
| 728 |     23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1, | 
| 729 |     -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, | 
| 730 |     33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, | 
| 731 |     41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, | 
| 732 |     49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1, | 
| 733 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 734 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 735 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 736 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 737 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 738 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 739 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 740 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 741 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 742 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 743 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 744 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 745 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 746 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 747 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 748 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1 | 
| 749 | }; | 
| 750 |  | 
| 751 | constexpr signed char kUnWebSafeBase64[] = { | 
| 752 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 753 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 754 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 755 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 756 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 757 |     -1,      -1,      -1,      -1,      -1,      62/*-*/, -1,      -1, | 
| 758 |     52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, | 
| 759 |     60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1, | 
| 760 |     -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/, | 
| 761 |     07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, | 
| 762 |     15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, | 
| 763 |     23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      63/*_*/, | 
| 764 |     -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, | 
| 765 |     33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, | 
| 766 |     41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, | 
| 767 |     49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1, | 
| 768 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 769 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 770 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 771 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 772 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 773 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 774 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 775 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 776 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 777 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 778 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 779 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 780 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 781 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 782 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1, | 
| 783 |     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1 | 
| 784 | }; | 
| 785 | /* clang-format on */ | 
| 786 |  | 
| 787 | size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { | 
| 788 |   // Base64 encodes three bytes of input at a time. If the input is not | 
| 789 |   // divisible by three, we pad as appropriate. | 
| 790 |   // | 
| 791 |   // (from https://tools.ietf.org/html/rfc3548) | 
| 792 |   // Special processing is performed if fewer than 24 bits are available | 
| 793 |   // at the end of the data being encoded.  A full encoding quantum is | 
| 794 |   // always completed at the end of a quantity.  When fewer than 24 input | 
| 795 |   // bits are available in an input group, zero bits are added (on the | 
| 796 |   // right) to form an integral number of 6-bit groups.  Padding at the | 
| 797 |   // end of the data is performed using the '=' character.  Since all base | 
| 798 |   // 64 input is an integral number of octets, only the following cases | 
| 799 |   // can arise: | 
| 800 |  | 
| 801 |   // Base64 encodes each three bytes of input into four bytes of output. | 
| 802 |   size_t len = (input_len / 3) * 4; | 
| 803 |  | 
| 804 |   if (input_len % 3 == 0) { | 
| 805 |     // (from https://tools.ietf.org/html/rfc3548) | 
| 806 |     // (1) the final quantum of encoding input is an integral multiple of 24 | 
| 807 |     // bits; here, the final unit of encoded output will be an integral | 
| 808 |     // multiple of 4 characters with no "=" padding, | 
| 809 |   } else if (input_len % 3 == 1) { | 
| 810 |     // (from https://tools.ietf.org/html/rfc3548) | 
| 811 |     // (2) the final quantum of encoding input is exactly 8 bits; here, the | 
| 812 |     // final unit of encoded output will be two characters followed by two | 
| 813 |     // "=" padding characters, or | 
| 814 |     len += 2; | 
| 815 |     if (do_padding) { | 
| 816 |       len += 2; | 
| 817 |     } | 
| 818 |   } else {  // (input_len % 3 == 2) | 
| 819 |     // (from https://tools.ietf.org/html/rfc3548) | 
| 820 |     // (3) the final quantum of encoding input is exactly 16 bits; here, the | 
| 821 |     // final unit of encoded output will be three characters followed by one | 
| 822 |     // "=" padding character. | 
| 823 |     len += 3; | 
| 824 |     if (do_padding) { | 
| 825 |       len += 1; | 
| 826 |     } | 
| 827 |   } | 
| 828 |  | 
| 829 |   assert(len >= input_len);  // make sure we didn't overflow | 
| 830 |   return len; | 
| 831 | } | 
| 832 |  | 
| 833 | size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, | 
| 834 |                             size_t szdest, const char* base64, | 
| 835 |                             bool do_padding) { | 
| 836 |   static const char kPad64 = '='; | 
| 837 |  | 
| 838 |   if (szsrc * 4 > szdest * 3) return 0; | 
| 839 |  | 
| 840 |   char* cur_dest = dest; | 
| 841 |   const unsigned char* cur_src = src; | 
| 842 |  | 
| 843 |   char* const limit_dest = dest + szdest; | 
| 844 |   const unsigned char* const limit_src = src + szsrc; | 
| 845 |  | 
| 846 |   // Three bytes of data encodes to four characters of cyphertext. | 
| 847 |   // So we can pump through three-byte chunks atomically. | 
| 848 |   if (szsrc >= 3) {  // "limit_src - 3" is UB if szsrc < 3. | 
| 849 |     while (cur_src < limit_src - 3) {  // While we have >= 32 bits. | 
| 850 |       uint32_t in = absl::big_endian::Load32(cur_src) >> 8; | 
| 851 |  | 
| 852 |       cur_dest[0] = base64[in >> 18]; | 
| 853 |       in &= 0x3FFFF; | 
| 854 |       cur_dest[1] = base64[in >> 12]; | 
| 855 |       in &= 0xFFF; | 
| 856 |       cur_dest[2] = base64[in >> 6]; | 
| 857 |       in &= 0x3F; | 
| 858 |       cur_dest[3] = base64[in]; | 
| 859 |  | 
| 860 |       cur_dest += 4; | 
| 861 |       cur_src += 3; | 
| 862 |     } | 
| 863 |   } | 
| 864 |   // To save time, we didn't update szdest or szsrc in the loop.  So do it now. | 
| 865 |   szdest = limit_dest - cur_dest; | 
| 866 |   szsrc = limit_src - cur_src; | 
| 867 |  | 
| 868 |   /* now deal with the tail (<=3 bytes) */ | 
| 869 |   switch (szsrc) { | 
| 870 |     case 0: | 
| 871 |       // Nothing left; nothing more to do. | 
| 872 |       break; | 
| 873 |     case 1: { | 
| 874 |       // One byte left: this encodes to two characters, and (optionally) | 
| 875 |       // two pad characters to round out the four-character cypherblock. | 
| 876 |       if (szdest < 2) return 0; | 
| 877 |       uint32_t in = cur_src[0]; | 
| 878 |       cur_dest[0] = base64[in >> 2]; | 
| 879 |       in &= 0x3; | 
| 880 |       cur_dest[1] = base64[in << 4]; | 
| 881 |       cur_dest += 2; | 
| 882 |       szdest -= 2; | 
| 883 |       if (do_padding) { | 
| 884 |         if (szdest < 2) return 0; | 
| 885 |         cur_dest[0] = kPad64; | 
| 886 |         cur_dest[1] = kPad64; | 
| 887 |         cur_dest += 2; | 
| 888 |         szdest -= 2; | 
| 889 |       } | 
| 890 |       break; | 
| 891 |     } | 
| 892 |     case 2: { | 
| 893 |       // Two bytes left: this encodes to three characters, and (optionally) | 
| 894 |       // one pad character to round out the four-character cypherblock. | 
| 895 |       if (szdest < 3) return 0; | 
| 896 |       uint32_t in = absl::big_endian::Load16(cur_src); | 
| 897 |       cur_dest[0] = base64[in >> 10]; | 
| 898 |       in &= 0x3FF; | 
| 899 |       cur_dest[1] = base64[in >> 4]; | 
| 900 |       in &= 0x00F; | 
| 901 |       cur_dest[2] = base64[in << 2]; | 
| 902 |       cur_dest += 3; | 
| 903 |       szdest -= 3; | 
| 904 |       if (do_padding) { | 
| 905 |         if (szdest < 1) return 0; | 
| 906 |         cur_dest[0] = kPad64; | 
| 907 |         cur_dest += 1; | 
| 908 |         szdest -= 1; | 
| 909 |       } | 
| 910 |       break; | 
| 911 |     } | 
| 912 |     case 3: { | 
| 913 |       // Three bytes left: same as in the big loop above.  We can't do this in | 
| 914 |       // the loop because the loop above always reads 4 bytes, and the fourth | 
| 915 |       // byte is past the end of the input. | 
| 916 |       if (szdest < 4) return 0; | 
| 917 |       uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1); | 
| 918 |       cur_dest[0] = base64[in >> 18]; | 
| 919 |       in &= 0x3FFFF; | 
| 920 |       cur_dest[1] = base64[in >> 12]; | 
| 921 |       in &= 0xFFF; | 
| 922 |       cur_dest[2] = base64[in >> 6]; | 
| 923 |       in &= 0x3F; | 
| 924 |       cur_dest[3] = base64[in]; | 
| 925 |       cur_dest += 4; | 
| 926 |       szdest -= 4; | 
| 927 |       break; | 
| 928 |     } | 
| 929 |     default: | 
| 930 |       // Should not be reached: blocks of 4 bytes are handled | 
| 931 |       // in the while loop before this switch statement. | 
| 932 |       ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu" , szsrc); | 
| 933 |       break; | 
| 934 |   } | 
| 935 |   return (cur_dest - dest); | 
| 936 | } | 
| 937 |  | 
| 938 | constexpr char kBase64Chars[] = | 
| 939 |     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ; | 
| 940 |  | 
| 941 | constexpr char kWebSafeBase64Chars[] = | 
| 942 |     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" ; | 
| 943 |  | 
| 944 | template <typename String> | 
| 945 | void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest, | 
| 946 |                           bool do_padding, const char* base64_chars) { | 
| 947 |   const size_t calc_escaped_size = | 
| 948 |       CalculateBase64EscapedLenInternal(szsrc, do_padding); | 
| 949 |   strings_internal::STLStringResizeUninitialized(dest, calc_escaped_size); | 
| 950 |  | 
| 951 |   const size_t escaped_len = Base64EscapeInternal( | 
| 952 |       src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding); | 
| 953 |   assert(calc_escaped_size == escaped_len); | 
| 954 |   dest->erase(escaped_len); | 
| 955 | } | 
| 956 |  | 
| 957 | template <typename String> | 
| 958 | bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, | 
| 959 |                             const signed char* unbase64) { | 
| 960 |   // Determine the size of the output std::string.  Base64 encodes every 3 bytes into | 
| 961 |   // 4 characters.  any leftover chars are added directly for good measure. | 
| 962 |   // This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548 | 
| 963 |   const size_t dest_len = 3 * (slen / 4) + (slen % 4); | 
| 964 |  | 
| 965 |   strings_internal::STLStringResizeUninitialized(dest, dest_len); | 
| 966 |  | 
| 967 |   // We are getting the destination buffer by getting the beginning of the | 
| 968 |   // std::string and converting it into a char *. | 
| 969 |   size_t len; | 
| 970 |   const bool ok = | 
| 971 |       Base64UnescapeInternal(src, slen, &(*dest)[0], dest_len, unbase64, &len); | 
| 972 |   if (!ok) { | 
| 973 |     dest->clear(); | 
| 974 |     return false; | 
| 975 |   } | 
| 976 |  | 
| 977 |   // could be shorter if there was padding | 
| 978 |   assert(len <= dest_len); | 
| 979 |   dest->erase(len); | 
| 980 |  | 
| 981 |   return true; | 
| 982 | } | 
| 983 |  | 
| 984 | /* clang-format off */ | 
| 985 | constexpr char kHexValue[256] = { | 
| 986 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 987 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 988 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 989 |     0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0,  // '0'..'9' | 
| 990 |     0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 'A'..'F' | 
| 991 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 992 |     0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 'a'..'f' | 
| 993 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 994 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 995 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 996 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 997 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 998 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 999 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 1000 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | 
| 1001 |     0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0 | 
| 1002 | }; | 
| 1003 | /* clang-format on */ | 
| 1004 |  | 
| 1005 | // This is a templated function so that T can be either a char* | 
| 1006 | // or a string.  This works because we use the [] operator to access | 
| 1007 | // individual characters at a time. | 
| 1008 | template <typename T> | 
| 1009 | void HexStringToBytesInternal(const char* from, T to, ptrdiff_t num) { | 
| 1010 |   for (int i = 0; i < num; i++) { | 
| 1011 |     to[i] = (kHexValue[from[i * 2] & 0xFF] << 4) + | 
| 1012 |             (kHexValue[from[i * 2 + 1] & 0xFF]); | 
| 1013 |   } | 
| 1014 | } | 
| 1015 |  | 
| 1016 | // This is a templated function so that T can be either a char* or a | 
| 1017 | // std::string. | 
| 1018 | template <typename T> | 
| 1019 | void BytesToHexStringInternal(const unsigned char* src, T dest, ptrdiff_t num) { | 
| 1020 |   auto dest_ptr = &dest[0]; | 
| 1021 |   for (auto src_ptr = src; src_ptr != (src + num); ++src_ptr, dest_ptr += 2) { | 
| 1022 |     const char* hex_p = &kHexTable[*src_ptr * 2]; | 
| 1023 |     std::copy(hex_p, hex_p + 2, dest_ptr); | 
| 1024 |   } | 
| 1025 | } | 
| 1026 |  | 
| 1027 | }  // namespace | 
| 1028 |  | 
| 1029 | // ---------------------------------------------------------------------- | 
| 1030 | // CUnescape() | 
| 1031 | // | 
| 1032 | // See CUnescapeInternal() for implementation details. | 
| 1033 | // ---------------------------------------------------------------------- | 
| 1034 | bool CUnescape(absl::string_view source, std::string* dest, | 
| 1035 |                std::string* error) { | 
| 1036 |   return CUnescapeInternal(source, kUnescapeNulls, dest, error); | 
| 1037 | } | 
| 1038 |  | 
| 1039 | std::string CEscape(absl::string_view src) { | 
| 1040 |   std::string dest; | 
| 1041 |   CEscapeAndAppendInternal(src, &dest); | 
| 1042 |   return dest; | 
| 1043 | } | 
| 1044 |  | 
| 1045 | std::string CHexEscape(absl::string_view src) { | 
| 1046 |   return CEscapeInternal(src, true, false); | 
| 1047 | } | 
| 1048 |  | 
| 1049 | std::string Utf8SafeCEscape(absl::string_view src) { | 
| 1050 |   return CEscapeInternal(src, false, true); | 
| 1051 | } | 
| 1052 |  | 
| 1053 | std::string Utf8SafeCHexEscape(absl::string_view src) { | 
| 1054 |   return CEscapeInternal(src, true, true); | 
| 1055 | } | 
| 1056 |  | 
| 1057 | // ---------------------------------------------------------------------- | 
| 1058 | // Base64Unescape() - base64 decoder | 
| 1059 | // Base64Escape() - base64 encoder | 
| 1060 | // WebSafeBase64Unescape() - Google's variation of base64 decoder | 
| 1061 | // WebSafeBase64Escape() - Google's variation of base64 encoder | 
| 1062 | // | 
| 1063 | // Check out | 
| 1064 | // http://tools.ietf.org/html/rfc2045 for formal description, but what we | 
| 1065 | // care about is that... | 
| 1066 | //   Take the encoded stuff in groups of 4 characters and turn each | 
| 1067 | //   character into a code 0 to 63 thus: | 
| 1068 | //           A-Z map to 0 to 25 | 
| 1069 | //           a-z map to 26 to 51 | 
| 1070 | //           0-9 map to 52 to 61 | 
| 1071 | //           +(- for WebSafe) maps to 62 | 
| 1072 | //           /(_ for WebSafe) maps to 63 | 
| 1073 | //   There will be four numbers, all less than 64 which can be represented | 
| 1074 | //   by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). | 
| 1075 | //   Arrange the 6 digit binary numbers into three bytes as such: | 
| 1076 | //   aaaaaabb bbbbcccc ccdddddd | 
| 1077 | //   Equals signs (one or two) are used at the end of the encoded block to | 
| 1078 | //   indicate that the text was not an integer multiple of three bytes long. | 
| 1079 | // ---------------------------------------------------------------------- | 
| 1080 |  | 
| 1081 | bool Base64Unescape(absl::string_view src, std::string* dest) { | 
| 1082 |   return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); | 
| 1083 | } | 
| 1084 |  | 
| 1085 | bool WebSafeBase64Unescape(absl::string_view src, std::string* dest) { | 
| 1086 |   return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64); | 
| 1087 | } | 
| 1088 |  | 
| 1089 | void Base64Escape(absl::string_view src, std::string* dest) { | 
| 1090 |   Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), | 
| 1091 |                        src.size(), dest, true, kBase64Chars); | 
| 1092 | } | 
| 1093 |  | 
| 1094 | void WebSafeBase64Escape(absl::string_view src, std::string* dest) { | 
| 1095 |   Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), | 
| 1096 |                        src.size(), dest, false, kWebSafeBase64Chars); | 
| 1097 | } | 
| 1098 |  | 
| 1099 | std::string Base64Escape(absl::string_view src) { | 
| 1100 |   std::string dest; | 
| 1101 |   Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), | 
| 1102 |                        src.size(), &dest, true, kBase64Chars); | 
| 1103 |   return dest; | 
| 1104 | } | 
| 1105 |  | 
| 1106 | std::string WebSafeBase64Escape(absl::string_view src) { | 
| 1107 |   std::string dest; | 
| 1108 |   Base64EscapeInternal(reinterpret_cast<const unsigned char*>(src.data()), | 
| 1109 |                        src.size(), &dest, false, kWebSafeBase64Chars); | 
| 1110 |   return dest; | 
| 1111 | } | 
| 1112 |  | 
| 1113 | std::string HexStringToBytes(absl::string_view from) { | 
| 1114 |   std::string result; | 
| 1115 |   const auto num = from.size() / 2; | 
| 1116 |   strings_internal::STLStringResizeUninitialized(&result, num); | 
| 1117 |   absl::HexStringToBytesInternal<std::string&>(from.data(), result, num); | 
| 1118 |   return result; | 
| 1119 | } | 
| 1120 |  | 
| 1121 | std::string BytesToHexString(absl::string_view from) { | 
| 1122 |   std::string result; | 
| 1123 |   strings_internal::STLStringResizeUninitialized(&result, 2 * from.size()); | 
| 1124 |   absl::BytesToHexStringInternal<std::string&>( | 
| 1125 |       reinterpret_cast<const unsigned char*>(from.data()), result, from.size()); | 
| 1126 |   return result; | 
| 1127 | } | 
| 1128 |  | 
| 1129 | }  // namespace absl | 
| 1130 |  |