| 1 | /* |
| 2 | * Copyright 2010-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"). |
| 5 | * You may not use this file except in compliance with the License. |
| 6 | * A copy of the License is located at |
| 7 | * |
| 8 | * http://aws.amazon.com/apache2.0 |
| 9 | * |
| 10 | * or in the "license" file accompanying this file. This file is distributed |
| 11 | * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either |
| 12 | * express or implied. See the License for the specific language governing |
| 13 | * permissions and limitations under the License. |
| 14 | */ |
| 15 | |
| 16 | #include <aws/common/encoding.h> |
| 17 | |
| 18 | #include <ctype.h> |
| 19 | #include <stdlib.h> |
| 20 | |
| 21 | #ifdef USE_SIMD_ENCODING |
| 22 | size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned char *out, size_t len); |
| 23 | void aws_common_private_base64_encode_sse41(const unsigned char *in, unsigned char *out, size_t len); |
| 24 | bool aws_common_private_has_avx2(void); |
| 25 | #else |
| 26 | /* |
| 27 | * When AVX2 compilation is unavailable, we use these stubs to fall back to the pure-C decoder. |
| 28 | * Since we force aws_common_private_has_avx2 to return false, the encode and decode functions should |
| 29 | * not be called - but we must provide them anyway to avoid link errors. |
| 30 | */ |
| 31 | static inline size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned char *out, size_t len) { |
| 32 | (void)in; |
| 33 | (void)out; |
| 34 | (void)len; |
| 35 | AWS_ASSERT(false); |
| 36 | return (size_t)-1; /* unreachable */ |
| 37 | } |
| 38 | static inline void aws_common_private_base64_encode_sse41(const unsigned char *in, unsigned char *out, size_t len) { |
| 39 | (void)in; |
| 40 | (void)out; |
| 41 | (void)len; |
| 42 | AWS_ASSERT(false); |
| 43 | } |
| 44 | static inline bool aws_common_private_has_avx2(void) { |
| 45 | return false; |
| 46 | } |
| 47 | #endif |
| 48 | |
| 49 | static const uint8_t *HEX_CHARS = (const uint8_t *)"0123456789abcdef" ; |
| 50 | |
| 51 | static const uint8_t BASE64_SENTIANAL_VALUE = 0xff; |
| 52 | static const uint8_t BASE64_ENCODING_TABLE[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ; |
| 53 | |
| 54 | /* in this table, 0xDD is an invalid decoded value, if you have to do byte counting for any reason, there's 16 bytes |
| 55 | * per row. Reformatting is turned off to make sure this stays as 16 bytes per line. */ |
| 56 | /* clang-format off */ |
| 57 | static const uint8_t BASE64_DECODING_TABLE[256] = { |
| 58 | 64, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 59 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 60 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 62, 0xDD, 0xDD, 0xDD, 63, |
| 61 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xDD, 0xDD, 0xDD, 255, 0xDD, 0xDD, |
| 62 | 0xDD, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
| 63 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 64 | 0xDD, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
| 65 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 66 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 67 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 68 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 69 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 70 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 71 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 72 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
| 73 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD}; |
| 74 | /* clang-format on */ |
| 75 | |
| 76 | int aws_hex_compute_encoded_len(size_t to_encode_len, size_t *encoded_length) { |
| 77 | AWS_ASSERT(encoded_length); |
| 78 | |
| 79 | size_t temp = (to_encode_len << 1) + 1; |
| 80 | |
| 81 | if (AWS_UNLIKELY(temp < to_encode_len)) { |
| 82 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
| 83 | } |
| 84 | |
| 85 | *encoded_length = temp; |
| 86 | |
| 87 | return AWS_OP_SUCCESS; |
| 88 | } |
| 89 | |
| 90 | int aws_hex_encode(const struct aws_byte_cursor *AWS_RESTRICT to_encode, struct aws_byte_buf *AWS_RESTRICT output) { |
| 91 | AWS_PRECONDITION(aws_byte_cursor_is_valid(to_encode)); |
| 92 | AWS_PRECONDITION(aws_byte_buf_is_valid(output)); |
| 93 | |
| 94 | size_t encoded_len = 0; |
| 95 | |
| 96 | if (AWS_UNLIKELY(aws_hex_compute_encoded_len(to_encode->len, &encoded_len))) { |
| 97 | return AWS_OP_ERR; |
| 98 | } |
| 99 | |
| 100 | if (AWS_UNLIKELY(output->capacity < encoded_len)) { |
| 101 | return aws_raise_error(AWS_ERROR_SHORT_BUFFER); |
| 102 | } |
| 103 | |
| 104 | size_t written = 0; |
| 105 | for (size_t i = 0; i < to_encode->len; ++i) { |
| 106 | |
| 107 | output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] >> 4 & 0x0f]; |
| 108 | output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] & 0x0f]; |
| 109 | } |
| 110 | |
| 111 | output->buffer[written] = '\0'; |
| 112 | output->len = encoded_len; |
| 113 | |
| 114 | return AWS_OP_SUCCESS; |
| 115 | } |
| 116 | |
| 117 | int aws_hex_encode_append_dynamic( |
| 118 | const struct aws_byte_cursor *AWS_RESTRICT to_encode, |
| 119 | struct aws_byte_buf *AWS_RESTRICT output) { |
| 120 | AWS_ASSERT(to_encode->ptr); |
| 121 | AWS_ASSERT(aws_byte_buf_is_valid(output)); |
| 122 | |
| 123 | size_t encoded_len = 0; |
| 124 | if (AWS_UNLIKELY(aws_add_size_checked(to_encode->len, to_encode->len, &encoded_len))) { |
| 125 | return AWS_OP_ERR; |
| 126 | } |
| 127 | |
| 128 | if (AWS_UNLIKELY(aws_byte_buf_reserve_relative(output, encoded_len))) { |
| 129 | return AWS_OP_ERR; |
| 130 | } |
| 131 | |
| 132 | size_t written = output->len; |
| 133 | for (size_t i = 0; i < to_encode->len; ++i) { |
| 134 | |
| 135 | output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] >> 4 & 0x0f]; |
| 136 | output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] & 0x0f]; |
| 137 | } |
| 138 | |
| 139 | output->len += encoded_len; |
| 140 | |
| 141 | return AWS_OP_SUCCESS; |
| 142 | } |
| 143 | |
| 144 | static int s_hex_decode_char_to_int(char character, uint8_t *int_val) { |
| 145 | if (character >= 'a' && character <= 'f') { |
| 146 | *int_val = (uint8_t)(10 + (character - 'a')); |
| 147 | return 0; |
| 148 | } |
| 149 | |
| 150 | if (character >= 'A' && character <= 'F') { |
| 151 | *int_val = (uint8_t)(10 + (character - 'A')); |
| 152 | return 0; |
| 153 | } |
| 154 | |
| 155 | if (character >= '0' && character <= '9') { |
| 156 | *int_val = (uint8_t)(character - '0'); |
| 157 | return 0; |
| 158 | } |
| 159 | |
| 160 | return AWS_OP_ERR; |
| 161 | } |
| 162 | |
| 163 | int aws_hex_compute_decoded_len(size_t to_decode_len, size_t *decoded_len) { |
| 164 | AWS_ASSERT(decoded_len); |
| 165 | |
| 166 | size_t temp = (to_decode_len + 1); |
| 167 | |
| 168 | if (AWS_UNLIKELY(temp < to_decode_len)) { |
| 169 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
| 170 | } |
| 171 | |
| 172 | *decoded_len = temp >> 1; |
| 173 | return AWS_OP_SUCCESS; |
| 174 | } |
| 175 | |
| 176 | int aws_hex_decode(const struct aws_byte_cursor *AWS_RESTRICT to_decode, struct aws_byte_buf *AWS_RESTRICT output) { |
| 177 | AWS_PRECONDITION(aws_byte_cursor_is_valid(to_decode)); |
| 178 | AWS_PRECONDITION(aws_byte_buf_is_valid(output)); |
| 179 | |
| 180 | size_t decoded_length = 0; |
| 181 | |
| 182 | if (AWS_UNLIKELY(aws_hex_compute_decoded_len(to_decode->len, &decoded_length))) { |
| 183 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
| 184 | } |
| 185 | |
| 186 | if (AWS_UNLIKELY(output->capacity < decoded_length)) { |
| 187 | return aws_raise_error(AWS_ERROR_SHORT_BUFFER); |
| 188 | } |
| 189 | |
| 190 | size_t written = 0; |
| 191 | size_t i = 0; |
| 192 | uint8_t high_value = 0; |
| 193 | uint8_t low_value = 0; |
| 194 | |
| 195 | /* if the buffer isn't even, prepend a 0 to the buffer. */ |
| 196 | if (AWS_UNLIKELY(to_decode->len & 0x01)) { |
| 197 | i = 1; |
| 198 | if (s_hex_decode_char_to_int(to_decode->ptr[0], &low_value)) { |
| 199 | return aws_raise_error(AWS_ERROR_INVALID_HEX_STR); |
| 200 | } |
| 201 | |
| 202 | output->buffer[written++] = low_value; |
| 203 | } |
| 204 | |
| 205 | for (; i < to_decode->len; i += 2) { |
| 206 | if (AWS_UNLIKELY( |
| 207 | s_hex_decode_char_to_int(to_decode->ptr[i], &high_value) || |
| 208 | s_hex_decode_char_to_int(to_decode->ptr[i + 1], &low_value))) { |
| 209 | return aws_raise_error(AWS_ERROR_INVALID_HEX_STR); |
| 210 | } |
| 211 | |
| 212 | uint8_t value = (uint8_t)(high_value << 4); |
| 213 | value |= low_value; |
| 214 | output->buffer[written++] = value; |
| 215 | } |
| 216 | |
| 217 | output->len = decoded_length; |
| 218 | |
| 219 | return AWS_OP_SUCCESS; |
| 220 | } |
| 221 | |
| 222 | int aws_base64_compute_encoded_len(size_t to_encode_len, size_t *encoded_len) { |
| 223 | AWS_ASSERT(encoded_len); |
| 224 | |
| 225 | size_t tmp = to_encode_len + 2; |
| 226 | |
| 227 | if (AWS_UNLIKELY(tmp < to_encode_len)) { |
| 228 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
| 229 | } |
| 230 | |
| 231 | tmp /= 3; |
| 232 | size_t overflow_check = tmp; |
| 233 | tmp = 4 * tmp + 1; /* plus one for the NULL terminator */ |
| 234 | |
| 235 | if (AWS_UNLIKELY(tmp < overflow_check)) { |
| 236 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
| 237 | } |
| 238 | |
| 239 | *encoded_len = tmp; |
| 240 | |
| 241 | return AWS_OP_SUCCESS; |
| 242 | } |
| 243 | |
| 244 | int aws_base64_compute_decoded_len(const struct aws_byte_cursor *AWS_RESTRICT to_decode, size_t *decoded_len) { |
| 245 | AWS_ASSERT(to_decode); |
| 246 | AWS_ASSERT(decoded_len); |
| 247 | |
| 248 | const size_t len = to_decode->len; |
| 249 | const uint8_t *input = to_decode->ptr; |
| 250 | |
| 251 | if (len == 0) { |
| 252 | *decoded_len = 0; |
| 253 | return AWS_OP_SUCCESS; |
| 254 | } |
| 255 | |
| 256 | if (AWS_UNLIKELY(len & 0x03)) { |
| 257 | return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR); |
| 258 | } |
| 259 | |
| 260 | size_t tmp = len * 3; |
| 261 | |
| 262 | if (AWS_UNLIKELY(tmp < len)) { |
| 263 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
| 264 | } |
| 265 | |
| 266 | size_t padding = 0; |
| 267 | |
| 268 | if (len >= 2 && input[len - 1] == '=' && input[len - 2] == '=') { /*last two chars are = */ |
| 269 | padding = 2; |
| 270 | } else if (input[len - 1] == '=') { /*last char is = */ |
| 271 | padding = 1; |
| 272 | } |
| 273 | |
| 274 | *decoded_len = (tmp / 4 - padding); |
| 275 | return AWS_OP_SUCCESS; |
| 276 | } |
| 277 | |
| 278 | int aws_base64_encode(const struct aws_byte_cursor *AWS_RESTRICT to_encode, struct aws_byte_buf *AWS_RESTRICT output) { |
| 279 | AWS_ASSERT(to_encode->ptr); |
| 280 | AWS_ASSERT(output->buffer); |
| 281 | |
| 282 | size_t terminated_length = 0; |
| 283 | size_t encoded_length = 0; |
| 284 | if (AWS_UNLIKELY(aws_base64_compute_encoded_len(to_encode->len, &terminated_length))) { |
| 285 | return AWS_OP_ERR; |
| 286 | } |
| 287 | |
| 288 | size_t needed_capacity = 0; |
| 289 | if (AWS_UNLIKELY(aws_add_size_checked(output->len, terminated_length, &needed_capacity))) { |
| 290 | return AWS_OP_ERR; |
| 291 | } |
| 292 | |
| 293 | if (AWS_UNLIKELY(output->capacity < needed_capacity)) { |
| 294 | return aws_raise_error(AWS_ERROR_SHORT_BUFFER); |
| 295 | } |
| 296 | |
| 297 | /* |
| 298 | * For convenience to standard C functions expecting a null-terminated |
| 299 | * string, the output is terminated. As the encoding itself can be used in |
| 300 | * various ways, however, its length should never account for that byte. |
| 301 | */ |
| 302 | encoded_length = (terminated_length - 1); |
| 303 | |
| 304 | if (aws_common_private_has_avx2()) { |
| 305 | aws_common_private_base64_encode_sse41(to_encode->ptr, output->buffer + output->len, to_encode->len); |
| 306 | output->buffer[output->len + encoded_length] = 0; |
| 307 | output->len += encoded_length; |
| 308 | return AWS_OP_SUCCESS; |
| 309 | } |
| 310 | |
| 311 | size_t buffer_length = to_encode->len; |
| 312 | size_t block_count = (buffer_length + 2) / 3; |
| 313 | size_t remainder_count = (buffer_length % 3); |
| 314 | size_t str_index = output->len; |
| 315 | |
| 316 | for (size_t i = 0; i < to_encode->len; i += 3) { |
| 317 | uint32_t block = to_encode->ptr[i]; |
| 318 | |
| 319 | block <<= 8; |
| 320 | if (AWS_LIKELY(i + 1 < buffer_length)) { |
| 321 | block = block | to_encode->ptr[i + 1]; |
| 322 | } |
| 323 | |
| 324 | block <<= 8; |
| 325 | if (AWS_LIKELY(i + 2 < to_encode->len)) { |
| 326 | block = block | to_encode->ptr[i + 2]; |
| 327 | } |
| 328 | |
| 329 | output->buffer[str_index++] = BASE64_ENCODING_TABLE[(block >> 18) & 0x3F]; |
| 330 | output->buffer[str_index++] = BASE64_ENCODING_TABLE[(block >> 12) & 0x3F]; |
| 331 | output->buffer[str_index++] = BASE64_ENCODING_TABLE[(block >> 6) & 0x3F]; |
| 332 | output->buffer[str_index++] = BASE64_ENCODING_TABLE[block & 0x3F]; |
| 333 | } |
| 334 | |
| 335 | if (remainder_count > 0) { |
| 336 | output->buffer[output->len + block_count * 4 - 1] = '='; |
| 337 | if (remainder_count == 1) { |
| 338 | output->buffer[output->len + block_count * 4 - 2] = '='; |
| 339 | } |
| 340 | } |
| 341 | |
| 342 | /* it's a string add the null terminator. */ |
| 343 | output->buffer[output->len + encoded_length] = 0; |
| 344 | |
| 345 | output->len += encoded_length; |
| 346 | |
| 347 | return AWS_OP_SUCCESS; |
| 348 | } |
| 349 | |
| 350 | static inline int s_base64_get_decoded_value(unsigned char to_decode, uint8_t *value, int8_t allow_sentinal) { |
| 351 | |
| 352 | uint8_t decode_value = BASE64_DECODING_TABLE[(size_t)to_decode]; |
| 353 | if (decode_value != 0xDD && (decode_value != BASE64_SENTIANAL_VALUE || allow_sentinal)) { |
| 354 | *value = decode_value; |
| 355 | return AWS_OP_SUCCESS; |
| 356 | } |
| 357 | |
| 358 | return AWS_OP_ERR; |
| 359 | } |
| 360 | |
| 361 | int aws_base64_decode(const struct aws_byte_cursor *AWS_RESTRICT to_decode, struct aws_byte_buf *AWS_RESTRICT output) { |
| 362 | size_t decoded_length = 0; |
| 363 | |
| 364 | if (AWS_UNLIKELY(aws_base64_compute_decoded_len(to_decode, &decoded_length))) { |
| 365 | return AWS_OP_ERR; |
| 366 | } |
| 367 | |
| 368 | if (output->capacity < decoded_length) { |
| 369 | return aws_raise_error(AWS_ERROR_SHORT_BUFFER); |
| 370 | } |
| 371 | |
| 372 | if (aws_common_private_has_avx2()) { |
| 373 | size_t result = aws_common_private_base64_decode_sse41(to_decode->ptr, output->buffer, to_decode->len); |
| 374 | if (result == -1) { |
| 375 | return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR); |
| 376 | } |
| 377 | |
| 378 | output->len = result; |
| 379 | return AWS_OP_SUCCESS; |
| 380 | } |
| 381 | |
| 382 | int64_t block_count = (int64_t)to_decode->len / 4; |
| 383 | size_t string_index = 0; |
| 384 | uint8_t value1 = 0, value2 = 0, value3 = 0, value4 = 0; |
| 385 | int64_t buffer_index = 0; |
| 386 | |
| 387 | for (int64_t i = 0; i < block_count - 1; ++i) { |
| 388 | if (AWS_UNLIKELY( |
| 389 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value1, 0) || |
| 390 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value2, 0) || |
| 391 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value3, 0) || |
| 392 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value4, 0))) { |
| 393 | return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR); |
| 394 | } |
| 395 | |
| 396 | buffer_index = i * 3; |
| 397 | output->buffer[buffer_index++] = (uint8_t)((value1 << 2) | ((value2 >> 4) & 0x03)); |
| 398 | output->buffer[buffer_index++] = (uint8_t)(((value2 << 4) & 0xF0) | ((value3 >> 2) & 0x0F)); |
| 399 | output->buffer[buffer_index] = (uint8_t)((value3 & 0x03) << 6 | value4); |
| 400 | } |
| 401 | |
| 402 | buffer_index = (block_count - 1) * 3; |
| 403 | |
| 404 | if (buffer_index >= 0) { |
| 405 | if (s_base64_get_decoded_value(to_decode->ptr[string_index++], &value1, 0) || |
| 406 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value2, 0) || |
| 407 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value3, 1) || |
| 408 | s_base64_get_decoded_value(to_decode->ptr[string_index], &value4, 1)) { |
| 409 | return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR); |
| 410 | } |
| 411 | |
| 412 | output->buffer[buffer_index++] = (uint8_t)((value1 << 2) | ((value2 >> 4) & 0x03)); |
| 413 | |
| 414 | if (value3 != BASE64_SENTIANAL_VALUE) { |
| 415 | output->buffer[buffer_index++] = (uint8_t)(((value2 << 4) & 0xF0) | ((value3 >> 2) & 0x0F)); |
| 416 | if (value4 != BASE64_SENTIANAL_VALUE) { |
| 417 | output->buffer[buffer_index] = (uint8_t)((value3 & 0x03) << 6 | value4); |
| 418 | } |
| 419 | } |
| 420 | } |
| 421 | output->len = decoded_length; |
| 422 | return AWS_OP_SUCCESS; |
| 423 | } |
| 424 | |