1/*
2 * Copyright 2010-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License").
5 * You may not use this file except in compliance with the License.
6 * A copy of the License is located at
7 *
8 * http://aws.amazon.com/apache2.0
9 *
10 * or in the "license" file accompanying this file. This file is distributed
11 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
12 * express or implied. See the License for the specific language governing
13 * permissions and limitations under the License.
14 */
15
16#include <aws/common/encoding.h>
17
18#include <ctype.h>
19#include <stdlib.h>
20
21#ifdef USE_SIMD_ENCODING
22size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned char *out, size_t len);
23void aws_common_private_base64_encode_sse41(const unsigned char *in, unsigned char *out, size_t len);
24bool aws_common_private_has_avx2(void);
25#else
26/*
27 * When AVX2 compilation is unavailable, we use these stubs to fall back to the pure-C decoder.
28 * Since we force aws_common_private_has_avx2 to return false, the encode and decode functions should
29 * not be called - but we must provide them anyway to avoid link errors.
30 */
31static inline size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned char *out, size_t len) {
32 (void)in;
33 (void)out;
34 (void)len;
35 AWS_ASSERT(false);
36 return (size_t)-1; /* unreachable */
37}
38static inline void aws_common_private_base64_encode_sse41(const unsigned char *in, unsigned char *out, size_t len) {
39 (void)in;
40 (void)out;
41 (void)len;
42 AWS_ASSERT(false);
43}
44static inline bool aws_common_private_has_avx2(void) {
45 return false;
46}
47#endif
48
49static const uint8_t *HEX_CHARS = (const uint8_t *)"0123456789abcdef";
50
51static const uint8_t BASE64_SENTIANAL_VALUE = 0xff;
52static const uint8_t BASE64_ENCODING_TABLE[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
53
54/* in this table, 0xDD is an invalid decoded value, if you have to do byte counting for any reason, there's 16 bytes
55 * per row. Reformatting is turned off to make sure this stays as 16 bytes per line. */
56/* clang-format off */
57static const uint8_t BASE64_DECODING_TABLE[256] = {
58 64, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
59 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
60 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 62, 0xDD, 0xDD, 0xDD, 63,
61 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xDD, 0xDD, 0xDD, 255, 0xDD, 0xDD,
62 0xDD, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
63 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
64 0xDD, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
65 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
66 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
67 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
68 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
69 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
70 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
71 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
72 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD,
73 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD};
74/* clang-format on */
75
76int aws_hex_compute_encoded_len(size_t to_encode_len, size_t *encoded_length) {
77 AWS_ASSERT(encoded_length);
78
79 size_t temp = (to_encode_len << 1) + 1;
80
81 if (AWS_UNLIKELY(temp < to_encode_len)) {
82 return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
83 }
84
85 *encoded_length = temp;
86
87 return AWS_OP_SUCCESS;
88}
89
90int aws_hex_encode(const struct aws_byte_cursor *AWS_RESTRICT to_encode, struct aws_byte_buf *AWS_RESTRICT output) {
91 AWS_PRECONDITION(aws_byte_cursor_is_valid(to_encode));
92 AWS_PRECONDITION(aws_byte_buf_is_valid(output));
93
94 size_t encoded_len = 0;
95
96 if (AWS_UNLIKELY(aws_hex_compute_encoded_len(to_encode->len, &encoded_len))) {
97 return AWS_OP_ERR;
98 }
99
100 if (AWS_UNLIKELY(output->capacity < encoded_len)) {
101 return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
102 }
103
104 size_t written = 0;
105 for (size_t i = 0; i < to_encode->len; ++i) {
106
107 output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] >> 4 & 0x0f];
108 output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] & 0x0f];
109 }
110
111 output->buffer[written] = '\0';
112 output->len = encoded_len;
113
114 return AWS_OP_SUCCESS;
115}
116
117int aws_hex_encode_append_dynamic(
118 const struct aws_byte_cursor *AWS_RESTRICT to_encode,
119 struct aws_byte_buf *AWS_RESTRICT output) {
120 AWS_ASSERT(to_encode->ptr);
121 AWS_ASSERT(aws_byte_buf_is_valid(output));
122
123 size_t encoded_len = 0;
124 if (AWS_UNLIKELY(aws_add_size_checked(to_encode->len, to_encode->len, &encoded_len))) {
125 return AWS_OP_ERR;
126 }
127
128 if (AWS_UNLIKELY(aws_byte_buf_reserve_relative(output, encoded_len))) {
129 return AWS_OP_ERR;
130 }
131
132 size_t written = output->len;
133 for (size_t i = 0; i < to_encode->len; ++i) {
134
135 output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] >> 4 & 0x0f];
136 output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] & 0x0f];
137 }
138
139 output->len += encoded_len;
140
141 return AWS_OP_SUCCESS;
142}
143
144static int s_hex_decode_char_to_int(char character, uint8_t *int_val) {
145 if (character >= 'a' && character <= 'f') {
146 *int_val = (uint8_t)(10 + (character - 'a'));
147 return 0;
148 }
149
150 if (character >= 'A' && character <= 'F') {
151 *int_val = (uint8_t)(10 + (character - 'A'));
152 return 0;
153 }
154
155 if (character >= '0' && character <= '9') {
156 *int_val = (uint8_t)(character - '0');
157 return 0;
158 }
159
160 return AWS_OP_ERR;
161}
162
163int aws_hex_compute_decoded_len(size_t to_decode_len, size_t *decoded_len) {
164 AWS_ASSERT(decoded_len);
165
166 size_t temp = (to_decode_len + 1);
167
168 if (AWS_UNLIKELY(temp < to_decode_len)) {
169 return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
170 }
171
172 *decoded_len = temp >> 1;
173 return AWS_OP_SUCCESS;
174}
175
176int aws_hex_decode(const struct aws_byte_cursor *AWS_RESTRICT to_decode, struct aws_byte_buf *AWS_RESTRICT output) {
177 AWS_PRECONDITION(aws_byte_cursor_is_valid(to_decode));
178 AWS_PRECONDITION(aws_byte_buf_is_valid(output));
179
180 size_t decoded_length = 0;
181
182 if (AWS_UNLIKELY(aws_hex_compute_decoded_len(to_decode->len, &decoded_length))) {
183 return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
184 }
185
186 if (AWS_UNLIKELY(output->capacity < decoded_length)) {
187 return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
188 }
189
190 size_t written = 0;
191 size_t i = 0;
192 uint8_t high_value = 0;
193 uint8_t low_value = 0;
194
195 /* if the buffer isn't even, prepend a 0 to the buffer. */
196 if (AWS_UNLIKELY(to_decode->len & 0x01)) {
197 i = 1;
198 if (s_hex_decode_char_to_int(to_decode->ptr[0], &low_value)) {
199 return aws_raise_error(AWS_ERROR_INVALID_HEX_STR);
200 }
201
202 output->buffer[written++] = low_value;
203 }
204
205 for (; i < to_decode->len; i += 2) {
206 if (AWS_UNLIKELY(
207 s_hex_decode_char_to_int(to_decode->ptr[i], &high_value) ||
208 s_hex_decode_char_to_int(to_decode->ptr[i + 1], &low_value))) {
209 return aws_raise_error(AWS_ERROR_INVALID_HEX_STR);
210 }
211
212 uint8_t value = (uint8_t)(high_value << 4);
213 value |= low_value;
214 output->buffer[written++] = value;
215 }
216
217 output->len = decoded_length;
218
219 return AWS_OP_SUCCESS;
220}
221
222int aws_base64_compute_encoded_len(size_t to_encode_len, size_t *encoded_len) {
223 AWS_ASSERT(encoded_len);
224
225 size_t tmp = to_encode_len + 2;
226
227 if (AWS_UNLIKELY(tmp < to_encode_len)) {
228 return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
229 }
230
231 tmp /= 3;
232 size_t overflow_check = tmp;
233 tmp = 4 * tmp + 1; /* plus one for the NULL terminator */
234
235 if (AWS_UNLIKELY(tmp < overflow_check)) {
236 return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
237 }
238
239 *encoded_len = tmp;
240
241 return AWS_OP_SUCCESS;
242}
243
244int aws_base64_compute_decoded_len(const struct aws_byte_cursor *AWS_RESTRICT to_decode, size_t *decoded_len) {
245 AWS_ASSERT(to_decode);
246 AWS_ASSERT(decoded_len);
247
248 const size_t len = to_decode->len;
249 const uint8_t *input = to_decode->ptr;
250
251 if (len == 0) {
252 *decoded_len = 0;
253 return AWS_OP_SUCCESS;
254 }
255
256 if (AWS_UNLIKELY(len & 0x03)) {
257 return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR);
258 }
259
260 size_t tmp = len * 3;
261
262 if (AWS_UNLIKELY(tmp < len)) {
263 return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED);
264 }
265
266 size_t padding = 0;
267
268 if (len >= 2 && input[len - 1] == '=' && input[len - 2] == '=') { /*last two chars are = */
269 padding = 2;
270 } else if (input[len - 1] == '=') { /*last char is = */
271 padding = 1;
272 }
273
274 *decoded_len = (tmp / 4 - padding);
275 return AWS_OP_SUCCESS;
276}
277
278int aws_base64_encode(const struct aws_byte_cursor *AWS_RESTRICT to_encode, struct aws_byte_buf *AWS_RESTRICT output) {
279 AWS_ASSERT(to_encode->ptr);
280 AWS_ASSERT(output->buffer);
281
282 size_t terminated_length = 0;
283 size_t encoded_length = 0;
284 if (AWS_UNLIKELY(aws_base64_compute_encoded_len(to_encode->len, &terminated_length))) {
285 return AWS_OP_ERR;
286 }
287
288 size_t needed_capacity = 0;
289 if (AWS_UNLIKELY(aws_add_size_checked(output->len, terminated_length, &needed_capacity))) {
290 return AWS_OP_ERR;
291 }
292
293 if (AWS_UNLIKELY(output->capacity < needed_capacity)) {
294 return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
295 }
296
297 /*
298 * For convenience to standard C functions expecting a null-terminated
299 * string, the output is terminated. As the encoding itself can be used in
300 * various ways, however, its length should never account for that byte.
301 */
302 encoded_length = (terminated_length - 1);
303
304 if (aws_common_private_has_avx2()) {
305 aws_common_private_base64_encode_sse41(to_encode->ptr, output->buffer + output->len, to_encode->len);
306 output->buffer[output->len + encoded_length] = 0;
307 output->len += encoded_length;
308 return AWS_OP_SUCCESS;
309 }
310
311 size_t buffer_length = to_encode->len;
312 size_t block_count = (buffer_length + 2) / 3;
313 size_t remainder_count = (buffer_length % 3);
314 size_t str_index = output->len;
315
316 for (size_t i = 0; i < to_encode->len; i += 3) {
317 uint32_t block = to_encode->ptr[i];
318
319 block <<= 8;
320 if (AWS_LIKELY(i + 1 < buffer_length)) {
321 block = block | to_encode->ptr[i + 1];
322 }
323
324 block <<= 8;
325 if (AWS_LIKELY(i + 2 < to_encode->len)) {
326 block = block | to_encode->ptr[i + 2];
327 }
328
329 output->buffer[str_index++] = BASE64_ENCODING_TABLE[(block >> 18) & 0x3F];
330 output->buffer[str_index++] = BASE64_ENCODING_TABLE[(block >> 12) & 0x3F];
331 output->buffer[str_index++] = BASE64_ENCODING_TABLE[(block >> 6) & 0x3F];
332 output->buffer[str_index++] = BASE64_ENCODING_TABLE[block & 0x3F];
333 }
334
335 if (remainder_count > 0) {
336 output->buffer[output->len + block_count * 4 - 1] = '=';
337 if (remainder_count == 1) {
338 output->buffer[output->len + block_count * 4 - 2] = '=';
339 }
340 }
341
342 /* it's a string add the null terminator. */
343 output->buffer[output->len + encoded_length] = 0;
344
345 output->len += encoded_length;
346
347 return AWS_OP_SUCCESS;
348}
349
350static inline int s_base64_get_decoded_value(unsigned char to_decode, uint8_t *value, int8_t allow_sentinal) {
351
352 uint8_t decode_value = BASE64_DECODING_TABLE[(size_t)to_decode];
353 if (decode_value != 0xDD && (decode_value != BASE64_SENTIANAL_VALUE || allow_sentinal)) {
354 *value = decode_value;
355 return AWS_OP_SUCCESS;
356 }
357
358 return AWS_OP_ERR;
359}
360
361int aws_base64_decode(const struct aws_byte_cursor *AWS_RESTRICT to_decode, struct aws_byte_buf *AWS_RESTRICT output) {
362 size_t decoded_length = 0;
363
364 if (AWS_UNLIKELY(aws_base64_compute_decoded_len(to_decode, &decoded_length))) {
365 return AWS_OP_ERR;
366 }
367
368 if (output->capacity < decoded_length) {
369 return aws_raise_error(AWS_ERROR_SHORT_BUFFER);
370 }
371
372 if (aws_common_private_has_avx2()) {
373 size_t result = aws_common_private_base64_decode_sse41(to_decode->ptr, output->buffer, to_decode->len);
374 if (result == -1) {
375 return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR);
376 }
377
378 output->len = result;
379 return AWS_OP_SUCCESS;
380 }
381
382 int64_t block_count = (int64_t)to_decode->len / 4;
383 size_t string_index = 0;
384 uint8_t value1 = 0, value2 = 0, value3 = 0, value4 = 0;
385 int64_t buffer_index = 0;
386
387 for (int64_t i = 0; i < block_count - 1; ++i) {
388 if (AWS_UNLIKELY(
389 s_base64_get_decoded_value(to_decode->ptr[string_index++], &value1, 0) ||
390 s_base64_get_decoded_value(to_decode->ptr[string_index++], &value2, 0) ||
391 s_base64_get_decoded_value(to_decode->ptr[string_index++], &value3, 0) ||
392 s_base64_get_decoded_value(to_decode->ptr[string_index++], &value4, 0))) {
393 return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR);
394 }
395
396 buffer_index = i * 3;
397 output->buffer[buffer_index++] = (uint8_t)((value1 << 2) | ((value2 >> 4) & 0x03));
398 output->buffer[buffer_index++] = (uint8_t)(((value2 << 4) & 0xF0) | ((value3 >> 2) & 0x0F));
399 output->buffer[buffer_index] = (uint8_t)((value3 & 0x03) << 6 | value4);
400 }
401
402 buffer_index = (block_count - 1) * 3;
403
404 if (buffer_index >= 0) {
405 if (s_base64_get_decoded_value(to_decode->ptr[string_index++], &value1, 0) ||
406 s_base64_get_decoded_value(to_decode->ptr[string_index++], &value2, 0) ||
407 s_base64_get_decoded_value(to_decode->ptr[string_index++], &value3, 1) ||
408 s_base64_get_decoded_value(to_decode->ptr[string_index], &value4, 1)) {
409 return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR);
410 }
411
412 output->buffer[buffer_index++] = (uint8_t)((value1 << 2) | ((value2 >> 4) & 0x03));
413
414 if (value3 != BASE64_SENTIANAL_VALUE) {
415 output->buffer[buffer_index++] = (uint8_t)(((value2 << 4) & 0xF0) | ((value3 >> 2) & 0x0F));
416 if (value4 != BASE64_SENTIANAL_VALUE) {
417 output->buffer[buffer_index] = (uint8_t)((value3 & 0x03) << 6 | value4);
418 }
419 }
420 }
421 output->len = decoded_length;
422 return AWS_OP_SUCCESS;
423}
424