1 | /* |
2 | * Copyright 2010-2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. |
3 | * |
4 | * Licensed under the Apache License, Version 2.0 (the "License"). |
5 | * You may not use this file except in compliance with the License. |
6 | * A copy of the License is located at |
7 | * |
8 | * http://aws.amazon.com/apache2.0 |
9 | * |
10 | * or in the "license" file accompanying this file. This file is distributed |
11 | * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either |
12 | * express or implied. See the License for the specific language governing |
13 | * permissions and limitations under the License. |
14 | */ |
15 | |
16 | #include <aws/common/encoding.h> |
17 | |
18 | #include <ctype.h> |
19 | #include <stdlib.h> |
20 | |
21 | #ifdef USE_SIMD_ENCODING |
22 | size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned char *out, size_t len); |
23 | void aws_common_private_base64_encode_sse41(const unsigned char *in, unsigned char *out, size_t len); |
24 | bool aws_common_private_has_avx2(void); |
25 | #else |
26 | /* |
27 | * When AVX2 compilation is unavailable, we use these stubs to fall back to the pure-C decoder. |
28 | * Since we force aws_common_private_has_avx2 to return false, the encode and decode functions should |
29 | * not be called - but we must provide them anyway to avoid link errors. |
30 | */ |
31 | static inline size_t aws_common_private_base64_decode_sse41(const unsigned char *in, unsigned char *out, size_t len) { |
32 | (void)in; |
33 | (void)out; |
34 | (void)len; |
35 | AWS_ASSERT(false); |
36 | return (size_t)-1; /* unreachable */ |
37 | } |
38 | static inline void aws_common_private_base64_encode_sse41(const unsigned char *in, unsigned char *out, size_t len) { |
39 | (void)in; |
40 | (void)out; |
41 | (void)len; |
42 | AWS_ASSERT(false); |
43 | } |
44 | static inline bool aws_common_private_has_avx2(void) { |
45 | return false; |
46 | } |
47 | #endif |
48 | |
49 | static const uint8_t *HEX_CHARS = (const uint8_t *)"0123456789abcdef" ; |
50 | |
51 | static const uint8_t BASE64_SENTIANAL_VALUE = 0xff; |
52 | static const uint8_t BASE64_ENCODING_TABLE[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" ; |
53 | |
54 | /* in this table, 0xDD is an invalid decoded value, if you have to do byte counting for any reason, there's 16 bytes |
55 | * per row. Reformatting is turned off to make sure this stays as 16 bytes per line. */ |
56 | /* clang-format off */ |
57 | static const uint8_t BASE64_DECODING_TABLE[256] = { |
58 | 64, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
59 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
60 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 62, 0xDD, 0xDD, 0xDD, 63, |
61 | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0xDD, 0xDD, 0xDD, 255, 0xDD, 0xDD, |
62 | 0xDD, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
63 | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
64 | 0xDD, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
65 | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
66 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
67 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
68 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
69 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
70 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
71 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
72 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, |
73 | 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD, 0xDD}; |
74 | /* clang-format on */ |
75 | |
76 | int aws_hex_compute_encoded_len(size_t to_encode_len, size_t *encoded_length) { |
77 | AWS_ASSERT(encoded_length); |
78 | |
79 | size_t temp = (to_encode_len << 1) + 1; |
80 | |
81 | if (AWS_UNLIKELY(temp < to_encode_len)) { |
82 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
83 | } |
84 | |
85 | *encoded_length = temp; |
86 | |
87 | return AWS_OP_SUCCESS; |
88 | } |
89 | |
90 | int aws_hex_encode(const struct aws_byte_cursor *AWS_RESTRICT to_encode, struct aws_byte_buf *AWS_RESTRICT output) { |
91 | AWS_PRECONDITION(aws_byte_cursor_is_valid(to_encode)); |
92 | AWS_PRECONDITION(aws_byte_buf_is_valid(output)); |
93 | |
94 | size_t encoded_len = 0; |
95 | |
96 | if (AWS_UNLIKELY(aws_hex_compute_encoded_len(to_encode->len, &encoded_len))) { |
97 | return AWS_OP_ERR; |
98 | } |
99 | |
100 | if (AWS_UNLIKELY(output->capacity < encoded_len)) { |
101 | return aws_raise_error(AWS_ERROR_SHORT_BUFFER); |
102 | } |
103 | |
104 | size_t written = 0; |
105 | for (size_t i = 0; i < to_encode->len; ++i) { |
106 | |
107 | output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] >> 4 & 0x0f]; |
108 | output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] & 0x0f]; |
109 | } |
110 | |
111 | output->buffer[written] = '\0'; |
112 | output->len = encoded_len; |
113 | |
114 | return AWS_OP_SUCCESS; |
115 | } |
116 | |
117 | int aws_hex_encode_append_dynamic( |
118 | const struct aws_byte_cursor *AWS_RESTRICT to_encode, |
119 | struct aws_byte_buf *AWS_RESTRICT output) { |
120 | AWS_ASSERT(to_encode->ptr); |
121 | AWS_ASSERT(aws_byte_buf_is_valid(output)); |
122 | |
123 | size_t encoded_len = 0; |
124 | if (AWS_UNLIKELY(aws_add_size_checked(to_encode->len, to_encode->len, &encoded_len))) { |
125 | return AWS_OP_ERR; |
126 | } |
127 | |
128 | if (AWS_UNLIKELY(aws_byte_buf_reserve_relative(output, encoded_len))) { |
129 | return AWS_OP_ERR; |
130 | } |
131 | |
132 | size_t written = output->len; |
133 | for (size_t i = 0; i < to_encode->len; ++i) { |
134 | |
135 | output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] >> 4 & 0x0f]; |
136 | output->buffer[written++] = HEX_CHARS[to_encode->ptr[i] & 0x0f]; |
137 | } |
138 | |
139 | output->len += encoded_len; |
140 | |
141 | return AWS_OP_SUCCESS; |
142 | } |
143 | |
144 | static int s_hex_decode_char_to_int(char character, uint8_t *int_val) { |
145 | if (character >= 'a' && character <= 'f') { |
146 | *int_val = (uint8_t)(10 + (character - 'a')); |
147 | return 0; |
148 | } |
149 | |
150 | if (character >= 'A' && character <= 'F') { |
151 | *int_val = (uint8_t)(10 + (character - 'A')); |
152 | return 0; |
153 | } |
154 | |
155 | if (character >= '0' && character <= '9') { |
156 | *int_val = (uint8_t)(character - '0'); |
157 | return 0; |
158 | } |
159 | |
160 | return AWS_OP_ERR; |
161 | } |
162 | |
163 | int aws_hex_compute_decoded_len(size_t to_decode_len, size_t *decoded_len) { |
164 | AWS_ASSERT(decoded_len); |
165 | |
166 | size_t temp = (to_decode_len + 1); |
167 | |
168 | if (AWS_UNLIKELY(temp < to_decode_len)) { |
169 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
170 | } |
171 | |
172 | *decoded_len = temp >> 1; |
173 | return AWS_OP_SUCCESS; |
174 | } |
175 | |
176 | int aws_hex_decode(const struct aws_byte_cursor *AWS_RESTRICT to_decode, struct aws_byte_buf *AWS_RESTRICT output) { |
177 | AWS_PRECONDITION(aws_byte_cursor_is_valid(to_decode)); |
178 | AWS_PRECONDITION(aws_byte_buf_is_valid(output)); |
179 | |
180 | size_t decoded_length = 0; |
181 | |
182 | if (AWS_UNLIKELY(aws_hex_compute_decoded_len(to_decode->len, &decoded_length))) { |
183 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
184 | } |
185 | |
186 | if (AWS_UNLIKELY(output->capacity < decoded_length)) { |
187 | return aws_raise_error(AWS_ERROR_SHORT_BUFFER); |
188 | } |
189 | |
190 | size_t written = 0; |
191 | size_t i = 0; |
192 | uint8_t high_value = 0; |
193 | uint8_t low_value = 0; |
194 | |
195 | /* if the buffer isn't even, prepend a 0 to the buffer. */ |
196 | if (AWS_UNLIKELY(to_decode->len & 0x01)) { |
197 | i = 1; |
198 | if (s_hex_decode_char_to_int(to_decode->ptr[0], &low_value)) { |
199 | return aws_raise_error(AWS_ERROR_INVALID_HEX_STR); |
200 | } |
201 | |
202 | output->buffer[written++] = low_value; |
203 | } |
204 | |
205 | for (; i < to_decode->len; i += 2) { |
206 | if (AWS_UNLIKELY( |
207 | s_hex_decode_char_to_int(to_decode->ptr[i], &high_value) || |
208 | s_hex_decode_char_to_int(to_decode->ptr[i + 1], &low_value))) { |
209 | return aws_raise_error(AWS_ERROR_INVALID_HEX_STR); |
210 | } |
211 | |
212 | uint8_t value = (uint8_t)(high_value << 4); |
213 | value |= low_value; |
214 | output->buffer[written++] = value; |
215 | } |
216 | |
217 | output->len = decoded_length; |
218 | |
219 | return AWS_OP_SUCCESS; |
220 | } |
221 | |
222 | int aws_base64_compute_encoded_len(size_t to_encode_len, size_t *encoded_len) { |
223 | AWS_ASSERT(encoded_len); |
224 | |
225 | size_t tmp = to_encode_len + 2; |
226 | |
227 | if (AWS_UNLIKELY(tmp < to_encode_len)) { |
228 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
229 | } |
230 | |
231 | tmp /= 3; |
232 | size_t overflow_check = tmp; |
233 | tmp = 4 * tmp + 1; /* plus one for the NULL terminator */ |
234 | |
235 | if (AWS_UNLIKELY(tmp < overflow_check)) { |
236 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
237 | } |
238 | |
239 | *encoded_len = tmp; |
240 | |
241 | return AWS_OP_SUCCESS; |
242 | } |
243 | |
244 | int aws_base64_compute_decoded_len(const struct aws_byte_cursor *AWS_RESTRICT to_decode, size_t *decoded_len) { |
245 | AWS_ASSERT(to_decode); |
246 | AWS_ASSERT(decoded_len); |
247 | |
248 | const size_t len = to_decode->len; |
249 | const uint8_t *input = to_decode->ptr; |
250 | |
251 | if (len == 0) { |
252 | *decoded_len = 0; |
253 | return AWS_OP_SUCCESS; |
254 | } |
255 | |
256 | if (AWS_UNLIKELY(len & 0x03)) { |
257 | return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR); |
258 | } |
259 | |
260 | size_t tmp = len * 3; |
261 | |
262 | if (AWS_UNLIKELY(tmp < len)) { |
263 | return aws_raise_error(AWS_ERROR_OVERFLOW_DETECTED); |
264 | } |
265 | |
266 | size_t padding = 0; |
267 | |
268 | if (len >= 2 && input[len - 1] == '=' && input[len - 2] == '=') { /*last two chars are = */ |
269 | padding = 2; |
270 | } else if (input[len - 1] == '=') { /*last char is = */ |
271 | padding = 1; |
272 | } |
273 | |
274 | *decoded_len = (tmp / 4 - padding); |
275 | return AWS_OP_SUCCESS; |
276 | } |
277 | |
278 | int aws_base64_encode(const struct aws_byte_cursor *AWS_RESTRICT to_encode, struct aws_byte_buf *AWS_RESTRICT output) { |
279 | AWS_ASSERT(to_encode->ptr); |
280 | AWS_ASSERT(output->buffer); |
281 | |
282 | size_t terminated_length = 0; |
283 | size_t encoded_length = 0; |
284 | if (AWS_UNLIKELY(aws_base64_compute_encoded_len(to_encode->len, &terminated_length))) { |
285 | return AWS_OP_ERR; |
286 | } |
287 | |
288 | size_t needed_capacity = 0; |
289 | if (AWS_UNLIKELY(aws_add_size_checked(output->len, terminated_length, &needed_capacity))) { |
290 | return AWS_OP_ERR; |
291 | } |
292 | |
293 | if (AWS_UNLIKELY(output->capacity < needed_capacity)) { |
294 | return aws_raise_error(AWS_ERROR_SHORT_BUFFER); |
295 | } |
296 | |
297 | /* |
298 | * For convenience to standard C functions expecting a null-terminated |
299 | * string, the output is terminated. As the encoding itself can be used in |
300 | * various ways, however, its length should never account for that byte. |
301 | */ |
302 | encoded_length = (terminated_length - 1); |
303 | |
304 | if (aws_common_private_has_avx2()) { |
305 | aws_common_private_base64_encode_sse41(to_encode->ptr, output->buffer + output->len, to_encode->len); |
306 | output->buffer[output->len + encoded_length] = 0; |
307 | output->len += encoded_length; |
308 | return AWS_OP_SUCCESS; |
309 | } |
310 | |
311 | size_t buffer_length = to_encode->len; |
312 | size_t block_count = (buffer_length + 2) / 3; |
313 | size_t remainder_count = (buffer_length % 3); |
314 | size_t str_index = output->len; |
315 | |
316 | for (size_t i = 0; i < to_encode->len; i += 3) { |
317 | uint32_t block = to_encode->ptr[i]; |
318 | |
319 | block <<= 8; |
320 | if (AWS_LIKELY(i + 1 < buffer_length)) { |
321 | block = block | to_encode->ptr[i + 1]; |
322 | } |
323 | |
324 | block <<= 8; |
325 | if (AWS_LIKELY(i + 2 < to_encode->len)) { |
326 | block = block | to_encode->ptr[i + 2]; |
327 | } |
328 | |
329 | output->buffer[str_index++] = BASE64_ENCODING_TABLE[(block >> 18) & 0x3F]; |
330 | output->buffer[str_index++] = BASE64_ENCODING_TABLE[(block >> 12) & 0x3F]; |
331 | output->buffer[str_index++] = BASE64_ENCODING_TABLE[(block >> 6) & 0x3F]; |
332 | output->buffer[str_index++] = BASE64_ENCODING_TABLE[block & 0x3F]; |
333 | } |
334 | |
335 | if (remainder_count > 0) { |
336 | output->buffer[output->len + block_count * 4 - 1] = '='; |
337 | if (remainder_count == 1) { |
338 | output->buffer[output->len + block_count * 4 - 2] = '='; |
339 | } |
340 | } |
341 | |
342 | /* it's a string add the null terminator. */ |
343 | output->buffer[output->len + encoded_length] = 0; |
344 | |
345 | output->len += encoded_length; |
346 | |
347 | return AWS_OP_SUCCESS; |
348 | } |
349 | |
350 | static inline int s_base64_get_decoded_value(unsigned char to_decode, uint8_t *value, int8_t allow_sentinal) { |
351 | |
352 | uint8_t decode_value = BASE64_DECODING_TABLE[(size_t)to_decode]; |
353 | if (decode_value != 0xDD && (decode_value != BASE64_SENTIANAL_VALUE || allow_sentinal)) { |
354 | *value = decode_value; |
355 | return AWS_OP_SUCCESS; |
356 | } |
357 | |
358 | return AWS_OP_ERR; |
359 | } |
360 | |
361 | int aws_base64_decode(const struct aws_byte_cursor *AWS_RESTRICT to_decode, struct aws_byte_buf *AWS_RESTRICT output) { |
362 | size_t decoded_length = 0; |
363 | |
364 | if (AWS_UNLIKELY(aws_base64_compute_decoded_len(to_decode, &decoded_length))) { |
365 | return AWS_OP_ERR; |
366 | } |
367 | |
368 | if (output->capacity < decoded_length) { |
369 | return aws_raise_error(AWS_ERROR_SHORT_BUFFER); |
370 | } |
371 | |
372 | if (aws_common_private_has_avx2()) { |
373 | size_t result = aws_common_private_base64_decode_sse41(to_decode->ptr, output->buffer, to_decode->len); |
374 | if (result == -1) { |
375 | return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR); |
376 | } |
377 | |
378 | output->len = result; |
379 | return AWS_OP_SUCCESS; |
380 | } |
381 | |
382 | int64_t block_count = (int64_t)to_decode->len / 4; |
383 | size_t string_index = 0; |
384 | uint8_t value1 = 0, value2 = 0, value3 = 0, value4 = 0; |
385 | int64_t buffer_index = 0; |
386 | |
387 | for (int64_t i = 0; i < block_count - 1; ++i) { |
388 | if (AWS_UNLIKELY( |
389 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value1, 0) || |
390 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value2, 0) || |
391 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value3, 0) || |
392 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value4, 0))) { |
393 | return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR); |
394 | } |
395 | |
396 | buffer_index = i * 3; |
397 | output->buffer[buffer_index++] = (uint8_t)((value1 << 2) | ((value2 >> 4) & 0x03)); |
398 | output->buffer[buffer_index++] = (uint8_t)(((value2 << 4) & 0xF0) | ((value3 >> 2) & 0x0F)); |
399 | output->buffer[buffer_index] = (uint8_t)((value3 & 0x03) << 6 | value4); |
400 | } |
401 | |
402 | buffer_index = (block_count - 1) * 3; |
403 | |
404 | if (buffer_index >= 0) { |
405 | if (s_base64_get_decoded_value(to_decode->ptr[string_index++], &value1, 0) || |
406 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value2, 0) || |
407 | s_base64_get_decoded_value(to_decode->ptr[string_index++], &value3, 1) || |
408 | s_base64_get_decoded_value(to_decode->ptr[string_index], &value4, 1)) { |
409 | return aws_raise_error(AWS_ERROR_INVALID_BASE64_STR); |
410 | } |
411 | |
412 | output->buffer[buffer_index++] = (uint8_t)((value1 << 2) | ((value2 >> 4) & 0x03)); |
413 | |
414 | if (value3 != BASE64_SENTIANAL_VALUE) { |
415 | output->buffer[buffer_index++] = (uint8_t)(((value2 << 4) & 0xF0) | ((value3 >> 2) & 0x0F)); |
416 | if (value4 != BASE64_SENTIANAL_VALUE) { |
417 | output->buffer[buffer_index] = (uint8_t)((value3 & 0x03) << 6 | value4); |
418 | } |
419 | } |
420 | } |
421 | output->len = decoded_length; |
422 | return AWS_OP_SUCCESS; |
423 | } |
424 | |