1 | #include <stdint.h> |
2 | #include <stddef.h> |
3 | #include <string.h> |
4 | |
5 | #include "../../../include/libbase64.h" |
6 | #include "../../codecs.h" |
7 | |
8 | #ifdef __aarch64__ |
9 | # if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON64 |
10 | # define BASE64_USE_NEON64 |
11 | # endif |
12 | #endif |
13 | |
14 | #ifdef BASE64_USE_NEON64 |
15 | #include <arm_neon.h> |
16 | |
17 | static inline uint8x16x4_t |
18 | load_64byte_table (const uint8_t *p) |
19 | { |
20 | #if defined(__GNUC__) && !defined(__clang__) |
21 | // As of October 2016, GCC does not support the 'vld1q_u8_x4()' intrinsic. |
22 | uint8x16x4_t ret; |
23 | ret.val[0] = vld1q_u8(p + 0); |
24 | ret.val[1] = vld1q_u8(p + 16); |
25 | ret.val[2] = vld1q_u8(p + 32); |
26 | ret.val[3] = vld1q_u8(p + 48); |
27 | return ret; |
28 | #else |
29 | return vld1q_u8_x4(p); |
30 | #endif |
31 | } |
32 | |
33 | #include "../generic/32/dec_loop.c" |
34 | #include "../generic/64/enc_loop.c" |
35 | #include "dec_loop.c" |
36 | #include "enc_reshuffle.c" |
37 | #include "enc_loop.c" |
38 | |
39 | #endif // BASE64_USE_NEON64 |
40 | |
41 | // Stride size is so large on these NEON 64-bit functions |
42 | // (48 bytes encode, 64 bytes decode) that we inline the |
43 | // uint64 codec to stay performant on smaller inputs. |
44 | |
45 | BASE64_ENC_FUNCTION(neon64) |
46 | { |
47 | #ifdef BASE64_USE_NEON64 |
48 | #include "../generic/enc_head.c" |
49 | enc_loop_neon64(&s, &slen, &o, &olen); |
50 | enc_loop_generic_64(&s, &slen, &o, &olen); |
51 | #include "../generic/enc_tail.c" |
52 | #else |
53 | BASE64_ENC_STUB |
54 | #endif |
55 | } |
56 | |
57 | BASE64_DEC_FUNCTION(neon64) |
58 | { |
59 | #ifdef BASE64_USE_NEON64 |
60 | #include "../generic/dec_head.c" |
61 | dec_loop_neon64(&s, &slen, &o, &olen); |
62 | dec_loop_generic_32(&s, &slen, &o, &olen); |
63 | #include "../generic/dec_tail.c" |
64 | #else |
65 | BASE64_DEC_STUB |
66 | #endif |
67 | } |
68 | |