1 | #include <stdint.h> |
2 | #include <stddef.h> |
3 | #include <string.h> |
4 | |
5 | #include "../../../include/libbase64.h" |
6 | #include "../../codecs.h" |
7 | |
8 | #ifdef __arm__ |
9 | # if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32 |
10 | # define BASE64_USE_NEON32 |
11 | # endif |
12 | #endif |
13 | |
14 | #ifdef BASE64_USE_NEON32 |
15 | #include <arm_neon.h> |
16 | |
17 | static inline uint8x16_t |
18 | vqtbl1q_u8 (const uint8x16_t lut, const uint8x16_t indices) |
19 | { |
20 | // NEON32 only supports 64-bit wide lookups in 128-bit tables. Emulate |
21 | // the NEON64 `vqtbl1q_u8` intrinsic to do 128-bit wide lookups. |
22 | uint8x8x2_t lut2; |
23 | uint8x8x2_t result; |
24 | |
25 | lut2.val[0] = vget_low_u8(lut); |
26 | lut2.val[1] = vget_high_u8(lut); |
27 | |
28 | result.val[0] = vtbl2_u8(lut2, vget_low_u8(indices)); |
29 | result.val[1] = vtbl2_u8(lut2, vget_high_u8(indices)); |
30 | |
31 | return vcombine_u8(result.val[0], result.val[1]); |
32 | } |
33 | |
34 | #include "../generic/32/dec_loop.c" |
35 | #include "../generic/32/enc_loop.c" |
36 | #include "dec_loop.c" |
37 | #include "enc_reshuffle.c" |
38 | #include "enc_translate.c" |
39 | #include "enc_loop.c" |
40 | |
41 | #endif // BASE64_USE_NEON32 |
42 | |
43 | // Stride size is so large on these NEON 32-bit functions |
44 | // (48 bytes encode, 32 bytes decode) that we inline the |
45 | // uint32 codec to stay performant on smaller inputs. |
46 | |
47 | BASE64_ENC_FUNCTION(neon32) |
48 | { |
49 | #ifdef BASE64_USE_NEON32 |
50 | #include "../generic/enc_head.c" |
51 | enc_loop_neon32(&s, &slen, &o, &olen); |
52 | enc_loop_generic_32(&s, &slen, &o, &olen); |
53 | #include "../generic/enc_tail.c" |
54 | #else |
55 | BASE64_ENC_STUB |
56 | #endif |
57 | } |
58 | |
59 | BASE64_DEC_FUNCTION(neon32) |
60 | { |
61 | #ifdef BASE64_USE_NEON32 |
62 | #include "../generic/dec_head.c" |
63 | dec_loop_neon32(&s, &slen, &o, &olen); |
64 | dec_loop_generic_32(&s, &slen, &o, &olen); |
65 | #include "../generic/dec_tail.c" |
66 | #else |
67 | BASE64_DEC_STUB |
68 | #endif |
69 | } |
70 | |