1#include <stdint.h>
2#include <stddef.h>
3#include <string.h>
4
5#include "../../../include/libbase64.h"
6#include "../../codecs.h"
7
8#ifdef __arm__
9# if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && HAVE_NEON32
10# define BASE64_USE_NEON32
11# endif
12#endif
13
14#ifdef BASE64_USE_NEON32
15#include <arm_neon.h>
16
17static inline uint8x16_t
18vqtbl1q_u8 (const uint8x16_t lut, const uint8x16_t indices)
19{
20 // NEON32 only supports 64-bit wide lookups in 128-bit tables. Emulate
21 // the NEON64 `vqtbl1q_u8` intrinsic to do 128-bit wide lookups.
22 uint8x8x2_t lut2;
23 uint8x8x2_t result;
24
25 lut2.val[0] = vget_low_u8(lut);
26 lut2.val[1] = vget_high_u8(lut);
27
28 result.val[0] = vtbl2_u8(lut2, vget_low_u8(indices));
29 result.val[1] = vtbl2_u8(lut2, vget_high_u8(indices));
30
31 return vcombine_u8(result.val[0], result.val[1]);
32}
33
34#include "../generic/32/dec_loop.c"
35#include "../generic/32/enc_loop.c"
36#include "dec_loop.c"
37#include "enc_reshuffle.c"
38#include "enc_translate.c"
39#include "enc_loop.c"
40
41#endif // BASE64_USE_NEON32
42
43// Stride size is so large on these NEON 32-bit functions
44// (48 bytes encode, 32 bytes decode) that we inline the
45// uint32 codec to stay performant on smaller inputs.
46
47BASE64_ENC_FUNCTION(neon32)
48{
49#ifdef BASE64_USE_NEON32
50 #include "../generic/enc_head.c"
51 enc_loop_neon32(&s, &slen, &o, &olen);
52 enc_loop_generic_32(&s, &slen, &o, &olen);
53 #include "../generic/enc_tail.c"
54#else
55 BASE64_ENC_STUB
56#endif
57}
58
59BASE64_DEC_FUNCTION(neon32)
60{
61#ifdef BASE64_USE_NEON32
62 #include "../generic/dec_head.c"
63 dec_loop_neon32(&s, &slen, &o, &olen);
64 dec_loop_generic_32(&s, &slen, &o, &olen);
65 #include "../generic/dec_tail.c"
66#else
67 BASE64_DEC_STUB
68#endif
69}
70