1 | static inline void |
2 | enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o) |
3 | { |
4 | // Load input: |
5 | __m128i str = _mm_loadu_si128((__m128i *) *s); |
6 | |
7 | // Reshuffle: |
8 | str = enc_reshuffle(str); |
9 | |
10 | // Translate reshuffled bytes to the Base64 alphabet: |
11 | str = enc_translate(str); |
12 | |
13 | // Store: |
14 | _mm_storeu_si128((__m128i *) *o, str); |
15 | |
16 | *s += 12; |
17 | *o += 16; |
18 | } |
19 | |
20 | static inline void |
21 | enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen) |
22 | { |
23 | if (*slen < 16) { |
24 | return; |
25 | } |
26 | |
27 | // Process blocks of 12 bytes at a time. Because blocks are loaded 16 |
28 | // bytes at a time, ensure that there will be at least 4 remaining |
29 | // bytes after the last round, so that the final read will not pass |
30 | // beyond the bounds of the input buffer: |
31 | size_t rounds = (*slen - 4) / 12; |
32 | |
33 | *slen -= rounds * 12; // 12 bytes consumed per round |
34 | *olen += rounds * 16; // 16 bytes produced per round |
35 | |
36 | do { |
37 | if (rounds >= 8) { |
38 | enc_loop_ssse3_inner(s, o); |
39 | enc_loop_ssse3_inner(s, o); |
40 | enc_loop_ssse3_inner(s, o); |
41 | enc_loop_ssse3_inner(s, o); |
42 | enc_loop_ssse3_inner(s, o); |
43 | enc_loop_ssse3_inner(s, o); |
44 | enc_loop_ssse3_inner(s, o); |
45 | enc_loop_ssse3_inner(s, o); |
46 | rounds -= 8; |
47 | continue; |
48 | } |
49 | if (rounds >= 4) { |
50 | enc_loop_ssse3_inner(s, o); |
51 | enc_loop_ssse3_inner(s, o); |
52 | enc_loop_ssse3_inner(s, o); |
53 | enc_loop_ssse3_inner(s, o); |
54 | rounds -= 4; |
55 | continue; |
56 | } |
57 | if (rounds >= 2) { |
58 | enc_loop_ssse3_inner(s, o); |
59 | enc_loop_ssse3_inner(s, o); |
60 | rounds -= 2; |
61 | continue; |
62 | } |
63 | enc_loop_ssse3_inner(s, o); |
64 | break; |
65 | |
66 | } while (rounds > 0); |
67 | } |
68 | |