1static inline void
2enc_loop_ssse3_inner (const uint8_t **s, uint8_t **o)
3{
4 // Load input:
5 __m128i str = _mm_loadu_si128((__m128i *) *s);
6
7 // Reshuffle:
8 str = enc_reshuffle(str);
9
10 // Translate reshuffled bytes to the Base64 alphabet:
11 str = enc_translate(str);
12
13 // Store:
14 _mm_storeu_si128((__m128i *) *o, str);
15
16 *s += 12;
17 *o += 16;
18}
19
20static inline void
21enc_loop_ssse3 (const uint8_t **s, size_t *slen, uint8_t **o, size_t *olen)
22{
23 if (*slen < 16) {
24 return;
25 }
26
27 // Process blocks of 12 bytes at a time. Because blocks are loaded 16
28 // bytes at a time, ensure that there will be at least 4 remaining
29 // bytes after the last round, so that the final read will not pass
30 // beyond the bounds of the input buffer:
31 size_t rounds = (*slen - 4) / 12;
32
33 *slen -= rounds * 12; // 12 bytes consumed per round
34 *olen += rounds * 16; // 16 bytes produced per round
35
36 do {
37 if (rounds >= 8) {
38 enc_loop_ssse3_inner(s, o);
39 enc_loop_ssse3_inner(s, o);
40 enc_loop_ssse3_inner(s, o);
41 enc_loop_ssse3_inner(s, o);
42 enc_loop_ssse3_inner(s, o);
43 enc_loop_ssse3_inner(s, o);
44 enc_loop_ssse3_inner(s, o);
45 enc_loop_ssse3_inner(s, o);
46 rounds -= 8;
47 continue;
48 }
49 if (rounds >= 4) {
50 enc_loop_ssse3_inner(s, o);
51 enc_loop_ssse3_inner(s, o);
52 enc_loop_ssse3_inner(s, o);
53 enc_loop_ssse3_inner(s, o);
54 rounds -= 4;
55 continue;
56 }
57 if (rounds >= 2) {
58 enc_loop_ssse3_inner(s, o);
59 enc_loop_ssse3_inner(s, o);
60 rounds -= 2;
61 continue;
62 }
63 enc_loop_ssse3_inner(s, o);
64 break;
65
66 } while (rounds > 0);
67}
68