| 1 | /* chunkset_neon.c -- NEON inline functions to copy small data chunks. |
| 2 | * For conditions of distribution and use, see copyright notice in zlib.h |
| 3 | */ |
| 4 | |
| 5 | #ifdef ARM_NEON_CHUNKSET |
| 6 | #ifdef _M_ARM64 |
| 7 | # include <arm64_neon.h> |
| 8 | #else |
| 9 | # include <arm_neon.h> |
| 10 | #endif |
| 11 | #include "../../zbuild.h" |
| 12 | #include "../../zutil.h" |
| 13 | |
| 14 | typedef uint8x16_t chunk_t; |
| 15 | |
| 16 | #define CHUNK_SIZE 16 |
| 17 | |
| 18 | #define HAVE_CHUNKMEMSET_1 |
| 19 | #define HAVE_CHUNKMEMSET_2 |
| 20 | #define HAVE_CHUNKMEMSET_4 |
| 21 | #define HAVE_CHUNKMEMSET_8 |
| 22 | |
| 23 | static inline void chunkmemset_1(uint8_t *from, chunk_t *chunk) { |
| 24 | *chunk = vld1q_dup_u8(from); |
| 25 | } |
| 26 | |
| 27 | static inline void chunkmemset_2(uint8_t *from, chunk_t *chunk) { |
| 28 | uint16_t tmp; |
| 29 | memcpy(dest: &tmp, src: from, n: 2); |
| 30 | *chunk = vreinterpretq_u8_u16(p0: vdupq_n_u16(p0: tmp)); |
| 31 | } |
| 32 | |
| 33 | static inline void chunkmemset_4(uint8_t *from, chunk_t *chunk) { |
| 34 | uint32_t tmp; |
| 35 | memcpy(dest: &tmp, src: from, n: 4); |
| 36 | *chunk = vreinterpretq_u8_u32(p0: vdupq_n_u32(p0: tmp)); |
| 37 | } |
| 38 | |
| 39 | static inline void chunkmemset_8(uint8_t *from, chunk_t *chunk) { |
| 40 | uint64_t tmp; |
| 41 | memcpy(dest: &tmp, src: from, n: 8); |
| 42 | *chunk = vreinterpretq_u8_u64(p0: vdupq_n_u64(p0: tmp)); |
| 43 | } |
| 44 | |
| 45 | #define CHUNKSIZE chunksize_neon |
| 46 | #define CHUNKCOPY chunkcopy_neon |
| 47 | #define CHUNKCOPY_SAFE chunkcopy_safe_neon |
| 48 | #define CHUNKUNROLL chunkunroll_neon |
| 49 | #define CHUNKMEMSET chunkmemset_neon |
| 50 | #define CHUNKMEMSET_SAFE chunkmemset_safe_neon |
| 51 | |
| 52 | static inline void loadchunk(uint8_t const *s, chunk_t *chunk) { |
| 53 | *chunk = vld1q_u8(s); |
| 54 | } |
| 55 | |
| 56 | static inline void storechunk(uint8_t *out, chunk_t *chunk) { |
| 57 | vst1q_u8(out, *chunk); |
| 58 | } |
| 59 | |
| 60 | #include "chunkset_tpl.h" |
| 61 | |
| 62 | #endif |
| 63 | |