1/* slide_neon.c -- Optimized hash table shifting for ARM with support for NEON instructions
2 * Copyright (C) 2017-2020 Mika T. Lindqvist
3 *
4 * Authors:
5 * Mika T. Lindqvist <postmaster@raasu.org>
6 * Jun He <jun.he@arm.com>
7 *
8 * For conditions of distribution and use, see copyright notice in zlib.h
9 */
10
11#if defined(ARM_NEON_SLIDEHASH)
12#ifdef _M_ARM64
13# include <arm64_neon.h>
14#else
15# include <arm_neon.h>
16#endif
17#include "../../zbuild.h"
18#include "../../deflate.h"
19
20/* SIMD version of hash_chain rebase */
21static inline void slide_hash_chain(Pos *table, unsigned int entries, uint16_t window_size) {
22 Z_REGISTER uint16x8_t v, *p;
23 Z_REGISTER size_t n;
24
25 size_t size = entries*sizeof(table[0]);
26 Assert((size % sizeof(uint16x8_t) * 8 == 0), "hash table size err");
27
28 Assert(sizeof(Pos) == 2, "Wrong Pos size");
29 v = vdupq_n_u16(p0: window_size);
30
31 p = (uint16x8_t *)table;
32 n = size / (sizeof(uint16x8_t) * 8);
33 do {
34 p[0] = vqsubq_u16(p0: p[0], p1: v);
35 p[1] = vqsubq_u16(p0: p[1], p1: v);
36 p[2] = vqsubq_u16(p0: p[2], p1: v);
37 p[3] = vqsubq_u16(p0: p[3], p1: v);
38 p[4] = vqsubq_u16(p0: p[4], p1: v);
39 p[5] = vqsubq_u16(p0: p[5], p1: v);
40 p[6] = vqsubq_u16(p0: p[6], p1: v);
41 p[7] = vqsubq_u16(p0: p[7], p1: v);
42 p += 8;
43 } while (--n);
44}
45
46Z_INTERNAL void slide_hash_neon(deflate_state *s) {
47 unsigned int wsize = s->w_size;
48
49 slide_hash_chain(table: s->head, HASH_SIZE, window_size: wsize);
50 slide_hash_chain(table: s->prev, entries: wsize, window_size: wsize);
51}
52#endif
53