1 | /* |
2 | * SSE optimized hash slide |
3 | * |
4 | * Copyright (C) 2017 Intel Corporation |
5 | * Authors: |
6 | * Arjan van de Ven <arjan@linux.intel.com> |
7 | * Jim Kukunas <james.t.kukunas@linux.intel.com> |
8 | * |
9 | * For conditions of distribution and use, see copyright notice in zlib.h |
10 | */ |
11 | #include "../../zbuild.h" |
12 | #include "../../deflate.h" |
13 | |
14 | #include <immintrin.h> |
15 | |
16 | ZLIB_INTERNAL void slide_hash_sse2(deflate_state *s) { |
17 | Pos *p; |
18 | unsigned n; |
19 | unsigned wsize = s->w_size; |
20 | const __m128i xmm_wsize = _mm_set1_epi16(s->w_size); |
21 | |
22 | n = s->hash_size; |
23 | p = &s->head[n] - 8; |
24 | do { |
25 | __m128i value, result; |
26 | |
27 | value = _mm_loadu_si128((__m128i *)p); |
28 | result= _mm_subs_epu16(value, xmm_wsize); |
29 | _mm_storeu_si128((__m128i *)p, result); |
30 | p -= 8; |
31 | n -= 8; |
32 | } while (n > 0); |
33 | |
34 | n = wsize; |
35 | p = &s->prev[n] - 8; |
36 | do { |
37 | __m128i value, result; |
38 | |
39 | value = _mm_loadu_si128((__m128i *)p); |
40 | result= _mm_subs_epu16(value, xmm_wsize); |
41 | _mm_storeu_si128((__m128i *)p, result); |
42 | |
43 | p -= 8; |
44 | n -= 8; |
45 | } while (n > 0); |
46 | } |
47 | |