| 1 | /* |
| 2 | * SSE optimized hash slide |
| 3 | * |
| 4 | * Copyright (C) 2017 Intel Corporation |
| 5 | * Authors: |
| 6 | * Arjan van de Ven <arjan@linux.intel.com> |
| 7 | * Jim Kukunas <james.t.kukunas@linux.intel.com> |
| 8 | * |
| 9 | * For conditions of distribution and use, see copyright notice in zlib.h |
| 10 | */ |
| 11 | #include "../../zbuild.h" |
| 12 | #include "../../deflate.h" |
| 13 | |
| 14 | #include <immintrin.h> |
| 15 | |
| 16 | ZLIB_INTERNAL void slide_hash_sse2(deflate_state *s) { |
| 17 | Pos *p; |
| 18 | unsigned n; |
| 19 | unsigned wsize = s->w_size; |
| 20 | const __m128i xmm_wsize = _mm_set1_epi16(s->w_size); |
| 21 | |
| 22 | n = s->hash_size; |
| 23 | p = &s->head[n] - 8; |
| 24 | do { |
| 25 | __m128i value, result; |
| 26 | |
| 27 | value = _mm_loadu_si128((__m128i *)p); |
| 28 | result= _mm_subs_epu16(value, xmm_wsize); |
| 29 | _mm_storeu_si128((__m128i *)p, result); |
| 30 | p -= 8; |
| 31 | n -= 8; |
| 32 | } while (n > 0); |
| 33 | |
| 34 | n = wsize; |
| 35 | p = &s->prev[n] - 8; |
| 36 | do { |
| 37 | __m128i value, result; |
| 38 | |
| 39 | value = _mm_loadu_si128((__m128i *)p); |
| 40 | result= _mm_subs_epu16(value, xmm_wsize); |
| 41 | _mm_storeu_si128((__m128i *)p, result); |
| 42 | |
| 43 | p -= 8; |
| 44 | n -= 8; |
| 45 | } while (n > 0); |
| 46 | } |
| 47 | |