1/* insert_string.h
2 *
3 * Copyright 2019 The Chromium Authors. All rights reserved.
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the Chromium source repository LICENSE file.
6 */
7
8#if defined(_MSC_VER)
9#define INLINE __inline
10#else
11#define INLINE inline
12#endif
13
14#include "cpu_features.h"
15
16// clang-format off
17#if defined(CRC32_SIMD_SSE42_PCLMUL)
18 #include <smmintrin.h> /* Required to make MSVC bot build pass. */
19
20 #if defined(__clang__) || defined(__GNUC__)
21 #define TARGET_CPU_WITH_CRC __attribute__((target("sse4.2")))
22 #else
23 #define TARGET_CPU_WITH_CRC
24 #endif
25
26 #define _cpu_crc32_u32 _mm_crc32_u32
27
28#elif defined(CRC32_ARMV8_CRC32)
29 #if defined(__clang__)
30 #define __crc32cw __builtin_arm_crc32cw
31 #endif
32
33 #if defined(__aarch64__)
34 #define TARGET_CPU_WITH_CRC __attribute__((target("crc")))
35 #else // !defined(__aarch64__)
36 #define TARGET_CPU_WITH_CRC __attribute__((target("armv8-a,crc")))
37 #endif // defined(__aarch64__)
38
39 #define _cpu_crc32_u32 __crc32cw
40
41#endif
42// clang-format on
43
44#if defined(TARGET_CPU_WITH_CRC)
45
46TARGET_CPU_WITH_CRC
47local INLINE Pos insert_string_simd(deflate_state* const s, const Pos str) {
48 Pos ret;
49 unsigned *ip, val, h = 0;
50
51 ip = (unsigned*)&s->window[str];
52 val = *ip;
53
54 if (s->level >= 6)
55 val &= 0xFFFFFF;
56
57 /* Unlike the case of data integrity checks for GZIP format where the
58 * polynomial used is defined (https://tools.ietf.org/html/rfc1952#page-11),
59 * here it is just a hash function for the hash table used while
60 * performing compression.
61 */
62 h = _cpu_crc32_u32(h, val);
63
64 ret = s->head[h & s->hash_mask];
65 s->head[h & s->hash_mask] = str;
66 s->prev[str & s->w_mask] = ret;
67 return ret;
68}
69
70#endif // TARGET_CPU_WITH_CRC
71
72/* ===========================================================================
73 * Update a hash value with the given input byte
74 * IN assertion: all calls to UPDATE_HASH are made with consecutive input
75 * characters, so that a running hash key can be computed from the previous
76 * key instead of complete recalculation each time.
77 */
78#define UPDATE_HASH(s, h, c) (h = (((h) << s->hash_shift) ^ (c)) & s->hash_mask)
79
80/* ===========================================================================
81 * Insert string str in the dictionary and set match_head to the previous head
82 * of the hash chain (the most recent string with same hash key). Return
83 * the previous length of the hash chain.
84 * If this file is compiled with -DFASTEST, the compression level is forced
85 * to 1, and no hash chains are maintained.
86 * IN assertion: all calls to INSERT_STRING are made with consecutive input
87 * characters and the first MIN_MATCH bytes of str are valid (except for
88 * the last MIN_MATCH-1 bytes of the input file).
89 */
90local INLINE Pos insert_string_c(deflate_state* const s, const Pos str) {
91 Pos ret;
92
93 UPDATE_HASH(s, s->ins_h, s->window[(str) + (MIN_MATCH - 1)]);
94#ifdef FASTEST
95 ret = s->head[s->ins_h];
96#else
97 ret = s->prev[str & s->w_mask] = s->head[s->ins_h];
98#endif
99 s->head[s->ins_h] = str;
100
101 return ret;
102}
103
104local INLINE Pos insert_string(deflate_state* const s, const Pos str) {
105/* insert_string_simd string dictionary insertion: this SIMD symbol hashing
106 * significantly improves data compression speed.
107 *
108 * Note: the generated compressed output is a valid DEFLATE stream but will
109 * differ from vanilla zlib output ...
110 */
111#if defined(CHROMIUM_ZLIB_NO_CASTAGNOLI)
112/* ... so this build-time option can used to disable the SIMD symbol hasher
113 * if matching vanilla zlib DEFLATE output is required.
114 */ (;) /* FALLTHOUGH */
115#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_SIMD_SSE42_PCLMUL)
116 if (x86_cpu_enable_simd)
117 return insert_string_simd(s, str);
118#elif defined(TARGET_CPU_WITH_CRC) && defined(CRC32_ARMV8_CRC32)
119 if (arm_cpu_enable_crc32)
120 return insert_string_simd(s, str);
121#endif
122 return insert_string_c(s, str);
123}
124