1/* functable.c -- Choose relevant optimized functions at runtime
2 * Copyright (C) 2017 Hans Kristian Rosbach
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "zbuild.h"
7#include "zendian.h"
8#include "deflate.h"
9#include "deflate_p.h"
10
11#include "functable.h"
12/* insert_string */
13#ifdef X86_SSE42_CRC_HASH
14extern Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count);
15#elif defined(ARM_ACLE_CRC_HASH)
16extern Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count);
17#endif
18
19/* fill_window */
20#if defined(X86_SSE2)
21extern void fill_window_sse(deflate_state *s);
22#elif defined(ARM_GETAUXVAL)
23extern void fill_window_arm(deflate_state *s);
24#endif
25
26/* slide_hash */
27#ifdef X86_SSE2
28void slide_hash_sse2(deflate_state *s);
29#endif
30
31/* adler32 */
32extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len);
33#if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)
34extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len);
35#endif
36
37/* CRC32 */
38ZLIB_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t);
39
40#ifdef DYNAMIC_CRC_TABLE
41extern volatile int crc_table_empty;
42extern void make_crc_table(void);
43#endif
44
45#ifdef __ARM_FEATURE_CRC32
46extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t);
47#endif
48
49#if BYTE_ORDER == LITTLE_ENDIAN
50extern uint32_t crc32_little(uint32_t, const unsigned char *, uint64_t);
51#elif BYTE_ORDER == BIG_ENDIAN
52extern uint32_t crc32_big(uint32_t, const unsigned char *, uint64_t);
53#endif
54
55
56/* stub definitions */
57ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count);
58ZLIB_INTERNAL void fill_window_stub(deflate_state *s);
59ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len);
60ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len);
61ZLIB_INTERNAL void slide_hash_stub(deflate_state *s);
62
63/* functable init */
64ZLIB_INTERNAL __thread struct functable_s functable = {
65 fill_window_stub,
66 insert_string_stub,
67 adler32_stub,
68 crc32_stub,
69 slide_hash_stub
70 };
71
72
73/* stub functions */
74ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count) {
75 // Initialize default
76 functable.insert_string=&insert_string_c;
77
78 #ifdef X86_SSE42_CRC_HASH
79 if (x86_cpu_has_sse42)
80 functable.insert_string=&insert_string_sse;
81 #elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
82 if (arm_cpu_has_crc32)
83 functable.insert_string=&insert_string_acle;
84 #endif
85
86 return functable.insert_string(s, str, count);
87}
88
89ZLIB_INTERNAL void fill_window_stub(deflate_state *s) {
90 // Initialize default
91 functable.fill_window=&fill_window_c;
92
93 #if defined(X86_SSE2)
94 # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
95 if (x86_cpu_has_sse2)
96 # endif
97 functable.fill_window=&fill_window_sse;
98 #elif defined(ARM_GETAUXVAL)
99 functable.fill_window=&fill_window_arm;
100 #endif
101
102 functable.fill_window(s);
103}
104
105ZLIB_INTERNAL void slide_hash_stub(deflate_state *s) {
106 // Initialize default
107 functable.slide_hash=&slide_hash_c;
108
109 #ifdef X86_SSE2
110 # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2)
111 if (x86_cpu_has_sse2)
112 # endif
113 functable.slide_hash=&slide_hash_sse2;
114 #endif
115
116 functable.slide_hash(s);
117}
118
119ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) {
120 // Initialize default
121 functable.adler32=&adler32_c;
122
123 #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32)
124 if (arm_cpu_has_neon)
125 functable.adler32=&adler32_neon;
126 #endif
127
128 return functable.adler32(adler, buf, len);
129}
130
131ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) {
132
133
134 Assert(sizeof(uint64_t) >= sizeof(size_t),
135 "crc32_z takes size_t but internally we have a uint64_t len");
136/* return a function pointer for optimized arches here after a capability test */
137
138#ifdef DYNAMIC_CRC_TABLE
139 if (crc_table_empty)
140 make_crc_table();
141#endif /* DYNAMIC_CRC_TABLE */
142
143 if (sizeof(void *) == sizeof(ptrdiff_t)) {
144#if BYTE_ORDER == LITTLE_ENDIAN
145 functable.crc32=crc32_little;
146# if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH)
147 if (arm_cpu_has_crc32)
148 functable.crc32=crc32_acle;
149# endif
150#elif BYTE_ORDER == BIG_ENDIAN
151 functable.crc32=crc32_big;
152#else
153# error No endian defined
154#endif
155 } else {
156 functable.crc32=crc32_generic;
157 }
158
159 return functable.crc32(crc, buf, len);
160}
161