1 | /* functable.c -- Choose relevant optimized functions at runtime |
2 | * Copyright (C) 2017 Hans Kristian Rosbach |
3 | * For conditions of distribution and use, see copyright notice in zlib.h |
4 | */ |
5 | |
6 | #include "zbuild.h" |
7 | #include "zendian.h" |
8 | #include "deflate.h" |
9 | #include "deflate_p.h" |
10 | |
11 | #include "functable.h" |
12 | /* insert_string */ |
13 | #ifdef X86_SSE42_CRC_HASH |
14 | extern Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count); |
15 | #elif defined(ARM_ACLE_CRC_HASH) |
16 | extern Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count); |
17 | #endif |
18 | |
19 | /* fill_window */ |
20 | #if defined(X86_SSE2) |
21 | extern void fill_window_sse(deflate_state *s); |
22 | #elif defined(ARM_GETAUXVAL) |
23 | extern void fill_window_arm(deflate_state *s); |
24 | #endif |
25 | |
26 | /* slide_hash */ |
27 | #ifdef X86_SSE2 |
28 | void slide_hash_sse2(deflate_state *s); |
29 | #endif |
30 | |
31 | /* adler32 */ |
32 | extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len); |
33 | #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32) |
34 | extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len); |
35 | #endif |
36 | |
37 | /* CRC32 */ |
38 | ZLIB_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t); |
39 | |
40 | #ifdef DYNAMIC_CRC_TABLE |
41 | extern volatile int crc_table_empty; |
42 | extern void make_crc_table(void); |
43 | #endif |
44 | |
45 | #ifdef __ARM_FEATURE_CRC32 |
46 | extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t); |
47 | #endif |
48 | |
49 | #if BYTE_ORDER == LITTLE_ENDIAN |
50 | extern uint32_t crc32_little(uint32_t, const unsigned char *, uint64_t); |
51 | #elif BYTE_ORDER == BIG_ENDIAN |
52 | extern uint32_t crc32_big(uint32_t, const unsigned char *, uint64_t); |
53 | #endif |
54 | |
55 | |
56 | /* stub definitions */ |
57 | ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count); |
58 | ZLIB_INTERNAL void fill_window_stub(deflate_state *s); |
59 | ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len); |
60 | ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len); |
61 | ZLIB_INTERNAL void slide_hash_stub(deflate_state *s); |
62 | |
63 | /* functable init */ |
64 | ZLIB_INTERNAL __thread struct functable_s functable = { |
65 | fill_window_stub, |
66 | insert_string_stub, |
67 | adler32_stub, |
68 | crc32_stub, |
69 | slide_hash_stub |
70 | }; |
71 | |
72 | |
73 | /* stub functions */ |
74 | ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count) { |
75 | // Initialize default |
76 | functable.insert_string=&insert_string_c; |
77 | |
78 | #ifdef X86_SSE42_CRC_HASH |
79 | if (x86_cpu_has_sse42) |
80 | functable.insert_string=&insert_string_sse; |
81 | #elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH) |
82 | if (arm_cpu_has_crc32) |
83 | functable.insert_string=&insert_string_acle; |
84 | #endif |
85 | |
86 | return functable.insert_string(s, str, count); |
87 | } |
88 | |
89 | ZLIB_INTERNAL void fill_window_stub(deflate_state *s) { |
90 | // Initialize default |
91 | functable.fill_window=&fill_window_c; |
92 | |
93 | #if defined(X86_SSE2) |
94 | # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) |
95 | if (x86_cpu_has_sse2) |
96 | # endif |
97 | functable.fill_window=&fill_window_sse; |
98 | #elif defined(ARM_GETAUXVAL) |
99 | functable.fill_window=&fill_window_arm; |
100 | #endif |
101 | |
102 | functable.fill_window(s); |
103 | } |
104 | |
105 | ZLIB_INTERNAL void slide_hash_stub(deflate_state *s) { |
106 | // Initialize default |
107 | functable.slide_hash=&slide_hash_c; |
108 | |
109 | #ifdef X86_SSE2 |
110 | # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) |
111 | if (x86_cpu_has_sse2) |
112 | # endif |
113 | functable.slide_hash=&slide_hash_sse2; |
114 | #endif |
115 | |
116 | functable.slide_hash(s); |
117 | } |
118 | |
119 | ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) { |
120 | // Initialize default |
121 | functable.adler32=&adler32_c; |
122 | |
123 | #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32) |
124 | if (arm_cpu_has_neon) |
125 | functable.adler32=&adler32_neon; |
126 | #endif |
127 | |
128 | return functable.adler32(adler, buf, len); |
129 | } |
130 | |
131 | ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) { |
132 | |
133 | |
134 | Assert(sizeof(uint64_t) >= sizeof(size_t), |
135 | "crc32_z takes size_t but internally we have a uint64_t len" ); |
136 | /* return a function pointer for optimized arches here after a capability test */ |
137 | |
138 | #ifdef DYNAMIC_CRC_TABLE |
139 | if (crc_table_empty) |
140 | make_crc_table(); |
141 | #endif /* DYNAMIC_CRC_TABLE */ |
142 | |
143 | if (sizeof(void *) == sizeof(ptrdiff_t)) { |
144 | #if BYTE_ORDER == LITTLE_ENDIAN |
145 | functable.crc32=crc32_little; |
146 | # if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH) |
147 | if (arm_cpu_has_crc32) |
148 | functable.crc32=crc32_acle; |
149 | # endif |
150 | #elif BYTE_ORDER == BIG_ENDIAN |
151 | functable.crc32=crc32_big; |
152 | #else |
153 | # error No endian defined |
154 | #endif |
155 | } else { |
156 | functable.crc32=crc32_generic; |
157 | } |
158 | |
159 | return functable.crc32(crc, buf, len); |
160 | } |
161 | |