| 1 | /* functable.c -- Choose relevant optimized functions at runtime |
| 2 | * Copyright (C) 2017 Hans Kristian Rosbach |
| 3 | * For conditions of distribution and use, see copyright notice in zlib.h |
| 4 | */ |
| 5 | |
| 6 | #include "zbuild.h" |
| 7 | #include "zendian.h" |
| 8 | #include "deflate.h" |
| 9 | #include "deflate_p.h" |
| 10 | |
| 11 | #include "functable.h" |
| 12 | /* insert_string */ |
| 13 | #ifdef X86_SSE42_CRC_HASH |
| 14 | extern Pos insert_string_sse(deflate_state *const s, const Pos str, unsigned int count); |
| 15 | #elif defined(ARM_ACLE_CRC_HASH) |
| 16 | extern Pos insert_string_acle(deflate_state *const s, const Pos str, unsigned int count); |
| 17 | #endif |
| 18 | |
| 19 | /* fill_window */ |
| 20 | #if defined(X86_SSE2) |
| 21 | extern void fill_window_sse(deflate_state *s); |
| 22 | #elif defined(ARM_GETAUXVAL) |
| 23 | extern void fill_window_arm(deflate_state *s); |
| 24 | #endif |
| 25 | |
| 26 | /* slide_hash */ |
| 27 | #ifdef X86_SSE2 |
| 28 | void slide_hash_sse2(deflate_state *s); |
| 29 | #endif |
| 30 | |
| 31 | /* adler32 */ |
| 32 | extern uint32_t adler32_c(uint32_t adler, const unsigned char *buf, size_t len); |
| 33 | #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32) |
| 34 | extern uint32_t adler32_neon(uint32_t adler, const unsigned char *buf, size_t len); |
| 35 | #endif |
| 36 | |
| 37 | /* CRC32 */ |
| 38 | ZLIB_INTERNAL uint32_t crc32_generic(uint32_t, const unsigned char *, uint64_t); |
| 39 | |
| 40 | #ifdef DYNAMIC_CRC_TABLE |
| 41 | extern volatile int crc_table_empty; |
| 42 | extern void make_crc_table(void); |
| 43 | #endif |
| 44 | |
| 45 | #ifdef __ARM_FEATURE_CRC32 |
| 46 | extern uint32_t crc32_acle(uint32_t, const unsigned char *, uint64_t); |
| 47 | #endif |
| 48 | |
| 49 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 50 | extern uint32_t crc32_little(uint32_t, const unsigned char *, uint64_t); |
| 51 | #elif BYTE_ORDER == BIG_ENDIAN |
| 52 | extern uint32_t crc32_big(uint32_t, const unsigned char *, uint64_t); |
| 53 | #endif |
| 54 | |
| 55 | |
| 56 | /* stub definitions */ |
| 57 | ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count); |
| 58 | ZLIB_INTERNAL void fill_window_stub(deflate_state *s); |
| 59 | ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len); |
| 60 | ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len); |
| 61 | ZLIB_INTERNAL void slide_hash_stub(deflate_state *s); |
| 62 | |
| 63 | /* functable init */ |
| 64 | ZLIB_INTERNAL __thread struct functable_s functable = { |
| 65 | fill_window_stub, |
| 66 | insert_string_stub, |
| 67 | adler32_stub, |
| 68 | crc32_stub, |
| 69 | slide_hash_stub |
| 70 | }; |
| 71 | |
| 72 | |
| 73 | /* stub functions */ |
| 74 | ZLIB_INTERNAL Pos insert_string_stub(deflate_state *const s, const Pos str, unsigned int count) { |
| 75 | // Initialize default |
| 76 | functable.insert_string=&insert_string_c; |
| 77 | |
| 78 | #ifdef X86_SSE42_CRC_HASH |
| 79 | if (x86_cpu_has_sse42) |
| 80 | functable.insert_string=&insert_string_sse; |
| 81 | #elif defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH) |
| 82 | if (arm_cpu_has_crc32) |
| 83 | functable.insert_string=&insert_string_acle; |
| 84 | #endif |
| 85 | |
| 86 | return functable.insert_string(s, str, count); |
| 87 | } |
| 88 | |
| 89 | ZLIB_INTERNAL void fill_window_stub(deflate_state *s) { |
| 90 | // Initialize default |
| 91 | functable.fill_window=&fill_window_c; |
| 92 | |
| 93 | #if defined(X86_SSE2) |
| 94 | # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) |
| 95 | if (x86_cpu_has_sse2) |
| 96 | # endif |
| 97 | functable.fill_window=&fill_window_sse; |
| 98 | #elif defined(ARM_GETAUXVAL) |
| 99 | functable.fill_window=&fill_window_arm; |
| 100 | #endif |
| 101 | |
| 102 | functable.fill_window(s); |
| 103 | } |
| 104 | |
| 105 | ZLIB_INTERNAL void slide_hash_stub(deflate_state *s) { |
| 106 | // Initialize default |
| 107 | functable.slide_hash=&slide_hash_c; |
| 108 | |
| 109 | #ifdef X86_SSE2 |
| 110 | # if !defined(__x86_64__) && !defined(_M_X64) && !defined(X86_NOCHECK_SSE2) |
| 111 | if (x86_cpu_has_sse2) |
| 112 | # endif |
| 113 | functable.slide_hash=&slide_hash_sse2; |
| 114 | #endif |
| 115 | |
| 116 | functable.slide_hash(s); |
| 117 | } |
| 118 | |
| 119 | ZLIB_INTERNAL uint32_t adler32_stub(uint32_t adler, const unsigned char *buf, size_t len) { |
| 120 | // Initialize default |
| 121 | functable.adler32=&adler32_c; |
| 122 | |
| 123 | #if (defined(__ARM_NEON__) || defined(__ARM_NEON)) && defined(ARM_NEON_ADLER32) |
| 124 | if (arm_cpu_has_neon) |
| 125 | functable.adler32=&adler32_neon; |
| 126 | #endif |
| 127 | |
| 128 | return functable.adler32(adler, buf, len); |
| 129 | } |
| 130 | |
| 131 | ZLIB_INTERNAL uint32_t crc32_stub(uint32_t crc, const unsigned char *buf, uint64_t len) { |
| 132 | |
| 133 | |
| 134 | Assert(sizeof(uint64_t) >= sizeof(size_t), |
| 135 | "crc32_z takes size_t but internally we have a uint64_t len" ); |
| 136 | /* return a function pointer for optimized arches here after a capability test */ |
| 137 | |
| 138 | #ifdef DYNAMIC_CRC_TABLE |
| 139 | if (crc_table_empty) |
| 140 | make_crc_table(); |
| 141 | #endif /* DYNAMIC_CRC_TABLE */ |
| 142 | |
| 143 | if (sizeof(void *) == sizeof(ptrdiff_t)) { |
| 144 | #if BYTE_ORDER == LITTLE_ENDIAN |
| 145 | functable.crc32=crc32_little; |
| 146 | # if defined(__ARM_FEATURE_CRC32) && defined(ARM_ACLE_CRC_HASH) |
| 147 | if (arm_cpu_has_crc32) |
| 148 | functable.crc32=crc32_acle; |
| 149 | # endif |
| 150 | #elif BYTE_ORDER == BIG_ENDIAN |
| 151 | functable.crc32=crc32_big; |
| 152 | #else |
| 153 | # error No endian defined |
| 154 | #endif |
| 155 | } else { |
| 156 | functable.crc32=crc32_generic; |
| 157 | } |
| 158 | |
| 159 | return functable.crc32(crc, buf, len); |
| 160 | } |
| 161 | |