1 | |
2 | #include "zbuild.h" |
3 | #include "deflate.h" |
4 | #include "functable.h" |
5 | |
6 | #ifndef MATCH_TPL_H |
7 | #define MATCH_TPL_H |
8 | |
9 | #ifdef UNALIGNED_OK |
10 | # ifdef UNALIGNED64_OK |
11 | typedef uint64_t bestcmp_t; |
12 | # else |
13 | typedef uint32_t bestcmp_t; |
14 | # endif |
15 | #else |
16 | typedef uint8_t bestcmp_t; |
17 | #endif |
18 | |
19 | #define EARLY_EXIT_TRIGGER_LEVEL 5 |
20 | |
21 | #endif |
22 | |
23 | /* Set match_start to the longest match starting at the given string and |
24 | * return its length. Matches shorter or equal to prev_length are discarded, |
25 | * in which case the result is equal to prev_length and match_start is garbage. |
26 | * |
27 | * IN assertions: cur_match is the head of the hash chain for the current |
28 | * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1 |
29 | * OUT assertion: the match length is not greater than s->lookahead |
30 | */ |
31 | Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) { |
32 | unsigned int strstart = s->strstart; |
33 | const unsigned wmask = s->w_mask; |
34 | unsigned char *window = s->window; |
35 | unsigned char *scan = window + strstart; |
36 | Z_REGISTER unsigned char *mbase_start = window; |
37 | Z_REGISTER unsigned char *mbase_end; |
38 | const Pos *prev = s->prev; |
39 | Pos limit; |
40 | int32_t early_exit; |
41 | uint32_t chain_length, nice_match, best_len, offset; |
42 | uint32_t lookahead = s->lookahead; |
43 | bestcmp_t scan_end; |
44 | #ifndef UNALIGNED_OK |
45 | bestcmp_t scan_end0; |
46 | #else |
47 | bestcmp_t scan_start; |
48 | #endif |
49 | |
50 | #define GOTO_NEXT_CHAIN \ |
51 | if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \ |
52 | continue; \ |
53 | return best_len; |
54 | |
55 | /* The code is optimized for MAX_MATCH-2 multiple of 16. */ |
56 | Assert(MAX_MATCH == 258, "Code too clever" ); |
57 | |
58 | best_len = s->prev_length ? s->prev_length : 1; |
59 | |
60 | /* Calculate read offset which should only extend an extra byte |
61 | * to find the next best match length. |
62 | */ |
63 | offset = best_len-1; |
64 | #ifdef UNALIGNED_OK |
65 | if (best_len >= sizeof(uint32_t)) { |
66 | offset -= 2; |
67 | #ifdef UNALIGNED64_OK |
68 | if (best_len >= sizeof(uint64_t)) |
69 | offset -= 4; |
70 | #endif |
71 | } |
72 | #endif |
73 | |
74 | scan_end = *(bestcmp_t *)(scan+offset); |
75 | #ifndef UNALIGNED_OK |
76 | scan_end0 = *(bestcmp_t *)(scan+offset+1); |
77 | #else |
78 | scan_start = *(bestcmp_t *)(scan); |
79 | #endif |
80 | mbase_end = (mbase_start+offset); |
81 | |
82 | /* Do not waste too much time if we already have a good match */ |
83 | chain_length = s->max_chain_length; |
84 | early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL; |
85 | if (best_len >= s->good_match) |
86 | chain_length >>= 2; |
87 | nice_match = (uint32_t)s->nice_match; |
88 | |
89 | /* Stop when cur_match becomes <= limit. To simplify the code, |
90 | * we prevent matches with the string of window index 0 |
91 | */ |
92 | limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0; |
93 | |
94 | Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead" ); |
95 | for (;;) { |
96 | if (cur_match >= strstart) |
97 | break; |
98 | |
99 | /* Skip to next match if the match length cannot increase or if the match length is |
100 | * less than 2. Note that the checks below for insufficient lookahead only occur |
101 | * occasionally for performance reasons. |
102 | * Therefore uninitialized memory will be accessed and conditional jumps will be made |
103 | * that depend on those values. However the length of the match is limited to the |
104 | * lookahead, so the output of deflate is not affected by the uninitialized values. |
105 | */ |
106 | #ifdef UNALIGNED_OK |
107 | if (best_len < sizeof(uint32_t)) { |
108 | for (;;) { |
109 | if (*(uint16_t *)(mbase_end+cur_match) == (uint16_t)scan_end && |
110 | *(uint16_t *)(mbase_start+cur_match) == (uint16_t)scan_start) |
111 | break; |
112 | GOTO_NEXT_CHAIN; |
113 | } |
114 | # ifdef UNALIGNED64_OK |
115 | } else if (best_len >= sizeof(uint64_t)) { |
116 | for (;;) { |
117 | if (*(uint64_t *)(mbase_end+cur_match) == (uint64_t)scan_end && |
118 | *(uint64_t *)(mbase_start+cur_match) == (uint64_t)scan_start) |
119 | break; |
120 | GOTO_NEXT_CHAIN; |
121 | } |
122 | # endif |
123 | } else { |
124 | for (;;) { |
125 | if (*(uint32_t *)(mbase_end+cur_match) == (uint32_t)scan_end && |
126 | *(uint32_t *)(mbase_start+cur_match) == (uint32_t)scan_start) |
127 | break; |
128 | GOTO_NEXT_CHAIN; |
129 | } |
130 | } |
131 | #else |
132 | for (;;) { |
133 | if (mbase_end[cur_match] == scan_end && mbase_end[cur_match+1] == scan_end0 && |
134 | mbase_start[cur_match] == scan[0] && mbase_start[cur_match+1] == scan[1]) |
135 | break; |
136 | GOTO_NEXT_CHAIN; |
137 | } |
138 | #endif |
139 | uint32_t len = COMPARE256(src0: scan+2, src1: mbase_start+cur_match+2) + 2; |
140 | Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan" ); |
141 | |
142 | if (len > best_len) { |
143 | s->match_start = cur_match; |
144 | /* Do not look for matches beyond the end of the input. */ |
145 | if (len > lookahead) |
146 | return lookahead; |
147 | best_len = len; |
148 | if (best_len >= nice_match) |
149 | return best_len; |
150 | |
151 | offset = best_len-1; |
152 | #ifdef UNALIGNED_OK |
153 | if (best_len >= sizeof(uint32_t)) { |
154 | offset -= 2; |
155 | #ifdef UNALIGNED64_OK |
156 | if (best_len >= sizeof(uint64_t)) |
157 | offset -= 4; |
158 | #endif |
159 | } |
160 | #endif |
161 | scan_end = *(bestcmp_t *)(scan+offset); |
162 | #ifndef UNALIGNED_OK |
163 | scan_end0 = *(bestcmp_t *)(scan+offset+1); |
164 | #endif |
165 | mbase_end = (mbase_start+offset); |
166 | } else if (UNLIKELY(early_exit)) { |
167 | /* The probability of finding a match later if we here is pretty low, so for |
168 | * performance it's best to outright stop here for the lower compression levels |
169 | */ |
170 | break; |
171 | } |
172 | GOTO_NEXT_CHAIN; |
173 | } |
174 | |
175 | return best_len; |
176 | } |
177 | |
178 | #undef LONGEST_MATCH |
179 | #undef COMPARE256 |
180 | #undef COMPARE258 |
181 | |