1 | /* deflate_medium.c -- The deflate_medium deflate strategy |
2 | * |
3 | * Copyright (C) 2013 Intel Corporation. All rights reserved. |
4 | * Authors: |
5 | * Arjan van de Ven <arjan@linux.intel.com> |
6 | * |
7 | * For conditions of distribution and use, see copyright notice in zlib.h |
8 | */ |
9 | #ifndef NO_MEDIUM_STRATEGY |
10 | #include "zbuild.h" |
11 | #include "deflate.h" |
12 | #include "deflate_p.h" |
13 | #include "match_p.h" |
14 | #include "functable.h" |
15 | |
16 | struct match { |
17 | unsigned int match_start; |
18 | unsigned int match_length; |
19 | unsigned int strstart; |
20 | unsigned int orgstart; |
21 | }; |
22 | |
23 | #define MAX_DIST2 ((1 << MAX_WBITS) - MIN_LOOKAHEAD) |
24 | |
25 | static int tr_tally_dist(deflate_state *s, int distance, int length) { |
26 | return zng_tr_tally(s, distance, length); |
27 | } |
28 | |
29 | static int tr_tally_lit(deflate_state *s, int c) { |
30 | return zng_tr_tally(s, 0, c); |
31 | } |
32 | |
33 | static int emit_match(deflate_state *s, struct match match) { |
34 | int flush = 0; |
35 | |
36 | /* matches that are not long enough we need to emit as literals */ |
37 | if (match.match_length < MIN_MATCH) { |
38 | while (match.match_length) { |
39 | flush += tr_tally_lit(s, s->window[match.strstart]); |
40 | s->lookahead--; |
41 | match.strstart++; |
42 | match.match_length--; |
43 | } |
44 | return flush; |
45 | } |
46 | |
47 | check_match(s, match.strstart, match.match_start, match.match_length); |
48 | |
49 | flush += tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH); |
50 | |
51 | s->lookahead -= match.match_length; |
52 | return flush; |
53 | } |
54 | |
55 | static void insert_match(deflate_state *s, struct match match) { |
56 | if (UNLIKELY(s->lookahead <= match.match_length + MIN_MATCH)) |
57 | return; |
58 | |
59 | /* matches that are not long enough we need to emit as literals */ |
60 | if (match.match_length < MIN_MATCH) { |
61 | #ifdef NOT_TWEAK_COMPILER |
62 | while (match.match_length) { |
63 | match.strstart++; |
64 | match.match_length--; |
65 | |
66 | if (match.match_length) { |
67 | if (match.strstart >= match.orgstart) { |
68 | functable.insert_string(s, match.strstart, 1); |
69 | } |
70 | } |
71 | } |
72 | #else |
73 | match.strstart++; |
74 | match.match_length--; |
75 | if (match.match_length > 0) { |
76 | if (match.strstart >= match.orgstart) { |
77 | if (match.strstart + match.match_length - 1 >= match.orgstart) { |
78 | functable.insert_string(s, match.strstart, match.match_length); |
79 | } else { |
80 | functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1); |
81 | } |
82 | match.strstart += match.match_length; |
83 | match.match_length = 0; |
84 | } |
85 | } |
86 | #endif |
87 | return; |
88 | } |
89 | |
90 | /* Insert new strings in the hash table only if the match length |
91 | * is not too large. This saves time but degrades compression. |
92 | */ |
93 | if (match.match_length <= 16* s->max_insert_length && s->lookahead >= MIN_MATCH) { |
94 | match.match_length--; /* string at strstart already in table */ |
95 | match.strstart++; |
96 | #ifdef NOT_TWEAK_COMPILER |
97 | do { |
98 | if (LIKELY(match.strstart >= match.orgstart)) { |
99 | functable.insert_string(s, match.strstart, 1); |
100 | } |
101 | match.strstart++; |
102 | /* strstart never exceeds WSIZE-MAX_MATCH, so there are |
103 | * always MIN_MATCH bytes ahead. |
104 | */ |
105 | } while (--match.match_length != 0); |
106 | #else |
107 | if (LIKELY(match.strstart >= match.orgstart)) { |
108 | if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) { |
109 | functable.insert_string(s, match.strstart, match.match_length); |
110 | } else { |
111 | functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1); |
112 | } |
113 | } else if (match.orgstart < match.strstart + match.match_length) { |
114 | functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart); |
115 | } |
116 | match.strstart += match.match_length; |
117 | match.match_length = 0; |
118 | #endif |
119 | } else { |
120 | match.strstart += match.match_length; |
121 | match.match_length = 0; |
122 | s->ins_h = s->window[match.strstart]; |
123 | if (match.strstart >= (MIN_MATCH - 2)) |
124 | #ifndef NOT_TWEAK_COMPILER |
125 | functable.insert_string(s, match.strstart + 2 - MIN_MATCH, MIN_MATCH - 2); |
126 | #else |
127 | functable.insert_string(s, match.strstart + 2 - MIN_MATCH, 1); |
128 | #if MIN_MATCH != 3 |
129 | #warning Call insert_string() MIN_MATCH-3 more times |
130 | #endif |
131 | #endif |
132 | /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not |
133 | * matter since it will be recomputed at next deflate call. |
134 | */ |
135 | } |
136 | } |
137 | |
138 | static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) { |
139 | IPos limit; |
140 | unsigned char *match, *orig; |
141 | int changed = 0; |
142 | struct match c, n; |
143 | /* step zero: sanity checks */ |
144 | |
145 | if (current->match_length <= 1) |
146 | return; |
147 | |
148 | if (UNLIKELY(current->match_length > 1 + next->match_start)) |
149 | return; |
150 | |
151 | if (UNLIKELY(current->match_length > 1 + next->strstart)) |
152 | return; |
153 | |
154 | match = s->window - current->match_length + 1 + next->match_start; |
155 | orig = s->window - current->match_length + 1 + next->strstart; |
156 | |
157 | /* quick exit check.. if this fails then don't bother with anything else */ |
158 | if (LIKELY(*match != *orig)) |
159 | return; |
160 | |
161 | c = *current; |
162 | n = *next; |
163 | |
164 | /* step one: try to move the "next" match to the left as much as possible */ |
165 | limit = next->strstart > MAX_DIST2 ? next->strstart - MAX_DIST2 : 0; |
166 | |
167 | match = s->window + n.match_start - 1; |
168 | orig = s->window + n.strstart - 1; |
169 | |
170 | while (*match == *orig) { |
171 | if (c.match_length < 1) |
172 | break; |
173 | if (n.strstart <= limit) |
174 | break; |
175 | if (n.match_length >= 256) |
176 | break; |
177 | if (n.match_start <= 1) |
178 | break; |
179 | |
180 | n.strstart--; |
181 | n.match_start--; |
182 | n.match_length++; |
183 | c.match_length--; |
184 | match--; |
185 | orig--; |
186 | changed++; |
187 | } |
188 | |
189 | if (!changed) |
190 | return; |
191 | |
192 | if (c.match_length <= 1 && n.match_length != 2) { |
193 | n.orgstart++; |
194 | *current = c; |
195 | *next = n; |
196 | } else { |
197 | return; |
198 | } |
199 | } |
200 | |
201 | ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { |
202 | struct match current_match, next_match; |
203 | |
204 | memset(¤t_match, 0, sizeof(struct match)); |
205 | memset(&next_match, 0, sizeof(struct match)); |
206 | |
207 | for (;;) { |
208 | IPos hash_head = 0; /* head of the hash chain */ |
209 | int bflush; /* set if current block must be flushed */ |
210 | |
211 | /* Make sure that we always have enough lookahead, except |
212 | * at the end of the input file. We need MAX_MATCH bytes |
213 | * for the next match, plus MIN_MATCH bytes to insert the |
214 | * string following the next current_match. |
215 | */ |
216 | if (s->lookahead < MIN_LOOKAHEAD) { |
217 | functable.fill_window(s); |
218 | if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { |
219 | return need_more; |
220 | } |
221 | if (s->lookahead == 0) |
222 | break; /* flush the current block */ |
223 | next_match.match_length = 0; |
224 | } |
225 | s->prev_length = 2; |
226 | |
227 | /* Insert the string window[strstart .. strstart+2] in the |
228 | * dictionary, and set hash_head to the head of the hash chain: |
229 | */ |
230 | |
231 | /* If we already have a future match from a previous round, just use that */ |
232 | if (next_match.match_length > 0) { |
233 | current_match = next_match; |
234 | next_match.match_length = 0; |
235 | |
236 | } else { |
237 | hash_head = 0; |
238 | if (s->lookahead >= MIN_MATCH) { |
239 | hash_head = functable.insert_string(s, s->strstart, 1); |
240 | } |
241 | |
242 | /* set up the initial match to be a 1 byte literal */ |
243 | current_match.match_start = 0; |
244 | current_match.match_length = 1; |
245 | current_match.strstart = s->strstart; |
246 | current_match.orgstart = current_match.strstart; |
247 | |
248 | /* Find the longest match, discarding those <= prev_length. |
249 | * At this point we have always match_length < MIN_MATCH |
250 | */ |
251 | |
252 | if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) { |
253 | /* To simplify the code, we prevent matches with the string |
254 | * of window index 0 (in particular we have to avoid a match |
255 | * of the string with itself at the start of the input file). |
256 | */ |
257 | current_match.match_length = longest_match(s, hash_head); |
258 | current_match.match_start = s->match_start; |
259 | if (current_match.match_length < MIN_MATCH) |
260 | current_match.match_length = 1; |
261 | if (current_match.match_start >= current_match.strstart) { |
262 | /* this can happen due to some restarts */ |
263 | current_match.match_length = 1; |
264 | } |
265 | } |
266 | } |
267 | |
268 | insert_match(s, current_match); |
269 | |
270 | /* now, look ahead one */ |
271 | if (s->lookahead > MIN_LOOKAHEAD && (current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD)) { |
272 | s->strstart = current_match.strstart + current_match.match_length; |
273 | hash_head = functable.insert_string(s, s->strstart, 1); |
274 | |
275 | /* set up the initial match to be a 1 byte literal */ |
276 | next_match.match_start = 0; |
277 | next_match.match_length = 1; |
278 | next_match.strstart = s->strstart; |
279 | next_match.orgstart = next_match.strstart; |
280 | |
281 | /* Find the longest match, discarding those <= prev_length. |
282 | * At this point we have always match_length < MIN_MATCH |
283 | */ |
284 | if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) { |
285 | /* To simplify the code, we prevent matches with the string |
286 | * of window index 0 (in particular we have to avoid a match |
287 | * of the string with itself at the start of the input file). |
288 | */ |
289 | next_match.match_length = longest_match(s, hash_head); |
290 | next_match.match_start = s->match_start; |
291 | if (next_match.match_start >= next_match.strstart) { |
292 | /* this can happen due to some restarts */ |
293 | next_match.match_length = 1; |
294 | } |
295 | if (next_match.match_length < MIN_MATCH) |
296 | next_match.match_length = 1; |
297 | else |
298 | fizzle_matches(s, ¤t_match, &next_match); |
299 | } |
300 | |
301 | /* short matches with a very long distance are rarely a good idea encoding wise */ |
302 | if (next_match.match_length == 3 && (next_match.strstart - next_match.match_start) > 12000) |
303 | next_match.match_length = 1; |
304 | s->strstart = current_match.strstart; |
305 | |
306 | } else { |
307 | next_match.match_length = 0; |
308 | } |
309 | |
310 | /* now emit the current match */ |
311 | bflush = emit_match(s, current_match); |
312 | |
313 | /* move the "cursor" forward */ |
314 | s->strstart += current_match.match_length; |
315 | |
316 | if (bflush) |
317 | FLUSH_BLOCK(s, 0); |
318 | } |
319 | s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; |
320 | if (flush == Z_FINISH) { |
321 | FLUSH_BLOCK(s, 1); |
322 | return finish_done; |
323 | } |
324 | if (s->sym_next) |
325 | FLUSH_BLOCK(s, 0); |
326 | |
327 | return block_done; |
328 | } |
329 | #endif |
330 | |