1/* deflate_medium.c -- The deflate_medium deflate strategy
2 *
3 * Copyright (C) 2013 Intel Corporation. All rights reserved.
4 * Authors:
5 * Arjan van de Ven <arjan@linux.intel.com>
6 *
7 * For conditions of distribution and use, see copyright notice in zlib.h
8 */
9#ifndef NO_MEDIUM_STRATEGY
10#include "zbuild.h"
11#include "deflate.h"
12#include "deflate_p.h"
13#include "match_p.h"
14#include "functable.h"
15
16struct match {
17 unsigned int match_start;
18 unsigned int match_length;
19 unsigned int strstart;
20 unsigned int orgstart;
21};
22
23#define MAX_DIST2 ((1 << MAX_WBITS) - MIN_LOOKAHEAD)
24
25static int tr_tally_dist(deflate_state *s, int distance, int length) {
26 return zng_tr_tally(s, distance, length);
27}
28
29static int tr_tally_lit(deflate_state *s, int c) {
30 return zng_tr_tally(s, 0, c);
31}
32
33static int emit_match(deflate_state *s, struct match match) {
34 int flush = 0;
35
36 /* matches that are not long enough we need to emit as literals */
37 if (match.match_length < MIN_MATCH) {
38 while (match.match_length) {
39 flush += tr_tally_lit(s, s->window[match.strstart]);
40 s->lookahead--;
41 match.strstart++;
42 match.match_length--;
43 }
44 return flush;
45 }
46
47 check_match(s, match.strstart, match.match_start, match.match_length);
48
49 flush += tr_tally_dist(s, match.strstart - match.match_start, match.match_length - MIN_MATCH);
50
51 s->lookahead -= match.match_length;
52 return flush;
53}
54
55static void insert_match(deflate_state *s, struct match match) {
56 if (UNLIKELY(s->lookahead <= match.match_length + MIN_MATCH))
57 return;
58
59 /* matches that are not long enough we need to emit as literals */
60 if (match.match_length < MIN_MATCH) {
61#ifdef NOT_TWEAK_COMPILER
62 while (match.match_length) {
63 match.strstart++;
64 match.match_length--;
65
66 if (match.match_length) {
67 if (match.strstart >= match.orgstart) {
68 functable.insert_string(s, match.strstart, 1);
69 }
70 }
71 }
72#else
73 match.strstart++;
74 match.match_length--;
75 if (match.match_length > 0) {
76 if (match.strstart >= match.orgstart) {
77 if (match.strstart + match.match_length - 1 >= match.orgstart) {
78 functable.insert_string(s, match.strstart, match.match_length);
79 } else {
80 functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
81 }
82 match.strstart += match.match_length;
83 match.match_length = 0;
84 }
85 }
86#endif
87 return;
88 }
89
90 /* Insert new strings in the hash table only if the match length
91 * is not too large. This saves time but degrades compression.
92 */
93 if (match.match_length <= 16* s->max_insert_length && s->lookahead >= MIN_MATCH) {
94 match.match_length--; /* string at strstart already in table */
95 match.strstart++;
96#ifdef NOT_TWEAK_COMPILER
97 do {
98 if (LIKELY(match.strstart >= match.orgstart)) {
99 functable.insert_string(s, match.strstart, 1);
100 }
101 match.strstart++;
102 /* strstart never exceeds WSIZE-MAX_MATCH, so there are
103 * always MIN_MATCH bytes ahead.
104 */
105 } while (--match.match_length != 0);
106#else
107 if (LIKELY(match.strstart >= match.orgstart)) {
108 if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) {
109 functable.insert_string(s, match.strstart, match.match_length);
110 } else {
111 functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1);
112 }
113 } else if (match.orgstart < match.strstart + match.match_length) {
114 functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart);
115 }
116 match.strstart += match.match_length;
117 match.match_length = 0;
118#endif
119 } else {
120 match.strstart += match.match_length;
121 match.match_length = 0;
122 s->ins_h = s->window[match.strstart];
123 if (match.strstart >= (MIN_MATCH - 2))
124#ifndef NOT_TWEAK_COMPILER
125 functable.insert_string(s, match.strstart + 2 - MIN_MATCH, MIN_MATCH - 2);
126#else
127 functable.insert_string(s, match.strstart + 2 - MIN_MATCH, 1);
128#if MIN_MATCH != 3
129#warning Call insert_string() MIN_MATCH-3 more times
130#endif
131#endif
132 /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not
133 * matter since it will be recomputed at next deflate call.
134 */
135 }
136}
137
138static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) {
139 IPos limit;
140 unsigned char *match, *orig;
141 int changed = 0;
142 struct match c, n;
143 /* step zero: sanity checks */
144
145 if (current->match_length <= 1)
146 return;
147
148 if (UNLIKELY(current->match_length > 1 + next->match_start))
149 return;
150
151 if (UNLIKELY(current->match_length > 1 + next->strstart))
152 return;
153
154 match = s->window - current->match_length + 1 + next->match_start;
155 orig = s->window - current->match_length + 1 + next->strstart;
156
157 /* quick exit check.. if this fails then don't bother with anything else */
158 if (LIKELY(*match != *orig))
159 return;
160
161 c = *current;
162 n = *next;
163
164 /* step one: try to move the "next" match to the left as much as possible */
165 limit = next->strstart > MAX_DIST2 ? next->strstart - MAX_DIST2 : 0;
166
167 match = s->window + n.match_start - 1;
168 orig = s->window + n.strstart - 1;
169
170 while (*match == *orig) {
171 if (c.match_length < 1)
172 break;
173 if (n.strstart <= limit)
174 break;
175 if (n.match_length >= 256)
176 break;
177 if (n.match_start <= 1)
178 break;
179
180 n.strstart--;
181 n.match_start--;
182 n.match_length++;
183 c.match_length--;
184 match--;
185 orig--;
186 changed++;
187 }
188
189 if (!changed)
190 return;
191
192 if (c.match_length <= 1 && n.match_length != 2) {
193 n.orgstart++;
194 *current = c;
195 *next = n;
196 } else {
197 return;
198 }
199}
200
201ZLIB_INTERNAL block_state deflate_medium(deflate_state *s, int flush) {
202 struct match current_match, next_match;
203
204 memset(&current_match, 0, sizeof(struct match));
205 memset(&next_match, 0, sizeof(struct match));
206
207 for (;;) {
208 IPos hash_head = 0; /* head of the hash chain */
209 int bflush; /* set if current block must be flushed */
210
211 /* Make sure that we always have enough lookahead, except
212 * at the end of the input file. We need MAX_MATCH bytes
213 * for the next match, plus MIN_MATCH bytes to insert the
214 * string following the next current_match.
215 */
216 if (s->lookahead < MIN_LOOKAHEAD) {
217 functable.fill_window(s);
218 if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) {
219 return need_more;
220 }
221 if (s->lookahead == 0)
222 break; /* flush the current block */
223 next_match.match_length = 0;
224 }
225 s->prev_length = 2;
226
227 /* Insert the string window[strstart .. strstart+2] in the
228 * dictionary, and set hash_head to the head of the hash chain:
229 */
230
231 /* If we already have a future match from a previous round, just use that */
232 if (next_match.match_length > 0) {
233 current_match = next_match;
234 next_match.match_length = 0;
235
236 } else {
237 hash_head = 0;
238 if (s->lookahead >= MIN_MATCH) {
239 hash_head = functable.insert_string(s, s->strstart, 1);
240 }
241
242 /* set up the initial match to be a 1 byte literal */
243 current_match.match_start = 0;
244 current_match.match_length = 1;
245 current_match.strstart = s->strstart;
246 current_match.orgstart = current_match.strstart;
247
248 /* Find the longest match, discarding those <= prev_length.
249 * At this point we have always match_length < MIN_MATCH
250 */
251
252 if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
253 /* To simplify the code, we prevent matches with the string
254 * of window index 0 (in particular we have to avoid a match
255 * of the string with itself at the start of the input file).
256 */
257 current_match.match_length = longest_match(s, hash_head);
258 current_match.match_start = s->match_start;
259 if (current_match.match_length < MIN_MATCH)
260 current_match.match_length = 1;
261 if (current_match.match_start >= current_match.strstart) {
262 /* this can happen due to some restarts */
263 current_match.match_length = 1;
264 }
265 }
266 }
267
268 insert_match(s, current_match);
269
270 /* now, look ahead one */
271 if (s->lookahead > MIN_LOOKAHEAD && (current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD)) {
272 s->strstart = current_match.strstart + current_match.match_length;
273 hash_head = functable.insert_string(s, s->strstart, 1);
274
275 /* set up the initial match to be a 1 byte literal */
276 next_match.match_start = 0;
277 next_match.match_length = 1;
278 next_match.strstart = s->strstart;
279 next_match.orgstart = next_match.strstart;
280
281 /* Find the longest match, discarding those <= prev_length.
282 * At this point we have always match_length < MIN_MATCH
283 */
284 if (hash_head != 0 && s->strstart - hash_head <= MAX_DIST2) {
285 /* To simplify the code, we prevent matches with the string
286 * of window index 0 (in particular we have to avoid a match
287 * of the string with itself at the start of the input file).
288 */
289 next_match.match_length = longest_match(s, hash_head);
290 next_match.match_start = s->match_start;
291 if (next_match.match_start >= next_match.strstart) {
292 /* this can happen due to some restarts */
293 next_match.match_length = 1;
294 }
295 if (next_match.match_length < MIN_MATCH)
296 next_match.match_length = 1;
297 else
298 fizzle_matches(s, &current_match, &next_match);
299 }
300
301 /* short matches with a very long distance are rarely a good idea encoding wise */
302 if (next_match.match_length == 3 && (next_match.strstart - next_match.match_start) > 12000)
303 next_match.match_length = 1;
304 s->strstart = current_match.strstart;
305
306 } else {
307 next_match.match_length = 0;
308 }
309
310 /* now emit the current match */
311 bflush = emit_match(s, current_match);
312
313 /* move the "cursor" forward */
314 s->strstart += current_match.match_length;
315
316 if (bflush)
317 FLUSH_BLOCK(s, 0);
318 }
319 s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1;
320 if (flush == Z_FINISH) {
321 FLUSH_BLOCK(s, 1);
322 return finish_done;
323 }
324 if (s->sym_next)
325 FLUSH_BLOCK(s, 0);
326
327 return block_done;
328}
329#endif
330