| 1 | /* deflate_medium.c -- The deflate_medium deflate strategy | 
|---|
| 2 | * | 
|---|
| 3 | * Copyright (C) 2013 Intel Corporation. All rights reserved. | 
|---|
| 4 | * Authors: | 
|---|
| 5 | *  Arjan van de Ven    <arjan@linux.intel.com> | 
|---|
| 6 | * | 
|---|
| 7 | * For conditions of distribution and use, see copyright notice in zlib.h | 
|---|
| 8 | */ | 
|---|
| 9 | #ifndef NO_MEDIUM_STRATEGY | 
|---|
| 10 | #include <stdint.h> | 
|---|
| 11 | #include "zbuild.h" | 
|---|
| 12 | #include "deflate.h" | 
|---|
| 13 | #include "deflate_p.h" | 
|---|
| 14 | #include "functable.h" | 
|---|
| 15 |  | 
|---|
| 16 | struct match { | 
|---|
| 17 | uint16_t match_start; | 
|---|
| 18 | uint16_t match_length; | 
|---|
| 19 | uint16_t strstart; | 
|---|
| 20 | uint16_t orgstart; | 
|---|
| 21 | }; | 
|---|
| 22 |  | 
|---|
| 23 | static int emit_match(deflate_state *s, struct match match) { | 
|---|
| 24 | int bflush = 0; | 
|---|
| 25 |  | 
|---|
| 26 | /* matches that are not long enough we need to emit as literals */ | 
|---|
| 27 | if (match.match_length < MIN_MATCH) { | 
|---|
| 28 | while (match.match_length) { | 
|---|
| 29 | bflush += zng_tr_tally_lit(s, c: s->window[match.strstart]); | 
|---|
| 30 | s->lookahead--; | 
|---|
| 31 | match.strstart++; | 
|---|
| 32 | match.match_length--; | 
|---|
| 33 | } | 
|---|
| 34 | return bflush; | 
|---|
| 35 | } | 
|---|
| 36 |  | 
|---|
| 37 | check_match(s, match.strstart, match.match_start, match.match_length); | 
|---|
| 38 |  | 
|---|
| 39 | bflush += zng_tr_tally_dist(s, dist: match.strstart - match.match_start, len: match.match_length - MIN_MATCH); | 
|---|
| 40 |  | 
|---|
| 41 | s->lookahead -= match.match_length; | 
|---|
| 42 | return bflush; | 
|---|
| 43 | } | 
|---|
| 44 |  | 
|---|
| 45 | static void insert_match(deflate_state *s, struct match match) { | 
|---|
| 46 | if (UNLIKELY(s->lookahead <= (unsigned int)(match.match_length + MIN_MATCH))) | 
|---|
| 47 | return; | 
|---|
| 48 |  | 
|---|
| 49 | /* matches that are not long enough we need to emit as literals */ | 
|---|
| 50 | if (LIKELY(match.match_length < MIN_MATCH)) { | 
|---|
| 51 | match.strstart++; | 
|---|
| 52 | match.match_length--; | 
|---|
| 53 | if (UNLIKELY(match.match_length > 0)) { | 
|---|
| 54 | if (match.strstart >= match.orgstart) { | 
|---|
| 55 | if (match.strstart + match.match_length - 1 >= match.orgstart) { | 
|---|
| 56 | functable.insert_string(s, match.strstart, match.match_length); | 
|---|
| 57 | } else { | 
|---|
| 58 | functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1); | 
|---|
| 59 | } | 
|---|
| 60 | match.strstart += match.match_length; | 
|---|
| 61 | match.match_length = 0; | 
|---|
| 62 | } | 
|---|
| 63 | } | 
|---|
| 64 | return; | 
|---|
| 65 | } | 
|---|
| 66 |  | 
|---|
| 67 | /* Insert new strings in the hash table only if the match length | 
|---|
| 68 | * is not too large. This saves time but degrades compression. | 
|---|
| 69 | */ | 
|---|
| 70 | if (match.match_length <= 16* s->max_insert_length && s->lookahead >= MIN_MATCH) { | 
|---|
| 71 | match.match_length--; /* string at strstart already in table */ | 
|---|
| 72 | match.strstart++; | 
|---|
| 73 |  | 
|---|
| 74 | if (LIKELY(match.strstart >= match.orgstart)) { | 
|---|
| 75 | if (LIKELY(match.strstart + match.match_length - 1 >= match.orgstart)) { | 
|---|
| 76 | functable.insert_string(s, match.strstart, match.match_length); | 
|---|
| 77 | } else { | 
|---|
| 78 | functable.insert_string(s, match.strstart, match.orgstart - match.strstart + 1); | 
|---|
| 79 | } | 
|---|
| 80 | } else if (match.orgstart < match.strstart + match.match_length) { | 
|---|
| 81 | functable.insert_string(s, match.orgstart, match.strstart + match.match_length - match.orgstart); | 
|---|
| 82 | } | 
|---|
| 83 | match.strstart += match.match_length; | 
|---|
| 84 | match.match_length = 0; | 
|---|
| 85 | } else { | 
|---|
| 86 | match.strstart += match.match_length; | 
|---|
| 87 | match.match_length = 0; | 
|---|
| 88 | if (match.strstart >= (MIN_MATCH - 2)) | 
|---|
| 89 | #if MIN_MATCH != 3 | 
|---|
| 90 | functable.insert_string(s, match.strstart + 2 - MIN_MATCH, MIN_MATCH - 2); | 
|---|
| 91 | #else | 
|---|
| 92 | functable.quick_insert_string(s, match.strstart + 2 - MIN_MATCH); | 
|---|
| 93 | #endif | 
|---|
| 94 | /* If lookahead < MIN_MATCH, ins_h is garbage, but it does not | 
|---|
| 95 | * matter since it will be recomputed at next deflate call. | 
|---|
| 96 | */ | 
|---|
| 97 | } | 
|---|
| 98 | } | 
|---|
| 99 |  | 
|---|
| 100 | static void fizzle_matches(deflate_state *s, struct match *current, struct match *next) { | 
|---|
| 101 | Pos limit; | 
|---|
| 102 | unsigned char *match, *orig; | 
|---|
| 103 | int changed = 0; | 
|---|
| 104 | struct match c, n; | 
|---|
| 105 | /* step zero: sanity checks */ | 
|---|
| 106 |  | 
|---|
| 107 | if (current->match_length <= 1) | 
|---|
| 108 | return; | 
|---|
| 109 |  | 
|---|
| 110 | if (UNLIKELY(current->match_length > 1 + next->match_start)) | 
|---|
| 111 | return; | 
|---|
| 112 |  | 
|---|
| 113 | if (UNLIKELY(current->match_length > 1 + next->strstart)) | 
|---|
| 114 | return; | 
|---|
| 115 |  | 
|---|
| 116 | match = s->window - current->match_length + 1 + next->match_start; | 
|---|
| 117 | orig  = s->window - current->match_length + 1 + next->strstart; | 
|---|
| 118 |  | 
|---|
| 119 | /* quick exit check.. if this fails then don't bother with anything else */ | 
|---|
| 120 | if (LIKELY(*match != *orig)) | 
|---|
| 121 | return; | 
|---|
| 122 |  | 
|---|
| 123 | c = *current; | 
|---|
| 124 | n = *next; | 
|---|
| 125 |  | 
|---|
| 126 | /* step one: try to move the "next" match to the left as much as possible */ | 
|---|
| 127 | limit = next->strstart > MAX_DIST(s) ? next->strstart - (Pos)MAX_DIST(s) : 0; | 
|---|
| 128 |  | 
|---|
| 129 | match = s->window + n.match_start - 1; | 
|---|
| 130 | orig = s->window + n.strstart - 1; | 
|---|
| 131 |  | 
|---|
| 132 | while (*match == *orig) { | 
|---|
| 133 | if (UNLIKELY(c.match_length < 1)) | 
|---|
| 134 | break; | 
|---|
| 135 | if (UNLIKELY(n.strstart <= limit)) | 
|---|
| 136 | break; | 
|---|
| 137 | if (UNLIKELY(n.match_length >= 256)) | 
|---|
| 138 | break; | 
|---|
| 139 | if (UNLIKELY(n.match_start <= 1)) | 
|---|
| 140 | break; | 
|---|
| 141 |  | 
|---|
| 142 | n.strstart--; | 
|---|
| 143 | n.match_start--; | 
|---|
| 144 | n.match_length++; | 
|---|
| 145 | c.match_length--; | 
|---|
| 146 | match--; | 
|---|
| 147 | orig--; | 
|---|
| 148 | changed++; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | if (!changed) | 
|---|
| 152 | return; | 
|---|
| 153 |  | 
|---|
| 154 | if (c.match_length <= 1 && n.match_length != 2) { | 
|---|
| 155 | n.orgstart++; | 
|---|
| 156 | *current = c; | 
|---|
| 157 | *next = n; | 
|---|
| 158 | } else { | 
|---|
| 159 | return; | 
|---|
| 160 | } | 
|---|
| 161 | } | 
|---|
| 162 |  | 
|---|
| 163 | Z_INTERNAL block_state deflate_medium(deflate_state *s, int flush) { | 
|---|
| 164 | /* Align the first struct to start on a new cacheline, this allows us to fit both structs in one cacheline */ | 
|---|
| 165 | ALIGNED_(16) struct match current_match; | 
|---|
| 166 | struct match next_match; | 
|---|
| 167 |  | 
|---|
| 168 | memset(s: ¤t_match, c: 0, n: sizeof(struct match)); | 
|---|
| 169 | memset(s: &next_match, c: 0, n: sizeof(struct match)); | 
|---|
| 170 |  | 
|---|
| 171 | for (;;) { | 
|---|
| 172 | Pos hash_head = 0;    /* head of the hash chain */ | 
|---|
| 173 | int bflush = 0;       /* set if current block must be flushed */ | 
|---|
| 174 | int64_t dist; | 
|---|
| 175 |  | 
|---|
| 176 | /* Make sure that we always have enough lookahead, except | 
|---|
| 177 | * at the end of the input file. We need MAX_MATCH bytes | 
|---|
| 178 | * for the next match, plus MIN_MATCH bytes to insert the | 
|---|
| 179 | * string following the next current_match. | 
|---|
| 180 | */ | 
|---|
| 181 | if (s->lookahead < MIN_LOOKAHEAD) { | 
|---|
| 182 | fill_window(s); | 
|---|
| 183 | if (s->lookahead < MIN_LOOKAHEAD && flush == Z_NO_FLUSH) { | 
|---|
| 184 | return need_more; | 
|---|
| 185 | } | 
|---|
| 186 | if (UNLIKELY(s->lookahead == 0)) | 
|---|
| 187 | break; /* flush the current block */ | 
|---|
| 188 | next_match.match_length = 0; | 
|---|
| 189 | } | 
|---|
| 190 |  | 
|---|
| 191 | /* Insert the string window[strstart .. strstart+2] in the | 
|---|
| 192 | * dictionary, and set hash_head to the head of the hash chain: | 
|---|
| 193 | */ | 
|---|
| 194 |  | 
|---|
| 195 | /* If we already have a future match from a previous round, just use that */ | 
|---|
| 196 | if (next_match.match_length > 0) { | 
|---|
| 197 | current_match = next_match; | 
|---|
| 198 | next_match.match_length = 0; | 
|---|
| 199 | } else { | 
|---|
| 200 | hash_head = 0; | 
|---|
| 201 | if (s->lookahead >= MIN_MATCH) { | 
|---|
| 202 | hash_head = functable.quick_insert_string(s, s->strstart); | 
|---|
| 203 | } | 
|---|
| 204 |  | 
|---|
| 205 | current_match.strstart = (uint16_t)s->strstart; | 
|---|
| 206 | current_match.orgstart = current_match.strstart; | 
|---|
| 207 |  | 
|---|
| 208 | /* Find the longest match, discarding those <= prev_length. | 
|---|
| 209 | * At this point we have always match_length < MIN_MATCH | 
|---|
| 210 | */ | 
|---|
| 211 |  | 
|---|
| 212 | dist = (int64_t)s->strstart - hash_head; | 
|---|
| 213 | if (dist <= MAX_DIST(s) && dist > 0 && hash_head != 0) { | 
|---|
| 214 | /* To simplify the code, we prevent matches with the string | 
|---|
| 215 | * of window index 0 (in particular we have to avoid a match | 
|---|
| 216 | * of the string with itself at the start of the input file). | 
|---|
| 217 | */ | 
|---|
| 218 | current_match.match_length = (uint16_t)functable.longest_match(s, hash_head); | 
|---|
| 219 | current_match.match_start = (uint16_t)s->match_start; | 
|---|
| 220 | if (UNLIKELY(current_match.match_length < MIN_MATCH)) | 
|---|
| 221 | current_match.match_length = 1; | 
|---|
| 222 | if (UNLIKELY(current_match.match_start >= current_match.strstart)) { | 
|---|
| 223 | /* this can happen due to some restarts */ | 
|---|
| 224 | current_match.match_length = 1; | 
|---|
| 225 | } | 
|---|
| 226 | } else { | 
|---|
| 227 | /* Set up the match to be a 1 byte literal */ | 
|---|
| 228 | current_match.match_start = 0; | 
|---|
| 229 | current_match.match_length = 1; | 
|---|
| 230 | } | 
|---|
| 231 | } | 
|---|
| 232 |  | 
|---|
| 233 | insert_match(s, match: current_match); | 
|---|
| 234 |  | 
|---|
| 235 | /* now, look ahead one */ | 
|---|
| 236 | if (LIKELY(s->lookahead > MIN_LOOKAHEAD && (uint32_t)(current_match.strstart + current_match.match_length) < (s->window_size - MIN_LOOKAHEAD))) { | 
|---|
| 237 | s->strstart = current_match.strstart + current_match.match_length; | 
|---|
| 238 | hash_head = functable.quick_insert_string(s, s->strstart); | 
|---|
| 239 |  | 
|---|
| 240 | next_match.strstart = (uint16_t)s->strstart; | 
|---|
| 241 | next_match.orgstart = next_match.strstart; | 
|---|
| 242 |  | 
|---|
| 243 | /* Find the longest match, discarding those <= prev_length. | 
|---|
| 244 | * At this point we have always match_length < MIN_MATCH | 
|---|
| 245 | */ | 
|---|
| 246 |  | 
|---|
| 247 | dist = (int64_t)s->strstart - hash_head; | 
|---|
| 248 | if (dist <= MAX_DIST(s) && dist > 0 && hash_head != 0) { | 
|---|
| 249 | /* To simplify the code, we prevent matches with the string | 
|---|
| 250 | * of window index 0 (in particular we have to avoid a match | 
|---|
| 251 | * of the string with itself at the start of the input file). | 
|---|
| 252 | */ | 
|---|
| 253 | next_match.match_length = (uint16_t)functable.longest_match(s, hash_head); | 
|---|
| 254 | next_match.match_start = (uint16_t)s->match_start; | 
|---|
| 255 | if (UNLIKELY(next_match.match_start >= next_match.strstart)) { | 
|---|
| 256 | /* this can happen due to some restarts */ | 
|---|
| 257 | next_match.match_length = 1; | 
|---|
| 258 | } | 
|---|
| 259 | if (next_match.match_length < MIN_MATCH) | 
|---|
| 260 | next_match.match_length = 1; | 
|---|
| 261 | else | 
|---|
| 262 | fizzle_matches(s, current: ¤t_match, next: &next_match); | 
|---|
| 263 | } else { | 
|---|
| 264 | /* Set up the match to be a 1 byte literal */ | 
|---|
| 265 | next_match.match_start = 0; | 
|---|
| 266 | next_match.match_length = 1; | 
|---|
| 267 | } | 
|---|
| 268 |  | 
|---|
| 269 | s->strstart = current_match.strstart; | 
|---|
| 270 | } else { | 
|---|
| 271 | next_match.match_length = 0; | 
|---|
| 272 | } | 
|---|
| 273 |  | 
|---|
| 274 | /* now emit the current match */ | 
|---|
| 275 | bflush = emit_match(s, match: current_match); | 
|---|
| 276 |  | 
|---|
| 277 | /* move the "cursor" forward */ | 
|---|
| 278 | s->strstart += current_match.match_length; | 
|---|
| 279 |  | 
|---|
| 280 | if (UNLIKELY(bflush)) | 
|---|
| 281 | FLUSH_BLOCK(s, 0); | 
|---|
| 282 | } | 
|---|
| 283 | s->insert = s->strstart < MIN_MATCH-1 ? s->strstart : MIN_MATCH-1; | 
|---|
| 284 | if (flush == Z_FINISH) { | 
|---|
| 285 | FLUSH_BLOCK(s, 1); | 
|---|
| 286 | return finish_done; | 
|---|
| 287 | } | 
|---|
| 288 | if (UNLIKELY(s->sym_next)) | 
|---|
| 289 | FLUSH_BLOCK(s, 0); | 
|---|
| 290 |  | 
|---|
| 291 | return block_done; | 
|---|
| 292 | } | 
|---|
| 293 | #endif | 
|---|
| 294 |  | 
|---|