1
2#include "zbuild.h"
3#include "deflate.h"
4#include "functable.h"
5
6#ifndef MATCH_TPL_H
7#define MATCH_TPL_H
8
9#ifdef UNALIGNED_OK
10# ifdef UNALIGNED64_OK
11typedef uint64_t bestcmp_t;
12# else
13typedef uint32_t bestcmp_t;
14# endif
15#else
16typedef uint8_t bestcmp_t;
17#endif
18
19#define EARLY_EXIT_TRIGGER_LEVEL 5
20
21#endif
22
23/* Set match_start to the longest match starting at the given string and
24 * return its length. Matches shorter or equal to prev_length are discarded,
25 * in which case the result is equal to prev_length and match_start is garbage.
26 *
27 * IN assertions: cur_match is the head of the hash chain for the current
28 * string (strstart) and its distance is <= MAX_DIST, and prev_length >=1
29 * OUT assertion: the match length is not greater than s->lookahead
30 */
31Z_INTERNAL uint32_t LONGEST_MATCH(deflate_state *const s, Pos cur_match) {
32 unsigned int strstart = s->strstart;
33 const unsigned wmask = s->w_mask;
34 unsigned char *window = s->window;
35 unsigned char *scan = window + strstart;
36 Z_REGISTER unsigned char *mbase_start = window;
37 Z_REGISTER unsigned char *mbase_end;
38 const Pos *prev = s->prev;
39 Pos limit;
40 int32_t early_exit;
41 uint32_t chain_length, nice_match, best_len, offset;
42 uint32_t lookahead = s->lookahead;
43 bestcmp_t scan_end;
44#ifndef UNALIGNED_OK
45 bestcmp_t scan_end0;
46#else
47 bestcmp_t scan_start;
48#endif
49
50#define GOTO_NEXT_CHAIN \
51 if (--chain_length && (cur_match = prev[cur_match & wmask]) > limit) \
52 continue; \
53 return best_len;
54
55 /* The code is optimized for MAX_MATCH-2 multiple of 16. */
56 Assert(MAX_MATCH == 258, "Code too clever");
57
58 best_len = s->prev_length ? s->prev_length : 1;
59
60 /* Calculate read offset which should only extend an extra byte
61 * to find the next best match length.
62 */
63 offset = best_len-1;
64#ifdef UNALIGNED_OK
65 if (best_len >= sizeof(uint32_t)) {
66 offset -= 2;
67#ifdef UNALIGNED64_OK
68 if (best_len >= sizeof(uint64_t))
69 offset -= 4;
70#endif
71 }
72#endif
73
74 scan_end = *(bestcmp_t *)(scan+offset);
75#ifndef UNALIGNED_OK
76 scan_end0 = *(bestcmp_t *)(scan+offset+1);
77#else
78 scan_start = *(bestcmp_t *)(scan);
79#endif
80 mbase_end = (mbase_start+offset);
81
82 /* Do not waste too much time if we already have a good match */
83 chain_length = s->max_chain_length;
84 early_exit = s->level < EARLY_EXIT_TRIGGER_LEVEL;
85 if (best_len >= s->good_match)
86 chain_length >>= 2;
87 nice_match = (uint32_t)s->nice_match;
88
89 /* Stop when cur_match becomes <= limit. To simplify the code,
90 * we prevent matches with the string of window index 0
91 */
92 limit = strstart > MAX_DIST(s) ? (Pos)(strstart - MAX_DIST(s)) : 0;
93
94 Assert((unsigned long)strstart <= s->window_size - MIN_LOOKAHEAD, "need lookahead");
95 for (;;) {
96 if (cur_match >= strstart)
97 break;
98
99 /* Skip to next match if the match length cannot increase or if the match length is
100 * less than 2. Note that the checks below for insufficient lookahead only occur
101 * occasionally for performance reasons.
102 * Therefore uninitialized memory will be accessed and conditional jumps will be made
103 * that depend on those values. However the length of the match is limited to the
104 * lookahead, so the output of deflate is not affected by the uninitialized values.
105 */
106#ifdef UNALIGNED_OK
107 if (best_len < sizeof(uint32_t)) {
108 for (;;) {
109 if (*(uint16_t *)(mbase_end+cur_match) == (uint16_t)scan_end &&
110 *(uint16_t *)(mbase_start+cur_match) == (uint16_t)scan_start)
111 break;
112 GOTO_NEXT_CHAIN;
113 }
114# ifdef UNALIGNED64_OK
115 } else if (best_len >= sizeof(uint64_t)) {
116 for (;;) {
117 if (*(uint64_t *)(mbase_end+cur_match) == (uint64_t)scan_end &&
118 *(uint64_t *)(mbase_start+cur_match) == (uint64_t)scan_start)
119 break;
120 GOTO_NEXT_CHAIN;
121 }
122# endif
123 } else {
124 for (;;) {
125 if (*(uint32_t *)(mbase_end+cur_match) == (uint32_t)scan_end &&
126 *(uint32_t *)(mbase_start+cur_match) == (uint32_t)scan_start)
127 break;
128 GOTO_NEXT_CHAIN;
129 }
130 }
131#else
132 for (;;) {
133 if (mbase_end[cur_match] == scan_end && mbase_end[cur_match+1] == scan_end0 &&
134 mbase_start[cur_match] == scan[0] && mbase_start[cur_match+1] == scan[1])
135 break;
136 GOTO_NEXT_CHAIN;
137 }
138#endif
139 uint32_t len = COMPARE256(src0: scan+2, src1: mbase_start+cur_match+2) + 2;
140 Assert(scan+len <= window+(unsigned)(s->window_size-1), "wild scan");
141
142 if (len > best_len) {
143 s->match_start = cur_match;
144 /* Do not look for matches beyond the end of the input. */
145 if (len > lookahead)
146 return lookahead;
147 best_len = len;
148 if (best_len >= nice_match)
149 return best_len;
150
151 offset = best_len-1;
152#ifdef UNALIGNED_OK
153 if (best_len >= sizeof(uint32_t)) {
154 offset -= 2;
155#ifdef UNALIGNED64_OK
156 if (best_len >= sizeof(uint64_t))
157 offset -= 4;
158#endif
159 }
160#endif
161 scan_end = *(bestcmp_t *)(scan+offset);
162#ifndef UNALIGNED_OK
163 scan_end0 = *(bestcmp_t *)(scan+offset+1);
164#endif
165 mbase_end = (mbase_start+offset);
166 } else if (UNLIKELY(early_exit)) {
167 /* The probability of finding a match later if we here is pretty low, so for
168 * performance it's best to outright stop here for the lower compression levels
169 */
170 break;
171 }
172 GOTO_NEXT_CHAIN;
173 }
174
175 return best_len;
176}
177
178#undef LONGEST_MATCH
179#undef COMPARE256
180#undef COMPARE258
181