1 | #include "mupdf/fitz.h" |
2 | |
3 | #include <assert.h> |
4 | |
5 | /* TODO: error checking */ |
6 | |
7 | #define LZW_CLEAR(lzw) (1 << ((lzw)->min_bits - 1)) |
8 | #define LZW_EOD(lzw) (LZW_CLEAR(lzw) + 1) |
9 | #define LZW_FIRST(lzw) (LZW_CLEAR(lzw) + 2) |
10 | |
11 | enum |
12 | { |
13 | MAX_BITS = 12, |
14 | NUM_CODES = (1 << MAX_BITS), |
15 | MAX_LENGTH = 4097 |
16 | }; |
17 | |
18 | typedef struct lzw_code_s lzw_code; |
19 | |
20 | struct lzw_code_s |
21 | { |
22 | int prev; /* prev code (in string) */ |
23 | unsigned short length; /* string len, including this token */ |
24 | unsigned char value; /* data value */ |
25 | unsigned char first_char; /* first token of string */ |
26 | }; |
27 | |
28 | typedef struct fz_lzwd_s fz_lzwd; |
29 | |
30 | struct fz_lzwd_s |
31 | { |
32 | fz_stream *chain; |
33 | int eod; |
34 | |
35 | int early_change; |
36 | |
37 | int reverse_bits; |
38 | int old_tiff; |
39 | int min_bits; /* minimum num bits/code */ |
40 | int code_bits; /* num bits/code */ |
41 | int code; /* current code */ |
42 | int old_code; /* previously recognized code */ |
43 | int next_code; /* next free entry */ |
44 | |
45 | lzw_code table[NUM_CODES]; |
46 | |
47 | unsigned char bp[MAX_LENGTH]; |
48 | unsigned char *rp, *wp; |
49 | |
50 | unsigned char buffer[4096]; |
51 | }; |
52 | |
53 | static int |
54 | next_lzwd(fz_context *ctx, fz_stream *stm, size_t len) |
55 | { |
56 | fz_lzwd *lzw = stm->state; |
57 | lzw_code *table = lzw->table; |
58 | unsigned char *buf = lzw->buffer; |
59 | unsigned char *p = buf; |
60 | unsigned char *ep; |
61 | unsigned char *s; |
62 | int codelen; |
63 | |
64 | int code_bits = lzw->code_bits; |
65 | int code = lzw->code; |
66 | int old_code = lzw->old_code; |
67 | int next_code = lzw->next_code; |
68 | |
69 | if (len > sizeof(lzw->buffer)) |
70 | len = sizeof(lzw->buffer); |
71 | ep = buf + len; |
72 | |
73 | while (lzw->rp < lzw->wp && p < ep) |
74 | *p++ = *lzw->rp++; |
75 | |
76 | while (p < ep) |
77 | { |
78 | if (lzw->eod) |
79 | return EOF; |
80 | |
81 | if (lzw->reverse_bits) |
82 | code = fz_read_rbits(ctx, lzw->chain, code_bits); |
83 | else |
84 | code = fz_read_bits(ctx, lzw->chain, code_bits); |
85 | |
86 | if (fz_is_eof_bits(ctx, lzw->chain)) |
87 | { |
88 | lzw->eod = 1; |
89 | break; |
90 | } |
91 | |
92 | if (code == LZW_EOD(lzw)) |
93 | { |
94 | lzw->eod = 1; |
95 | break; |
96 | } |
97 | |
98 | /* Old Tiffs are allowed to NOT send the clear code, and to |
99 | * overrun at the end. */ |
100 | if (!lzw->old_tiff && next_code > NUM_CODES && code != LZW_CLEAR(lzw)) |
101 | { |
102 | fz_warn(ctx, "missing clear code in lzw decode" ); |
103 | code = LZW_CLEAR(lzw); |
104 | } |
105 | |
106 | if (code == LZW_CLEAR(lzw)) |
107 | { |
108 | code_bits = lzw->min_bits; |
109 | next_code = LZW_FIRST(lzw); |
110 | old_code = -1; |
111 | continue; |
112 | } |
113 | |
114 | /* if stream starts without a clear code, old_code is undefined... */ |
115 | if (old_code == -1) |
116 | { |
117 | old_code = code; |
118 | } |
119 | else if (!lzw->old_tiff && next_code == NUM_CODES) |
120 | { |
121 | /* TODO: Ghostscript checks for a following clear code before tolerating */ |
122 | fz_warn(ctx, "tolerating a single out of range code in lzw decode" ); |
123 | next_code++; |
124 | } |
125 | else if (code > next_code || (!lzw->old_tiff && next_code >= NUM_CODES)) |
126 | { |
127 | fz_warn(ctx, "out of range code encountered in lzw decode" ); |
128 | } |
129 | else if (next_code < NUM_CODES) |
130 | { |
131 | /* add new entry to the code table */ |
132 | table[next_code].prev = old_code; |
133 | table[next_code].first_char = table[old_code].first_char; |
134 | table[next_code].length = table[old_code].length + 1; |
135 | if (code < next_code) |
136 | table[next_code].value = table[code].first_char; |
137 | else if (code == next_code) |
138 | table[next_code].value = table[next_code].first_char; |
139 | else |
140 | fz_warn(ctx, "out of range code encountered in lzw decode" ); |
141 | |
142 | next_code ++; |
143 | |
144 | if (next_code > (1 << code_bits) - lzw->early_change - 1) |
145 | { |
146 | code_bits ++; |
147 | if (code_bits > MAX_BITS) |
148 | code_bits = MAX_BITS; |
149 | } |
150 | |
151 | old_code = code; |
152 | } |
153 | |
154 | /* code maps to a string, copy to output (in reverse...) */ |
155 | if (code >= LZW_CLEAR(lzw)) |
156 | { |
157 | codelen = table[code].length; |
158 | lzw->rp = lzw->bp; |
159 | lzw->wp = lzw->bp + codelen; |
160 | |
161 | assert(codelen < MAX_LENGTH); |
162 | |
163 | s = lzw->wp; |
164 | do { |
165 | *(--s) = table[code].value; |
166 | code = table[code].prev; |
167 | } while (code >= 0 && s > lzw->bp); |
168 | } |
169 | |
170 | /* ... or just a single character */ |
171 | else |
172 | { |
173 | lzw->bp[0] = code; |
174 | lzw->rp = lzw->bp; |
175 | lzw->wp = lzw->bp + 1; |
176 | } |
177 | |
178 | /* copy to output */ |
179 | while (lzw->rp < lzw->wp && p < ep) |
180 | *p++ = *lzw->rp++; |
181 | } |
182 | |
183 | lzw->code_bits = code_bits; |
184 | lzw->code = code; |
185 | lzw->old_code = old_code; |
186 | lzw->next_code = next_code; |
187 | |
188 | stm->rp = buf; |
189 | stm->wp = p; |
190 | if (buf == p) |
191 | return EOF; |
192 | stm->pos += p - buf; |
193 | |
194 | return *stm->rp++; |
195 | } |
196 | |
197 | static void |
198 | close_lzwd(fz_context *ctx, void *state_) |
199 | { |
200 | fz_lzwd *lzw = (fz_lzwd *)state_; |
201 | fz_sync_bits(ctx, lzw->chain); |
202 | fz_drop_stream(ctx, lzw->chain); |
203 | fz_free(ctx, lzw); |
204 | } |
205 | |
206 | /* Default: early_change = 1 */ |
207 | fz_stream * |
208 | fz_open_lzwd(fz_context *ctx, fz_stream *chain, int early_change, int min_bits, int reverse_bits, int old_tiff) |
209 | { |
210 | fz_lzwd *lzw; |
211 | int i; |
212 | |
213 | if (min_bits > MAX_BITS) |
214 | { |
215 | fz_warn(ctx, "out of range initial lzw code size" ); |
216 | min_bits = MAX_BITS; |
217 | } |
218 | |
219 | lzw = fz_malloc_struct(ctx, fz_lzwd); |
220 | lzw->eod = 0; |
221 | lzw->early_change = early_change; |
222 | lzw->reverse_bits = reverse_bits; |
223 | lzw->old_tiff = old_tiff; |
224 | lzw->min_bits = min_bits; |
225 | lzw->code_bits = lzw->min_bits; |
226 | lzw->code = -1; |
227 | lzw->next_code = LZW_FIRST(lzw); |
228 | lzw->old_code = -1; |
229 | lzw->rp = lzw->bp; |
230 | lzw->wp = lzw->bp; |
231 | |
232 | for (i = 0; i < LZW_CLEAR(lzw); i++) |
233 | { |
234 | lzw->table[i].value = i; |
235 | lzw->table[i].first_char = i; |
236 | lzw->table[i].length = 1; |
237 | lzw->table[i].prev = -1; |
238 | } |
239 | |
240 | for (i = LZW_CLEAR(lzw); i < NUM_CODES; i++) |
241 | { |
242 | lzw->table[i].value = 0; |
243 | lzw->table[i].first_char = 0; |
244 | lzw->table[i].length = 0; |
245 | lzw->table[i].prev = -1; |
246 | } |
247 | |
248 | lzw->chain = fz_keep_stream(ctx, chain); |
249 | |
250 | return fz_new_stream(ctx, lzw, next_lzwd, close_lzwd); |
251 | } |
252 | |