1#include "mupdf/fitz.h"
2
3#include <assert.h>
4
5/* TODO: error checking */
6
7#define LZW_CLEAR(lzw) (1 << ((lzw)->min_bits - 1))
8#define LZW_EOD(lzw) (LZW_CLEAR(lzw) + 1)
9#define LZW_FIRST(lzw) (LZW_CLEAR(lzw) + 2)
10
11enum
12{
13 MAX_BITS = 12,
14 NUM_CODES = (1 << MAX_BITS),
15 MAX_LENGTH = 4097
16};
17
18typedef struct lzw_code_s lzw_code;
19
20struct lzw_code_s
21{
22 int prev; /* prev code (in string) */
23 unsigned short length; /* string len, including this token */
24 unsigned char value; /* data value */
25 unsigned char first_char; /* first token of string */
26};
27
28typedef struct fz_lzwd_s fz_lzwd;
29
30struct fz_lzwd_s
31{
32 fz_stream *chain;
33 int eod;
34
35 int early_change;
36
37 int reverse_bits;
38 int old_tiff;
39 int min_bits; /* minimum num bits/code */
40 int code_bits; /* num bits/code */
41 int code; /* current code */
42 int old_code; /* previously recognized code */
43 int next_code; /* next free entry */
44
45 lzw_code table[NUM_CODES];
46
47 unsigned char bp[MAX_LENGTH];
48 unsigned char *rp, *wp;
49
50 unsigned char buffer[4096];
51};
52
53static int
54next_lzwd(fz_context *ctx, fz_stream *stm, size_t len)
55{
56 fz_lzwd *lzw = stm->state;
57 lzw_code *table = lzw->table;
58 unsigned char *buf = lzw->buffer;
59 unsigned char *p = buf;
60 unsigned char *ep;
61 unsigned char *s;
62 int codelen;
63
64 int code_bits = lzw->code_bits;
65 int code = lzw->code;
66 int old_code = lzw->old_code;
67 int next_code = lzw->next_code;
68
69 if (len > sizeof(lzw->buffer))
70 len = sizeof(lzw->buffer);
71 ep = buf + len;
72
73 while (lzw->rp < lzw->wp && p < ep)
74 *p++ = *lzw->rp++;
75
76 while (p < ep)
77 {
78 if (lzw->eod)
79 return EOF;
80
81 if (lzw->reverse_bits)
82 code = fz_read_rbits(ctx, lzw->chain, code_bits);
83 else
84 code = fz_read_bits(ctx, lzw->chain, code_bits);
85
86 if (fz_is_eof_bits(ctx, lzw->chain))
87 {
88 lzw->eod = 1;
89 break;
90 }
91
92 if (code == LZW_EOD(lzw))
93 {
94 lzw->eod = 1;
95 break;
96 }
97
98 /* Old Tiffs are allowed to NOT send the clear code, and to
99 * overrun at the end. */
100 if (!lzw->old_tiff && next_code > NUM_CODES && code != LZW_CLEAR(lzw))
101 {
102 fz_warn(ctx, "missing clear code in lzw decode");
103 code = LZW_CLEAR(lzw);
104 }
105
106 if (code == LZW_CLEAR(lzw))
107 {
108 code_bits = lzw->min_bits;
109 next_code = LZW_FIRST(lzw);
110 old_code = -1;
111 continue;
112 }
113
114 /* if stream starts without a clear code, old_code is undefined... */
115 if (old_code == -1)
116 {
117 old_code = code;
118 }
119 else if (!lzw->old_tiff && next_code == NUM_CODES)
120 {
121 /* TODO: Ghostscript checks for a following clear code before tolerating */
122 fz_warn(ctx, "tolerating a single out of range code in lzw decode");
123 next_code++;
124 }
125 else if (code > next_code || (!lzw->old_tiff && next_code >= NUM_CODES))
126 {
127 fz_warn(ctx, "out of range code encountered in lzw decode");
128 }
129 else if (next_code < NUM_CODES)
130 {
131 /* add new entry to the code table */
132 table[next_code].prev = old_code;
133 table[next_code].first_char = table[old_code].first_char;
134 table[next_code].length = table[old_code].length + 1;
135 if (code < next_code)
136 table[next_code].value = table[code].first_char;
137 else if (code == next_code)
138 table[next_code].value = table[next_code].first_char;
139 else
140 fz_warn(ctx, "out of range code encountered in lzw decode");
141
142 next_code ++;
143
144 if (next_code > (1 << code_bits) - lzw->early_change - 1)
145 {
146 code_bits ++;
147 if (code_bits > MAX_BITS)
148 code_bits = MAX_BITS;
149 }
150
151 old_code = code;
152 }
153
154 /* code maps to a string, copy to output (in reverse...) */
155 if (code >= LZW_CLEAR(lzw))
156 {
157 codelen = table[code].length;
158 lzw->rp = lzw->bp;
159 lzw->wp = lzw->bp + codelen;
160
161 assert(codelen < MAX_LENGTH);
162
163 s = lzw->wp;
164 do {
165 *(--s) = table[code].value;
166 code = table[code].prev;
167 } while (code >= 0 && s > lzw->bp);
168 }
169
170 /* ... or just a single character */
171 else
172 {
173 lzw->bp[0] = code;
174 lzw->rp = lzw->bp;
175 lzw->wp = lzw->bp + 1;
176 }
177
178 /* copy to output */
179 while (lzw->rp < lzw->wp && p < ep)
180 *p++ = *lzw->rp++;
181 }
182
183 lzw->code_bits = code_bits;
184 lzw->code = code;
185 lzw->old_code = old_code;
186 lzw->next_code = next_code;
187
188 stm->rp = buf;
189 stm->wp = p;
190 if (buf == p)
191 return EOF;
192 stm->pos += p - buf;
193
194 return *stm->rp++;
195}
196
197static void
198close_lzwd(fz_context *ctx, void *state_)
199{
200 fz_lzwd *lzw = (fz_lzwd *)state_;
201 fz_sync_bits(ctx, lzw->chain);
202 fz_drop_stream(ctx, lzw->chain);
203 fz_free(ctx, lzw);
204}
205
206/* Default: early_change = 1 */
207fz_stream *
208fz_open_lzwd(fz_context *ctx, fz_stream *chain, int early_change, int min_bits, int reverse_bits, int old_tiff)
209{
210 fz_lzwd *lzw;
211 int i;
212
213 if (min_bits > MAX_BITS)
214 {
215 fz_warn(ctx, "out of range initial lzw code size");
216 min_bits = MAX_BITS;
217 }
218
219 lzw = fz_malloc_struct(ctx, fz_lzwd);
220 lzw->eod = 0;
221 lzw->early_change = early_change;
222 lzw->reverse_bits = reverse_bits;
223 lzw->old_tiff = old_tiff;
224 lzw->min_bits = min_bits;
225 lzw->code_bits = lzw->min_bits;
226 lzw->code = -1;
227 lzw->next_code = LZW_FIRST(lzw);
228 lzw->old_code = -1;
229 lzw->rp = lzw->bp;
230 lzw->wp = lzw->bp;
231
232 for (i = 0; i < LZW_CLEAR(lzw); i++)
233 {
234 lzw->table[i].value = i;
235 lzw->table[i].first_char = i;
236 lzw->table[i].length = 1;
237 lzw->table[i].prev = -1;
238 }
239
240 for (i = LZW_CLEAR(lzw); i < NUM_CODES; i++)
241 {
242 lzw->table[i].value = 0;
243 lzw->table[i].first_char = 0;
244 lzw->table[i].length = 0;
245 lzw->table[i].prev = -1;
246 }
247
248 lzw->chain = fz_keep_stream(ctx, chain);
249
250 return fz_new_stream(ctx, lzw, next_lzwd, close_lzwd);
251}
252