1 | /* |
2 | * jchuff.c |
3 | * |
4 | * This file was part of the Independent JPEG Group's software: |
5 | * Copyright (C) 1991-1997, Thomas G. Lane. |
6 | * libjpeg-turbo Modifications: |
7 | * Copyright (C) 2009-2011, 2014-2016, D. R. Commander. |
8 | * Copyright (C) 2015, Matthieu Darbois. |
9 | * For conditions of distribution and use, see the accompanying README.ijg |
10 | * file. |
11 | * |
12 | * This file contains Huffman entropy encoding routines. |
13 | * |
14 | * Much of the complexity here has to do with supporting output suspension. |
15 | * If the data destination module demands suspension, we want to be able to |
16 | * back up to the start of the current MCU. To do this, we copy state |
17 | * variables into local working storage, and update them back to the |
18 | * permanent JPEG objects only upon successful completion of an MCU. |
19 | */ |
20 | |
21 | #define JPEG_INTERNALS |
22 | #include "jinclude.h" |
23 | #include "jpeglib.h" |
24 | #include "jsimd.h" |
25 | #include "jconfigint.h" |
26 | #include <limits.h> |
27 | |
28 | /* |
29 | * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be |
30 | * used for bit counting rather than the lookup table. This will reduce the |
31 | * memory footprint by 64k, which is important for some mobile applications |
32 | * that create many isolated instances of libjpeg-turbo (web browsers, for |
33 | * instance.) This may improve performance on some mobile platforms as well. |
34 | * This feature is enabled by default only on ARM processors, because some x86 |
35 | * chips have a slow implementation of bsr, and the use of clz/bsr cannot be |
36 | * shown to have a significant performance impact even on the x86 chips that |
37 | * have a fast implementation of it. When building for ARMv6, you can |
38 | * explicitly disable the use of clz/bsr by adding -mthumb to the compiler |
39 | * flags (this defines __thumb__). |
40 | */ |
41 | |
42 | /* NOTE: Both GCC and Clang define __GNUC__ */ |
43 | #if defined __GNUC__ && (defined __arm__ || defined __aarch64__) |
44 | #if !defined __thumb__ || defined __thumb2__ |
45 | #define USE_CLZ_INTRINSIC |
46 | #endif |
47 | #endif |
48 | |
49 | #ifdef USE_CLZ_INTRINSIC |
50 | #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) |
51 | #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) |
52 | #else |
53 | #include "jpeg_nbits_table.h" |
54 | #define JPEG_NBITS(x) (jpeg_nbits_table[x]) |
55 | #define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) |
56 | #endif |
57 | |
58 | #ifndef min |
59 | #define min(a,b) ((a)<(b)?(a):(b)) |
60 | #endif |
61 | |
62 | |
63 | /* Expanded entropy encoder object for Huffman encoding. |
64 | * |
65 | * The savable_state subrecord contains fields that change within an MCU, |
66 | * but must not be updated permanently until we complete the MCU. |
67 | */ |
68 | |
69 | typedef struct { |
70 | size_t put_buffer; /* current bit-accumulation buffer */ |
71 | int put_bits; /* # of bits now in it */ |
72 | int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ |
73 | } savable_state; |
74 | |
75 | /* This macro is to work around compilers with missing or broken |
76 | * structure assignment. You'll need to fix this code if you have |
77 | * such a compiler and you change MAX_COMPS_IN_SCAN. |
78 | */ |
79 | |
80 | #ifndef NO_STRUCT_ASSIGN |
81 | #define ASSIGN_STATE(dest,src) ((dest) = (src)) |
82 | #else |
83 | #if MAX_COMPS_IN_SCAN == 4 |
84 | #define ASSIGN_STATE(dest,src) \ |
85 | ((dest).put_buffer = (src).put_buffer, \ |
86 | (dest).put_bits = (src).put_bits, \ |
87 | (dest).last_dc_val[0] = (src).last_dc_val[0], \ |
88 | (dest).last_dc_val[1] = (src).last_dc_val[1], \ |
89 | (dest).last_dc_val[2] = (src).last_dc_val[2], \ |
90 | (dest).last_dc_val[3] = (src).last_dc_val[3]) |
91 | #endif |
92 | #endif |
93 | |
94 | |
95 | typedef struct { |
96 | struct jpeg_entropy_encoder pub; /* public fields */ |
97 | |
98 | savable_state saved; /* Bit buffer & DC state at start of MCU */ |
99 | |
100 | /* These fields are NOT loaded into local working state. */ |
101 | unsigned int restarts_to_go; /* MCUs left in this restart interval */ |
102 | int next_restart_num; /* next restart number to write (0-7) */ |
103 | |
104 | /* Pointers to derived tables (these workspaces have image lifespan) */ |
105 | c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS]; |
106 | c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS]; |
107 | |
108 | #ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ |
109 | long *dc_count_ptrs[NUM_HUFF_TBLS]; |
110 | long *ac_count_ptrs[NUM_HUFF_TBLS]; |
111 | #endif |
112 | |
113 | int simd; |
114 | } huff_entropy_encoder; |
115 | |
116 | typedef huff_entropy_encoder *huff_entropy_ptr; |
117 | |
118 | /* Working state while writing an MCU. |
119 | * This struct contains all the fields that are needed by subroutines. |
120 | */ |
121 | |
122 | typedef struct { |
123 | JOCTET *next_output_byte; /* => next byte to write in buffer */ |
124 | size_t free_in_buffer; /* # of byte spaces remaining in buffer */ |
125 | savable_state cur; /* Current bit buffer & DC state */ |
126 | j_compress_ptr cinfo; /* dump_buffer needs access to this */ |
127 | } working_state; |
128 | |
129 | |
130 | /* Forward declarations */ |
131 | METHODDEF(boolean) encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data); |
132 | METHODDEF(void) finish_pass_huff (j_compress_ptr cinfo); |
133 | #ifdef ENTROPY_OPT_SUPPORTED |
134 | METHODDEF(boolean) encode_mcu_gather (j_compress_ptr cinfo, |
135 | JBLOCKROW *MCU_data); |
136 | METHODDEF(void) finish_pass_gather (j_compress_ptr cinfo); |
137 | #endif |
138 | |
139 | |
140 | /* |
141 | * Initialize for a Huffman-compressed scan. |
142 | * If gather_statistics is TRUE, we do not output anything during the scan, |
143 | * just count the Huffman symbols used and generate Huffman code tables. |
144 | */ |
145 | |
146 | METHODDEF(void) |
147 | start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) |
148 | { |
149 | huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
150 | int ci, dctbl, actbl; |
151 | jpeg_component_info *compptr; |
152 | |
153 | if (gather_statistics) { |
154 | #ifdef ENTROPY_OPT_SUPPORTED |
155 | entropy->pub.encode_mcu = encode_mcu_gather; |
156 | entropy->pub.finish_pass = finish_pass_gather; |
157 | #else |
158 | ERREXIT(cinfo, JERR_NOT_COMPILED); |
159 | #endif |
160 | } else { |
161 | entropy->pub.encode_mcu = encode_mcu_huff; |
162 | entropy->pub.finish_pass = finish_pass_huff; |
163 | } |
164 | |
165 | entropy->simd = jsimd_can_huff_encode_one_block(); |
166 | |
167 | for (ci = 0; ci < cinfo->comps_in_scan; ci++) { |
168 | compptr = cinfo->cur_comp_info[ci]; |
169 | dctbl = compptr->dc_tbl_no; |
170 | actbl = compptr->ac_tbl_no; |
171 | if (gather_statistics) { |
172 | #ifdef ENTROPY_OPT_SUPPORTED |
173 | /* Check for invalid table indexes */ |
174 | /* (make_c_derived_tbl does this in the other path) */ |
175 | if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS) |
176 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); |
177 | if (actbl < 0 || actbl >= NUM_HUFF_TBLS) |
178 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); |
179 | /* Allocate and zero the statistics tables */ |
180 | /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ |
181 | if (entropy->dc_count_ptrs[dctbl] == NULL) |
182 | entropy->dc_count_ptrs[dctbl] = (long *) |
183 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
184 | 257 * sizeof(long)); |
185 | MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long)); |
186 | if (entropy->ac_count_ptrs[actbl] == NULL) |
187 | entropy->ac_count_ptrs[actbl] = (long *) |
188 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
189 | 257 * sizeof(long)); |
190 | MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long)); |
191 | #endif |
192 | } else { |
193 | /* Compute derived values for Huffman tables */ |
194 | /* We may do this more than once for a table, but it's not expensive */ |
195 | jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl, |
196 | & entropy->dc_derived_tbls[dctbl]); |
197 | jpeg_make_c_derived_tbl(cinfo, FALSE, actbl, |
198 | & entropy->ac_derived_tbls[actbl]); |
199 | } |
200 | /* Initialize DC predictions to 0 */ |
201 | entropy->saved.last_dc_val[ci] = 0; |
202 | } |
203 | |
204 | /* Initialize bit buffer to empty */ |
205 | entropy->saved.put_buffer = 0; |
206 | entropy->saved.put_bits = 0; |
207 | |
208 | /* Initialize restart stuff */ |
209 | entropy->restarts_to_go = cinfo->restart_interval; |
210 | entropy->next_restart_num = 0; |
211 | } |
212 | |
213 | |
214 | /* |
215 | * Compute the derived values for a Huffman table. |
216 | * This routine also performs some validation checks on the table. |
217 | * |
218 | * Note this is also used by jcphuff.c. |
219 | */ |
220 | |
221 | GLOBAL(void) |
222 | jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, |
223 | c_derived_tbl **pdtbl) |
224 | { |
225 | JHUFF_TBL *htbl; |
226 | c_derived_tbl *dtbl; |
227 | int p, i, l, lastp, si, maxsymbol; |
228 | char huffsize[257]; |
229 | unsigned int huffcode[257]; |
230 | unsigned int code; |
231 | |
232 | /* Note that huffsize[] and huffcode[] are filled in code-length order, |
233 | * paralleling the order of the symbols themselves in htbl->huffval[]. |
234 | */ |
235 | |
236 | /* Find the input Huffman table */ |
237 | if (tblno < 0 || tblno >= NUM_HUFF_TBLS) |
238 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
239 | htbl = |
240 | isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; |
241 | if (htbl == NULL) |
242 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
243 | |
244 | /* Allocate a workspace if we haven't already done so. */ |
245 | if (*pdtbl == NULL) |
246 | *pdtbl = (c_derived_tbl *) |
247 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
248 | sizeof(c_derived_tbl)); |
249 | dtbl = *pdtbl; |
250 | |
251 | /* Figure C.1: make table of Huffman code length for each symbol */ |
252 | |
253 | p = 0; |
254 | for (l = 1; l <= 16; l++) { |
255 | i = (int) htbl->bits[l]; |
256 | if (i < 0 || p + i > 256) /* protect against table overrun */ |
257 | ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
258 | while (i--) |
259 | huffsize[p++] = (char) l; |
260 | } |
261 | huffsize[p] = 0; |
262 | lastp = p; |
263 | |
264 | /* Figure C.2: generate the codes themselves */ |
265 | /* We also validate that the counts represent a legal Huffman code tree. */ |
266 | |
267 | code = 0; |
268 | si = huffsize[0]; |
269 | p = 0; |
270 | while (huffsize[p]) { |
271 | while (((int) huffsize[p]) == si) { |
272 | huffcode[p++] = code; |
273 | code++; |
274 | } |
275 | /* code is now 1 more than the last code used for codelength si; but |
276 | * it must still fit in si bits, since no code is allowed to be all ones. |
277 | */ |
278 | if (((JLONG) code) >= (((JLONG) 1) << si)) |
279 | ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
280 | code <<= 1; |
281 | si++; |
282 | } |
283 | |
284 | /* Figure C.3: generate encoding tables */ |
285 | /* These are code and size indexed by symbol value */ |
286 | |
287 | /* Set all codeless symbols to have code length 0; |
288 | * this lets us detect duplicate VAL entries here, and later |
289 | * allows emit_bits to detect any attempt to emit such symbols. |
290 | */ |
291 | MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi)); |
292 | |
293 | /* This is also a convenient place to check for out-of-range |
294 | * and duplicated VAL entries. We allow 0..255 for AC symbols |
295 | * but only 0..15 for DC. (We could constrain them further |
296 | * based on data depth and mode, but this seems enough.) |
297 | */ |
298 | maxsymbol = isDC ? 15 : 255; |
299 | |
300 | for (p = 0; p < lastp; p++) { |
301 | i = htbl->huffval[p]; |
302 | if (i < 0 || i > maxsymbol || dtbl->ehufsi[i]) |
303 | ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
304 | dtbl->ehufco[i] = huffcode[p]; |
305 | dtbl->ehufsi[i] = huffsize[p]; |
306 | } |
307 | } |
308 | |
309 | |
310 | /* Outputting bytes to the file */ |
311 | |
312 | /* Emit a byte, taking 'action' if must suspend. */ |
313 | #define emit_byte(state,val,action) \ |
314 | { *(state)->next_output_byte++ = (JOCTET) (val); \ |
315 | if (--(state)->free_in_buffer == 0) \ |
316 | if (! dump_buffer(state)) \ |
317 | { action; } } |
318 | |
319 | |
320 | LOCAL(boolean) |
321 | dump_buffer (working_state *state) |
322 | /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ |
323 | { |
324 | struct jpeg_destination_mgr *dest = state->cinfo->dest; |
325 | |
326 | if (! (*dest->empty_output_buffer) (state->cinfo)) |
327 | return FALSE; |
328 | /* After a successful buffer dump, must reset buffer pointers */ |
329 | state->next_output_byte = dest->next_output_byte; |
330 | state->free_in_buffer = dest->free_in_buffer; |
331 | return TRUE; |
332 | } |
333 | |
334 | |
335 | /* Outputting bits to the file */ |
336 | |
337 | /* These macros perform the same task as the emit_bits() function in the |
338 | * original libjpeg code. In addition to reducing overhead by explicitly |
339 | * inlining the code, additional performance is achieved by taking into |
340 | * account the size of the bit buffer and waiting until it is almost full |
341 | * before emptying it. This mostly benefits 64-bit platforms, since 6 |
342 | * bytes can be stored in a 64-bit bit buffer before it has to be emptied. |
343 | */ |
344 | |
345 | #define EMIT_BYTE() { \ |
346 | JOCTET |
---|