1 | /* |
2 | * jchuff.c |
3 | * |
4 | * This file was part of the Independent JPEG Group's software: |
5 | * Copyright (C) 1991-1997, Thomas G. Lane. |
6 | * libjpeg-turbo Modifications: |
7 | * Copyright (C) 2009-2011, 2014-2016, 2018, D. R. Commander. |
8 | * Copyright (C) 2015, Matthieu Darbois. |
9 | * For conditions of distribution and use, see the accompanying README.ijg |
10 | * file. |
11 | * |
12 | * This file contains Huffman entropy encoding routines. |
13 | * |
14 | * Much of the complexity here has to do with supporting output suspension. |
15 | * If the data destination module demands suspension, we want to be able to |
16 | * back up to the start of the current MCU. To do this, we copy state |
17 | * variables into local working storage, and update them back to the |
18 | * permanent JPEG objects only upon successful completion of an MCU. |
19 | * |
20 | * NOTE: All referenced figures are from |
21 | * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994. |
22 | */ |
23 | |
24 | #define JPEG_INTERNALS |
25 | #include "jinclude.h" |
26 | #include "jpeglib.h" |
27 | #include "jsimd.h" |
28 | #include "jconfigint.h" |
29 | #include <limits.h> |
30 | |
31 | /* |
32 | * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be |
33 | * used for bit counting rather than the lookup table. This will reduce the |
34 | * memory footprint by 64k, which is important for some mobile applications |
35 | * that create many isolated instances of libjpeg-turbo (web browsers, for |
36 | * instance.) This may improve performance on some mobile platforms as well. |
37 | * This feature is enabled by default only on ARM processors, because some x86 |
38 | * chips have a slow implementation of bsr, and the use of clz/bsr cannot be |
39 | * shown to have a significant performance impact even on the x86 chips that |
40 | * have a fast implementation of it. When building for ARMv6, you can |
41 | * explicitly disable the use of clz/bsr by adding -mthumb to the compiler |
42 | * flags (this defines __thumb__). |
43 | */ |
44 | |
45 | /* NOTE: Both GCC and Clang define __GNUC__ */ |
46 | #if defined __GNUC__ && (defined __arm__ || defined __aarch64__) |
47 | #if !defined __thumb__ || defined __thumb2__ |
48 | #define USE_CLZ_INTRINSIC |
49 | #endif |
50 | #endif |
51 | |
52 | #ifdef USE_CLZ_INTRINSIC |
53 | #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) |
54 | #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) |
55 | #else |
56 | #include "jpeg_nbits_table.h" |
57 | #define JPEG_NBITS(x) (jpeg_nbits_table[x]) |
58 | #define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) |
59 | #endif |
60 | |
61 | |
62 | /* Expanded entropy encoder object for Huffman encoding. |
63 | * |
64 | * The savable_state subrecord contains fields that change within an MCU, |
65 | * but must not be updated permanently until we complete the MCU. |
66 | */ |
67 | |
68 | typedef struct { |
69 | size_t put_buffer; /* current bit-accumulation buffer */ |
70 | int put_bits; /* # of bits now in it */ |
71 | int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ |
72 | } savable_state; |
73 | |
74 | /* This macro is to work around compilers with missing or broken |
75 | * structure assignment. You'll need to fix this code if you have |
76 | * such a compiler and you change MAX_COMPS_IN_SCAN. |
77 | */ |
78 | |
79 | #ifndef NO_STRUCT_ASSIGN |
80 | #define ASSIGN_STATE(dest, src) ((dest) = (src)) |
81 | #else |
82 | #if MAX_COMPS_IN_SCAN == 4 |
83 | #define ASSIGN_STATE(dest, src) \ |
84 | ((dest).put_buffer = (src).put_buffer, \ |
85 | (dest).put_bits = (src).put_bits, \ |
86 | (dest).last_dc_val[0] = (src).last_dc_val[0], \ |
87 | (dest).last_dc_val[1] = (src).last_dc_val[1], \ |
88 | (dest).last_dc_val[2] = (src).last_dc_val[2], \ |
89 | (dest).last_dc_val[3] = (src).last_dc_val[3]) |
90 | #endif |
91 | #endif |
92 | |
93 | |
94 | typedef struct { |
95 | struct jpeg_entropy_encoder pub; /* public fields */ |
96 | |
97 | savable_state saved; /* Bit buffer & DC state at start of MCU */ |
98 | |
99 | /* These fields are NOT loaded into local working state. */ |
100 | unsigned int restarts_to_go; /* MCUs left in this restart interval */ |
101 | int next_restart_num; /* next restart number to write (0-7) */ |
102 | |
103 | /* Pointers to derived tables (these workspaces have image lifespan) */ |
104 | c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS]; |
105 | c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS]; |
106 | |
107 | #ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ |
108 | long *dc_count_ptrs[NUM_HUFF_TBLS]; |
109 | long *ac_count_ptrs[NUM_HUFF_TBLS]; |
110 | #endif |
111 | |
112 | int simd; |
113 | } huff_entropy_encoder; |
114 | |
115 | typedef huff_entropy_encoder *huff_entropy_ptr; |
116 | |
117 | /* Working state while writing an MCU. |
118 | * This struct contains all the fields that are needed by subroutines. |
119 | */ |
120 | |
121 | typedef struct { |
122 | JOCTET *next_output_byte; /* => next byte to write in buffer */ |
123 | size_t free_in_buffer; /* # of byte spaces remaining in buffer */ |
124 | savable_state cur; /* Current bit buffer & DC state */ |
125 | j_compress_ptr cinfo; /* dump_buffer needs access to this */ |
126 | } working_state; |
127 | |
128 | |
129 | /* Forward declarations */ |
130 | METHODDEF(boolean) encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data); |
131 | METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo); |
132 | #ifdef ENTROPY_OPT_SUPPORTED |
133 | METHODDEF(boolean) encode_mcu_gather(j_compress_ptr cinfo, |
134 | JBLOCKROW *MCU_data); |
135 | METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo); |
136 | #endif |
137 | |
138 | |
139 | /* |
140 | * Initialize for a Huffman-compressed scan. |
141 | * If gather_statistics is TRUE, we do not output anything during the scan, |
142 | * just count the Huffman symbols used and generate Huffman code tables. |
143 | */ |
144 | |
145 | METHODDEF(void) |
146 | start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics) |
147 | { |
148 | huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy; |
149 | int ci, dctbl, actbl; |
150 | jpeg_component_info *compptr; |
151 | |
152 | if (gather_statistics) { |
153 | #ifdef ENTROPY_OPT_SUPPORTED |
154 | entropy->pub.encode_mcu = encode_mcu_gather; |
155 | entropy->pub.finish_pass = finish_pass_gather; |
156 | #else |
157 | ERREXIT(cinfo, JERR_NOT_COMPILED); |
158 | #endif |
159 | } else { |
160 | entropy->pub.encode_mcu = encode_mcu_huff; |
161 | entropy->pub.finish_pass = finish_pass_huff; |
162 | } |
163 | |
164 | entropy->simd = jsimd_can_huff_encode_one_block(); |
165 | |
166 | for (ci = 0; ci < cinfo->comps_in_scan; ci++) { |
167 | compptr = cinfo->cur_comp_info[ci]; |
168 | dctbl = compptr->dc_tbl_no; |
169 | actbl = compptr->ac_tbl_no; |
170 | if (gather_statistics) { |
171 | #ifdef ENTROPY_OPT_SUPPORTED |
172 | /* Check for invalid table indexes */ |
173 | /* (make_c_derived_tbl does this in the other path) */ |
174 | if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS) |
175 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); |
176 | if (actbl < 0 || actbl >= NUM_HUFF_TBLS) |
177 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); |
178 | /* Allocate and zero the statistics tables */ |
179 | /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ |
180 | if (entropy->dc_count_ptrs[dctbl] == NULL) |
181 | entropy->dc_count_ptrs[dctbl] = (long *) |
182 | (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, |
183 | 257 * sizeof(long)); |
184 | MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long)); |
185 | if (entropy->ac_count_ptrs[actbl] == NULL) |
186 | entropy->ac_count_ptrs[actbl] = (long *) |
187 | (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, |
188 | 257 * sizeof(long)); |
189 | MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long)); |
190 | #endif |
191 | } else { |
192 | /* Compute derived values for Huffman tables */ |
193 | /* We may do this more than once for a table, but it's not expensive */ |
194 | jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl, |
195 | &entropy->dc_derived_tbls[dctbl]); |
196 | jpeg_make_c_derived_tbl(cinfo, FALSE, actbl, |
197 | &entropy->ac_derived_tbls[actbl]); |
198 | } |
199 | /* Initialize DC predictions to 0 */ |
200 | entropy->saved.last_dc_val[ci] = 0; |
201 | } |
202 | |
203 | /* Initialize bit buffer to empty */ |
204 | entropy->saved.put_buffer = 0; |
205 | entropy->saved.put_bits = 0; |
206 | |
207 | /* Initialize restart stuff */ |
208 | entropy->restarts_to_go = cinfo->restart_interval; |
209 | entropy->next_restart_num = 0; |
210 | } |
211 | |
212 | |
213 | /* |
214 | * Compute the derived values for a Huffman table. |
215 | * This routine also performs some validation checks on the table. |
216 | * |
217 | * Note this is also used by jcphuff.c. |
218 | */ |
219 | |
220 | GLOBAL(void) |
221 | jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno, |
222 | c_derived_tbl **pdtbl) |
223 | { |
224 | JHUFF_TBL *htbl; |
225 | c_derived_tbl *dtbl; |
226 | int p, i, l, lastp, si, maxsymbol; |
227 | char huffsize[257]; |
228 | unsigned int huffcode[257]; |
229 | unsigned int code; |
230 | |
231 | /* Note that huffsize[] and huffcode[] are filled in code-length order, |
232 | * paralleling the order of the symbols themselves in htbl->huffval[]. |
233 | */ |
234 | |
235 | /* Find the input Huffman table */ |
236 | if (tblno < 0 || tblno >= NUM_HUFF_TBLS) |
237 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
238 | htbl = |
239 | isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; |
240 | if (htbl == NULL) |
241 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
242 | |
243 | /* Allocate a workspace if we haven't already done so. */ |
244 | if (*pdtbl == NULL) |
245 | *pdtbl = (c_derived_tbl *) |
246 | (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, |
247 | sizeof(c_derived_tbl)); |
248 | dtbl = *pdtbl; |
249 | |
250 | /* Figure C.1: make table of Huffman code length for each symbol */ |
251 | |
252 | p = 0; |
253 | for (l = 1; l <= 16; l++) { |
254 | i = (int)htbl->bits[l]; |
255 | if (i < 0 || p + i > 256) /* protect against table overrun */ |
256 | ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
257 | while (i--) |
258 | huffsize[p++] = (char)l; |
259 | } |
260 | huffsize[p] = 0; |
261 | lastp = p; |
262 | |
263 | /* Figure C.2: generate the codes themselves */ |
264 | /* We also validate that the counts represent a legal Huffman code tree. */ |
265 | |
266 | code = 0; |
267 | si = huffsize[0]; |
268 | p = 0; |
269 | while (huffsize[p]) { |
270 | while (((int)huffsize[p]) == si) { |
271 | huffcode[p++] = code; |
272 | code++; |
273 | } |
274 | /* code is now 1 more than the last code used for codelength si; but |
275 | * it must still fit in si bits, since no code is allowed to be all ones. |
276 | */ |
277 | if (((JLONG)code) >= (((JLONG)1) << si)) |
278 | ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
279 | code <<= 1; |
280 | si++; |
281 | } |
282 | |
283 | /* Figure C.3: generate encoding tables */ |
284 | /* These are code and size indexed by symbol value */ |
285 | |
286 | /* Set all codeless symbols to have code length 0; |
287 | * this lets us detect duplicate VAL entries here, and later |
288 | * allows emit_bits to detect any attempt to emit such symbols. |
289 | */ |
290 | MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi)); |
291 | |
292 | /* This is also a convenient place to check for out-of-range |
293 | * and duplicated VAL entries. We allow 0..255 for AC symbols |
294 | * but only 0..15 for DC. (We could constrain them further |
295 | * based on data depth and mode, but this seems enough.) |
296 | */ |
297 | maxsymbol = isDC ? 15 : 255; |
298 | |
299 | for (p = 0; p < lastp; p++) { |
300 | i = htbl->huffval[p]; |
301 | if (i < 0 || i > maxsymbol || dtbl->ehufsi[i]) |
302 | ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
303 | dtbl->ehufco[i] = huffcode[p]; |
304 | dtbl->ehufsi[i] = huffsize[p]; |
305 | } |
306 | } |
307 | |
308 | |
309 | /* Outputting bytes to the file */ |
310 | |
311 | /* Emit a byte, taking 'action' if must suspend. */ |
312 | #define emit_byte(state, val, action) { \ |
313 | *(state)->next_output_byte++ = (JOCTET)(val); \ |
314 | if (--(state)->free_in_buffer == 0) \ |
315 | if (!dump_buffer(state)) \ |
316 | { action; } \ |
317 | } |
318 | |
319 | |
320 | LOCAL(boolean) |
321 | dump_buffer(working_state *state) |
322 | /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ |
323 | { |
324 | struct jpeg_destination_mgr *dest = state->cinfo->dest; |
325 | |
326 | if (!(*dest->empty_output_buffer) (state->cinfo)) |
327 | return FALSE; |
328 | /* After a successful buffer dump, must reset buffer pointers */ |
329 | state->next_output_byte = dest->next_output_byte; |
330 | state->free_in_buffer = dest->free_in_buffer; |
331 | return TRUE; |
332 | } |
333 | |
334 | |
335 | /* Outputting bits to the file */ |
336 | |
337 | /* These macros perform the same task as the emit_bits() function in the |
338 | * original libjpeg code. In addition to reducing overhead by explicitly |
339 | * inlining the code, additional performance is achieved by taking into |
340 | * account the size of the bit buffer and waiting until it is almost full |
341 | * before emptying it. This mostly benefits 64-bit platforms, since 6 |
342 | * bytes can be stored in a 64-bit bit buffer before it has to be emptied. |
343 | */ |
344 | |
345 | #define EMIT_BYTE() { \ |
346 | JOCTET |
---|