1 | /* |
2 | * jcphuff.c |
3 | * |
4 | * This file was part of the Independent JPEG Group's software: |
5 | * Copyright (C) 1995-1997, Thomas G. Lane. |
6 | * libjpeg-turbo Modifications: |
7 | * Copyright (C) 2011, 2015, 2018, D. R. Commander. |
8 | * Copyright (C) 2016, 2018, Matthieu Darbois. |
9 | * For conditions of distribution and use, see the accompanying README.ijg |
10 | * file. |
11 | * |
12 | * This file contains Huffman entropy encoding routines for progressive JPEG. |
13 | * |
14 | * We do not support output suspension in this module, since the library |
15 | * currently does not allow multiple-scan files to be written with output |
16 | * suspension. |
17 | */ |
18 | |
19 | #define JPEG_INTERNALS |
20 | #include "jinclude.h" |
21 | #include "jpeglib.h" |
22 | #include "jsimd.h" |
23 | #include "jconfigint.h" |
24 | #include <limits.h> |
25 | |
26 | #ifdef HAVE_INTRIN_H |
27 | #include <intrin.h> |
28 | #ifdef _MSC_VER |
29 | #ifdef HAVE_BITSCANFORWARD64 |
30 | #pragma intrinsic(_BitScanForward64) |
31 | #endif |
32 | #ifdef HAVE_BITSCANFORWARD |
33 | #pragma intrinsic(_BitScanForward) |
34 | #endif |
35 | #endif |
36 | #endif |
37 | |
38 | #ifdef C_PROGRESSIVE_SUPPORTED |
39 | |
40 | /* |
41 | * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be |
42 | * used for bit counting rather than the lookup table. This will reduce the |
43 | * memory footprint by 64k, which is important for some mobile applications |
44 | * that create many isolated instances of libjpeg-turbo (web browsers, for |
45 | * instance.) This may improve performance on some mobile platforms as well. |
46 | * This feature is enabled by default only on ARM processors, because some x86 |
47 | * chips have a slow implementation of bsr, and the use of clz/bsr cannot be |
48 | * shown to have a significant performance impact even on the x86 chips that |
49 | * have a fast implementation of it. When building for ARMv6, you can |
50 | * explicitly disable the use of clz/bsr by adding -mthumb to the compiler |
51 | * flags (this defines __thumb__). |
52 | */ |
53 | |
54 | /* NOTE: Both GCC and Clang define __GNUC__ */ |
55 | #if defined __GNUC__ && (defined __arm__ || defined __aarch64__) |
56 | #if !defined __thumb__ || defined __thumb2__ |
57 | #define USE_CLZ_INTRINSIC |
58 | #endif |
59 | #endif |
60 | |
61 | #ifdef USE_CLZ_INTRINSIC |
62 | #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) |
63 | #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) |
64 | #else |
65 | #include "jpeg_nbits_table.h" |
66 | #define JPEG_NBITS(x) (jpeg_nbits_table[x]) |
67 | #define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) |
68 | #endif |
69 | |
70 | |
71 | /* Expanded entropy encoder object for progressive Huffman encoding. */ |
72 | |
73 | typedef struct { |
74 | struct jpeg_entropy_encoder pub; /* public fields */ |
75 | |
76 | /* Pointer to routine to prepare data for encode_mcu_AC_first() */ |
77 | void (*AC_first_prepare) (const JCOEF *block, |
78 | const int *jpeg_natural_order_start, int Sl, |
79 | int Al, JCOEF *values, size_t *zerobits); |
80 | /* Pointer to routine to prepare data for encode_mcu_AC_refine() */ |
81 | int (*AC_refine_prepare) (const JCOEF *block, |
82 | const int *jpeg_natural_order_start, int Sl, |
83 | int Al, JCOEF *absvalues, size_t *bits); |
84 | |
85 | /* Mode flag: TRUE for optimization, FALSE for actual data output */ |
86 | boolean gather_statistics; |
87 | |
88 | /* Bit-level coding status. |
89 | * next_output_byte/free_in_buffer are local copies of cinfo->dest fields. |
90 | */ |
91 | JOCTET *next_output_byte; /* => next byte to write in buffer */ |
92 | size_t free_in_buffer; /* # of byte spaces remaining in buffer */ |
93 | size_t put_buffer; /* current bit-accumulation buffer */ |
94 | int put_bits; /* # of bits now in it */ |
95 | j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */ |
96 | |
97 | /* Coding status for DC components */ |
98 | int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ |
99 | |
100 | /* Coding status for AC components */ |
101 | int ac_tbl_no; /* the table number of the single component */ |
102 | unsigned int EOBRUN; /* run length of EOBs */ |
103 | unsigned int BE; /* # of buffered correction bits before MCU */ |
104 | char *bit_buffer; /* buffer for correction bits (1 per char) */ |
105 | /* packing correction bits tightly would save some space but cost time... */ |
106 | |
107 | unsigned int restarts_to_go; /* MCUs left in this restart interval */ |
108 | int next_restart_num; /* next restart number to write (0-7) */ |
109 | |
110 | /* Pointers to derived tables (these workspaces have image lifespan). |
111 | * Since any one scan codes only DC or only AC, we only need one set |
112 | * of tables, not one for DC and one for AC. |
113 | */ |
114 | c_derived_tbl *derived_tbls[NUM_HUFF_TBLS]; |
115 | |
116 | /* Statistics tables for optimization; again, one set is enough */ |
117 | long *count_ptrs[NUM_HUFF_TBLS]; |
118 | } phuff_entropy_encoder; |
119 | |
120 | typedef phuff_entropy_encoder *phuff_entropy_ptr; |
121 | |
122 | /* MAX_CORR_BITS is the number of bits the AC refinement correction-bit |
123 | * buffer can hold. Larger sizes may slightly improve compression, but |
124 | * 1000 is already well into the realm of overkill. |
125 | * The minimum safe size is 64 bits. |
126 | */ |
127 | |
128 | #define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */ |
129 | |
130 | /* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG. |
131 | * We assume that int right shift is unsigned if JLONG right shift is, |
132 | * which should be safe. |
133 | */ |
134 | |
135 | #ifdef RIGHT_SHIFT_IS_UNSIGNED |
136 | #define ISHIFT_TEMPS int ishift_temp; |
137 | #define IRIGHT_SHIFT(x, shft) \ |
138 | ((ishift_temp = (x)) < 0 ? \ |
139 | (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \ |
140 | (ishift_temp >> (shft))) |
141 | #else |
142 | #define ISHIFT_TEMPS |
143 | #define IRIGHT_SHIFT(x, shft) ((x) >> (shft)) |
144 | #endif |
145 | |
146 | #define PAD(v, p) ((v + (p) - 1) & (~((p) - 1))) |
147 | |
148 | /* Forward declarations */ |
149 | METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo, |
150 | JBLOCKROW *MCU_data); |
151 | METHODDEF(void) encode_mcu_AC_first_prepare |
152 | (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, |
153 | JCOEF *values, size_t *zerobits); |
154 | METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo, |
155 | JBLOCKROW *MCU_data); |
156 | METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo, |
157 | JBLOCKROW *MCU_data); |
158 | METHODDEF(int) encode_mcu_AC_refine_prepare |
159 | (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al, |
160 | JCOEF *absvalues, size_t *bits); |
161 | METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo, |
162 | JBLOCKROW *MCU_data); |
163 | METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo); |
164 | METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo); |
165 | |
166 | |
167 | /* Count bit loop zeroes */ |
168 | INLINE |
169 | METHODDEF(int) |
170 | count_zeroes(size_t *x) |
171 | { |
172 | int result; |
173 | #if defined(HAVE_BUILTIN_CTZL) |
174 | result = __builtin_ctzl(*x); |
175 | *x >>= result; |
176 | #elif defined(HAVE_BITSCANFORWARD64) |
177 | _BitScanForward64(&result, *x); |
178 | *x >>= result; |
179 | #elif defined(HAVE_BITSCANFORWARD) |
180 | _BitScanForward(&result, *x); |
181 | *x >>= result; |
182 | #else |
183 | result = 0; |
184 | while ((*x & 1) == 0) { |
185 | ++result; |
186 | *x >>= 1; |
187 | } |
188 | #endif |
189 | return result; |
190 | } |
191 | |
192 | |
193 | /* |
194 | * Initialize for a Huffman-compressed scan using progressive JPEG. |
195 | */ |
196 | |
197 | METHODDEF(void) |
198 | start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics) |
199 | { |
200 | phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
201 | boolean is_DC_band; |
202 | int ci, tbl; |
203 | jpeg_component_info *compptr; |
204 | |
205 | entropy->cinfo = cinfo; |
206 | entropy->gather_statistics = gather_statistics; |
207 | |
208 | is_DC_band = (cinfo->Ss == 0); |
209 | |
210 | /* We assume jcmaster.c already validated the scan parameters. */ |
211 | |
212 | /* Select execution routines */ |
213 | if (cinfo->Ah == 0) { |
214 | if (is_DC_band) |
215 | entropy->pub.encode_mcu = encode_mcu_DC_first; |
216 | else |
217 | entropy->pub.encode_mcu = encode_mcu_AC_first; |
218 | if (jsimd_can_encode_mcu_AC_first_prepare()) |
219 | entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare; |
220 | else |
221 | entropy->AC_first_prepare = encode_mcu_AC_first_prepare; |
222 | } else { |
223 | if (is_DC_band) |
224 | entropy->pub.encode_mcu = encode_mcu_DC_refine; |
225 | else { |
226 | entropy->pub.encode_mcu = encode_mcu_AC_refine; |
227 | if (jsimd_can_encode_mcu_AC_refine_prepare()) |
228 | entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare; |
229 | else |
230 | entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare; |
231 | /* AC refinement needs a correction bit buffer */ |
232 | if (entropy->bit_buffer == NULL) |
233 | entropy->bit_buffer = (char *) |
234 | (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, |
235 | MAX_CORR_BITS * sizeof(char)); |
236 | } |
237 | } |
238 | if (gather_statistics) |
239 | entropy->pub.finish_pass = finish_pass_gather_phuff; |
240 | else |
241 | entropy->pub.finish_pass = finish_pass_phuff; |
242 | |
243 | /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1 |
244 | * for AC coefficients. |
245 | */ |
246 | for (ci = 0; ci < cinfo->comps_in_scan; ci++) { |
247 | compptr = cinfo->cur_comp_info[ci]; |
248 | /* Initialize DC predictions to 0 */ |
249 | entropy->last_dc_val[ci] = 0; |
250 | /* Get table index */ |
251 | if (is_DC_band) { |
252 | if (cinfo->Ah != 0) /* DC refinement needs no table */ |
253 | continue; |
254 | tbl = compptr->dc_tbl_no; |
255 | } else { |
256 | entropy->ac_tbl_no = tbl = compptr->ac_tbl_no; |
257 | } |
258 | if (gather_statistics) { |
259 | /* Check for invalid table index */ |
260 | /* (make_c_derived_tbl does this in the other path) */ |
261 | if (tbl < 0 || tbl >= NUM_HUFF_TBLS) |
262 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl); |
263 | /* Allocate and zero the statistics tables */ |
264 | /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ |
265 | if (entropy->count_ptrs[tbl] == NULL) |
266 | entropy->count_ptrs[tbl] = (long *) |
267 | (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, |
268 | 257 * sizeof(long)); |
269 | MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long)); |
270 | } else { |
271 | /* Compute derived values for Huffman table */ |
272 | /* We may do this more than once for a table, but it's not expensive */ |
273 | jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl, |
274 | &entropy->derived_tbls[tbl]); |
275 | } |
276 | } |
277 | |
278 | /* Initialize AC stuff */ |
279 | entropy->EOBRUN = 0; |
280 | entropy->BE = 0; |
281 | |
282 | /* Initialize bit buffer to empty */ |
283 | entropy->put_buffer = 0; |
284 | entropy->put_bits = 0; |
285 | |
286 | /* Initialize restart stuff */ |
287 | entropy->restarts_to_go = cinfo->restart_interval; |
288 | entropy->next_restart_num = 0; |
289 | } |
290 | |
291 | |
292 | /* Outputting bytes to the file. |
293 | * NB: these must be called only when actually outputting, |
294 | * that is, entropy->gather_statistics == FALSE. |
295 | */ |
296 | |
297 | /* Emit a byte */ |
298 | #define emit_byte(entropy, val) { \ |
299 | *(entropy)->next_output_byte++ = (JOCTET)(val); \ |
300 | if (--(entropy)->free_in_buffer == 0) \ |
301 | dump_buffer(entropy); \ |
302 | } |
303 | |
304 | |
305 | LOCAL(void) |
306 | dump_buffer(phuff_entropy_ptr entropy) |
307 | /* Empty the output buffer; we do not support suspension in this module. */ |
308 | { |
309 | struct jpeg_destination_mgr *dest = entropy->cinfo->dest; |
310 | |
311 | if (!(*dest->empty_output_buffer) (entropy->cinfo)) |
312 | ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND); |
313 | /* After a successful buffer dump, must reset buffer pointers */ |
314 | entropy->next_output_byte = dest->next_output_byte; |
315 | entropy->free_in_buffer = dest->free_in_buffer; |
316 | } |
317 | |
318 | |
319 | /* Outputting bits to the file */ |
320 | |
321 | /* Only the right 24 bits of put_buffer are used; the valid bits are |
322 | * left-justified in this part. At most 16 bits can be passed to emit_bits |
323 | * in one call, and we never retain more than 7 bits in put_buffer |
324 | * between calls, so 24 bits are sufficient. |
325 | */ |
326 | |
327 | LOCAL(void) |
328 | emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size) |
329 | /* Emit some bits, unless we are in gather mode */ |
330 | { |
331 | /* This routine is heavily used, so it's worth coding tightly. */ |
332 | register size_t put_buffer = (size_t)code; |
333 | register int put_bits = entropy->put_bits; |
334 | |
335 | /* if size is 0, caller used an invalid Huffman table entry */ |
336 | if (size == 0) |
337 | ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); |
338 | |
339 | if (entropy->gather_statistics) |
340 | return; /* do nothing if we're only getting stats */ |
341 | |
342 | put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */ |
343 | |
344 | put_bits += size; /* new number of bits in buffer */ |
345 | |
346 | put_buffer <<= 24 - put_bits; /* align incoming bits */ |
347 | |
348 | put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */ |
349 | |
350 | while (put_bits >= 8) { |
351 | int c = (int)((put_buffer >> 16) & 0xFF); |
352 | |
353 | emit_byte(entropy, c); |
354 | if (c == 0xFF) { /* need to stuff a zero byte? */ |
355 | emit_byte(entropy, 0); |
356 | } |
357 | put_buffer <<= 8; |
358 | put_bits -= 8; |
359 | } |
360 | |
361 | entropy->put_buffer = put_buffer; /* update variables */ |
362 | entropy->put_bits = put_bits; |
363 | } |
364 | |
365 | |
366 | LOCAL(void) |
367 | flush_bits(phuff_entropy_ptr entropy) |
368 | { |
369 | emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */ |
370 | entropy->put_buffer = 0; /* and reset bit-buffer to empty */ |
371 | entropy->put_bits = 0; |
372 | } |
373 | |
374 | |
375 | /* |
376 | * Emit (or just count) a Huffman symbol. |
377 | */ |
378 | |
379 | LOCAL(void) |
380 | emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol) |
381 | { |
382 | if (entropy->gather_statistics) |
383 | entropy->count_ptrs[tbl_no][symbol]++; |
384 | else { |
385 | c_derived_tbl *tbl = entropy->derived_tbls[tbl_no]; |
386 | emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]); |
387 | } |
388 | } |
389 | |
390 | |
391 | /* |
392 | * Emit bits from a correction bit buffer. |
393 | */ |
394 | |
395 | LOCAL(void) |
396 | emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart, |
397 | unsigned int nbits) |
398 | { |
399 | if (entropy->gather_statistics) |
400 | return; /* no real work */ |
401 | |
402 | while (nbits > 0) { |
403 | emit_bits(entropy, (unsigned int)(*bufstart), 1); |
404 | bufstart++; |
405 | nbits--; |
406 | } |
407 | } |
408 | |
409 | |
410 | /* |
411 | * Emit any pending EOBRUN symbol. |
412 | */ |
413 | |
414 | LOCAL(void) |
415 | emit_eobrun(phuff_entropy_ptr entropy) |
416 | { |
417 | register int temp, nbits; |
418 | |
419 | if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */ |
420 | temp = entropy->EOBRUN; |
421 | nbits = JPEG_NBITS_NONZERO(temp) - 1; |
422 | /* safety check: shouldn't happen given limited correction-bit buffer */ |
423 | if (nbits > 14) |
424 | ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE); |
425 | |
426 | emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4); |
427 | if (nbits) |
428 | emit_bits(entropy, entropy->EOBRUN, nbits); |
429 | |
430 | entropy->EOBRUN = 0; |
431 | |
432 | /* Emit any buffered correction bits */ |
433 | emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE); |
434 | entropy->BE = 0; |
435 | } |
436 | } |
437 | |
438 | |
439 | /* |
440 | * Emit a restart marker & resynchronize predictions. |
441 | */ |
442 | |
443 | LOCAL(void) |
444 | emit_restart(phuff_entropy_ptr entropy, int restart_num) |
445 | { |
446 | int ci; |
447 | |
448 | emit_eobrun(entropy); |
449 | |
450 | if (!entropy->gather_statistics) { |
451 | flush_bits(entropy); |
452 | emit_byte(entropy, 0xFF); |
453 | emit_byte(entropy, JPEG_RST0 + restart_num); |
454 | } |
455 | |
456 | if (entropy->cinfo->Ss == 0) { |
457 | /* Re-initialize DC predictions to 0 */ |
458 | for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++) |
459 | entropy->last_dc_val[ci] = 0; |
460 | } else { |
461 | /* Re-initialize all AC-related fields to 0 */ |
462 | entropy->EOBRUN = 0; |
463 | entropy->BE = 0; |
464 | } |
465 | } |
466 | |
467 | |
468 | /* |
469 | * MCU encoding for DC initial scan (either spectral selection, |
470 | * or first pass of successive approximation). |
471 | */ |
472 | |
473 | METHODDEF(boolean) |
474 | encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
475 | { |
476 | phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
477 | register int temp, temp2, temp3; |
478 | register int nbits; |
479 | int blkn, ci; |
480 | int Al = cinfo->Al; |
481 | JBLOCKROW block; |
482 | jpeg_component_info *compptr; |
483 | ISHIFT_TEMPS |
484 | |
485 | entropy->next_output_byte = cinfo->dest->next_output_byte; |
486 | entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
487 | |
488 | /* Emit restart marker if needed */ |
489 | if (cinfo->restart_interval) |
490 | if (entropy->restarts_to_go == 0) |
491 | emit_restart(entropy, entropy->next_restart_num); |
492 | |
493 | /* Encode the MCU data blocks */ |
494 | for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { |
495 | block = MCU_data[blkn]; |
496 | ci = cinfo->MCU_membership[blkn]; |
497 | compptr = cinfo->cur_comp_info[ci]; |
498 | |
499 | /* Compute the DC value after the required point transform by Al. |
500 | * This is simply an arithmetic right shift. |
501 | */ |
502 | temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al); |
503 | |
504 | /* DC differences are figured on the point-transformed values. */ |
505 | temp = temp2 - entropy->last_dc_val[ci]; |
506 | entropy->last_dc_val[ci] = temp2; |
507 | |
508 | /* Encode the DC coefficient difference per section G.1.2.1 */ |
509 | |
510 | /* This is a well-known technique for obtaining the absolute value without |
511 | * a branch. It is derived from an assembly language technique presented |
512 | * in "How to Optimize for the Pentium Processors", Copyright (c) 1996, |
513 | * 1997 by Agner Fog. |
514 | */ |
515 | temp3 = temp >> (CHAR_BIT * sizeof(int) - 1); |
516 | temp ^= temp3; |
517 | temp -= temp3; /* temp is abs value of input */ |
518 | /* For a negative input, want temp2 = bitwise complement of abs(input) */ |
519 | temp2 = temp ^ temp3; |
520 | |
521 | /* Find the number of bits needed for the magnitude of the coefficient */ |
522 | nbits = JPEG_NBITS(temp); |
523 | /* Check for out-of-range coefficient values. |
524 | * Since we're encoding a difference, the range limit is twice as much. |
525 | */ |
526 | if (nbits > MAX_COEF_BITS + 1) |
527 | ERREXIT(cinfo, JERR_BAD_DCT_COEF); |
528 | |
529 | /* Count/emit the Huffman-coded symbol for the number of bits */ |
530 | emit_symbol(entropy, compptr->dc_tbl_no, nbits); |
531 | |
532 | /* Emit that number of bits of the value, if positive, */ |
533 | /* or the complement of its magnitude, if negative. */ |
534 | if (nbits) /* emit_bits rejects calls with size 0 */ |
535 | emit_bits(entropy, (unsigned int)temp2, nbits); |
536 | } |
537 | |
538 | cinfo->dest->next_output_byte = entropy->next_output_byte; |
539 | cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
540 | |
541 | /* Update restart-interval state too */ |
542 | if (cinfo->restart_interval) { |
543 | if (entropy->restarts_to_go == 0) { |
544 | entropy->restarts_to_go = cinfo->restart_interval; |
545 | entropy->next_restart_num++; |
546 | entropy->next_restart_num &= 7; |
547 | } |
548 | entropy->restarts_to_go--; |
549 | } |
550 | |
551 | return TRUE; |
552 | } |
553 | |
554 | |
555 | /* |
556 | * Data preparation for encode_mcu_AC_first(). |
557 | */ |
558 | |
559 | #define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \ |
560 | for (k = 0; k < Sl; k++) { \ |
561 | temp = block[jpeg_natural_order_start[k]]; \ |
562 | if (temp == 0) \ |
563 | continue; \ |
564 | /* We must apply the point transform by Al. For AC coefficients this \ |
565 | * is an integer division with rounding towards 0. To do this portably \ |
566 | * in C, we shift after obtaining the absolute value; so the code is \ |
567 | * interwoven with finding the abs value (temp) and output bits (temp2). \ |
568 | */ \ |
569 | temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ |
570 | temp ^= temp2; \ |
571 | temp -= temp2; /* temp is abs value of input */ \ |
572 | temp >>= Al; /* apply the point transform */ \ |
573 | /* Watch out for case that nonzero coef is zero after point transform */ \ |
574 | if (temp == 0) \ |
575 | continue; \ |
576 | /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \ |
577 | temp2 ^= temp; \ |
578 | values[k] = temp; \ |
579 | values[k + DCTSIZE2] = temp2; \ |
580 | zerobits |= ((size_t)1U) << k; \ |
581 | } \ |
582 | } |
583 | |
584 | METHODDEF(void) |
585 | encode_mcu_AC_first_prepare(const JCOEF *block, |
586 | const int *jpeg_natural_order_start, int Sl, |
587 | int Al, JCOEF *values, size_t *bits) |
588 | { |
589 | register int k, temp, temp2; |
590 | size_t zerobits = 0U; |
591 | int Sl0 = Sl; |
592 | |
593 | #if SIZEOF_SIZE_T == 4 |
594 | if (Sl0 > 32) |
595 | Sl0 = 32; |
596 | #endif |
597 | |
598 | COMPUTE_ABSVALUES_AC_FIRST(Sl0); |
599 | |
600 | bits[0] = zerobits; |
601 | #if SIZEOF_SIZE_T == 4 |
602 | zerobits = 0U; |
603 | |
604 | if (Sl > 32) { |
605 | Sl -= 32; |
606 | jpeg_natural_order_start += 32; |
607 | values += 32; |
608 | |
609 | COMPUTE_ABSVALUES_AC_FIRST(Sl); |
610 | } |
611 | bits[1] = zerobits; |
612 | #endif |
613 | } |
614 | |
615 | /* |
616 | * MCU encoding for AC initial scan (either spectral selection, |
617 | * or first pass of successive approximation). |
618 | */ |
619 | |
620 | #define ENCODE_COEFS_AC_FIRST(label) { \ |
621 | while (zerobits) { \ |
622 | r = count_zeroes(&zerobits); \ |
623 | cvalue += r; \ |
624 | label \ |
625 | temp = cvalue[0]; \ |
626 | temp2 = cvalue[DCTSIZE2]; \ |
627 | \ |
628 | /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \ |
629 | while (r > 15) { \ |
630 | emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ |
631 | r -= 16; \ |
632 | } \ |
633 | \ |
634 | /* Find the number of bits needed for the magnitude of the coefficient */ \ |
635 | nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \ |
636 | /* Check for out-of-range coefficient values */ \ |
637 | if (nbits > MAX_COEF_BITS) \ |
638 | ERREXIT(cinfo, JERR_BAD_DCT_COEF); \ |
639 | \ |
640 | /* Count/emit Huffman symbol for run length / number of bits */ \ |
641 | emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \ |
642 | \ |
643 | /* Emit that number of bits of the value, if positive, */ \ |
644 | /* or the complement of its magnitude, if negative. */ \ |
645 | emit_bits(entropy, (unsigned int)temp2, nbits); \ |
646 | \ |
647 | cvalue++; \ |
648 | zerobits >>= 1; \ |
649 | } \ |
650 | } |
651 | |
652 | METHODDEF(boolean) |
653 | encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
654 | { |
655 | phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
656 | register int temp, temp2; |
657 | register int nbits, r; |
658 | int Sl = cinfo->Se - cinfo->Ss + 1; |
659 | int Al = cinfo->Al; |
660 | JCOEF values_unaligned[2 * DCTSIZE2 + 15]; |
661 | JCOEF *values; |
662 | const JCOEF *cvalue; |
663 | size_t zerobits; |
664 | size_t bits[8 / SIZEOF_SIZE_T]; |
665 | |
666 | entropy->next_output_byte = cinfo->dest->next_output_byte; |
667 | entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
668 | |
669 | /* Emit restart marker if needed */ |
670 | if (cinfo->restart_interval) |
671 | if (entropy->restarts_to_go == 0) |
672 | emit_restart(entropy, entropy->next_restart_num); |
673 | |
674 | #ifdef WITH_SIMD |
675 | cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16); |
676 | #else |
677 | /* Not using SIMD, so alignment is not needed */ |
678 | cvalue = values = values_unaligned; |
679 | #endif |
680 | |
681 | /* Prepare data */ |
682 | entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, |
683 | Sl, Al, values, bits); |
684 | |
685 | zerobits = bits[0]; |
686 | #if SIZEOF_SIZE_T == 4 |
687 | zerobits |= bits[1]; |
688 | #endif |
689 | |
690 | /* Emit any pending EOBRUN */ |
691 | if (zerobits && (entropy->EOBRUN > 0)) |
692 | emit_eobrun(entropy); |
693 | |
694 | #if SIZEOF_SIZE_T == 4 |
695 | zerobits = bits[0]; |
696 | #endif |
697 | |
698 | /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */ |
699 | |
700 | ENCODE_COEFS_AC_FIRST((void)0;); |
701 | |
702 | #if SIZEOF_SIZE_T == 4 |
703 | zerobits = bits[1]; |
704 | if (zerobits) { |
705 | int diff = ((values + DCTSIZE2 / 2) - cvalue); |
706 | r = count_zeroes(&zerobits); |
707 | r += diff; |
708 | cvalue += r; |
709 | goto first_iter_ac_first; |
710 | } |
711 | |
712 | ENCODE_COEFS_AC_FIRST(first_iter_ac_first:); |
713 | #endif |
714 | |
715 | if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */ |
716 | entropy->EOBRUN++; /* count an EOB */ |
717 | if (entropy->EOBRUN == 0x7FFF) |
718 | emit_eobrun(entropy); /* force it out to avoid overflow */ |
719 | } |
720 | |
721 | cinfo->dest->next_output_byte = entropy->next_output_byte; |
722 | cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
723 | |
724 | /* Update restart-interval state too */ |
725 | if (cinfo->restart_interval) { |
726 | if (entropy->restarts_to_go == 0) { |
727 | entropy->restarts_to_go = cinfo->restart_interval; |
728 | entropy->next_restart_num++; |
729 | entropy->next_restart_num &= 7; |
730 | } |
731 | entropy->restarts_to_go--; |
732 | } |
733 | |
734 | return TRUE; |
735 | } |
736 | |
737 | |
738 | /* |
739 | * MCU encoding for DC successive approximation refinement scan. |
740 | * Note: we assume such scans can be multi-component, although the spec |
741 | * is not very clear on the point. |
742 | */ |
743 | |
744 | METHODDEF(boolean) |
745 | encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
746 | { |
747 | phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
748 | register int temp; |
749 | int blkn; |
750 | int Al = cinfo->Al; |
751 | JBLOCKROW block; |
752 | |
753 | entropy->next_output_byte = cinfo->dest->next_output_byte; |
754 | entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
755 | |
756 | /* Emit restart marker if needed */ |
757 | if (cinfo->restart_interval) |
758 | if (entropy->restarts_to_go == 0) |
759 | emit_restart(entropy, entropy->next_restart_num); |
760 | |
761 | /* Encode the MCU data blocks */ |
762 | for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) { |
763 | block = MCU_data[blkn]; |
764 | |
765 | /* We simply emit the Al'th bit of the DC coefficient value. */ |
766 | temp = (*block)[0]; |
767 | emit_bits(entropy, (unsigned int)(temp >> Al), 1); |
768 | } |
769 | |
770 | cinfo->dest->next_output_byte = entropy->next_output_byte; |
771 | cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
772 | |
773 | /* Update restart-interval state too */ |
774 | if (cinfo->restart_interval) { |
775 | if (entropy->restarts_to_go == 0) { |
776 | entropy->restarts_to_go = cinfo->restart_interval; |
777 | entropy->next_restart_num++; |
778 | entropy->next_restart_num &= 7; |
779 | } |
780 | entropy->restarts_to_go--; |
781 | } |
782 | |
783 | return TRUE; |
784 | } |
785 | |
786 | |
787 | /* |
788 | * Data preparation for encode_mcu_AC_refine(). |
789 | */ |
790 | |
791 | #define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \ |
792 | /* It is convenient to make a pre-pass to determine the transformed \ |
793 | * coefficients' absolute values and the EOB position. \ |
794 | */ \ |
795 | for (k = 0; k < Sl; k++) { \ |
796 | temp = block[jpeg_natural_order_start[k]]; \ |
797 | /* We must apply the point transform by Al. For AC coefficients this \ |
798 | * is an integer division with rounding towards 0. To do this portably \ |
799 | * in C, we shift after obtaining the absolute value. \ |
800 | */ \ |
801 | temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \ |
802 | temp ^= temp2; \ |
803 | temp -= temp2; /* temp is abs value of input */ \ |
804 | temp >>= Al; /* apply the point transform */ \ |
805 | if (temp != 0) { \ |
806 | zerobits |= ((size_t)1U) << k; \ |
807 | signbits |= ((size_t)(temp2 + 1)) << k; \ |
808 | } \ |
809 | absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \ |
810 | if (temp == 1) \ |
811 | EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \ |
812 | } \ |
813 | } |
814 | |
815 | METHODDEF(int) |
816 | encode_mcu_AC_refine_prepare(const JCOEF *block, |
817 | const int *jpeg_natural_order_start, int Sl, |
818 | int Al, JCOEF *absvalues, size_t *bits) |
819 | { |
820 | register int k, temp, temp2; |
821 | int EOB = 0; |
822 | size_t zerobits = 0U, signbits = 0U; |
823 | int Sl0 = Sl; |
824 | |
825 | #if SIZEOF_SIZE_T == 4 |
826 | if (Sl0 > 32) |
827 | Sl0 = 32; |
828 | #endif |
829 | |
830 | COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0); |
831 | |
832 | bits[0] = zerobits; |
833 | #if SIZEOF_SIZE_T == 8 |
834 | bits[1] = signbits; |
835 | #else |
836 | bits[2] = signbits; |
837 | |
838 | zerobits = 0U; |
839 | signbits = 0U; |
840 | |
841 | if (Sl > 32) { |
842 | Sl -= 32; |
843 | jpeg_natural_order_start += 32; |
844 | absvalues += 32; |
845 | |
846 | COMPUTE_ABSVALUES_AC_REFINE(Sl, 32); |
847 | } |
848 | |
849 | bits[1] = zerobits; |
850 | bits[3] = signbits; |
851 | #endif |
852 | |
853 | return EOB; |
854 | } |
855 | |
856 | |
857 | /* |
858 | * MCU encoding for AC successive approximation refinement scan. |
859 | */ |
860 | |
861 | #define ENCODE_COEFS_AC_REFINE(label) { \ |
862 | while (zerobits) { \ |
863 | int idx = count_zeroes(&zerobits); \ |
864 | r += idx; \ |
865 | cabsvalue += idx; \ |
866 | signbits >>= idx; \ |
867 | label \ |
868 | /* Emit any required ZRLs, but not if they can be folded into EOB */ \ |
869 | while (r > 15 && (cabsvalue <= EOBPTR)) { \ |
870 | /* emit any pending EOBRUN and the BE correction bits */ \ |
871 | emit_eobrun(entropy); \ |
872 | /* Emit ZRL */ \ |
873 | emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \ |
874 | r -= 16; \ |
875 | /* Emit buffered correction bits that must be associated with ZRL */ \ |
876 | emit_buffered_bits(entropy, BR_buffer, BR); \ |
877 | BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ |
878 | BR = 0; \ |
879 | } \ |
880 | \ |
881 | temp = *cabsvalue++; \ |
882 | \ |
883 | /* If the coef was previously nonzero, it only needs a correction bit. \ |
884 | * NOTE: a straight translation of the spec's figure G.7 would suggest \ |
885 | * that we also need to test r > 15. But if r > 15, we can only get here \ |
886 | * if k > EOB, which implies that this coefficient is not 1. \ |
887 | */ \ |
888 | if (temp > 1) { \ |
889 | /* The correction bit is the next bit of the absolute value. */ \ |
890 | BR_buffer[BR++] = (char)(temp & 1); \ |
891 | signbits >>= 1; \ |
892 | zerobits >>= 1; \ |
893 | continue; \ |
894 | } \ |
895 | \ |
896 | /* Emit any pending EOBRUN and the BE correction bits */ \ |
897 | emit_eobrun(entropy); \ |
898 | \ |
899 | /* Count/emit Huffman symbol for run length / number of bits */ \ |
900 | emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \ |
901 | \ |
902 | /* Emit output bit for newly-nonzero coef */ \ |
903 | temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \ |
904 | emit_bits(entropy, (unsigned int)temp, 1); \ |
905 | \ |
906 | /* Emit buffered correction bits that must be associated with this code */ \ |
907 | emit_buffered_bits(entropy, BR_buffer, BR); \ |
908 | BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \ |
909 | BR = 0; \ |
910 | r = 0; /* reset zero run length */ \ |
911 | signbits >>= 1; \ |
912 | zerobits >>= 1; \ |
913 | } \ |
914 | } |
915 | |
916 | METHODDEF(boolean) |
917 | encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data) |
918 | { |
919 | phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
920 | register int temp, r; |
921 | char *BR_buffer; |
922 | unsigned int BR; |
923 | int Sl = cinfo->Se - cinfo->Ss + 1; |
924 | int Al = cinfo->Al; |
925 | JCOEF absvalues_unaligned[DCTSIZE2 + 15]; |
926 | JCOEF *absvalues; |
927 | const JCOEF *cabsvalue, *EOBPTR; |
928 | size_t zerobits, signbits; |
929 | size_t bits[16 / SIZEOF_SIZE_T]; |
930 | |
931 | entropy->next_output_byte = cinfo->dest->next_output_byte; |
932 | entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
933 | |
934 | /* Emit restart marker if needed */ |
935 | if (cinfo->restart_interval) |
936 | if (entropy->restarts_to_go == 0) |
937 | emit_restart(entropy, entropy->next_restart_num); |
938 | |
939 | #ifdef WITH_SIMD |
940 | cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16); |
941 | #else |
942 | /* Not using SIMD, so alignment is not needed */ |
943 | cabsvalue = absvalues = absvalues_unaligned; |
944 | #endif |
945 | |
946 | /* Prepare data */ |
947 | EOBPTR = absvalues + |
948 | entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss, |
949 | Sl, Al, absvalues, bits); |
950 | |
951 | /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */ |
952 | |
953 | r = 0; /* r = run length of zeros */ |
954 | BR = 0; /* BR = count of buffered bits added now */ |
955 | BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */ |
956 | |
957 | zerobits = bits[0]; |
958 | #if SIZEOF_SIZE_T == 8 |
959 | signbits = bits[1]; |
960 | #else |
961 | signbits = bits[2]; |
962 | #endif |
963 | ENCODE_COEFS_AC_REFINE((void)0;); |
964 | |
965 | #if SIZEOF_SIZE_T == 4 |
966 | zerobits = bits[1]; |
967 | signbits = bits[3]; |
968 | |
969 | if (zerobits) { |
970 | int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue); |
971 | int idx = count_zeroes(&zerobits); |
972 | signbits >>= idx; |
973 | idx += diff; |
974 | r += idx; |
975 | cabsvalue += idx; |
976 | goto first_iter_ac_refine; |
977 | } |
978 | |
979 | ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:); |
980 | #endif |
981 | |
982 | r |= (int)((absvalues + Sl) - cabsvalue); |
983 | |
984 | if (r > 0 || BR > 0) { /* If there are trailing zeroes, */ |
985 | entropy->EOBRUN++; /* count an EOB */ |
986 | entropy->BE += BR; /* concat my correction bits to older ones */ |
987 | /* We force out the EOB if we risk either: |
988 | * 1. overflow of the EOB counter; |
989 | * 2. overflow of the correction bit buffer during the next MCU. |
990 | */ |
991 | if (entropy->EOBRUN == 0x7FFF || |
992 | entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1)) |
993 | emit_eobrun(entropy); |
994 | } |
995 | |
996 | cinfo->dest->next_output_byte = entropy->next_output_byte; |
997 | cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
998 | |
999 | /* Update restart-interval state too */ |
1000 | if (cinfo->restart_interval) { |
1001 | if (entropy->restarts_to_go == 0) { |
1002 | entropy->restarts_to_go = cinfo->restart_interval; |
1003 | entropy->next_restart_num++; |
1004 | entropy->next_restart_num &= 7; |
1005 | } |
1006 | entropy->restarts_to_go--; |
1007 | } |
1008 | |
1009 | return TRUE; |
1010 | } |
1011 | |
1012 | |
1013 | /* |
1014 | * Finish up at the end of a Huffman-compressed progressive scan. |
1015 | */ |
1016 | |
1017 | METHODDEF(void) |
1018 | finish_pass_phuff(j_compress_ptr cinfo) |
1019 | { |
1020 | phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
1021 | |
1022 | entropy->next_output_byte = cinfo->dest->next_output_byte; |
1023 | entropy->free_in_buffer = cinfo->dest->free_in_buffer; |
1024 | |
1025 | /* Flush out any buffered data */ |
1026 | emit_eobrun(entropy); |
1027 | flush_bits(entropy); |
1028 | |
1029 | cinfo->dest->next_output_byte = entropy->next_output_byte; |
1030 | cinfo->dest->free_in_buffer = entropy->free_in_buffer; |
1031 | } |
1032 | |
1033 | |
1034 | /* |
1035 | * Finish up a statistics-gathering pass and create the new Huffman tables. |
1036 | */ |
1037 | |
1038 | METHODDEF(void) |
1039 | finish_pass_gather_phuff(j_compress_ptr cinfo) |
1040 | { |
1041 | phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy; |
1042 | boolean is_DC_band; |
1043 | int ci, tbl; |
1044 | jpeg_component_info *compptr; |
1045 | JHUFF_TBL **htblptr; |
1046 | boolean did[NUM_HUFF_TBLS]; |
1047 | |
1048 | /* Flush out buffered data (all we care about is counting the EOB symbol) */ |
1049 | emit_eobrun(entropy); |
1050 | |
1051 | is_DC_band = (cinfo->Ss == 0); |
1052 | |
1053 | /* It's important not to apply jpeg_gen_optimal_table more than once |
1054 | * per table, because it clobbers the input frequency counts! |
1055 | */ |
1056 | MEMZERO(did, sizeof(did)); |
1057 | |
1058 | for (ci = 0; ci < cinfo->comps_in_scan; ci++) { |
1059 | compptr = cinfo->cur_comp_info[ci]; |
1060 | if (is_DC_band) { |
1061 | if (cinfo->Ah != 0) /* DC refinement needs no table */ |
1062 | continue; |
1063 | tbl = compptr->dc_tbl_no; |
1064 | } else { |
1065 | tbl = compptr->ac_tbl_no; |
1066 | } |
1067 | if (!did[tbl]) { |
1068 | if (is_DC_band) |
1069 | htblptr = &cinfo->dc_huff_tbl_ptrs[tbl]; |
1070 | else |
1071 | htblptr = &cinfo->ac_huff_tbl_ptrs[tbl]; |
1072 | if (*htblptr == NULL) |
1073 | *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo); |
1074 | jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]); |
1075 | did[tbl] = TRUE; |
1076 | } |
1077 | } |
1078 | } |
1079 | |
1080 | |
1081 | /* |
1082 | * Module initialization routine for progressive Huffman entropy encoding. |
1083 | */ |
1084 | |
1085 | GLOBAL(void) |
1086 | jinit_phuff_encoder(j_compress_ptr cinfo) |
1087 | { |
1088 | phuff_entropy_ptr entropy; |
1089 | int i; |
1090 | |
1091 | entropy = (phuff_entropy_ptr) |
1092 | (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE, |
1093 | sizeof(phuff_entropy_encoder)); |
1094 | cinfo->entropy = (struct jpeg_entropy_encoder *)entropy; |
1095 | entropy->pub.start_pass = start_pass_phuff; |
1096 | |
1097 | /* Mark tables unallocated */ |
1098 | for (i = 0; i < NUM_HUFF_TBLS; i++) { |
1099 | entropy->derived_tbls[i] = NULL; |
1100 | entropy->count_ptrs[i] = NULL; |
1101 | } |
1102 | entropy->bit_buffer = NULL; /* needed only in AC refinement scan */ |
1103 | } |
1104 | |
1105 | #endif /* C_PROGRESSIVE_SUPPORTED */ |
1106 | |