1/*
2 * jcphuff.c
3 *
4 * This file was part of the Independent JPEG Group's software:
5 * Copyright (C) 1995-1997, Thomas G. Lane.
6 * libjpeg-turbo Modifications:
7 * Copyright (C) 2011, 2015, 2018, D. R. Commander.
8 * Copyright (C) 2016, 2018, Matthieu Darbois.
9 * For conditions of distribution and use, see the accompanying README.ijg
10 * file.
11 *
12 * This file contains Huffman entropy encoding routines for progressive JPEG.
13 *
14 * We do not support output suspension in this module, since the library
15 * currently does not allow multiple-scan files to be written with output
16 * suspension.
17 */
18
19#define JPEG_INTERNALS
20#include "jinclude.h"
21#include "jpeglib.h"
22#include "jsimd.h"
23#include "jconfigint.h"
24#include <limits.h>
25
26#ifdef HAVE_INTRIN_H
27#include <intrin.h>
28#ifdef _MSC_VER
29#ifdef HAVE_BITSCANFORWARD64
30#pragma intrinsic(_BitScanForward64)
31#endif
32#ifdef HAVE_BITSCANFORWARD
33#pragma intrinsic(_BitScanForward)
34#endif
35#endif
36#endif
37
38#ifdef C_PROGRESSIVE_SUPPORTED
39
40/*
41 * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
42 * used for bit counting rather than the lookup table. This will reduce the
43 * memory footprint by 64k, which is important for some mobile applications
44 * that create many isolated instances of libjpeg-turbo (web browsers, for
45 * instance.) This may improve performance on some mobile platforms as well.
46 * This feature is enabled by default only on ARM processors, because some x86
47 * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
48 * shown to have a significant performance impact even on the x86 chips that
49 * have a fast implementation of it. When building for ARMv6, you can
50 * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
51 * flags (this defines __thumb__).
52 */
53
54/* NOTE: Both GCC and Clang define __GNUC__ */
55#if defined __GNUC__ && (defined __arm__ || defined __aarch64__)
56#if !defined __thumb__ || defined __thumb2__
57#define USE_CLZ_INTRINSIC
58#endif
59#endif
60
61#ifdef USE_CLZ_INTRINSIC
62#define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x))
63#define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0)
64#else
65#include "jpeg_nbits_table.h"
66#define JPEG_NBITS(x) (jpeg_nbits_table[x])
67#define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x)
68#endif
69
70
71/* Expanded entropy encoder object for progressive Huffman encoding. */
72
73typedef struct {
74 struct jpeg_entropy_encoder pub; /* public fields */
75
76 /* Pointer to routine to prepare data for encode_mcu_AC_first() */
77 void (*AC_first_prepare) (const JCOEF *block,
78 const int *jpeg_natural_order_start, int Sl,
79 int Al, JCOEF *values, size_t *zerobits);
80 /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
81 int (*AC_refine_prepare) (const JCOEF *block,
82 const int *jpeg_natural_order_start, int Sl,
83 int Al, JCOEF *absvalues, size_t *bits);
84
85 /* Mode flag: TRUE for optimization, FALSE for actual data output */
86 boolean gather_statistics;
87
88 /* Bit-level coding status.
89 * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
90 */
91 JOCTET *next_output_byte; /* => next byte to write in buffer */
92 size_t free_in_buffer; /* # of byte spaces remaining in buffer */
93 size_t put_buffer; /* current bit-accumulation buffer */
94 int put_bits; /* # of bits now in it */
95 j_compress_ptr cinfo; /* link to cinfo (needed for dump_buffer) */
96
97 /* Coding status for DC components */
98 int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
99
100 /* Coding status for AC components */
101 int ac_tbl_no; /* the table number of the single component */
102 unsigned int EOBRUN; /* run length of EOBs */
103 unsigned int BE; /* # of buffered correction bits before MCU */
104 char *bit_buffer; /* buffer for correction bits (1 per char) */
105 /* packing correction bits tightly would save some space but cost time... */
106
107 unsigned int restarts_to_go; /* MCUs left in this restart interval */
108 int next_restart_num; /* next restart number to write (0-7) */
109
110 /* Pointers to derived tables (these workspaces have image lifespan).
111 * Since any one scan codes only DC or only AC, we only need one set
112 * of tables, not one for DC and one for AC.
113 */
114 c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
115
116 /* Statistics tables for optimization; again, one set is enough */
117 long *count_ptrs[NUM_HUFF_TBLS];
118} phuff_entropy_encoder;
119
120typedef phuff_entropy_encoder *phuff_entropy_ptr;
121
122/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
123 * buffer can hold. Larger sizes may slightly improve compression, but
124 * 1000 is already well into the realm of overkill.
125 * The minimum safe size is 64 bits.
126 */
127
128#define MAX_CORR_BITS 1000 /* Max # of correction bits I can buffer */
129
130/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
131 * We assume that int right shift is unsigned if JLONG right shift is,
132 * which should be safe.
133 */
134
135#ifdef RIGHT_SHIFT_IS_UNSIGNED
136#define ISHIFT_TEMPS int ishift_temp;
137#define IRIGHT_SHIFT(x, shft) \
138 ((ishift_temp = (x)) < 0 ? \
139 (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \
140 (ishift_temp >> (shft)))
141#else
142#define ISHIFT_TEMPS
143#define IRIGHT_SHIFT(x, shft) ((x) >> (shft))
144#endif
145
146#define PAD(v, p) ((v + (p) - 1) & (~((p) - 1)))
147
148/* Forward declarations */
149METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
150 JBLOCKROW *MCU_data);
151METHODDEF(void) encode_mcu_AC_first_prepare
152 (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
153 JCOEF *values, size_t *zerobits);
154METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
155 JBLOCKROW *MCU_data);
156METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
157 JBLOCKROW *MCU_data);
158METHODDEF(int) encode_mcu_AC_refine_prepare
159 (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
160 JCOEF *absvalues, size_t *bits);
161METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
162 JBLOCKROW *MCU_data);
163METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
164METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo);
165
166
167/* Count bit loop zeroes */
168INLINE
169METHODDEF(int)
170count_zeroes(size_t *x)
171{
172 int result;
173#if defined(HAVE_BUILTIN_CTZL)
174 result = __builtin_ctzl(*x);
175 *x >>= result;
176#elif defined(HAVE_BITSCANFORWARD64)
177 _BitScanForward64(&result, *x);
178 *x >>= result;
179#elif defined(HAVE_BITSCANFORWARD)
180 _BitScanForward(&result, *x);
181 *x >>= result;
182#else
183 result = 0;
184 while ((*x & 1) == 0) {
185 ++result;
186 *x >>= 1;
187 }
188#endif
189 return result;
190}
191
192
193/*
194 * Initialize for a Huffman-compressed scan using progressive JPEG.
195 */
196
197METHODDEF(void)
198start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
199{
200 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
201 boolean is_DC_band;
202 int ci, tbl;
203 jpeg_component_info *compptr;
204
205 entropy->cinfo = cinfo;
206 entropy->gather_statistics = gather_statistics;
207
208 is_DC_band = (cinfo->Ss == 0);
209
210 /* We assume jcmaster.c already validated the scan parameters. */
211
212 /* Select execution routines */
213 if (cinfo->Ah == 0) {
214 if (is_DC_band)
215 entropy->pub.encode_mcu = encode_mcu_DC_first;
216 else
217 entropy->pub.encode_mcu = encode_mcu_AC_first;
218 if (jsimd_can_encode_mcu_AC_first_prepare())
219 entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
220 else
221 entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
222 } else {
223 if (is_DC_band)
224 entropy->pub.encode_mcu = encode_mcu_DC_refine;
225 else {
226 entropy->pub.encode_mcu = encode_mcu_AC_refine;
227 if (jsimd_can_encode_mcu_AC_refine_prepare())
228 entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
229 else
230 entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
231 /* AC refinement needs a correction bit buffer */
232 if (entropy->bit_buffer == NULL)
233 entropy->bit_buffer = (char *)
234 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
235 MAX_CORR_BITS * sizeof(char));
236 }
237 }
238 if (gather_statistics)
239 entropy->pub.finish_pass = finish_pass_gather_phuff;
240 else
241 entropy->pub.finish_pass = finish_pass_phuff;
242
243 /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
244 * for AC coefficients.
245 */
246 for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
247 compptr = cinfo->cur_comp_info[ci];
248 /* Initialize DC predictions to 0 */
249 entropy->last_dc_val[ci] = 0;
250 /* Get table index */
251 if (is_DC_band) {
252 if (cinfo->Ah != 0) /* DC refinement needs no table */
253 continue;
254 tbl = compptr->dc_tbl_no;
255 } else {
256 entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
257 }
258 if (gather_statistics) {
259 /* Check for invalid table index */
260 /* (make_c_derived_tbl does this in the other path) */
261 if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
262 ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
263 /* Allocate and zero the statistics tables */
264 /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
265 if (entropy->count_ptrs[tbl] == NULL)
266 entropy->count_ptrs[tbl] = (long *)
267 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
268 257 * sizeof(long));
269 MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long));
270 } else {
271 /* Compute derived values for Huffman table */
272 /* We may do this more than once for a table, but it's not expensive */
273 jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
274 &entropy->derived_tbls[tbl]);
275 }
276 }
277
278 /* Initialize AC stuff */
279 entropy->EOBRUN = 0;
280 entropy->BE = 0;
281
282 /* Initialize bit buffer to empty */
283 entropy->put_buffer = 0;
284 entropy->put_bits = 0;
285
286 /* Initialize restart stuff */
287 entropy->restarts_to_go = cinfo->restart_interval;
288 entropy->next_restart_num = 0;
289}
290
291
292/* Outputting bytes to the file.
293 * NB: these must be called only when actually outputting,
294 * that is, entropy->gather_statistics == FALSE.
295 */
296
297/* Emit a byte */
298#define emit_byte(entropy, val) { \
299 *(entropy)->next_output_byte++ = (JOCTET)(val); \
300 if (--(entropy)->free_in_buffer == 0) \
301 dump_buffer(entropy); \
302}
303
304
305LOCAL(void)
306dump_buffer(phuff_entropy_ptr entropy)
307/* Empty the output buffer; we do not support suspension in this module. */
308{
309 struct jpeg_destination_mgr *dest = entropy->cinfo->dest;
310
311 if (!(*dest->empty_output_buffer) (entropy->cinfo))
312 ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
313 /* After a successful buffer dump, must reset buffer pointers */
314 entropy->next_output_byte = dest->next_output_byte;
315 entropy->free_in_buffer = dest->free_in_buffer;
316}
317
318
319/* Outputting bits to the file */
320
321/* Only the right 24 bits of put_buffer are used; the valid bits are
322 * left-justified in this part. At most 16 bits can be passed to emit_bits
323 * in one call, and we never retain more than 7 bits in put_buffer
324 * between calls, so 24 bits are sufficient.
325 */
326
327LOCAL(void)
328emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size)
329/* Emit some bits, unless we are in gather mode */
330{
331 /* This routine is heavily used, so it's worth coding tightly. */
332 register size_t put_buffer = (size_t)code;
333 register int put_bits = entropy->put_bits;
334
335 /* if size is 0, caller used an invalid Huffman table entry */
336 if (size == 0)
337 ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
338
339 if (entropy->gather_statistics)
340 return; /* do nothing if we're only getting stats */
341
342 put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
343
344 put_bits += size; /* new number of bits in buffer */
345
346 put_buffer <<= 24 - put_bits; /* align incoming bits */
347
348 put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
349
350 while (put_bits >= 8) {
351 int c = (int)((put_buffer >> 16) & 0xFF);
352
353 emit_byte(entropy, c);
354 if (c == 0xFF) { /* need to stuff a zero byte? */
355 emit_byte(entropy, 0);
356 }
357 put_buffer <<= 8;
358 put_bits -= 8;
359 }
360
361 entropy->put_buffer = put_buffer; /* update variables */
362 entropy->put_bits = put_bits;
363}
364
365
366LOCAL(void)
367flush_bits(phuff_entropy_ptr entropy)
368{
369 emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
370 entropy->put_buffer = 0; /* and reset bit-buffer to empty */
371 entropy->put_bits = 0;
372}
373
374
375/*
376 * Emit (or just count) a Huffman symbol.
377 */
378
379LOCAL(void)
380emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol)
381{
382 if (entropy->gather_statistics)
383 entropy->count_ptrs[tbl_no][symbol]++;
384 else {
385 c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];
386 emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
387 }
388}
389
390
391/*
392 * Emit bits from a correction bit buffer.
393 */
394
395LOCAL(void)
396emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart,
397 unsigned int nbits)
398{
399 if (entropy->gather_statistics)
400 return; /* no real work */
401
402 while (nbits > 0) {
403 emit_bits(entropy, (unsigned int)(*bufstart), 1);
404 bufstart++;
405 nbits--;
406 }
407}
408
409
410/*
411 * Emit any pending EOBRUN symbol.
412 */
413
414LOCAL(void)
415emit_eobrun(phuff_entropy_ptr entropy)
416{
417 register int temp, nbits;
418
419 if (entropy->EOBRUN > 0) { /* if there is any pending EOBRUN */
420 temp = entropy->EOBRUN;
421 nbits = JPEG_NBITS_NONZERO(temp) - 1;
422 /* safety check: shouldn't happen given limited correction-bit buffer */
423 if (nbits > 14)
424 ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
425
426 emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
427 if (nbits)
428 emit_bits(entropy, entropy->EOBRUN, nbits);
429
430 entropy->EOBRUN = 0;
431
432 /* Emit any buffered correction bits */
433 emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
434 entropy->BE = 0;
435 }
436}
437
438
439/*
440 * Emit a restart marker & resynchronize predictions.
441 */
442
443LOCAL(void)
444emit_restart(phuff_entropy_ptr entropy, int restart_num)
445{
446 int ci;
447
448 emit_eobrun(entropy);
449
450 if (!entropy->gather_statistics) {
451 flush_bits(entropy);
452 emit_byte(entropy, 0xFF);
453 emit_byte(entropy, JPEG_RST0 + restart_num);
454 }
455
456 if (entropy->cinfo->Ss == 0) {
457 /* Re-initialize DC predictions to 0 */
458 for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
459 entropy->last_dc_val[ci] = 0;
460 } else {
461 /* Re-initialize all AC-related fields to 0 */
462 entropy->EOBRUN = 0;
463 entropy->BE = 0;
464 }
465}
466
467
468/*
469 * MCU encoding for DC initial scan (either spectral selection,
470 * or first pass of successive approximation).
471 */
472
473METHODDEF(boolean)
474encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
475{
476 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
477 register int temp, temp2, temp3;
478 register int nbits;
479 int blkn, ci;
480 int Al = cinfo->Al;
481 JBLOCKROW block;
482 jpeg_component_info *compptr;
483 ISHIFT_TEMPS
484
485 entropy->next_output_byte = cinfo->dest->next_output_byte;
486 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
487
488 /* Emit restart marker if needed */
489 if (cinfo->restart_interval)
490 if (entropy->restarts_to_go == 0)
491 emit_restart(entropy, entropy->next_restart_num);
492
493 /* Encode the MCU data blocks */
494 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
495 block = MCU_data[blkn];
496 ci = cinfo->MCU_membership[blkn];
497 compptr = cinfo->cur_comp_info[ci];
498
499 /* Compute the DC value after the required point transform by Al.
500 * This is simply an arithmetic right shift.
501 */
502 temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al);
503
504 /* DC differences are figured on the point-transformed values. */
505 temp = temp2 - entropy->last_dc_val[ci];
506 entropy->last_dc_val[ci] = temp2;
507
508 /* Encode the DC coefficient difference per section G.1.2.1 */
509
510 /* This is a well-known technique for obtaining the absolute value without
511 * a branch. It is derived from an assembly language technique presented
512 * in "How to Optimize for the Pentium Processors", Copyright (c) 1996,
513 * 1997 by Agner Fog.
514 */
515 temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
516 temp ^= temp3;
517 temp -= temp3; /* temp is abs value of input */
518 /* For a negative input, want temp2 = bitwise complement of abs(input) */
519 temp2 = temp ^ temp3;
520
521 /* Find the number of bits needed for the magnitude of the coefficient */
522 nbits = JPEG_NBITS(temp);
523 /* Check for out-of-range coefficient values.
524 * Since we're encoding a difference, the range limit is twice as much.
525 */
526 if (nbits > MAX_COEF_BITS + 1)
527 ERREXIT(cinfo, JERR_BAD_DCT_COEF);
528
529 /* Count/emit the Huffman-coded symbol for the number of bits */
530 emit_symbol(entropy, compptr->dc_tbl_no, nbits);
531
532 /* Emit that number of bits of the value, if positive, */
533 /* or the complement of its magnitude, if negative. */
534 if (nbits) /* emit_bits rejects calls with size 0 */
535 emit_bits(entropy, (unsigned int)temp2, nbits);
536 }
537
538 cinfo->dest->next_output_byte = entropy->next_output_byte;
539 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
540
541 /* Update restart-interval state too */
542 if (cinfo->restart_interval) {
543 if (entropy->restarts_to_go == 0) {
544 entropy->restarts_to_go = cinfo->restart_interval;
545 entropy->next_restart_num++;
546 entropy->next_restart_num &= 7;
547 }
548 entropy->restarts_to_go--;
549 }
550
551 return TRUE;
552}
553
554
555/*
556 * Data preparation for encode_mcu_AC_first().
557 */
558
559#define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \
560 for (k = 0; k < Sl; k++) { \
561 temp = block[jpeg_natural_order_start[k]]; \
562 if (temp == 0) \
563 continue; \
564 /* We must apply the point transform by Al. For AC coefficients this \
565 * is an integer division with rounding towards 0. To do this portably \
566 * in C, we shift after obtaining the absolute value; so the code is \
567 * interwoven with finding the abs value (temp) and output bits (temp2). \
568 */ \
569 temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
570 temp ^= temp2; \
571 temp -= temp2; /* temp is abs value of input */ \
572 temp >>= Al; /* apply the point transform */ \
573 /* Watch out for case that nonzero coef is zero after point transform */ \
574 if (temp == 0) \
575 continue; \
576 /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
577 temp2 ^= temp; \
578 values[k] = temp; \
579 values[k + DCTSIZE2] = temp2; \
580 zerobits |= ((size_t)1U) << k; \
581 } \
582}
583
584METHODDEF(void)
585encode_mcu_AC_first_prepare(const JCOEF *block,
586 const int *jpeg_natural_order_start, int Sl,
587 int Al, JCOEF *values, size_t *bits)
588{
589 register int k, temp, temp2;
590 size_t zerobits = 0U;
591 int Sl0 = Sl;
592
593#if SIZEOF_SIZE_T == 4
594 if (Sl0 > 32)
595 Sl0 = 32;
596#endif
597
598 COMPUTE_ABSVALUES_AC_FIRST(Sl0);
599
600 bits[0] = zerobits;
601#if SIZEOF_SIZE_T == 4
602 zerobits = 0U;
603
604 if (Sl > 32) {
605 Sl -= 32;
606 jpeg_natural_order_start += 32;
607 values += 32;
608
609 COMPUTE_ABSVALUES_AC_FIRST(Sl);
610 }
611 bits[1] = zerobits;
612#endif
613}
614
615/*
616 * MCU encoding for AC initial scan (either spectral selection,
617 * or first pass of successive approximation).
618 */
619
620#define ENCODE_COEFS_AC_FIRST(label) { \
621 while (zerobits) { \
622 r = count_zeroes(&zerobits); \
623 cvalue += r; \
624label \
625 temp = cvalue[0]; \
626 temp2 = cvalue[DCTSIZE2]; \
627 \
628 /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
629 while (r > 15) { \
630 emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
631 r -= 16; \
632 } \
633 \
634 /* Find the number of bits needed for the magnitude of the coefficient */ \
635 nbits = JPEG_NBITS_NONZERO(temp); /* there must be at least one 1 bit */ \
636 /* Check for out-of-range coefficient values */ \
637 if (nbits > MAX_COEF_BITS) \
638 ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
639 \
640 /* Count/emit Huffman symbol for run length / number of bits */ \
641 emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
642 \
643 /* Emit that number of bits of the value, if positive, */ \
644 /* or the complement of its magnitude, if negative. */ \
645 emit_bits(entropy, (unsigned int)temp2, nbits); \
646 \
647 cvalue++; \
648 zerobits >>= 1; \
649 } \
650}
651
652METHODDEF(boolean)
653encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
654{
655 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
656 register int temp, temp2;
657 register int nbits, r;
658 int Sl = cinfo->Se - cinfo->Ss + 1;
659 int Al = cinfo->Al;
660 JCOEF values_unaligned[2 * DCTSIZE2 + 15];
661 JCOEF *values;
662 const JCOEF *cvalue;
663 size_t zerobits;
664 size_t bits[8 / SIZEOF_SIZE_T];
665
666 entropy->next_output_byte = cinfo->dest->next_output_byte;
667 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
668
669 /* Emit restart marker if needed */
670 if (cinfo->restart_interval)
671 if (entropy->restarts_to_go == 0)
672 emit_restart(entropy, entropy->next_restart_num);
673
674#ifdef WITH_SIMD
675 cvalue = values = (JCOEF *)PAD((size_t)values_unaligned, 16);
676#else
677 /* Not using SIMD, so alignment is not needed */
678 cvalue = values = values_unaligned;
679#endif
680
681 /* Prepare data */
682 entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
683 Sl, Al, values, bits);
684
685 zerobits = bits[0];
686#if SIZEOF_SIZE_T == 4
687 zerobits |= bits[1];
688#endif
689
690 /* Emit any pending EOBRUN */
691 if (zerobits && (entropy->EOBRUN > 0))
692 emit_eobrun(entropy);
693
694#if SIZEOF_SIZE_T == 4
695 zerobits = bits[0];
696#endif
697
698 /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
699
700 ENCODE_COEFS_AC_FIRST((void)0;);
701
702#if SIZEOF_SIZE_T == 4
703 zerobits = bits[1];
704 if (zerobits) {
705 int diff = ((values + DCTSIZE2 / 2) - cvalue);
706 r = count_zeroes(&zerobits);
707 r += diff;
708 cvalue += r;
709 goto first_iter_ac_first;
710 }
711
712 ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);
713#endif
714
715 if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */
716 entropy->EOBRUN++; /* count an EOB */
717 if (entropy->EOBRUN == 0x7FFF)
718 emit_eobrun(entropy); /* force it out to avoid overflow */
719 }
720
721 cinfo->dest->next_output_byte = entropy->next_output_byte;
722 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
723
724 /* Update restart-interval state too */
725 if (cinfo->restart_interval) {
726 if (entropy->restarts_to_go == 0) {
727 entropy->restarts_to_go = cinfo->restart_interval;
728 entropy->next_restart_num++;
729 entropy->next_restart_num &= 7;
730 }
731 entropy->restarts_to_go--;
732 }
733
734 return TRUE;
735}
736
737
738/*
739 * MCU encoding for DC successive approximation refinement scan.
740 * Note: we assume such scans can be multi-component, although the spec
741 * is not very clear on the point.
742 */
743
744METHODDEF(boolean)
745encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
746{
747 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
748 register int temp;
749 int blkn;
750 int Al = cinfo->Al;
751 JBLOCKROW block;
752
753 entropy->next_output_byte = cinfo->dest->next_output_byte;
754 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
755
756 /* Emit restart marker if needed */
757 if (cinfo->restart_interval)
758 if (entropy->restarts_to_go == 0)
759 emit_restart(entropy, entropy->next_restart_num);
760
761 /* Encode the MCU data blocks */
762 for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
763 block = MCU_data[blkn];
764
765 /* We simply emit the Al'th bit of the DC coefficient value. */
766 temp = (*block)[0];
767 emit_bits(entropy, (unsigned int)(temp >> Al), 1);
768 }
769
770 cinfo->dest->next_output_byte = entropy->next_output_byte;
771 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
772
773 /* Update restart-interval state too */
774 if (cinfo->restart_interval) {
775 if (entropy->restarts_to_go == 0) {
776 entropy->restarts_to_go = cinfo->restart_interval;
777 entropy->next_restart_num++;
778 entropy->next_restart_num &= 7;
779 }
780 entropy->restarts_to_go--;
781 }
782
783 return TRUE;
784}
785
786
787/*
788 * Data preparation for encode_mcu_AC_refine().
789 */
790
791#define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \
792 /* It is convenient to make a pre-pass to determine the transformed \
793 * coefficients' absolute values and the EOB position. \
794 */ \
795 for (k = 0; k < Sl; k++) { \
796 temp = block[jpeg_natural_order_start[k]]; \
797 /* We must apply the point transform by Al. For AC coefficients this \
798 * is an integer division with rounding towards 0. To do this portably \
799 * in C, we shift after obtaining the absolute value. \
800 */ \
801 temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
802 temp ^= temp2; \
803 temp -= temp2; /* temp is abs value of input */ \
804 temp >>= Al; /* apply the point transform */ \
805 if (temp != 0) { \
806 zerobits |= ((size_t)1U) << k; \
807 signbits |= ((size_t)(temp2 + 1)) << k; \
808 } \
809 absvalues[k] = (JCOEF)temp; /* save abs value for main pass */ \
810 if (temp == 1) \
811 EOB = k + koffset; /* EOB = index of last newly-nonzero coef */ \
812 } \
813}
814
815METHODDEF(int)
816encode_mcu_AC_refine_prepare(const JCOEF *block,
817 const int *jpeg_natural_order_start, int Sl,
818 int Al, JCOEF *absvalues, size_t *bits)
819{
820 register int k, temp, temp2;
821 int EOB = 0;
822 size_t zerobits = 0U, signbits = 0U;
823 int Sl0 = Sl;
824
825#if SIZEOF_SIZE_T == 4
826 if (Sl0 > 32)
827 Sl0 = 32;
828#endif
829
830 COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);
831
832 bits[0] = zerobits;
833#if SIZEOF_SIZE_T == 8
834 bits[1] = signbits;
835#else
836 bits[2] = signbits;
837
838 zerobits = 0U;
839 signbits = 0U;
840
841 if (Sl > 32) {
842 Sl -= 32;
843 jpeg_natural_order_start += 32;
844 absvalues += 32;
845
846 COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);
847 }
848
849 bits[1] = zerobits;
850 bits[3] = signbits;
851#endif
852
853 return EOB;
854}
855
856
857/*
858 * MCU encoding for AC successive approximation refinement scan.
859 */
860
861#define ENCODE_COEFS_AC_REFINE(label) { \
862 while (zerobits) { \
863 int idx = count_zeroes(&zerobits); \
864 r += idx; \
865 cabsvalue += idx; \
866 signbits >>= idx; \
867label \
868 /* Emit any required ZRLs, but not if they can be folded into EOB */ \
869 while (r > 15 && (cabsvalue <= EOBPTR)) { \
870 /* emit any pending EOBRUN and the BE correction bits */ \
871 emit_eobrun(entropy); \
872 /* Emit ZRL */ \
873 emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
874 r -= 16; \
875 /* Emit buffered correction bits that must be associated with ZRL */ \
876 emit_buffered_bits(entropy, BR_buffer, BR); \
877 BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
878 BR = 0; \
879 } \
880 \
881 temp = *cabsvalue++; \
882 \
883 /* If the coef was previously nonzero, it only needs a correction bit. \
884 * NOTE: a straight translation of the spec's figure G.7 would suggest \
885 * that we also need to test r > 15. But if r > 15, we can only get here \
886 * if k > EOB, which implies that this coefficient is not 1. \
887 */ \
888 if (temp > 1) { \
889 /* The correction bit is the next bit of the absolute value. */ \
890 BR_buffer[BR++] = (char)(temp & 1); \
891 signbits >>= 1; \
892 zerobits >>= 1; \
893 continue; \
894 } \
895 \
896 /* Emit any pending EOBRUN and the BE correction bits */ \
897 emit_eobrun(entropy); \
898 \
899 /* Count/emit Huffman symbol for run length / number of bits */ \
900 emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \
901 \
902 /* Emit output bit for newly-nonzero coef */ \
903 temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \
904 emit_bits(entropy, (unsigned int)temp, 1); \
905 \
906 /* Emit buffered correction bits that must be associated with this code */ \
907 emit_buffered_bits(entropy, BR_buffer, BR); \
908 BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
909 BR = 0; \
910 r = 0; /* reset zero run length */ \
911 signbits >>= 1; \
912 zerobits >>= 1; \
913 } \
914}
915
916METHODDEF(boolean)
917encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
918{
919 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
920 register int temp, r;
921 char *BR_buffer;
922 unsigned int BR;
923 int Sl = cinfo->Se - cinfo->Ss + 1;
924 int Al = cinfo->Al;
925 JCOEF absvalues_unaligned[DCTSIZE2 + 15];
926 JCOEF *absvalues;
927 const JCOEF *cabsvalue, *EOBPTR;
928 size_t zerobits, signbits;
929 size_t bits[16 / SIZEOF_SIZE_T];
930
931 entropy->next_output_byte = cinfo->dest->next_output_byte;
932 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
933
934 /* Emit restart marker if needed */
935 if (cinfo->restart_interval)
936 if (entropy->restarts_to_go == 0)
937 emit_restart(entropy, entropy->next_restart_num);
938
939#ifdef WITH_SIMD
940 cabsvalue = absvalues = (JCOEF *)PAD((size_t)absvalues_unaligned, 16);
941#else
942 /* Not using SIMD, so alignment is not needed */
943 cabsvalue = absvalues = absvalues_unaligned;
944#endif
945
946 /* Prepare data */
947 EOBPTR = absvalues +
948 entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
949 Sl, Al, absvalues, bits);
950
951 /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
952
953 r = 0; /* r = run length of zeros */
954 BR = 0; /* BR = count of buffered bits added now */
955 BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
956
957 zerobits = bits[0];
958#if SIZEOF_SIZE_T == 8
959 signbits = bits[1];
960#else
961 signbits = bits[2];
962#endif
963 ENCODE_COEFS_AC_REFINE((void)0;);
964
965#if SIZEOF_SIZE_T == 4
966 zerobits = bits[1];
967 signbits = bits[3];
968
969 if (zerobits) {
970 int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
971 int idx = count_zeroes(&zerobits);
972 signbits >>= idx;
973 idx += diff;
974 r += idx;
975 cabsvalue += idx;
976 goto first_iter_ac_refine;
977 }
978
979 ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);
980#endif
981
982 r |= (int)((absvalues + Sl) - cabsvalue);
983
984 if (r > 0 || BR > 0) { /* If there are trailing zeroes, */
985 entropy->EOBRUN++; /* count an EOB */
986 entropy->BE += BR; /* concat my correction bits to older ones */
987 /* We force out the EOB if we risk either:
988 * 1. overflow of the EOB counter;
989 * 2. overflow of the correction bit buffer during the next MCU.
990 */
991 if (entropy->EOBRUN == 0x7FFF ||
992 entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))
993 emit_eobrun(entropy);
994 }
995
996 cinfo->dest->next_output_byte = entropy->next_output_byte;
997 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
998
999 /* Update restart-interval state too */
1000 if (cinfo->restart_interval) {
1001 if (entropy->restarts_to_go == 0) {
1002 entropy->restarts_to_go = cinfo->restart_interval;
1003 entropy->next_restart_num++;
1004 entropy->next_restart_num &= 7;
1005 }
1006 entropy->restarts_to_go--;
1007 }
1008
1009 return TRUE;
1010}
1011
1012
1013/*
1014 * Finish up at the end of a Huffman-compressed progressive scan.
1015 */
1016
1017METHODDEF(void)
1018finish_pass_phuff(j_compress_ptr cinfo)
1019{
1020 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1021
1022 entropy->next_output_byte = cinfo->dest->next_output_byte;
1023 entropy->free_in_buffer = cinfo->dest->free_in_buffer;
1024
1025 /* Flush out any buffered data */
1026 emit_eobrun(entropy);
1027 flush_bits(entropy);
1028
1029 cinfo->dest->next_output_byte = entropy->next_output_byte;
1030 cinfo->dest->free_in_buffer = entropy->free_in_buffer;
1031}
1032
1033
1034/*
1035 * Finish up a statistics-gathering pass and create the new Huffman tables.
1036 */
1037
1038METHODDEF(void)
1039finish_pass_gather_phuff(j_compress_ptr cinfo)
1040{
1041 phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
1042 boolean is_DC_band;
1043 int ci, tbl;
1044 jpeg_component_info *compptr;
1045 JHUFF_TBL **htblptr;
1046 boolean did[NUM_HUFF_TBLS];
1047
1048 /* Flush out buffered data (all we care about is counting the EOB symbol) */
1049 emit_eobrun(entropy);
1050
1051 is_DC_band = (cinfo->Ss == 0);
1052
1053 /* It's important not to apply jpeg_gen_optimal_table more than once
1054 * per table, because it clobbers the input frequency counts!
1055 */
1056 MEMZERO(did, sizeof(did));
1057
1058 for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
1059 compptr = cinfo->cur_comp_info[ci];
1060 if (is_DC_band) {
1061 if (cinfo->Ah != 0) /* DC refinement needs no table */
1062 continue;
1063 tbl = compptr->dc_tbl_no;
1064 } else {
1065 tbl = compptr->ac_tbl_no;
1066 }
1067 if (!did[tbl]) {
1068 if (is_DC_band)
1069 htblptr = &cinfo->dc_huff_tbl_ptrs[tbl];
1070 else
1071 htblptr = &cinfo->ac_huff_tbl_ptrs[tbl];
1072 if (*htblptr == NULL)
1073 *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
1074 jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
1075 did[tbl] = TRUE;
1076 }
1077 }
1078}
1079
1080
1081/*
1082 * Module initialization routine for progressive Huffman entropy encoding.
1083 */
1084
1085GLOBAL(void)
1086jinit_phuff_encoder(j_compress_ptr cinfo)
1087{
1088 phuff_entropy_ptr entropy;
1089 int i;
1090
1091 entropy = (phuff_entropy_ptr)
1092 (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
1093 sizeof(phuff_entropy_encoder));
1094 cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
1095 entropy->pub.start_pass = start_pass_phuff;
1096
1097 /* Mark tables unallocated */
1098 for (i = 0; i < NUM_HUFF_TBLS; i++) {
1099 entropy->derived_tbls[i] = NULL;
1100 entropy->count_ptrs[i] = NULL;
1101 }
1102 entropy->bit_buffer = NULL; /* needed only in AC refinement scan */
1103}
1104
1105#endif /* C_PROGRESSIVE_SUPPORTED */
1106