| 1 | /* |
| 2 | * jchuff.c |
| 3 | * |
| 4 | * This file was part of the Independent JPEG Group's software: |
| 5 | * Copyright (C) 1991-1997, Thomas G. Lane. |
| 6 | * libjpeg-turbo Modifications: |
| 7 | * Copyright (C) 2009-2011, 2014-2016, D. R. Commander. |
| 8 | * Copyright (C) 2015, Matthieu Darbois. |
| 9 | * For conditions of distribution and use, see the accompanying README.ijg |
| 10 | * file. |
| 11 | * |
| 12 | * This file contains Huffman entropy encoding routines. |
| 13 | * |
| 14 | * Much of the complexity here has to do with supporting output suspension. |
| 15 | * If the data destination module demands suspension, we want to be able to |
| 16 | * back up to the start of the current MCU. To do this, we copy state |
| 17 | * variables into local working storage, and update them back to the |
| 18 | * permanent JPEG objects only upon successful completion of an MCU. |
| 19 | */ |
| 20 | |
| 21 | #define JPEG_INTERNALS |
| 22 | #include "jinclude.h" |
| 23 | #include "jpeglib.h" |
| 24 | #include "jsimd.h" |
| 25 | #include "jconfigint.h" |
| 26 | #include <limits.h> |
| 27 | |
| 28 | /* |
| 29 | * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be |
| 30 | * used for bit counting rather than the lookup table. This will reduce the |
| 31 | * memory footprint by 64k, which is important for some mobile applications |
| 32 | * that create many isolated instances of libjpeg-turbo (web browsers, for |
| 33 | * instance.) This may improve performance on some mobile platforms as well. |
| 34 | * This feature is enabled by default only on ARM processors, because some x86 |
| 35 | * chips have a slow implementation of bsr, and the use of clz/bsr cannot be |
| 36 | * shown to have a significant performance impact even on the x86 chips that |
| 37 | * have a fast implementation of it. When building for ARMv6, you can |
| 38 | * explicitly disable the use of clz/bsr by adding -mthumb to the compiler |
| 39 | * flags (this defines __thumb__). |
| 40 | */ |
| 41 | |
| 42 | /* NOTE: Both GCC and Clang define __GNUC__ */ |
| 43 | #if defined __GNUC__ && (defined __arm__ || defined __aarch64__) |
| 44 | #if !defined __thumb__ || defined __thumb2__ |
| 45 | #define USE_CLZ_INTRINSIC |
| 46 | #endif |
| 47 | #endif |
| 48 | |
| 49 | #ifdef USE_CLZ_INTRINSIC |
| 50 | #define JPEG_NBITS_NONZERO(x) (32 - __builtin_clz(x)) |
| 51 | #define JPEG_NBITS(x) (x ? JPEG_NBITS_NONZERO(x) : 0) |
| 52 | #else |
| 53 | #include "jpeg_nbits_table.h" |
| 54 | #define JPEG_NBITS(x) (jpeg_nbits_table[x]) |
| 55 | #define JPEG_NBITS_NONZERO(x) JPEG_NBITS(x) |
| 56 | #endif |
| 57 | |
| 58 | #ifndef min |
| 59 | #define min(a,b) ((a)<(b)?(a):(b)) |
| 60 | #endif |
| 61 | |
| 62 | |
| 63 | /* Expanded entropy encoder object for Huffman encoding. |
| 64 | * |
| 65 | * The savable_state subrecord contains fields that change within an MCU, |
| 66 | * but must not be updated permanently until we complete the MCU. |
| 67 | */ |
| 68 | |
| 69 | typedef struct { |
| 70 | size_t put_buffer; /* current bit-accumulation buffer */ |
| 71 | int put_bits; /* # of bits now in it */ |
| 72 | int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */ |
| 73 | } savable_state; |
| 74 | |
| 75 | /* This macro is to work around compilers with missing or broken |
| 76 | * structure assignment. You'll need to fix this code if you have |
| 77 | * such a compiler and you change MAX_COMPS_IN_SCAN. |
| 78 | */ |
| 79 | |
| 80 | #ifndef NO_STRUCT_ASSIGN |
| 81 | #define ASSIGN_STATE(dest,src) ((dest) = (src)) |
| 82 | #else |
| 83 | #if MAX_COMPS_IN_SCAN == 4 |
| 84 | #define ASSIGN_STATE(dest,src) \ |
| 85 | ((dest).put_buffer = (src).put_buffer, \ |
| 86 | (dest).put_bits = (src).put_bits, \ |
| 87 | (dest).last_dc_val[0] = (src).last_dc_val[0], \ |
| 88 | (dest).last_dc_val[1] = (src).last_dc_val[1], \ |
| 89 | (dest).last_dc_val[2] = (src).last_dc_val[2], \ |
| 90 | (dest).last_dc_val[3] = (src).last_dc_val[3]) |
| 91 | #endif |
| 92 | #endif |
| 93 | |
| 94 | |
| 95 | typedef struct { |
| 96 | struct jpeg_entropy_encoder pub; /* public fields */ |
| 97 | |
| 98 | savable_state saved; /* Bit buffer & DC state at start of MCU */ |
| 99 | |
| 100 | /* These fields are NOT loaded into local working state. */ |
| 101 | unsigned int restarts_to_go; /* MCUs left in this restart interval */ |
| 102 | int next_restart_num; /* next restart number to write (0-7) */ |
| 103 | |
| 104 | /* Pointers to derived tables (these workspaces have image lifespan) */ |
| 105 | c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS]; |
| 106 | c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS]; |
| 107 | |
| 108 | #ifdef ENTROPY_OPT_SUPPORTED /* Statistics tables for optimization */ |
| 109 | long *dc_count_ptrs[NUM_HUFF_TBLS]; |
| 110 | long *ac_count_ptrs[NUM_HUFF_TBLS]; |
| 111 | #endif |
| 112 | |
| 113 | int simd; |
| 114 | } huff_entropy_encoder; |
| 115 | |
| 116 | typedef huff_entropy_encoder *huff_entropy_ptr; |
| 117 | |
| 118 | /* Working state while writing an MCU. |
| 119 | * This struct contains all the fields that are needed by subroutines. |
| 120 | */ |
| 121 | |
| 122 | typedef struct { |
| 123 | JOCTET *next_output_byte; /* => next byte to write in buffer */ |
| 124 | size_t free_in_buffer; /* # of byte spaces remaining in buffer */ |
| 125 | savable_state cur; /* Current bit buffer & DC state */ |
| 126 | j_compress_ptr cinfo; /* dump_buffer needs access to this */ |
| 127 | } working_state; |
| 128 | |
| 129 | |
| 130 | /* Forward declarations */ |
| 131 | METHODDEF(boolean) encode_mcu_huff (j_compress_ptr cinfo, JBLOCKROW *MCU_data); |
| 132 | METHODDEF(void) finish_pass_huff (j_compress_ptr cinfo); |
| 133 | #ifdef ENTROPY_OPT_SUPPORTED |
| 134 | METHODDEF(boolean) encode_mcu_gather (j_compress_ptr cinfo, |
| 135 | JBLOCKROW *MCU_data); |
| 136 | METHODDEF(void) finish_pass_gather (j_compress_ptr cinfo); |
| 137 | #endif |
| 138 | |
| 139 | |
| 140 | /* |
| 141 | * Initialize for a Huffman-compressed scan. |
| 142 | * If gather_statistics is TRUE, we do not output anything during the scan, |
| 143 | * just count the Huffman symbols used and generate Huffman code tables. |
| 144 | */ |
| 145 | |
| 146 | METHODDEF(void) |
| 147 | start_pass_huff (j_compress_ptr cinfo, boolean gather_statistics) |
| 148 | { |
| 149 | huff_entropy_ptr entropy = (huff_entropy_ptr) cinfo->entropy; |
| 150 | int ci, dctbl, actbl; |
| 151 | jpeg_component_info *compptr; |
| 152 | |
| 153 | if (gather_statistics) { |
| 154 | #ifdef ENTROPY_OPT_SUPPORTED |
| 155 | entropy->pub.encode_mcu = encode_mcu_gather; |
| 156 | entropy->pub.finish_pass = finish_pass_gather; |
| 157 | #else |
| 158 | ERREXIT(cinfo, JERR_NOT_COMPILED); |
| 159 | #endif |
| 160 | } else { |
| 161 | entropy->pub.encode_mcu = encode_mcu_huff; |
| 162 | entropy->pub.finish_pass = finish_pass_huff; |
| 163 | } |
| 164 | |
| 165 | entropy->simd = jsimd_can_huff_encode_one_block(); |
| 166 | |
| 167 | for (ci = 0; ci < cinfo->comps_in_scan; ci++) { |
| 168 | compptr = cinfo->cur_comp_info[ci]; |
| 169 | dctbl = compptr->dc_tbl_no; |
| 170 | actbl = compptr->ac_tbl_no; |
| 171 | if (gather_statistics) { |
| 172 | #ifdef ENTROPY_OPT_SUPPORTED |
| 173 | /* Check for invalid table indexes */ |
| 174 | /* (make_c_derived_tbl does this in the other path) */ |
| 175 | if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS) |
| 176 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl); |
| 177 | if (actbl < 0 || actbl >= NUM_HUFF_TBLS) |
| 178 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl); |
| 179 | /* Allocate and zero the statistics tables */ |
| 180 | /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */ |
| 181 | if (entropy->dc_count_ptrs[dctbl] == NULL) |
| 182 | entropy->dc_count_ptrs[dctbl] = (long *) |
| 183 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
| 184 | 257 * sizeof(long)); |
| 185 | MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long)); |
| 186 | if (entropy->ac_count_ptrs[actbl] == NULL) |
| 187 | entropy->ac_count_ptrs[actbl] = (long *) |
| 188 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
| 189 | 257 * sizeof(long)); |
| 190 | MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long)); |
| 191 | #endif |
| 192 | } else { |
| 193 | /* Compute derived values for Huffman tables */ |
| 194 | /* We may do this more than once for a table, but it's not expensive */ |
| 195 | jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl, |
| 196 | & entropy->dc_derived_tbls[dctbl]); |
| 197 | jpeg_make_c_derived_tbl(cinfo, FALSE, actbl, |
| 198 | & entropy->ac_derived_tbls[actbl]); |
| 199 | } |
| 200 | /* Initialize DC predictions to 0 */ |
| 201 | entropy->saved.last_dc_val[ci] = 0; |
| 202 | } |
| 203 | |
| 204 | /* Initialize bit buffer to empty */ |
| 205 | entropy->saved.put_buffer = 0; |
| 206 | entropy->saved.put_bits = 0; |
| 207 | |
| 208 | /* Initialize restart stuff */ |
| 209 | entropy->restarts_to_go = cinfo->restart_interval; |
| 210 | entropy->next_restart_num = 0; |
| 211 | } |
| 212 | |
| 213 | |
| 214 | /* |
| 215 | * Compute the derived values for a Huffman table. |
| 216 | * This routine also performs some validation checks on the table. |
| 217 | * |
| 218 | * Note this is also used by jcphuff.c. |
| 219 | */ |
| 220 | |
| 221 | GLOBAL(void) |
| 222 | jpeg_make_c_derived_tbl (j_compress_ptr cinfo, boolean isDC, int tblno, |
| 223 | c_derived_tbl **pdtbl) |
| 224 | { |
| 225 | JHUFF_TBL *htbl; |
| 226 | c_derived_tbl *dtbl; |
| 227 | int p, i, l, lastp, si, maxsymbol; |
| 228 | char huffsize[257]; |
| 229 | unsigned int huffcode[257]; |
| 230 | unsigned int code; |
| 231 | |
| 232 | /* Note that huffsize[] and huffcode[] are filled in code-length order, |
| 233 | * paralleling the order of the symbols themselves in htbl->huffval[]. |
| 234 | */ |
| 235 | |
| 236 | /* Find the input Huffman table */ |
| 237 | if (tblno < 0 || tblno >= NUM_HUFF_TBLS) |
| 238 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
| 239 | htbl = |
| 240 | isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno]; |
| 241 | if (htbl == NULL) |
| 242 | ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno); |
| 243 | |
| 244 | /* Allocate a workspace if we haven't already done so. */ |
| 245 | if (*pdtbl == NULL) |
| 246 | *pdtbl = (c_derived_tbl *) |
| 247 | (*cinfo->mem->alloc_small) ((j_common_ptr) cinfo, JPOOL_IMAGE, |
| 248 | sizeof(c_derived_tbl)); |
| 249 | dtbl = *pdtbl; |
| 250 | |
| 251 | /* Figure C.1: make table of Huffman code length for each symbol */ |
| 252 | |
| 253 | p = 0; |
| 254 | for (l = 1; l <= 16; l++) { |
| 255 | i = (int) htbl->bits[l]; |
| 256 | if (i < 0 || p + i > 256) /* protect against table overrun */ |
| 257 | ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
| 258 | while (i--) |
| 259 | huffsize[p++] = (char) l; |
| 260 | } |
| 261 | huffsize[p] = 0; |
| 262 | lastp = p; |
| 263 | |
| 264 | /* Figure C.2: generate the codes themselves */ |
| 265 | /* We also validate that the counts represent a legal Huffman code tree. */ |
| 266 | |
| 267 | code = 0; |
| 268 | si = huffsize[0]; |
| 269 | p = 0; |
| 270 | while (huffsize[p]) { |
| 271 | while (((int) huffsize[p]) == si) { |
| 272 | huffcode[p++] = code; |
| 273 | code++; |
| 274 | } |
| 275 | /* code is now 1 more than the last code used for codelength si; but |
| 276 | * it must still fit in si bits, since no code is allowed to be all ones. |
| 277 | */ |
| 278 | if (((JLONG) code) >= (((JLONG) 1) << si)) |
| 279 | ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
| 280 | code <<= 1; |
| 281 | si++; |
| 282 | } |
| 283 | |
| 284 | /* Figure C.3: generate encoding tables */ |
| 285 | /* These are code and size indexed by symbol value */ |
| 286 | |
| 287 | /* Set all codeless symbols to have code length 0; |
| 288 | * this lets us detect duplicate VAL entries here, and later |
| 289 | * allows emit_bits to detect any attempt to emit such symbols. |
| 290 | */ |
| 291 | MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi)); |
| 292 | |
| 293 | /* This is also a convenient place to check for out-of-range |
| 294 | * and duplicated VAL entries. We allow 0..255 for AC symbols |
| 295 | * but only 0..15 for DC. (We could constrain them further |
| 296 | * based on data depth and mode, but this seems enough.) |
| 297 | */ |
| 298 | maxsymbol = isDC ? 15 : 255; |
| 299 | |
| 300 | for (p = 0; p < lastp; p++) { |
| 301 | i = htbl->huffval[p]; |
| 302 | if (i < 0 || i > maxsymbol || dtbl->ehufsi[i]) |
| 303 | ERREXIT(cinfo, JERR_BAD_HUFF_TABLE); |
| 304 | dtbl->ehufco[i] = huffcode[p]; |
| 305 | dtbl->ehufsi[i] = huffsize[p]; |
| 306 | } |
| 307 | } |
| 308 | |
| 309 | |
| 310 | /* Outputting bytes to the file */ |
| 311 | |
| 312 | /* Emit a byte, taking 'action' if must suspend. */ |
| 313 | #define emit_byte(state,val,action) \ |
| 314 | { *(state)->next_output_byte++ = (JOCTET) (val); \ |
| 315 | if (--(state)->free_in_buffer == 0) \ |
| 316 | if (! dump_buffer(state)) \ |
| 317 | { action; } } |
| 318 | |
| 319 | |
| 320 | LOCAL(boolean) |
| 321 | dump_buffer (working_state *state) |
| 322 | /* Empty the output buffer; return TRUE if successful, FALSE if must suspend */ |
| 323 | { |
| 324 | struct jpeg_destination_mgr *dest = state->cinfo->dest; |
| 325 | |
| 326 | if (! (*dest->empty_output_buffer) (state->cinfo)) |
| 327 | return FALSE; |
| 328 | /* After a successful buffer dump, must reset buffer pointers */ |
| 329 | state->next_output_byte = dest->next_output_byte; |
| 330 | state->free_in_buffer = dest->free_in_buffer; |
| 331 | return TRUE; |
| 332 | } |
| 333 | |
| 334 | |
| 335 | /* Outputting bits to the file */ |
| 336 | |
| 337 | /* These macros perform the same task as the emit_bits() function in the |
| 338 | * original libjpeg code. In addition to reducing overhead by explicitly |
| 339 | * inlining the code, additional performance is achieved by taking into |
| 340 | * account the size of the bit buffer and waiting until it is almost full |
| 341 | * before emptying it. This mostly benefits 64-bit platforms, since 6 |
| 342 | * bytes can be stored in a 64-bit bit buffer before it has to be emptied. |
| 343 | */ |
| 344 | |
| 345 | #define EMIT_BYTE() { \ |
| 346 | JOCTET |
|---|