| 1 | /* Copyright 2015 Google Inc. All Rights Reserved. |
| 2 | |
| 3 | Distributed under MIT license. |
| 4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 5 | */ |
| 6 | |
| 7 | /* Brotli state for partial streaming decoding. */ |
| 8 | |
| 9 | #ifndef BROTLI_DEC_STATE_H_ |
| 10 | #define BROTLI_DEC_STATE_H_ |
| 11 | |
| 12 | #include <brotli/decode.h> |
| 13 | #include <brotli/shared_dictionary.h> |
| 14 | #include <brotli/types.h> |
| 15 | |
| 16 | #include "../common/constants.h" |
| 17 | #include "../common/dictionary.h" |
| 18 | #include "../common/platform.h" |
| 19 | #include "../common/transform.h" |
| 20 | #include "bit_reader.h" |
| 21 | #include "huffman.h" |
| 22 | |
| 23 | #if defined(__cplusplus) || defined(c_plusplus) |
| 24 | extern "C" { |
| 25 | #endif |
| 26 | |
| 27 | /* Graphviz diagram that describes state transitions: |
| 28 | |
| 29 | digraph States { |
| 30 | graph [compound=true] |
| 31 | concentrate=true |
| 32 | node [shape="box"] |
| 33 | |
| 34 | UNINITED -> {LARGE_WINDOW_BITS -> INITIALIZE} |
| 35 | subgraph cluster_metablock_workflow { |
| 36 | style="rounded" |
| 37 | label=< <B>METABLOCK CYCLE</B> > |
| 38 | METABLOCK_BEGIN -> METABLOCK_HEADER |
| 39 | METABLOCK_HEADER:sw -> METADATA |
| 40 | METABLOCK_HEADER:s -> UNCOMPRESSED |
| 41 | METABLOCK_HEADER:se -> METABLOCK_DONE:ne |
| 42 | METADATA:s -> METABLOCK_DONE:w |
| 43 | UNCOMPRESSED:s -> METABLOCK_DONE:n |
| 44 | METABLOCK_DONE:e -> METABLOCK_BEGIN:e [constraint="false"] |
| 45 | } |
| 46 | INITIALIZE -> METABLOCK_BEGIN |
| 47 | METABLOCK_DONE -> DONE |
| 48 | |
| 49 | subgraph cluster_compressed_metablock { |
| 50 | style="rounded" |
| 51 | label=< <B>COMPRESSED METABLOCK</B> > |
| 52 | |
| 53 | subgraph cluster_command { |
| 54 | style="rounded" |
| 55 | label=< <B>HOT LOOP</B> > |
| 56 | |
| 57 | _METABLOCK_DONE_PORT_ [shape=point style=invis] |
| 58 | |
| 59 | { |
| 60 | // Set different shape for nodes returning from "compressed metablock". |
| 61 | node [shape=invhouse]; CMD_INNER CMD_POST_DECODE_LITERALS; |
| 62 | CMD_POST_WRAP_COPY; CMD_INNER_WRITE; CMD_POST_WRITE_1; |
| 63 | } |
| 64 | |
| 65 | CMD_BEGIN -> CMD_INNER -> CMD_POST_DECODE_LITERALS -> CMD_POST_WRAP_COPY |
| 66 | |
| 67 | // IO ("write") nodes are not in the hot loop! |
| 68 | CMD_INNER_WRITE [style=dashed] |
| 69 | CMD_INNER -> CMD_INNER_WRITE |
| 70 | CMD_POST_WRITE_1 [style=dashed] |
| 71 | CMD_POST_DECODE_LITERALS -> CMD_POST_WRITE_1 |
| 72 | CMD_POST_WRITE_2 [style=dashed] |
| 73 | CMD_POST_WRAP_COPY -> CMD_POST_WRITE_2 |
| 74 | |
| 75 | CMD_POST_WRITE_1 -> CMD_BEGIN:s [constraint="false"] |
| 76 | CMD_INNER_WRITE -> {CMD_INNER CMD_POST_DECODE_LITERALS} |
| 77 | [constraint="false"] |
| 78 | CMD_BEGIN:ne -> CMD_POST_DECODE_LITERALS [constraint="false"] |
| 79 | CMD_POST_WRAP_COPY -> CMD_BEGIN [constraint="false"] |
| 80 | CMD_POST_DECODE_LITERALS -> CMD_BEGIN:ne [constraint="false"] |
| 81 | CMD_POST_WRITE_2 -> CMD_POST_WRAP_COPY [constraint="false"] |
| 82 | {rank=same; CMD_BEGIN; CMD_INNER; CMD_POST_DECODE_LITERALS; |
| 83 | CMD_POST_WRAP_COPY} |
| 84 | {rank=same; CMD_INNER_WRITE; CMD_POST_WRITE_1; CMD_POST_WRITE_2} |
| 85 | |
| 86 | {CMD_INNER CMD_POST_DECODE_LITERALS CMD_POST_WRAP_COPY} -> |
| 87 | _METABLOCK_DONE_PORT_ [style=invis] |
| 88 | {CMD_INNER_WRITE CMD_POST_WRITE_1} -> _METABLOCK_DONE_PORT_ |
| 89 | [constraint="false" style=invis] |
| 90 | } |
| 91 | |
| 92 | BEFORE_COMPRESSED_METABLOCK_HEADER:s -> HUFFMAN_CODE_0:n |
| 93 | HUFFMAN_CODE_0 -> HUFFMAN_CODE_1 -> HUFFMAN_CODE_2 -> HUFFMAN_CODE_3 |
| 94 | HUFFMAN_CODE_0 -> METABLOCK_HEADER_2 -> CONTEXT_MODES -> CONTEXT_MAP_1 |
| 95 | CONTEXT_MAP_1 -> CONTEXT_MAP_2 -> TREE_GROUP |
| 96 | TREE_GROUP -> BEFORE_COMPRESSED_METABLOCK_BODY:e |
| 97 | BEFORE_COMPRESSED_METABLOCK_BODY:s -> CMD_BEGIN:n |
| 98 | |
| 99 | HUFFMAN_CODE_3:e -> HUFFMAN_CODE_0:ne [constraint="false"] |
| 100 | {rank=same; HUFFMAN_CODE_0; HUFFMAN_CODE_1; HUFFMAN_CODE_2; HUFFMAN_CODE_3} |
| 101 | {rank=same; METABLOCK_HEADER_2; CONTEXT_MODES; CONTEXT_MAP_1; CONTEXT_MAP_2; |
| 102 | TREE_GROUP} |
| 103 | } |
| 104 | METABLOCK_HEADER:e -> BEFORE_COMPRESSED_METABLOCK_HEADER:n |
| 105 | |
| 106 | _METABLOCK_DONE_PORT_ -> METABLOCK_DONE:se |
| 107 | [constraint="false" ltail=cluster_command] |
| 108 | |
| 109 | UNINITED [shape=Mdiamond]; |
| 110 | DONE [shape=Msquare]; |
| 111 | } |
| 112 | |
| 113 | |
| 114 | */ |
| 115 | |
| 116 | typedef enum { |
| 117 | BROTLI_STATE_UNINITED, |
| 118 | BROTLI_STATE_LARGE_WINDOW_BITS, |
| 119 | BROTLI_STATE_INITIALIZE, |
| 120 | BROTLI_STATE_METABLOCK_BEGIN, |
| 121 | , |
| 122 | , |
| 123 | BROTLI_STATE_CONTEXT_MODES, |
| 124 | BROTLI_STATE_COMMAND_BEGIN, |
| 125 | BROTLI_STATE_COMMAND_INNER, |
| 126 | BROTLI_STATE_COMMAND_POST_DECODE_LITERALS, |
| 127 | BROTLI_STATE_COMMAND_POST_WRAP_COPY, |
| 128 | BROTLI_STATE_UNCOMPRESSED, |
| 129 | BROTLI_STATE_METADATA, |
| 130 | BROTLI_STATE_COMMAND_INNER_WRITE, |
| 131 | BROTLI_STATE_METABLOCK_DONE, |
| 132 | BROTLI_STATE_COMMAND_POST_WRITE_1, |
| 133 | BROTLI_STATE_COMMAND_POST_WRITE_2, |
| 134 | , |
| 135 | BROTLI_STATE_HUFFMAN_CODE_0, |
| 136 | BROTLI_STATE_HUFFMAN_CODE_1, |
| 137 | BROTLI_STATE_HUFFMAN_CODE_2, |
| 138 | BROTLI_STATE_HUFFMAN_CODE_3, |
| 139 | BROTLI_STATE_CONTEXT_MAP_1, |
| 140 | BROTLI_STATE_CONTEXT_MAP_2, |
| 141 | BROTLI_STATE_TREE_GROUP, |
| 142 | BROTLI_STATE_BEFORE_COMPRESSED_METABLOCK_BODY, |
| 143 | BROTLI_STATE_DONE |
| 144 | } BrotliRunningState; |
| 145 | |
| 146 | typedef enum { |
| 147 | , |
| 148 | , |
| 149 | , |
| 150 | , |
| 151 | , |
| 152 | , |
| 153 | , |
| 154 | |
| 155 | } ; |
| 156 | |
| 157 | typedef enum { |
| 158 | BROTLI_STATE_UNCOMPRESSED_NONE, |
| 159 | BROTLI_STATE_UNCOMPRESSED_WRITE |
| 160 | } BrotliRunningUncompressedState; |
| 161 | |
| 162 | typedef enum { |
| 163 | BROTLI_STATE_TREE_GROUP_NONE, |
| 164 | BROTLI_STATE_TREE_GROUP_LOOP |
| 165 | } BrotliRunningTreeGroupState; |
| 166 | |
| 167 | typedef enum { |
| 168 | BROTLI_STATE_CONTEXT_MAP_NONE, |
| 169 | BROTLI_STATE_CONTEXT_MAP_READ_PREFIX, |
| 170 | BROTLI_STATE_CONTEXT_MAP_HUFFMAN, |
| 171 | BROTLI_STATE_CONTEXT_MAP_DECODE, |
| 172 | BROTLI_STATE_CONTEXT_MAP_TRANSFORM |
| 173 | } BrotliRunningContextMapState; |
| 174 | |
| 175 | typedef enum { |
| 176 | BROTLI_STATE_HUFFMAN_NONE, |
| 177 | BROTLI_STATE_HUFFMAN_SIMPLE_SIZE, |
| 178 | BROTLI_STATE_HUFFMAN_SIMPLE_READ, |
| 179 | BROTLI_STATE_HUFFMAN_SIMPLE_BUILD, |
| 180 | BROTLI_STATE_HUFFMAN_COMPLEX, |
| 181 | BROTLI_STATE_HUFFMAN_LENGTH_SYMBOLS |
| 182 | } BrotliRunningHuffmanState; |
| 183 | |
| 184 | typedef enum { |
| 185 | BROTLI_STATE_DECODE_UINT8_NONE, |
| 186 | BROTLI_STATE_DECODE_UINT8_SHORT, |
| 187 | BROTLI_STATE_DECODE_UINT8_LONG |
| 188 | } BrotliRunningDecodeUint8State; |
| 189 | |
| 190 | typedef enum { |
| 191 | BROTLI_STATE_READ_BLOCK_LENGTH_NONE, |
| 192 | BROTLI_STATE_READ_BLOCK_LENGTH_SUFFIX |
| 193 | } BrotliRunningReadBlockLengthState; |
| 194 | |
| 195 | /* BrotliDecoderState addon, used for Compound Dictionary functionality. */ |
| 196 | typedef struct BrotliDecoderCompoundDictionary { |
| 197 | int num_chunks; |
| 198 | int total_size; |
| 199 | int br_index; |
| 200 | int br_offset; |
| 201 | int br_length; |
| 202 | int br_copied; |
| 203 | const uint8_t* chunks[16]; |
| 204 | int chunk_offsets[16]; |
| 205 | int block_bits; |
| 206 | uint8_t block_map[256]; |
| 207 | } BrotliDecoderCompoundDictionary; |
| 208 | |
| 209 | typedef struct { |
| 210 | BrotliRunningTreeGroupState ; |
| 211 | BrotliRunningContextMapState ; |
| 212 | BrotliRunningHuffmanState ; |
| 213 | |
| 214 | uint32_t ; |
| 215 | |
| 216 | uint32_t ; |
| 217 | uint32_t ; |
| 218 | |
| 219 | /* For ReadHuffmanCode. */ |
| 220 | uint32_t ; |
| 221 | uint32_t ; |
| 222 | uint32_t ; |
| 223 | |
| 224 | /* Huffman table for "histograms". */ |
| 225 | HuffmanCode [32]; |
| 226 | /* List of heads of symbol chains. */ |
| 227 | uint16_t* ; |
| 228 | /* Storage from symbol_lists. */ |
| 229 | uint16_t [BROTLI_HUFFMAN_MAX_CODE_LENGTH + 1 + |
| 230 | BROTLI_NUM_COMMAND_SYMBOLS]; |
| 231 | /* Tails of symbol chains. */ |
| 232 | int [32]; |
| 233 | uint8_t [BROTLI_CODE_LENGTH_CODES]; |
| 234 | /* Population counts for the code lengths. */ |
| 235 | uint16_t [16]; |
| 236 | |
| 237 | /* For HuffmanTreeGroupDecode. */ |
| 238 | int ; |
| 239 | HuffmanCode* ; |
| 240 | |
| 241 | /* For DecodeContextMap. */ |
| 242 | uint32_t ; |
| 243 | uint32_t ; |
| 244 | uint32_t ; |
| 245 | HuffmanCode [BROTLI_HUFFMAN_MAX_SIZE_272]; |
| 246 | } ; |
| 247 | |
| 248 | typedef struct BrotliMetablockBodyArena { |
| 249 | uint8_t dist_extra_bits[544]; |
| 250 | uint32_t dist_offset[544]; |
| 251 | } BrotliMetablockBodyArena; |
| 252 | |
| 253 | struct BrotliDecoderStateStruct { |
| 254 | BrotliRunningState state; |
| 255 | |
| 256 | /* This counter is reused for several disjoint loops. */ |
| 257 | int loop_counter; |
| 258 | |
| 259 | BrotliBitReader br; |
| 260 | |
| 261 | brotli_alloc_func alloc_func; |
| 262 | brotli_free_func free_func; |
| 263 | void* memory_manager_opaque; |
| 264 | |
| 265 | /* Temporary storage for remaining input. Brotli stream format is designed in |
| 266 | a way, that 64 bits are enough to make progress in decoding. */ |
| 267 | union { |
| 268 | uint64_t u64; |
| 269 | uint8_t u8[8]; |
| 270 | } buffer; |
| 271 | uint32_t buffer_length; |
| 272 | |
| 273 | int pos; |
| 274 | int max_backward_distance; |
| 275 | int max_distance; |
| 276 | int ringbuffer_size; |
| 277 | int ringbuffer_mask; |
| 278 | int dist_rb_idx; |
| 279 | int dist_rb[4]; |
| 280 | int error_code; |
| 281 | uint8_t* ringbuffer; |
| 282 | uint8_t* ringbuffer_end; |
| 283 | HuffmanCode* htree_command; |
| 284 | const uint8_t* context_lookup; |
| 285 | uint8_t* context_map_slice; |
| 286 | uint8_t* dist_context_map_slice; |
| 287 | |
| 288 | /* This ring buffer holds a few past copy distances that will be used by |
| 289 | some special distance codes. */ |
| 290 | HuffmanTreeGroup literal_hgroup; |
| 291 | HuffmanTreeGroup insert_copy_hgroup; |
| 292 | HuffmanTreeGroup distance_hgroup; |
| 293 | HuffmanCode* block_type_trees; |
| 294 | HuffmanCode* block_len_trees; |
| 295 | /* This is true if the literal context map histogram type always matches the |
| 296 | block type. It is then not needed to keep the context (faster decoding). */ |
| 297 | int trivial_literal_context; |
| 298 | /* Distance context is actual after command is decoded and before distance is |
| 299 | computed. After distance computation it is used as a temporary variable. */ |
| 300 | int distance_context; |
| 301 | int meta_block_remaining_len; |
| 302 | uint32_t block_length_index; |
| 303 | uint32_t block_length[3]; |
| 304 | uint32_t num_block_types[3]; |
| 305 | uint32_t block_type_rb[6]; |
| 306 | uint32_t distance_postfix_bits; |
| 307 | uint32_t num_direct_distance_codes; |
| 308 | uint32_t num_dist_htrees; |
| 309 | uint8_t* dist_context_map; |
| 310 | HuffmanCode* literal_htree; |
| 311 | uint8_t dist_htree_index; |
| 312 | |
| 313 | int copy_length; |
| 314 | int distance_code; |
| 315 | |
| 316 | /* For partial write operations. */ |
| 317 | size_t rb_roundtrips; /* how many times we went around the ring-buffer */ |
| 318 | size_t partial_pos_out; /* how much output to the user in total */ |
| 319 | |
| 320 | /* For InverseMoveToFrontTransform. */ |
| 321 | uint32_t mtf_upper_bound; |
| 322 | uint32_t mtf[64 + 1]; |
| 323 | |
| 324 | /* Less used attributes are at the end of this struct. */ |
| 325 | |
| 326 | brotli_decoder_metadata_start_func metadata_start_func; |
| 327 | brotli_decoder_metadata_chunk_func metadata_chunk_func; |
| 328 | void* metadata_callback_opaque; |
| 329 | |
| 330 | /* For reporting. */ |
| 331 | uint64_t used_input; /* how many bytes of input are consumed */ |
| 332 | |
| 333 | /* States inside function calls. */ |
| 334 | BrotliRunningMetablockHeaderState ; |
| 335 | BrotliRunningUncompressedState substate_uncompressed; |
| 336 | BrotliRunningDecodeUint8State substate_decode_uint8; |
| 337 | BrotliRunningReadBlockLengthState substate_read_block_length; |
| 338 | |
| 339 | unsigned int is_last_metablock : 1; |
| 340 | unsigned int is_uncompressed : 1; |
| 341 | unsigned int is_metadata : 1; |
| 342 | unsigned int should_wrap_ringbuffer : 1; |
| 343 | unsigned int canny_ringbuffer_allocation : 1; |
| 344 | unsigned int large_window : 1; |
| 345 | unsigned int size_nibbles : 8; |
| 346 | uint32_t window_bits; |
| 347 | |
| 348 | int new_ringbuffer_size; |
| 349 | |
| 350 | uint32_t num_literal_htrees; |
| 351 | uint8_t* context_map; |
| 352 | uint8_t* context_modes; |
| 353 | |
| 354 | BrotliSharedDictionary* dictionary; |
| 355 | BrotliDecoderCompoundDictionary* compound_dictionary; |
| 356 | |
| 357 | uint32_t trivial_literal_contexts[8]; /* 256 bits */ |
| 358 | |
| 359 | union { |
| 360 | BrotliMetablockHeaderArena ; |
| 361 | BrotliMetablockBodyArena body; |
| 362 | } arena; |
| 363 | }; |
| 364 | |
| 365 | typedef struct BrotliDecoderStateStruct BrotliDecoderStateInternal; |
| 366 | #define BrotliDecoderState BrotliDecoderStateInternal |
| 367 | |
| 368 | BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderStateInit(BrotliDecoderState* s, |
| 369 | brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque); |
| 370 | BROTLI_INTERNAL void BrotliDecoderStateCleanup(BrotliDecoderState* s); |
| 371 | BROTLI_INTERNAL void BrotliDecoderStateMetablockBegin(BrotliDecoderState* s); |
| 372 | BROTLI_INTERNAL void BrotliDecoderStateCleanupAfterMetablock( |
| 373 | BrotliDecoderState* s); |
| 374 | BROTLI_INTERNAL BROTLI_BOOL BrotliDecoderHuffmanTreeGroupInit( |
| 375 | BrotliDecoderState* s, HuffmanTreeGroup* group, uint32_t alphabet_size_max, |
| 376 | uint32_t alphabet_size_limit, uint32_t ntrees); |
| 377 | |
| 378 | #define BROTLI_DECODER_ALLOC(S, L) S->alloc_func(S->memory_manager_opaque, L) |
| 379 | |
| 380 | #define BROTLI_DECODER_FREE(S, X) { \ |
| 381 | S->free_func(S->memory_manager_opaque, X); \ |
| 382 | X = NULL; \ |
| 383 | } |
| 384 | |
| 385 | #if defined(__cplusplus) || defined(c_plusplus) |
| 386 | } /* extern "C" */ |
| 387 | #endif |
| 388 | |
| 389 | #endif /* BROTLI_DEC_STATE_H_ */ |
| 390 | |