1/* Copyright 2013 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7/* Implementation of Brotli compressor. */
8
9#include <brotli/encode.h>
10
11#include <stdlib.h> /* free, malloc */
12#include <string.h> /* memcpy, memset */
13
14#include "../common/constants.h"
15#include "../common/context.h"
16#include "../common/platform.h"
17#include "../common/version.h"
18#include "./backward_references.h"
19#include "./backward_references_hq.h"
20#include "./bit_cost.h"
21#include "./brotli_bit_stream.h"
22#include "./compress_fragment.h"
23#include "./compress_fragment_two_pass.h"
24#include "./encoder_dict.h"
25#include "./entropy_encode.h"
26#include "./fast_log.h"
27#include "./hash.h"
28#include "./histogram.h"
29#include "./memory.h"
30#include "./metablock.h"
31#include "./prefix.h"
32#include "./quality.h"
33#include "./ringbuffer.h"
34#include "./utf8_util.h"
35#include "./write_bits.h"
36
37#if defined(__cplusplus) || defined(c_plusplus)
38extern "C" {
39#endif
40
41#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src));
42
43typedef enum BrotliEncoderStreamState {
44 /* Default state. */
45 BROTLI_STREAM_PROCESSING = 0,
46 /* Intermediate state; after next block is emitted, byte-padding should be
47 performed before getting back to default state. */
48 BROTLI_STREAM_FLUSH_REQUESTED = 1,
49 /* Last metablock was produced; no more input is acceptable. */
50 BROTLI_STREAM_FINISHED = 2,
51 /* Flushing compressed block and writing meta-data block header. */
52 BROTLI_STREAM_METADATA_HEAD = 3,
53 /* Writing metadata block body. */
54 BROTLI_STREAM_METADATA_BODY = 4
55} BrotliEncoderStreamState;
56
57typedef struct BrotliEncoderStateStruct {
58 BrotliEncoderParams params;
59
60 MemoryManager memory_manager_;
61
62 HasherHandle hasher_;
63 uint64_t input_pos_;
64 RingBuffer ringbuffer_;
65 size_t cmd_alloc_size_;
66 Command* commands_;
67 size_t num_commands_;
68 size_t num_literals_;
69 size_t last_insert_len_;
70 uint64_t last_flush_pos_;
71 uint64_t last_processed_pos_;
72 int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES];
73 int saved_dist_cache_[4];
74 uint16_t last_bytes_;
75 uint8_t last_bytes_bits_;
76 uint8_t prev_byte_;
77 uint8_t prev_byte2_;
78 size_t storage_size_;
79 uint8_t* storage_;
80 /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */
81 int small_table_[1 << 10]; /* 4KiB */
82 int* large_table_; /* Allocated only when needed */
83 size_t large_table_size_;
84 /* Command and distance prefix codes (each 64 symbols, stored back-to-back)
85 used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command
86 prefix code is over a smaller alphabet with the following 64 symbols:
87 0 - 15: insert length code 0, copy length code 0 - 15, same distance
88 16 - 39: insert length code 0, copy length code 0 - 23
89 40 - 63: insert length code 0 - 23, copy length code 0
90 Note that symbols 16 and 40 represent the same code in the full alphabet,
91 but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */
92 uint8_t cmd_depths_[128];
93 uint16_t cmd_bits_[128];
94 /* The compressed form of the command and distance prefix codes for the next
95 block in FAST_ONE_PASS_COMPRESSION_QUALITY. */
96 uint8_t cmd_code_[512];
97 size_t cmd_code_numbits_;
98 /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */
99 uint32_t* command_buf_;
100 uint8_t* literal_buf_;
101
102 uint8_t* next_out_;
103 size_t available_out_;
104 size_t total_out_;
105 /* Temporary buffer for padding flush bits or metadata block header / body. */
106 union {
107 uint64_t u64[2];
108 uint8_t u8[16];
109 } tiny_buf_;
110 uint32_t remaining_metadata_bytes_;
111 BrotliEncoderStreamState stream_state_;
112
113 BROTLI_BOOL is_last_block_emitted_;
114 BROTLI_BOOL is_initialized_;
115} BrotliEncoderStateStruct;
116
117static size_t InputBlockSize(BrotliEncoderState* s) {
118 return (size_t)1 << s->params.lgblock;
119}
120
121static uint64_t UnprocessedInputSize(BrotliEncoderState* s) {
122 return s->input_pos_ - s->last_processed_pos_;
123}
124
125static size_t RemainingInputBlockSize(BrotliEncoderState* s) {
126 const uint64_t delta = UnprocessedInputSize(s);
127 size_t block_size = InputBlockSize(s);
128 if (delta >= block_size) return 0;
129 return block_size - (size_t)delta;
130}
131
132BROTLI_BOOL BrotliEncoderSetParameter(
133 BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) {
134 /* Changing parameters on the fly is not implemented yet. */
135 if (state->is_initialized_) return BROTLI_FALSE;
136 /* TODO: Validate/clamp parameters here. */
137 switch (p) {
138 case BROTLI_PARAM_MODE:
139 state->params.mode = (BrotliEncoderMode)value;
140 return BROTLI_TRUE;
141
142 case BROTLI_PARAM_QUALITY:
143 state->params.quality = (int)value;
144 return BROTLI_TRUE;
145
146 case BROTLI_PARAM_LGWIN:
147 state->params.lgwin = (int)value;
148 return BROTLI_TRUE;
149
150 case BROTLI_PARAM_LGBLOCK:
151 state->params.lgblock = (int)value;
152 return BROTLI_TRUE;
153
154 case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING:
155 if ((value != 0) && (value != 1)) return BROTLI_FALSE;
156 state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value);
157 return BROTLI_TRUE;
158
159 case BROTLI_PARAM_SIZE_HINT:
160 state->params.size_hint = value;
161 return BROTLI_TRUE;
162
163 case BROTLI_PARAM_LARGE_WINDOW:
164 state->params.large_window = TO_BROTLI_BOOL(!!value);
165 return BROTLI_TRUE;
166
167 case BROTLI_PARAM_NPOSTFIX:
168 state->params.dist.distance_postfix_bits = value;
169 return BROTLI_TRUE;
170
171 case BROTLI_PARAM_NDIRECT:
172 state->params.dist.num_direct_distance_codes = value;
173 return BROTLI_TRUE;
174
175 default: return BROTLI_FALSE;
176 }
177}
178
179/* Wraps 64-bit input position to 32-bit ring-buffer position preserving
180 "not-a-first-lap" feature. */
181static uint32_t WrapPosition(uint64_t position) {
182 uint32_t result = (uint32_t)position;
183 uint64_t gb = position >> 30;
184 if (gb > 2) {
185 /* Wrap every 2GiB; The first 3GB are continuous. */
186 result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30;
187 }
188 return result;
189}
190
191static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) {
192 MemoryManager* m = &s->memory_manager_;
193 if (s->storage_size_ < size) {
194 BROTLI_FREE(m, s->storage_);
195 s->storage_ = BROTLI_ALLOC(m, uint8_t, size);
196 if (BROTLI_IS_OOM(m)) return NULL;
197 s->storage_size_ = size;
198 }
199 return s->storage_;
200}
201
202static size_t HashTableSize(size_t max_table_size, size_t input_size) {
203 size_t htsize = 256;
204 while (htsize < max_table_size && htsize < input_size) {
205 htsize <<= 1;
206 }
207 return htsize;
208}
209
210static int* GetHashTable(BrotliEncoderState* s, int quality,
211 size_t input_size, size_t* table_size) {
212 /* Use smaller hash table when input.size() is smaller, since we
213 fill the table, incurring O(hash table size) overhead for
214 compression, and if the input is short, we won't need that
215 many hash table entries anyway. */
216 MemoryManager* m = &s->memory_manager_;
217 const size_t max_table_size = MaxHashTableSize(quality);
218 size_t htsize = HashTableSize(max_table_size, input_size);
219 int* table;
220 BROTLI_DCHECK(max_table_size >= 256);
221 if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
222 /* Only odd shifts are supported by fast-one-pass. */
223 if ((htsize & 0xAAAAA) == 0) {
224 htsize <<= 1;
225 }
226 }
227
228 if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) {
229 table = s->small_table_;
230 } else {
231 if (htsize > s->large_table_size_) {
232 s->large_table_size_ = htsize;
233 BROTLI_FREE(m, s->large_table_);
234 s->large_table_ = BROTLI_ALLOC(m, int, htsize);
235 if (BROTLI_IS_OOM(m)) return 0;
236 }
237 table = s->large_table_;
238 }
239
240 *table_size = htsize;
241 memset(table, 0, htsize * sizeof(*table));
242 return table;
243}
244
245static void EncodeWindowBits(int lgwin, BROTLI_BOOL large_window,
246 uint16_t* last_bytes, uint8_t* last_bytes_bits) {
247 if (large_window) {
248 *last_bytes = (uint16_t)(((lgwin & 0x3F) << 8) | 0x11);
249 *last_bytes_bits = 14;
250 } else {
251 if (lgwin == 16) {
252 *last_bytes = 0;
253 *last_bytes_bits = 1;
254 } else if (lgwin == 17) {
255 *last_bytes = 1;
256 *last_bytes_bits = 7;
257 } else if (lgwin > 17) {
258 *last_bytes = (uint16_t)(((lgwin - 17) << 1) | 0x01);
259 *last_bytes_bits = 4;
260 } else {
261 *last_bytes = (uint16_t)(((lgwin - 8) << 4) | 0x01);
262 *last_bytes_bits = 7;
263 }
264 }
265}
266
267/* Initializes the command and distance prefix codes for the first block. */
268static void InitCommandPrefixCodes(uint8_t cmd_depths[128],
269 uint16_t cmd_bits[128],
270 uint8_t cmd_code[512],
271 size_t* cmd_code_numbits) {
272 static const uint8_t kDefaultCommandDepths[128] = {
273 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8,
274 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7,
275 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6,
276 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
277 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4,
279 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10,
280 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
281 };
282 static const uint16_t kDefaultCommandBits[128] = {
283 0, 0, 8, 9, 3, 35, 7, 71,
284 39, 103, 23, 47, 175, 111, 239, 31,
285 0, 0, 0, 4, 12, 2, 10, 6,
286 13, 29, 11, 43, 27, 59, 87, 55,
287 15, 79, 319, 831, 191, 703, 447, 959,
288 0, 14, 1, 25, 5, 21, 19, 51,
289 119, 159, 95, 223, 479, 991, 63, 575,
290 127, 639, 383, 895, 255, 767, 511, 1023,
291 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
292 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12,
293 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255,
294 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095,
295 };
296 static const uint8_t kDefaultCommandCode[] = {
297 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6,
298 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c,
299 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa,
300 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94,
301 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00,
302 };
303 static const size_t kDefaultCommandCodeNumBits = 448;
304 COPY_ARRAY(cmd_depths, kDefaultCommandDepths);
305 COPY_ARRAY(cmd_bits, kDefaultCommandBits);
306
307 /* Initialize the pre-compressed form of the command and distance prefix
308 codes. */
309 COPY_ARRAY(cmd_code, kDefaultCommandCode);
310 *cmd_code_numbits = kDefaultCommandCodeNumBits;
311}
312
313/* Decide about the context map based on the ability of the prediction
314 ability of the previous byte UTF8-prefix on the next byte. The
315 prediction ability is calculated as Shannon entropy. Here we need
316 Shannon entropy instead of 'BitsEntropy' since the prefix will be
317 encoded with the remaining 6 bits of the following byte, and
318 BitsEntropy will assume that symbol to be stored alone using Huffman
319 coding. */
320static void ChooseContextMap(int quality,
321 uint32_t* bigram_histo,
322 size_t* num_literal_contexts,
323 const uint32_t** literal_context_map) {
324 static const uint32_t kStaticContextMapContinuation[64] = {
325 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
326 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
327 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
329 };
330 static const uint32_t kStaticContextMapSimpleUTF8[64] = {
331 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
333 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
334 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
335 };
336
337 uint32_t monogram_histo[3] = { 0 };
338 uint32_t two_prefix_histo[6] = { 0 };
339 size_t total;
340 size_t i;
341 size_t dummy;
342 double entropy[4];
343 for (i = 0; i < 9; ++i) {
344 monogram_histo[i % 3] += bigram_histo[i];
345 two_prefix_histo[i % 6] += bigram_histo[i];
346 }
347 entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy);
348 entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) +
349 ShannonEntropy(two_prefix_histo + 3, 3, &dummy));
350 entropy[3] = 0;
351 for (i = 0; i < 3; ++i) {
352 entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy);
353 }
354
355 total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2];
356 BROTLI_DCHECK(total != 0);
357 entropy[0] = 1.0 / (double)total;
358 entropy[1] *= entropy[0];
359 entropy[2] *= entropy[0];
360 entropy[3] *= entropy[0];
361
362 if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) {
363 /* 3 context models is a bit slower, don't use it at lower qualities. */
364 entropy[3] = entropy[1] * 10;
365 }
366 /* If expected savings by symbol are less than 0.2 bits, skip the
367 context modeling -- in exchange for faster decoding speed. */
368 if (entropy[1] - entropy[2] < 0.2 &&
369 entropy[1] - entropy[3] < 0.2) {
370 *num_literal_contexts = 1;
371 } else if (entropy[2] - entropy[3] < 0.02) {
372 *num_literal_contexts = 2;
373 *literal_context_map = kStaticContextMapSimpleUTF8;
374 } else {
375 *num_literal_contexts = 3;
376 *literal_context_map = kStaticContextMapContinuation;
377 }
378}
379
380/* Decide if we want to use a more complex static context map containing 13
381 context values, based on the entropy reduction of histograms over the
382 first 5 bits of literals. */
383static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input,
384 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
385 size_t* num_literal_contexts, const uint32_t** literal_context_map) {
386 static const uint32_t kStaticContextMapComplexUTF8[64] = {
387 11, 11, 12, 12, /* 0 special */
388 0, 0, 0, 0, /* 4 lf */
389 1, 1, 9, 9, /* 8 space */
390 2, 2, 2, 2, /* !, first after space/lf and after something else. */
391 1, 1, 1, 1, /* " */
392 8, 3, 3, 3, /* % */
393 1, 1, 1, 1, /* ({[ */
394 2, 2, 2, 2, /* }]) */
395 8, 4, 4, 4, /* :; */
396 8, 7, 4, 4, /* . */
397 8, 0, 0, 0, /* > */
398 3, 3, 3, 3, /* [0..9] */
399 5, 5, 10, 5, /* [A-Z] */
400 5, 5, 10, 5,
401 6, 6, 6, 6, /* [a-z] */
402 6, 6, 6, 6,
403 };
404 BROTLI_UNUSED(quality);
405 /* Try the more complex static context map only for long data. */
406 if (size_hint < (1 << 20)) {
407 return BROTLI_FALSE;
408 } else {
409 const size_t end_pos = start_pos + length;
410 /* To make entropy calculations faster and to fit on the stack, we collect
411 histograms over the 5 most significant bits of literals. One histogram
412 without context and 13 additional histograms for each context value. */
413 uint32_t combined_histo[32] = { 0 };
414 uint32_t context_histo[13][32] = { { 0 } };
415 uint32_t total = 0;
416 double entropy[3];
417 size_t dummy;
418 size_t i;
419 ContextLut utf8_lut = BROTLI_CONTEXT_LUT(CONTEXT_UTF8);
420 for (; start_pos + 64 <= end_pos; start_pos += 4096) {
421 const size_t stride_end_pos = start_pos + 64;
422 uint8_t prev2 = input[start_pos & mask];
423 uint8_t prev1 = input[(start_pos + 1) & mask];
424 size_t pos;
425 /* To make the analysis of the data faster we only examine 64 byte long
426 strides at every 4kB intervals. */
427 for (pos = start_pos + 2; pos < stride_end_pos; ++pos) {
428 const uint8_t literal = input[pos & mask];
429 const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[
430 BROTLI_CONTEXT(prev1, prev2, utf8_lut)];
431 ++total;
432 ++combined_histo[literal >> 3];
433 ++context_histo[context][literal >> 3];
434 prev2 = prev1;
435 prev1 = literal;
436 }
437 }
438 entropy[1] = ShannonEntropy(combined_histo, 32, &dummy);
439 entropy[2] = 0;
440 for (i = 0; i < 13; ++i) {
441 entropy[2] += ShannonEntropy(&context_histo[i][0], 32, &dummy);
442 }
443 entropy[0] = 1.0 / (double)total;
444 entropy[1] *= entropy[0];
445 entropy[2] *= entropy[0];
446 /* The triggering heuristics below were tuned by compressing the individual
447 files of the silesia corpus. If we skip this kind of context modeling
448 for not very well compressible input (i.e. entropy using context modeling
449 is 60% of maximal entropy) or if expected savings by symbol are less
450 than 0.2 bits, then in every case when it triggers, the final compression
451 ratio is improved. Note however that this heuristics might be too strict
452 for some cases and could be tuned further. */
453 if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) {
454 return BROTLI_FALSE;
455 } else {
456 *num_literal_contexts = 13;
457 *literal_context_map = kStaticContextMapComplexUTF8;
458 return BROTLI_TRUE;
459 }
460 }
461}
462
463static void DecideOverLiteralContextModeling(const uint8_t* input,
464 size_t start_pos, size_t length, size_t mask, int quality, size_t size_hint,
465 size_t* num_literal_contexts, const uint32_t** literal_context_map) {
466 if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) {
467 return;
468 } else if (ShouldUseComplexStaticContextMap(
469 input, start_pos, length, mask, quality, size_hint,
470 num_literal_contexts, literal_context_map)) {
471 /* Context map was already set, nothing else to do. */
472 } else {
473 /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of
474 UTF8 data faster we only examine 64 byte long strides at every 4kB
475 intervals. */
476 const size_t end_pos = start_pos + length;
477 uint32_t bigram_prefix_histo[9] = { 0 };
478 for (; start_pos + 64 <= end_pos; start_pos += 4096) {
479 static const int lut[4] = { 0, 0, 1, 2 };
480 const size_t stride_end_pos = start_pos + 64;
481 int prev = lut[input[start_pos & mask] >> 6] * 3;
482 size_t pos;
483 for (pos = start_pos + 1; pos < stride_end_pos; ++pos) {
484 const uint8_t literal = input[pos & mask];
485 ++bigram_prefix_histo[prev + lut[literal >> 6]];
486 prev = lut[literal >> 6] * 3;
487 }
488 }
489 ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts,
490 literal_context_map);
491 }
492}
493
494static BROTLI_BOOL ShouldCompress(
495 const uint8_t* data, const size_t mask, const uint64_t last_flush_pos,
496 const size_t bytes, const size_t num_literals, const size_t num_commands) {
497 /* TODO: find more precise minimal block overhead. */
498 if (bytes <= 2) return BROTLI_FALSE;
499 if (num_commands < (bytes >> 8) + 2) {
500 if (num_literals > 0.99 * (double)bytes) {
501 uint32_t literal_histo[256] = { 0 };
502 static const uint32_t kSampleRate = 13;
503 static const double kMinEntropy = 7.92;
504 const double bit_cost_threshold =
505 (double)bytes * kMinEntropy / kSampleRate;
506 size_t t = (bytes + kSampleRate - 1) / kSampleRate;
507 uint32_t pos = (uint32_t)last_flush_pos;
508 size_t i;
509 for (i = 0; i < t; i++) {
510 ++literal_histo[data[pos & mask]];
511 pos += kSampleRate;
512 }
513 if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) {
514 return BROTLI_FALSE;
515 }
516 }
517 }
518 return BROTLI_TRUE;
519}
520
521/* Chooses the literal context mode for a metablock */
522static ContextType ChooseContextMode(const BrotliEncoderParams* params,
523 const uint8_t* data, const size_t pos, const size_t mask,
524 const size_t length) {
525 /* We only do the computation for the option of something else than
526 CONTEXT_UTF8 for the highest qualities */
527 if (params->quality >= MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING &&
528 !BrotliIsMostlyUTF8(data, pos, mask, length, kMinUTF8Ratio)) {
529 return CONTEXT_SIGNED;
530 }
531 return CONTEXT_UTF8;
532}
533
534static void WriteMetaBlockInternal(MemoryManager* m,
535 const uint8_t* data,
536 const size_t mask,
537 const uint64_t last_flush_pos,
538 const size_t bytes,
539 const BROTLI_BOOL is_last,
540 ContextType literal_context_mode,
541 const BrotliEncoderParams* params,
542 const uint8_t prev_byte,
543 const uint8_t prev_byte2,
544 const size_t num_literals,
545 const size_t num_commands,
546 Command* commands,
547 const int* saved_dist_cache,
548 int* dist_cache,
549 size_t* storage_ix,
550 uint8_t* storage) {
551 const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos);
552 uint16_t last_bytes;
553 uint8_t last_bytes_bits;
554 ContextLut literal_context_lut = BROTLI_CONTEXT_LUT(literal_context_mode);
555 BrotliEncoderParams block_params = *params;
556
557 if (bytes == 0) {
558 /* Write the ISLAST and ISEMPTY bits. */
559 BrotliWriteBits(2, 3, storage_ix, storage);
560 *storage_ix = (*storage_ix + 7u) & ~7u;
561 return;
562 }
563
564 if (!ShouldCompress(data, mask, last_flush_pos, bytes,
565 num_literals, num_commands)) {
566 /* Restore the distance cache, as its last update by
567 CreateBackwardReferences is now unused. */
568 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
569 BrotliStoreUncompressedMetaBlock(is_last, data,
570 wrapped_last_flush_pos, mask, bytes,
571 storage_ix, storage);
572 return;
573 }
574
575 BROTLI_DCHECK(*storage_ix <= 14);
576 last_bytes = (uint16_t)((storage[1] << 8) | storage[0]);
577 last_bytes_bits = (uint8_t)(*storage_ix);
578 if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) {
579 BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos,
580 bytes, mask, is_last, params,
581 commands, num_commands,
582 storage_ix, storage);
583 if (BROTLI_IS_OOM(m)) return;
584 } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) {
585 BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos,
586 bytes, mask, is_last, params,
587 commands, num_commands,
588 storage_ix, storage);
589 if (BROTLI_IS_OOM(m)) return;
590 } else {
591 MetaBlockSplit mb;
592 InitMetaBlockSplit(&mb);
593 if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) {
594 size_t num_literal_contexts = 1;
595 const uint32_t* literal_context_map = NULL;
596 if (!params->disable_literal_context_modeling) {
597 DecideOverLiteralContextModeling(
598 data, wrapped_last_flush_pos, bytes, mask, params->quality,
599 params->size_hint, &num_literal_contexts,
600 &literal_context_map);
601 }
602 BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask,
603 prev_byte, prev_byte2, literal_context_lut, num_literal_contexts,
604 literal_context_map, commands, num_commands, &mb);
605 if (BROTLI_IS_OOM(m)) return;
606 } else {
607 BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, &block_params,
608 prev_byte, prev_byte2,
609 commands, num_commands,
610 literal_context_mode,
611 &mb);
612 if (BROTLI_IS_OOM(m)) return;
613 }
614 if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) {
615 /* The number of distance symbols effectively used for distance
616 histograms. It might be less than distance alphabet size
617 for "Large Window Brotli" (32-bit). */
618 uint32_t num_effective_dist_codes = block_params.dist.alphabet_size;
619 if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
620 num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
621 }
622 BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
623 }
624 BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask,
625 prev_byte, prev_byte2,
626 is_last,
627 &block_params,
628 literal_context_mode,
629 commands, num_commands,
630 &mb,
631 storage_ix, storage);
632 if (BROTLI_IS_OOM(m)) return;
633 DestroyMetaBlockSplit(m, &mb);
634 }
635 if (bytes + 4 < (*storage_ix >> 3)) {
636 /* Restore the distance cache and last byte. */
637 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
638 storage[0] = (uint8_t)last_bytes;
639 storage[1] = (uint8_t)(last_bytes >> 8);
640 *storage_ix = last_bytes_bits;
641 BrotliStoreUncompressedMetaBlock(is_last, data,
642 wrapped_last_flush_pos, mask,
643 bytes, storage_ix, storage);
644 }
645}
646
647static void ChooseDistanceParams(BrotliEncoderParams* params) {
648 uint32_t distance_postfix_bits = 0;
649 uint32_t num_direct_distance_codes = 0;
650
651 if (params->quality >= MIN_QUALITY_FOR_NONZERO_DISTANCE_PARAMS) {
652 uint32_t ndirect_msb;
653 if (params->mode == BROTLI_MODE_FONT) {
654 distance_postfix_bits = 1;
655 num_direct_distance_codes = 12;
656 } else {
657 distance_postfix_bits = params->dist.distance_postfix_bits;
658 num_direct_distance_codes = params->dist.num_direct_distance_codes;
659 }
660 ndirect_msb = (num_direct_distance_codes >> distance_postfix_bits) & 0x0F;
661 if (distance_postfix_bits > BROTLI_MAX_NPOSTFIX ||
662 num_direct_distance_codes > BROTLI_MAX_NDIRECT ||
663 (ndirect_msb << distance_postfix_bits) != num_direct_distance_codes) {
664 distance_postfix_bits = 0;
665 num_direct_distance_codes = 0;
666 }
667 }
668
669 BrotliInitDistanceParams(
670 params, distance_postfix_bits, num_direct_distance_codes);
671}
672
673static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) {
674 if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE;
675 if (s->is_initialized_) return BROTLI_TRUE;
676
677 s->last_bytes_bits_ = 0;
678 s->last_bytes_ = 0;
679 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
680
681 SanitizeParams(&s->params);
682 s->params.lgblock = ComputeLgBlock(&s->params);
683 ChooseDistanceParams(&s->params);
684
685 RingBufferSetup(&s->params, &s->ringbuffer_);
686
687 /* Initialize last byte with stream header. */
688 {
689 int lgwin = s->params.lgwin;
690 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
691 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
692 lgwin = BROTLI_MAX(int, lgwin, 18);
693 }
694 EncodeWindowBits(lgwin, s->params.large_window,
695 &s->last_bytes_, &s->last_bytes_bits_);
696 }
697
698 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
699 InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_,
700 s->cmd_code_, &s->cmd_code_numbits_);
701 }
702
703 s->is_initialized_ = BROTLI_TRUE;
704 return BROTLI_TRUE;
705}
706
707static void BrotliEncoderInitParams(BrotliEncoderParams* params) {
708 params->mode = BROTLI_DEFAULT_MODE;
709 params->large_window = BROTLI_FALSE;
710 params->quality = BROTLI_DEFAULT_QUALITY;
711 params->lgwin = BROTLI_DEFAULT_WINDOW;
712 params->lgblock = 0;
713 params->size_hint = 0;
714 params->disable_literal_context_modeling = BROTLI_FALSE;
715 BrotliInitEncoderDictionary(&params->dictionary);
716 params->dist.distance_postfix_bits = 0;
717 params->dist.num_direct_distance_codes = 0;
718 params->dist.alphabet_size =
719 BROTLI_DISTANCE_ALPHABET_SIZE(0, 0, BROTLI_MAX_DISTANCE_BITS);
720 params->dist.max_distance = BROTLI_MAX_DISTANCE;
721}
722
723static void BrotliEncoderInitState(BrotliEncoderState* s) {
724 BrotliEncoderInitParams(&s->params);
725 s->input_pos_ = 0;
726 s->num_commands_ = 0;
727 s->num_literals_ = 0;
728 s->last_insert_len_ = 0;
729 s->last_flush_pos_ = 0;
730 s->last_processed_pos_ = 0;
731 s->prev_byte_ = 0;
732 s->prev_byte2_ = 0;
733 s->storage_size_ = 0;
734 s->storage_ = 0;
735 s->hasher_ = NULL;
736 s->large_table_ = NULL;
737 s->large_table_size_ = 0;
738 s->cmd_code_numbits_ = 0;
739 s->command_buf_ = NULL;
740 s->literal_buf_ = NULL;
741 s->next_out_ = NULL;
742 s->available_out_ = 0;
743 s->total_out_ = 0;
744 s->stream_state_ = BROTLI_STREAM_PROCESSING;
745 s->is_last_block_emitted_ = BROTLI_FALSE;
746 s->is_initialized_ = BROTLI_FALSE;
747
748 RingBufferInit(&s->ringbuffer_);
749
750 s->commands_ = 0;
751 s->cmd_alloc_size_ = 0;
752
753 /* Initialize distance cache. */
754 s->dist_cache_[0] = 4;
755 s->dist_cache_[1] = 11;
756 s->dist_cache_[2] = 15;
757 s->dist_cache_[3] = 16;
758 /* Save the state of the distance cache in case we need to restore it for
759 emitting an uncompressed block. */
760 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
761}
762
763BrotliEncoderState* BrotliEncoderCreateInstance(
764 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
765 BrotliEncoderState* state = 0;
766 if (!alloc_func && !free_func) {
767 state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState));
768 } else if (alloc_func && free_func) {
769 state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState));
770 }
771 if (state == 0) {
772 /* BROTLI_DUMP(); */
773 return 0;
774 }
775 BrotliInitMemoryManager(
776 &state->memory_manager_, alloc_func, free_func, opaque);
777 BrotliEncoderInitState(state);
778 return state;
779}
780
781static void BrotliEncoderCleanupState(BrotliEncoderState* s) {
782 MemoryManager* m = &s->memory_manager_;
783 if (BROTLI_IS_OOM(m)) {
784 BrotliWipeOutMemoryManager(m);
785 return;
786 }
787 BROTLI_FREE(m, s->storage_);
788 BROTLI_FREE(m, s->commands_);
789 RingBufferFree(m, &s->ringbuffer_);
790 DestroyHasher(m, &s->hasher_);
791 BROTLI_FREE(m, s->large_table_);
792 BROTLI_FREE(m, s->command_buf_);
793 BROTLI_FREE(m, s->literal_buf_);
794}
795
796/* Deinitializes and frees BrotliEncoderState instance. */
797void BrotliEncoderDestroyInstance(BrotliEncoderState* state) {
798 if (!state) {
799 return;
800 } else {
801 MemoryManager* m = &state->memory_manager_;
802 brotli_free_func free_func = m->free_func;
803 void* opaque = m->opaque;
804 BrotliEncoderCleanupState(state);
805 free_func(opaque, state);
806 }
807}
808
809/*
810 Copies the given input data to the internal ring buffer of the compressor.
811 No processing of the data occurs at this time and this function can be
812 called multiple times before calling WriteBrotliData() to process the
813 accumulated input. At most input_block_size() bytes of input data can be
814 copied to the ring buffer, otherwise the next WriteBrotliData() will fail.
815 */
816static void CopyInputToRingBuffer(BrotliEncoderState* s,
817 const size_t input_size,
818 const uint8_t* input_buffer) {
819 RingBuffer* ringbuffer_ = &s->ringbuffer_;
820 MemoryManager* m = &s->memory_manager_;
821 RingBufferWrite(m, input_buffer, input_size, ringbuffer_);
822 if (BROTLI_IS_OOM(m)) return;
823 s->input_pos_ += input_size;
824
825 /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the
826 hashing not depend on uninitialized data. This makes compression
827 deterministic and it prevents uninitialized memory warnings in Valgrind.
828 Even without erasing, the output would be valid (but nondeterministic).
829
830 Background information: The compressor stores short (at most 8 bytes)
831 substrings of the input already read in a hash table, and detects
832 repetitions by looking up such substrings in the hash table. If it
833 can find a substring, it checks whether the substring is really there
834 in the ring buffer (or it's just a hash collision). Should the hash
835 table become corrupt, this check makes sure that the output is
836 still valid, albeit the compression ratio would be bad.
837
838 The compressor populates the hash table from the ring buffer as it's
839 reading new bytes from the input. However, at the last few indexes of
840 the ring buffer, there are not enough bytes to build full-length
841 substrings from. Since the hash table always contains full-length
842 substrings, we erase with dummy zeros here to make sure that those
843 substrings will contain zeros at the end instead of uninitialized
844 data.
845
846 Please note that erasing is not necessary (because the
847 memory region is already initialized since he ring buffer
848 has a `tail' that holds a copy of the beginning,) so we
849 skip erasing if we have already gone around at least once in
850 the ring buffer.
851
852 Only clear during the first round of ring-buffer writes. On
853 subsequent rounds data in the ring-buffer would be affected. */
854 if (ringbuffer_->pos_ <= ringbuffer_->mask_) {
855 /* This is the first time when the ring buffer is being written.
856 We clear 7 bytes just after the bytes that have been copied from
857 the input buffer.
858
859 The ring-buffer has a "tail" that holds a copy of the beginning,
860 but only once the ring buffer has been fully written once, i.e.,
861 pos <= mask. For the first time, we need to write values
862 in this tail (where index may be larger than mask), so that
863 we have exactly defined behavior and don't read uninitialized
864 memory. Due to performance reasons, hashing reads data using a
865 LOAD64, which can go 7 bytes beyond the bytes written in the
866 ring-buffer. */
867 memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7);
868 }
869}
870
871/* Marks all input as processed.
872 Returns true if position wrapping occurs. */
873static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) {
874 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
875 uint32_t wrapped_input_pos = WrapPosition(s->input_pos_);
876 s->last_processed_pos_ = s->input_pos_;
877 return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos);
878}
879
880static void ExtendLastCommand(BrotliEncoderState* s, uint32_t* bytes,
881 uint32_t* wrapped_last_processed_pos) {
882 Command* last_command = &s->commands_[s->num_commands_ - 1];
883 const uint8_t* data = s->ringbuffer_.buffer_;
884 const uint32_t mask = s->ringbuffer_.mask_;
885 uint64_t max_backward_distance =
886 (((uint64_t)1) << s->params.lgwin) - BROTLI_WINDOW_GAP;
887 uint64_t last_copy_len = last_command->copy_len_ & 0x1FFFFFF;
888 uint64_t last_processed_pos = s->last_processed_pos_ - last_copy_len;
889 uint64_t max_distance = last_processed_pos < max_backward_distance ?
890 last_processed_pos : max_backward_distance;
891 uint64_t cmd_dist = (uint64_t)s->dist_cache_[0];
892 uint32_t distance_code = CommandRestoreDistanceCode(last_command,
893 &s->params.dist);
894 if (distance_code < BROTLI_NUM_DISTANCE_SHORT_CODES ||
895 distance_code - (BROTLI_NUM_DISTANCE_SHORT_CODES - 1) == cmd_dist) {
896 if (cmd_dist <= max_distance) {
897 while (*bytes != 0 && data[*wrapped_last_processed_pos & mask] ==
898 data[(*wrapped_last_processed_pos - cmd_dist) & mask]) {
899 last_command->copy_len_++;
900 (*bytes)--;
901 (*wrapped_last_processed_pos)++;
902 }
903 }
904 /* The copy length is at most the metablock size, and thus expressible. */
905 GetLengthCode(last_command->insert_len_,
906 (size_t)((int)(last_command->copy_len_ & 0x1FFFFFF) +
907 (int)(last_command->copy_len_ >> 25)),
908 TO_BROTLI_BOOL((last_command->dist_prefix_ & 0x3FF) == 0),
909 &last_command->cmd_prefix_);
910 }
911}
912
913/*
914 Processes the accumulated input data and sets |*out_size| to the length of
915 the new output meta-block, or to zero if no new output meta-block has been
916 created (in this case the processed input data is buffered internally).
917 If |*out_size| is positive, |*output| points to the start of the output
918 data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is
919 always created. However, until |is_last| is BROTLI_TRUE encoder may retain up
920 to 7 bits of the last byte of output. To force encoder to dump the remaining
921 bits use WriteMetadata() to append an empty meta-data block.
922 Returns BROTLI_FALSE if the size of the input data is larger than
923 input_block_size().
924 */
925static BROTLI_BOOL EncodeData(
926 BrotliEncoderState* s, const BROTLI_BOOL is_last,
927 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) {
928 const uint64_t delta = UnprocessedInputSize(s);
929 uint32_t bytes = (uint32_t)delta;
930 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_);
931 uint8_t* data;
932 uint32_t mask;
933 MemoryManager* m = &s->memory_manager_;
934 ContextType literal_context_mode;
935
936 data = s->ringbuffer_.buffer_;
937 mask = s->ringbuffer_.mask_;
938
939 /* Adding more blocks after "last" block is forbidden. */
940 if (s->is_last_block_emitted_) return BROTLI_FALSE;
941 if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE;
942
943 if (delta > InputBlockSize(s)) {
944 return BROTLI_FALSE;
945 }
946 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY &&
947 !s->command_buf_) {
948 s->command_buf_ =
949 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
950 s->literal_buf_ =
951 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
952 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
953 }
954
955 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
956 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
957 uint8_t* storage;
958 size_t storage_ix = s->last_bytes_bits_;
959 size_t table_size;
960 int* table;
961
962 if (delta == 0 && !is_last) {
963 /* We have no new input data and we don't have to finish the stream, so
964 nothing to do. */
965 *out_size = 0;
966 return BROTLI_TRUE;
967 }
968 storage = GetBrotliStorage(s, 2 * bytes + 503);
969 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
970 storage[0] = (uint8_t)s->last_bytes_;
971 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
972 table = GetHashTable(s, s->params.quality, bytes, &table_size);
973 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
974 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
975 BrotliCompressFragmentFast(
976 m, &data[wrapped_last_processed_pos & mask],
977 bytes, is_last,
978 table, table_size,
979 s->cmd_depths_, s->cmd_bits_,
980 &s->cmd_code_numbits_, s->cmd_code_,
981 &storage_ix, storage);
982 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
983 } else {
984 BrotliCompressFragmentTwoPass(
985 m, &data[wrapped_last_processed_pos & mask],
986 bytes, is_last,
987 s->command_buf_, s->literal_buf_,
988 table, table_size,
989 &storage_ix, storage);
990 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
991 }
992 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
993 s->last_bytes_bits_ = storage_ix & 7u;
994 UpdateLastProcessedPos(s);
995 *output = &storage[0];
996 *out_size = storage_ix >> 3;
997 return BROTLI_TRUE;
998 }
999
1000 {
1001 /* Theoretical max number of commands is 1 per 2 bytes. */
1002 size_t newsize = s->num_commands_ + bytes / 2 + 1;
1003 if (newsize > s->cmd_alloc_size_) {
1004 Command* new_commands;
1005 /* Reserve a bit more memory to allow merging with a next block
1006 without reallocation: that would impact speed. */
1007 newsize += (bytes / 4) + 16;
1008 s->cmd_alloc_size_ = newsize;
1009 new_commands = BROTLI_ALLOC(m, Command, newsize);
1010 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1011 if (s->commands_) {
1012 memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_);
1013 BROTLI_FREE(m, s->commands_);
1014 }
1015 s->commands_ = new_commands;
1016 }
1017 }
1018
1019 InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params,
1020 wrapped_last_processed_pos, bytes, is_last);
1021
1022 literal_context_mode = ChooseContextMode(
1023 &s->params, data, WrapPosition(s->last_flush_pos_),
1024 mask, (size_t)(s->input_pos_ - s->last_flush_pos_));
1025
1026 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1027
1028 if (s->num_commands_ && s->last_insert_len_ == 0) {
1029 ExtendLastCommand(s, &bytes, &wrapped_last_processed_pos);
1030 }
1031
1032 if (s->params.quality == ZOPFLIFICATION_QUALITY) {
1033 BROTLI_DCHECK(s->params.hasher.type == 10);
1034 BrotliCreateZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1035 data, mask, &s->params, s->hasher_, s->dist_cache_,
1036 &s->last_insert_len_, &s->commands_[s->num_commands_],
1037 &s->num_commands_, &s->num_literals_);
1038 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1039 } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) {
1040 BROTLI_DCHECK(s->params.hasher.type == 10);
1041 BrotliCreateHqZopfliBackwardReferences(m, bytes, wrapped_last_processed_pos,
1042 data, mask, &s->params, s->hasher_, s->dist_cache_,
1043 &s->last_insert_len_, &s->commands_[s->num_commands_],
1044 &s->num_commands_, &s->num_literals_);
1045 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1046 } else {
1047 BrotliCreateBackwardReferences(bytes, wrapped_last_processed_pos,
1048 data, mask, &s->params, s->hasher_, s->dist_cache_,
1049 &s->last_insert_len_, &s->commands_[s->num_commands_],
1050 &s->num_commands_, &s->num_literals_);
1051 }
1052
1053 {
1054 const size_t max_length = MaxMetablockSize(&s->params);
1055 const size_t max_literals = max_length / 8;
1056 const size_t max_commands = max_length / 8;
1057 const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_);
1058 /* If maximal possible additional block doesn't fit metablock, flush now. */
1059 /* TODO: Postpone decision until next block arrives? */
1060 const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL(
1061 processed_bytes + InputBlockSize(s) <= max_length);
1062 /* If block splitting is not used, then flush as soon as there is some
1063 amount of commands / literals produced. */
1064 const BROTLI_BOOL should_flush = TO_BROTLI_BOOL(
1065 s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT &&
1066 s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS);
1067 if (!is_last && !force_flush && !should_flush &&
1068 next_input_fits_metablock &&
1069 s->num_literals_ < max_literals &&
1070 s->num_commands_ < max_commands) {
1071 /* Merge with next input block. Everything will happen later. */
1072 if (UpdateLastProcessedPos(s)) {
1073 HasherReset(s->hasher_);
1074 }
1075 *out_size = 0;
1076 return BROTLI_TRUE;
1077 }
1078 }
1079
1080 /* Create the last insert-only command. */
1081 if (s->last_insert_len_ > 0) {
1082 InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_);
1083 s->num_literals_ += s->last_insert_len_;
1084 s->last_insert_len_ = 0;
1085 }
1086
1087 if (!is_last && s->input_pos_ == s->last_flush_pos_) {
1088 /* We have no new input data and we don't have to finish the stream, so
1089 nothing to do. */
1090 *out_size = 0;
1091 return BROTLI_TRUE;
1092 }
1093 BROTLI_DCHECK(s->input_pos_ >= s->last_flush_pos_);
1094 BROTLI_DCHECK(s->input_pos_ > s->last_flush_pos_ || is_last);
1095 BROTLI_DCHECK(s->input_pos_ - s->last_flush_pos_ <= 1u << 24);
1096 {
1097 const uint32_t metablock_size =
1098 (uint32_t)(s->input_pos_ - s->last_flush_pos_);
1099 uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 503);
1100 size_t storage_ix = s->last_bytes_bits_;
1101 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1102 storage[0] = (uint8_t)s->last_bytes_;
1103 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1104 WriteMetaBlockInternal(
1105 m, data, mask, s->last_flush_pos_, metablock_size, is_last,
1106 literal_context_mode, &s->params, s->prev_byte_, s->prev_byte2_,
1107 s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_,
1108 s->dist_cache_, &storage_ix, storage);
1109 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1110 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1111 s->last_bytes_bits_ = storage_ix & 7u;
1112 s->last_flush_pos_ = s->input_pos_;
1113 if (UpdateLastProcessedPos(s)) {
1114 HasherReset(s->hasher_);
1115 }
1116 if (s->last_flush_pos_ > 0) {
1117 s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask];
1118 }
1119 if (s->last_flush_pos_ > 1) {
1120 s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask];
1121 }
1122 s->num_commands_ = 0;
1123 s->num_literals_ = 0;
1124 /* Save the state of the distance cache in case we need to restore it for
1125 emitting an uncompressed block. */
1126 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_));
1127 *output = &storage[0];
1128 *out_size = storage_ix >> 3;
1129 return BROTLI_TRUE;
1130 }
1131}
1132
1133/* Dumps remaining output bits and metadata header to |header|.
1134 Returns number of produced bytes.
1135 REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long.
1136 REQUIRED: |block_size| <= (1 << 24). */
1137static size_t WriteMetadataHeader(
1138 BrotliEncoderState* s, const size_t block_size, uint8_t* header) {
1139 size_t storage_ix;
1140 storage_ix = s->last_bytes_bits_;
1141 header[0] = (uint8_t)s->last_bytes_;
1142 header[1] = (uint8_t)(s->last_bytes_ >> 8);
1143 s->last_bytes_ = 0;
1144 s->last_bytes_bits_ = 0;
1145
1146 BrotliWriteBits(1, 0, &storage_ix, header);
1147 BrotliWriteBits(2, 3, &storage_ix, header);
1148 BrotliWriteBits(1, 0, &storage_ix, header);
1149 if (block_size == 0) {
1150 BrotliWriteBits(2, 0, &storage_ix, header);
1151 } else {
1152 uint32_t nbits = (block_size == 1) ? 0 :
1153 (Log2FloorNonZero((uint32_t)block_size - 1) + 1);
1154 uint32_t nbytes = (nbits + 7) / 8;
1155 BrotliWriteBits(2, nbytes, &storage_ix, header);
1156 BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header);
1157 }
1158 return (storage_ix + 7u) >> 3;
1159}
1160
1161static BROTLI_BOOL BrotliCompressBufferQuality10(
1162 int lgwin, size_t input_size, const uint8_t* input_buffer,
1163 size_t* encoded_size, uint8_t* encoded_buffer) {
1164 MemoryManager memory_manager;
1165 MemoryManager* m = &memory_manager;
1166
1167 const size_t mask = BROTLI_SIZE_MAX >> 1;
1168 int dist_cache[4] = { 4, 11, 15, 16 };
1169 int saved_dist_cache[4] = { 4, 11, 15, 16 };
1170 BROTLI_BOOL ok = BROTLI_TRUE;
1171 const size_t max_out_size = *encoded_size;
1172 size_t total_out_size = 0;
1173 uint16_t last_bytes;
1174 uint8_t last_bytes_bits;
1175 HasherHandle hasher = NULL;
1176
1177 const size_t hasher_eff_size = BROTLI_MIN(size_t,
1178 input_size, BROTLI_MAX_BACKWARD_LIMIT(lgwin) + BROTLI_WINDOW_GAP);
1179
1180 BrotliEncoderParams params;
1181
1182 const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1);
1183 size_t max_block_size;
1184 const size_t max_metablock_size = (size_t)1 << lgmetablock;
1185 const size_t max_literals_per_metablock = max_metablock_size / 8;
1186 const size_t max_commands_per_metablock = max_metablock_size / 8;
1187 size_t metablock_start = 0;
1188 uint8_t prev_byte = 0;
1189 uint8_t prev_byte2 = 0;
1190
1191 BrotliEncoderInitParams(&params);
1192 params.quality = 10;
1193 params.lgwin = lgwin;
1194 if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1195 params.large_window = BROTLI_TRUE;
1196 }
1197 SanitizeParams(&params);
1198 params.lgblock = ComputeLgBlock(&params);
1199 ChooseDistanceParams(&params);
1200 max_block_size = (size_t)1 << params.lgblock;
1201
1202 BrotliInitMemoryManager(m, 0, 0, 0);
1203
1204 BROTLI_DCHECK(input_size <= mask + 1);
1205 EncodeWindowBits(lgwin, params.large_window, &last_bytes, &last_bytes_bits);
1206 InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, &params,
1207 0, hasher_eff_size, BROTLI_TRUE);
1208 if (BROTLI_IS_OOM(m)) goto oom;
1209
1210 while (ok && metablock_start < input_size) {
1211 const size_t metablock_end =
1212 BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size);
1213 const size_t expected_num_commands =
1214 (metablock_end - metablock_start) / 12 + 16;
1215 Command* commands = 0;
1216 size_t num_commands = 0;
1217 size_t last_insert_len = 0;
1218 size_t num_literals = 0;
1219 size_t metablock_size = 0;
1220 size_t cmd_alloc_size = 0;
1221 BROTLI_BOOL is_last;
1222 uint8_t* storage;
1223 size_t storage_ix;
1224
1225 ContextType literal_context_mode = ChooseContextMode(&params,
1226 input_buffer, metablock_start, mask, metablock_end - metablock_start);
1227
1228 size_t block_start;
1229 for (block_start = metablock_start; block_start < metablock_end; ) {
1230 size_t block_size =
1231 BROTLI_MIN(size_t, metablock_end - block_start, max_block_size);
1232 ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1);
1233 size_t path_size;
1234 size_t new_cmd_alloc_size;
1235 if (BROTLI_IS_OOM(m)) goto oom;
1236 BrotliInitZopfliNodes(nodes, block_size + 1);
1237 StitchToPreviousBlockH10(hasher, block_size, block_start,
1238 input_buffer, mask);
1239 path_size = BrotliZopfliComputeShortestPath(m, block_size, block_start,
1240 input_buffer, mask, &params, dist_cache, hasher,
1241 nodes);
1242 if (BROTLI_IS_OOM(m)) goto oom;
1243 /* We allocate a command buffer in the first iteration of this loop that
1244 will be likely big enough for the whole metablock, so that for most
1245 inputs we will not have to reallocate in later iterations. We do the
1246 allocation here and not before the loop, because if the input is small,
1247 this will be allocated after the Zopfli cost model is freed, so this
1248 will not increase peak memory usage.
1249 TODO: If the first allocation is too small, increase command
1250 buffer size exponentially. */
1251 new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands,
1252 num_commands + path_size + 1);
1253 if (cmd_alloc_size != new_cmd_alloc_size) {
1254 Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size);
1255 if (BROTLI_IS_OOM(m)) goto oom;
1256 cmd_alloc_size = new_cmd_alloc_size;
1257 if (commands) {
1258 memcpy(new_commands, commands, sizeof(Command) * num_commands);
1259 BROTLI_FREE(m, commands);
1260 }
1261 commands = new_commands;
1262 }
1263 BrotliZopfliCreateCommands(block_size, block_start, &nodes[0], dist_cache,
1264 &last_insert_len, &params, &commands[num_commands], &num_literals);
1265 num_commands += path_size;
1266 block_start += block_size;
1267 metablock_size += block_size;
1268 BROTLI_FREE(m, nodes);
1269 if (num_literals > max_literals_per_metablock ||
1270 num_commands > max_commands_per_metablock) {
1271 break;
1272 }
1273 }
1274
1275 if (last_insert_len > 0) {
1276 InitInsertCommand(&commands[num_commands++], last_insert_len);
1277 num_literals += last_insert_len;
1278 }
1279
1280 is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size);
1281 storage = NULL;
1282 storage_ix = last_bytes_bits;
1283
1284 if (metablock_size == 0) {
1285 /* Write the ISLAST and ISEMPTY bits. */
1286 storage = BROTLI_ALLOC(m, uint8_t, 16);
1287 if (BROTLI_IS_OOM(m)) goto oom;
1288 storage[0] = (uint8_t)last_bytes;
1289 storage[1] = (uint8_t)(last_bytes >> 8);
1290 BrotliWriteBits(2, 3, &storage_ix, storage);
1291 storage_ix = (storage_ix + 7u) & ~7u;
1292 } else if (!ShouldCompress(input_buffer, mask, metablock_start,
1293 metablock_size, num_literals, num_commands)) {
1294 /* Restore the distance cache, as its last update by
1295 CreateBackwardReferences is now unused. */
1296 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1297 storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16);
1298 if (BROTLI_IS_OOM(m)) goto oom;
1299 storage[0] = (uint8_t)last_bytes;
1300 storage[1] = (uint8_t)(last_bytes >> 8);
1301 BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1302 metablock_start, mask, metablock_size,
1303 &storage_ix, storage);
1304 } else {
1305 MetaBlockSplit mb;
1306 BrotliEncoderParams block_params = params;
1307 InitMetaBlockSplit(&mb);
1308 BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask,
1309 &block_params,
1310 prev_byte, prev_byte2,
1311 commands, num_commands,
1312 literal_context_mode,
1313 &mb);
1314 if (BROTLI_IS_OOM(m)) goto oom;
1315 {
1316 /* The number of distance symbols effectively used for distance
1317 histograms. It might be less than distance alphabet size
1318 for "Large Window Brotli" (32-bit). */
1319 uint32_t num_effective_dist_codes = block_params.dist.alphabet_size;
1320 if (num_effective_dist_codes > BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS) {
1321 num_effective_dist_codes = BROTLI_NUM_HISTOGRAM_DISTANCE_SYMBOLS;
1322 }
1323 BrotliOptimizeHistograms(num_effective_dist_codes, &mb);
1324 }
1325 storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 503);
1326 if (BROTLI_IS_OOM(m)) goto oom;
1327 storage[0] = (uint8_t)last_bytes;
1328 storage[1] = (uint8_t)(last_bytes >> 8);
1329 BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size,
1330 mask, prev_byte, prev_byte2,
1331 is_last,
1332 &block_params,
1333 literal_context_mode,
1334 commands, num_commands,
1335 &mb,
1336 &storage_ix, storage);
1337 if (BROTLI_IS_OOM(m)) goto oom;
1338 if (metablock_size + 4 < (storage_ix >> 3)) {
1339 /* Restore the distance cache and last byte. */
1340 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0]));
1341 storage[0] = (uint8_t)last_bytes;
1342 storage[1] = (uint8_t)(last_bytes >> 8);
1343 storage_ix = last_bytes_bits;
1344 BrotliStoreUncompressedMetaBlock(is_last, input_buffer,
1345 metablock_start, mask,
1346 metablock_size, &storage_ix, storage);
1347 }
1348 DestroyMetaBlockSplit(m, &mb);
1349 }
1350 last_bytes = (uint16_t)(storage[storage_ix >> 3]);
1351 last_bytes_bits = storage_ix & 7u;
1352 metablock_start += metablock_size;
1353 if (metablock_start < input_size) {
1354 prev_byte = input_buffer[metablock_start - 1];
1355 prev_byte2 = input_buffer[metablock_start - 2];
1356 }
1357 /* Save the state of the distance cache in case we need to restore it for
1358 emitting an uncompressed block. */
1359 memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0]));
1360
1361 {
1362 const size_t out_size = storage_ix >> 3;
1363 total_out_size += out_size;
1364 if (total_out_size <= max_out_size) {
1365 memcpy(encoded_buffer, storage, out_size);
1366 encoded_buffer += out_size;
1367 } else {
1368 ok = BROTLI_FALSE;
1369 }
1370 }
1371 BROTLI_FREE(m, storage);
1372 BROTLI_FREE(m, commands);
1373 }
1374
1375 *encoded_size = total_out_size;
1376 DestroyHasher(m, &hasher);
1377 return ok;
1378
1379oom:
1380 BrotliWipeOutMemoryManager(m);
1381 return BROTLI_FALSE;
1382}
1383
1384size_t BrotliEncoderMaxCompressedSize(size_t input_size) {
1385 /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */
1386 size_t num_large_blocks = input_size >> 14;
1387 size_t overhead = 2 + (4 * num_large_blocks) + 3 + 1;
1388 size_t result = input_size + overhead;
1389 if (input_size == 0) return 2;
1390 return (result < input_size) ? 0 : result;
1391}
1392
1393/* Wraps data to uncompressed brotli stream with minimal window size.
1394 |output| should point at region with at least BrotliEncoderMaxCompressedSize
1395 addressable bytes.
1396 Returns the length of stream. */
1397static size_t MakeUncompressedStream(
1398 const uint8_t* input, size_t input_size, uint8_t* output) {
1399 size_t size = input_size;
1400 size_t result = 0;
1401 size_t offset = 0;
1402 if (input_size == 0) {
1403 output[0] = 6;
1404 return 1;
1405 }
1406 output[result++] = 0x21; /* window bits = 10, is_last = false */
1407 output[result++] = 0x03; /* empty metadata, padding */
1408 while (size > 0) {
1409 uint32_t nibbles = 0;
1410 uint32_t chunk_size;
1411 uint32_t bits;
1412 chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size;
1413 if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1;
1414 bits =
1415 (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles));
1416 output[result++] = (uint8_t)bits;
1417 output[result++] = (uint8_t)(bits >> 8);
1418 output[result++] = (uint8_t)(bits >> 16);
1419 if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24);
1420 memcpy(&output[result], &input[offset], chunk_size);
1421 result += chunk_size;
1422 offset += chunk_size;
1423 size -= chunk_size;
1424 }
1425 output[result++] = 3;
1426 return result;
1427}
1428
1429BROTLI_BOOL BrotliEncoderCompress(
1430 int quality, int lgwin, BrotliEncoderMode mode, size_t input_size,
1431 const uint8_t* input_buffer, size_t* encoded_size,
1432 uint8_t* encoded_buffer) {
1433 BrotliEncoderState* s;
1434 size_t out_size = *encoded_size;
1435 const uint8_t* input_start = input_buffer;
1436 uint8_t* output_start = encoded_buffer;
1437 size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size);
1438 if (out_size == 0) {
1439 /* Output buffer needs at least one byte. */
1440 return BROTLI_FALSE;
1441 }
1442 if (input_size == 0) {
1443 /* Handle the special case of empty input. */
1444 *encoded_size = 1;
1445 *encoded_buffer = 6;
1446 return BROTLI_TRUE;
1447 }
1448 if (quality == 10) {
1449 /* TODO: Implement this direct path for all quality levels. */
1450 const int lg_win = BROTLI_MIN(int, BROTLI_LARGE_MAX_WINDOW_BITS,
1451 BROTLI_MAX(int, 16, lgwin));
1452 int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer,
1453 encoded_size, encoded_buffer);
1454 if (!ok || (max_out_size && *encoded_size > max_out_size)) {
1455 goto fallback;
1456 }
1457 return BROTLI_TRUE;
1458 }
1459
1460 s = BrotliEncoderCreateInstance(0, 0, 0);
1461 if (!s) {
1462 return BROTLI_FALSE;
1463 } else {
1464 size_t available_in = input_size;
1465 const uint8_t* next_in = input_buffer;
1466 size_t available_out = *encoded_size;
1467 uint8_t* next_out = encoded_buffer;
1468 size_t total_out = 0;
1469 BROTLI_BOOL result = BROTLI_FALSE;
1470 BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality);
1471 BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin);
1472 BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode);
1473 BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size);
1474 if (lgwin > BROTLI_MAX_WINDOW_BITS) {
1475 BrotliEncoderSetParameter(s, BROTLI_PARAM_LARGE_WINDOW, BROTLI_TRUE);
1476 }
1477 result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH,
1478 &available_in, &next_in, &available_out, &next_out, &total_out);
1479 if (!BrotliEncoderIsFinished(s)) result = 0;
1480 *encoded_size = total_out;
1481 BrotliEncoderDestroyInstance(s);
1482 if (!result || (max_out_size && *encoded_size > max_out_size)) {
1483 goto fallback;
1484 }
1485 return BROTLI_TRUE;
1486 }
1487fallback:
1488 *encoded_size = 0;
1489 if (!max_out_size) return BROTLI_FALSE;
1490 if (out_size >= max_out_size) {
1491 *encoded_size =
1492 MakeUncompressedStream(input_start, input_size, output_start);
1493 return BROTLI_TRUE;
1494 }
1495 return BROTLI_FALSE;
1496}
1497
1498static void InjectBytePaddingBlock(BrotliEncoderState* s) {
1499 uint32_t seal = s->last_bytes_;
1500 size_t seal_bits = s->last_bytes_bits_;
1501 uint8_t* destination;
1502 s->last_bytes_ = 0;
1503 s->last_bytes_bits_ = 0;
1504 /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */
1505 seal |= 0x6u << seal_bits;
1506 seal_bits += 6;
1507 /* If we have already created storage, then append to it.
1508 Storage is valid until next block is being compressed. */
1509 if (s->next_out_) {
1510 destination = s->next_out_ + s->available_out_;
1511 } else {
1512 destination = s->tiny_buf_.u8;
1513 s->next_out_ = destination;
1514 }
1515 destination[0] = (uint8_t)seal;
1516 if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8);
1517 if (seal_bits > 16) destination[2] = (uint8_t)(seal >> 16);
1518 s->available_out_ += (seal_bits + 7) >> 3;
1519}
1520
1521/* Injects padding bits or pushes compressed data to output.
1522 Returns false if nothing is done. */
1523static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s,
1524 size_t* available_out, uint8_t** next_out, size_t* total_out) {
1525 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1526 s->last_bytes_bits_ != 0) {
1527 InjectBytePaddingBlock(s);
1528 return BROTLI_TRUE;
1529 }
1530
1531 if (s->available_out_ != 0 && *available_out != 0) {
1532 size_t copy_output_size =
1533 BROTLI_MIN(size_t, s->available_out_, *available_out);
1534 memcpy(*next_out, s->next_out_, copy_output_size);
1535 *next_out += copy_output_size;
1536 *available_out -= copy_output_size;
1537 s->next_out_ += copy_output_size;
1538 s->available_out_ -= copy_output_size;
1539 s->total_out_ += copy_output_size;
1540 if (total_out) *total_out = s->total_out_;
1541 return BROTLI_TRUE;
1542 }
1543
1544 return BROTLI_FALSE;
1545}
1546
1547static void CheckFlushComplete(BrotliEncoderState* s) {
1548 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED &&
1549 s->available_out_ == 0) {
1550 s->stream_state_ = BROTLI_STREAM_PROCESSING;
1551 s->next_out_ = 0;
1552 }
1553}
1554
1555static BROTLI_BOOL BrotliEncoderCompressStreamFast(
1556 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1557 const uint8_t** next_in, size_t* available_out, uint8_t** next_out,
1558 size_t* total_out) {
1559 const size_t block_size_limit = (size_t)1 << s->params.lgwin;
1560 const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize,
1561 BROTLI_MIN(size_t, *available_in, block_size_limit));
1562 uint32_t* tmp_command_buf = NULL;
1563 uint32_t* command_buf = NULL;
1564 uint8_t* tmp_literal_buf = NULL;
1565 uint8_t* literal_buf = NULL;
1566 MemoryManager* m = &s->memory_manager_;
1567 if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY &&
1568 s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) {
1569 return BROTLI_FALSE;
1570 }
1571 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1572 if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) {
1573 s->command_buf_ =
1574 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize);
1575 s->literal_buf_ =
1576 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize);
1577 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1578 }
1579 if (s->command_buf_) {
1580 command_buf = s->command_buf_;
1581 literal_buf = s->literal_buf_;
1582 } else {
1583 tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size);
1584 tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size);
1585 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1586 command_buf = tmp_command_buf;
1587 literal_buf = tmp_literal_buf;
1588 }
1589 }
1590
1591 while (BROTLI_TRUE) {
1592 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1593 continue;
1594 }
1595
1596 /* Compress block only when internal output buffer is empty, stream is not
1597 finished, there is no pending flush request, and there is either
1598 additional input or pending operation. */
1599 if (s->available_out_ == 0 &&
1600 s->stream_state_ == BROTLI_STREAM_PROCESSING &&
1601 (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) {
1602 size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in);
1603 BROTLI_BOOL is_last =
1604 (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH);
1605 BROTLI_BOOL force_flush =
1606 (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH);
1607 size_t max_out_size = 2 * block_size + 503;
1608 BROTLI_BOOL inplace = BROTLI_TRUE;
1609 uint8_t* storage = NULL;
1610 size_t storage_ix = s->last_bytes_bits_;
1611 size_t table_size;
1612 int* table;
1613
1614 if (force_flush && block_size == 0) {
1615 s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1616 continue;
1617 }
1618 if (max_out_size <= *available_out) {
1619 storage = *next_out;
1620 } else {
1621 inplace = BROTLI_FALSE;
1622 storage = GetBrotliStorage(s, max_out_size);
1623 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1624 }
1625 storage[0] = (uint8_t)s->last_bytes_;
1626 storage[1] = (uint8_t)(s->last_bytes_ >> 8);
1627 table = GetHashTable(s, s->params.quality, block_size, &table_size);
1628 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1629
1630 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) {
1631 BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table,
1632 table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_,
1633 s->cmd_code_, &storage_ix, storage);
1634 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1635 } else {
1636 BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last,
1637 command_buf, literal_buf, table, table_size,
1638 &storage_ix, storage);
1639 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE;
1640 }
1641 *next_in += block_size;
1642 *available_in -= block_size;
1643 if (inplace) {
1644 size_t out_bytes = storage_ix >> 3;
1645 BROTLI_DCHECK(out_bytes <= *available_out);
1646 BROTLI_DCHECK((storage_ix & 7) == 0 || out_bytes < *available_out);
1647 *next_out += out_bytes;
1648 *available_out -= out_bytes;
1649 s->total_out_ += out_bytes;
1650 if (total_out) *total_out = s->total_out_;
1651 } else {
1652 size_t out_bytes = storage_ix >> 3;
1653 s->next_out_ = storage;
1654 s->available_out_ = out_bytes;
1655 }
1656 s->last_bytes_ = (uint16_t)(storage[storage_ix >> 3]);
1657 s->last_bytes_bits_ = storage_ix & 7u;
1658
1659 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1660 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1661 continue;
1662 }
1663 break;
1664 }
1665 BROTLI_FREE(m, tmp_command_buf);
1666 BROTLI_FREE(m, tmp_literal_buf);
1667 CheckFlushComplete(s);
1668 return BROTLI_TRUE;
1669}
1670
1671static BROTLI_BOOL ProcessMetadata(
1672 BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in,
1673 size_t* available_out, uint8_t** next_out, size_t* total_out) {
1674 if (*available_in > (1u << 24)) return BROTLI_FALSE;
1675 /* Switch to metadata block workflow, if required. */
1676 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1677 s->remaining_metadata_bytes_ = (uint32_t)*available_in;
1678 s->stream_state_ = BROTLI_STREAM_METADATA_HEAD;
1679 }
1680 if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD &&
1681 s->stream_state_ != BROTLI_STREAM_METADATA_BODY) {
1682 return BROTLI_FALSE;
1683 }
1684
1685 while (BROTLI_TRUE) {
1686 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1687 continue;
1688 }
1689 if (s->available_out_ != 0) break;
1690
1691 if (s->input_pos_ != s->last_flush_pos_) {
1692 BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE,
1693 &s->available_out_, &s->next_out_);
1694 if (!result) return BROTLI_FALSE;
1695 continue;
1696 }
1697
1698 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) {
1699 s->next_out_ = s->tiny_buf_.u8;
1700 s->available_out_ =
1701 WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_);
1702 s->stream_state_ = BROTLI_STREAM_METADATA_BODY;
1703 continue;
1704 } else {
1705 /* Exit workflow only when there is no more input and no more output.
1706 Otherwise client may continue producing empty metadata blocks. */
1707 if (s->remaining_metadata_bytes_ == 0) {
1708 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX;
1709 s->stream_state_ = BROTLI_STREAM_PROCESSING;
1710 break;
1711 }
1712 if (*available_out) {
1713 /* Directly copy input to output. */
1714 uint32_t copy = (uint32_t)BROTLI_MIN(
1715 size_t, s->remaining_metadata_bytes_, *available_out);
1716 memcpy(*next_out, *next_in, copy);
1717 *next_in += copy;
1718 *available_in -= copy;
1719 s->remaining_metadata_bytes_ -= copy;
1720 *next_out += copy;
1721 *available_out -= copy;
1722 } else {
1723 /* This guarantees progress in "TakeOutput" workflow. */
1724 uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16);
1725 s->next_out_ = s->tiny_buf_.u8;
1726 memcpy(s->next_out_, *next_in, copy);
1727 *next_in += copy;
1728 *available_in -= copy;
1729 s->remaining_metadata_bytes_ -= copy;
1730 s->available_out_ = copy;
1731 }
1732 continue;
1733 }
1734 }
1735
1736 return BROTLI_TRUE;
1737}
1738
1739static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) {
1740 if (s->params.size_hint == 0) {
1741 uint64_t delta = UnprocessedInputSize(s);
1742 uint64_t tail = available_in;
1743 uint32_t limit = 1u << 30;
1744 uint32_t total;
1745 if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) {
1746 total = limit;
1747 } else {
1748 total = (uint32_t)(delta + tail);
1749 }
1750 s->params.size_hint = total;
1751 }
1752}
1753
1754BROTLI_BOOL BrotliEncoderCompressStream(
1755 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in,
1756 const uint8_t** next_in, size_t* available_out,uint8_t** next_out,
1757 size_t* total_out) {
1758 if (!EnsureInitialized(s)) return BROTLI_FALSE;
1759
1760 /* Unfinished metadata block; check requirements. */
1761 if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) {
1762 if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE;
1763 if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE;
1764 }
1765
1766 if (op == BROTLI_OPERATION_EMIT_METADATA) {
1767 UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */
1768 return ProcessMetadata(
1769 s, available_in, next_in, available_out, next_out, total_out);
1770 }
1771
1772 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD ||
1773 s->stream_state_ == BROTLI_STREAM_METADATA_BODY) {
1774 return BROTLI_FALSE;
1775 }
1776
1777 if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) {
1778 return BROTLI_FALSE;
1779 }
1780 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY ||
1781 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) {
1782 return BrotliEncoderCompressStreamFast(s, op, available_in, next_in,
1783 available_out, next_out, total_out);
1784 }
1785 while (BROTLI_TRUE) {
1786 size_t remaining_block_size = RemainingInputBlockSize(s);
1787
1788 if (remaining_block_size != 0 && *available_in != 0) {
1789 size_t copy_input_size =
1790 BROTLI_MIN(size_t, remaining_block_size, *available_in);
1791 CopyInputToRingBuffer(s, copy_input_size, *next_in);
1792 *next_in += copy_input_size;
1793 *available_in -= copy_input_size;
1794 continue;
1795 }
1796
1797 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) {
1798 continue;
1799 }
1800
1801 /* Compress data only when internal output buffer is empty, stream is not
1802 finished and there is no pending flush request. */
1803 if (s->available_out_ == 0 &&
1804 s->stream_state_ == BROTLI_STREAM_PROCESSING) {
1805 if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) {
1806 BROTLI_BOOL is_last = TO_BROTLI_BOOL(
1807 (*available_in == 0) && op == BROTLI_OPERATION_FINISH);
1808 BROTLI_BOOL force_flush = TO_BROTLI_BOOL(
1809 (*available_in == 0) && op == BROTLI_OPERATION_FLUSH);
1810 BROTLI_BOOL result;
1811 UpdateSizeHint(s, *available_in);
1812 result = EncodeData(s, is_last, force_flush,
1813 &s->available_out_, &s->next_out_);
1814 if (!result) return BROTLI_FALSE;
1815 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED;
1816 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED;
1817 continue;
1818 }
1819 }
1820 break;
1821 }
1822 CheckFlushComplete(s);
1823 return BROTLI_TRUE;
1824}
1825
1826BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) {
1827 return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED &&
1828 !BrotliEncoderHasMoreOutput(s));
1829}
1830
1831BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) {
1832 return TO_BROTLI_BOOL(s->available_out_ != 0);
1833}
1834
1835const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) {
1836 size_t consumed_size = s->available_out_;
1837 uint8_t* result = s->next_out_;
1838 if (*size) {
1839 consumed_size = BROTLI_MIN(size_t, *size, s->available_out_);
1840 }
1841 if (consumed_size) {
1842 s->next_out_ += consumed_size;
1843 s->available_out_ -= consumed_size;
1844 s->total_out_ += consumed_size;
1845 CheckFlushComplete(s);
1846 *size = consumed_size;
1847 } else {
1848 *size = 0;
1849 result = 0;
1850 }
1851 return result;
1852}
1853
1854uint32_t BrotliEncoderVersion(void) {
1855 return BROTLI_VERSION;
1856}
1857
1858#if defined(__cplusplus) || defined(c_plusplus)
1859} /* extern "C" */
1860#endif
1861