| 1 | /* NOLINT(build/header_guard) */ |
| 2 | /* Copyright 2015 Google Inc. All Rights Reserved. |
| 3 | |
| 4 | Distributed under MIT license. |
| 5 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 6 | */ |
| 7 | |
| 8 | /* template parameters: FN */ |
| 9 | |
| 10 | #define HistogramType FN(Histogram) |
| 11 | |
| 12 | /* Greedy block splitter for one block category (literal, command or distance). |
| 13 | */ |
| 14 | typedef struct FN(BlockSplitter) { |
| 15 | /* Alphabet size of particular block category. */ |
| 16 | size_t alphabet_size_; |
| 17 | /* We collect at least this many symbols for each block. */ |
| 18 | size_t min_block_size_; |
| 19 | /* We merge histograms A and B if |
| 20 | entropy(A+B) < entropy(A) + entropy(B) + split_threshold_, |
| 21 | where A is the current histogram and B is the histogram of the last or the |
| 22 | second last block type. */ |
| 23 | double split_threshold_; |
| 24 | |
| 25 | size_t num_blocks_; |
| 26 | BlockSplit* split_; /* not owned */ |
| 27 | HistogramType* histograms_; /* not owned */ |
| 28 | size_t* histograms_size_; /* not owned */ |
| 29 | |
| 30 | /* The number of symbols that we want to collect before deciding on whether |
| 31 | or not to merge the block with a previous one or emit a new block. */ |
| 32 | size_t target_block_size_; |
| 33 | /* The number of symbols in the current histogram. */ |
| 34 | size_t block_size_; |
| 35 | /* Offset of the current histogram. */ |
| 36 | size_t curr_histogram_ix_; |
| 37 | /* Offset of the histograms of the previous two block types. */ |
| 38 | size_t last_histogram_ix_[2]; |
| 39 | /* Entropy of the previous two block types. */ |
| 40 | double last_entropy_[2]; |
| 41 | /* The number of times we merged the current block with the last one. */ |
| 42 | size_t merge_last_count_; |
| 43 | } FN(BlockSplitter); |
| 44 | |
| 45 | static void FN(InitBlockSplitter)( |
| 46 | MemoryManager* m, FN(BlockSplitter)* self, size_t alphabet_size, |
| 47 | size_t min_block_size, double split_threshold, size_t num_symbols, |
| 48 | BlockSplit* split, HistogramType** histograms, size_t* histograms_size) { |
| 49 | size_t max_num_blocks = num_symbols / min_block_size + 1; |
| 50 | /* We have to allocate one more histogram than the maximum number of block |
| 51 | types for the current histogram when the meta-block is too big. */ |
| 52 | size_t max_num_types = |
| 53 | BROTLI_MIN(size_t, max_num_blocks, BROTLI_MAX_NUMBER_OF_BLOCK_TYPES + 1); |
| 54 | self->alphabet_size_ = alphabet_size; |
| 55 | self->min_block_size_ = min_block_size; |
| 56 | self->split_threshold_ = split_threshold; |
| 57 | self->num_blocks_ = 0; |
| 58 | self->split_ = split; |
| 59 | self->histograms_size_ = histograms_size; |
| 60 | self->target_block_size_ = min_block_size; |
| 61 | self->block_size_ = 0; |
| 62 | self->curr_histogram_ix_ = 0; |
| 63 | self->merge_last_count_ = 0; |
| 64 | BROTLI_ENSURE_CAPACITY(m, uint8_t, |
| 65 | split->types, split->types_alloc_size, max_num_blocks); |
| 66 | BROTLI_ENSURE_CAPACITY(m, uint32_t, |
| 67 | split->lengths, split->lengths_alloc_size, max_num_blocks); |
| 68 | if (BROTLI_IS_OOM(m)) return; |
| 69 | self->split_->num_blocks = max_num_blocks; |
| 70 | BROTLI_DCHECK(*histograms == 0); |
| 71 | *histograms_size = max_num_types; |
| 72 | *histograms = BROTLI_ALLOC(m, HistogramType, *histograms_size); |
| 73 | self->histograms_ = *histograms; |
| 74 | if (BROTLI_IS_OOM(m)) return; |
| 75 | /* Clear only current histogram. */ |
| 76 | FN(HistogramClear)(&self->histograms_[0]); |
| 77 | self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0; |
| 78 | } |
| 79 | |
| 80 | /* Does either of three things: |
| 81 | (1) emits the current block with a new block type; |
| 82 | (2) emits the current block with the type of the second last block; |
| 83 | (3) merges the current block with the last block. */ |
| 84 | static void FN(BlockSplitterFinishBlock)( |
| 85 | FN(BlockSplitter)* self, BROTLI_BOOL is_final) { |
| 86 | BlockSplit* split = self->split_; |
| 87 | double* last_entropy = self->last_entropy_; |
| 88 | HistogramType* histograms = self->histograms_; |
| 89 | self->block_size_ = |
| 90 | BROTLI_MAX(size_t, self->block_size_, self->min_block_size_); |
| 91 | if (self->num_blocks_ == 0) { |
| 92 | /* Create first block. */ |
| 93 | split->lengths[0] = (uint32_t)self->block_size_; |
| 94 | split->types[0] = 0; |
| 95 | last_entropy[0] = |
| 96 | BitsEntropy(histograms[0].data_, self->alphabet_size_); |
| 97 | last_entropy[1] = last_entropy[0]; |
| 98 | ++self->num_blocks_; |
| 99 | ++split->num_types; |
| 100 | ++self->curr_histogram_ix_; |
| 101 | if (self->curr_histogram_ix_ < *self->histograms_size_) |
| 102 | FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); |
| 103 | self->block_size_ = 0; |
| 104 | } else if (self->block_size_ > 0) { |
| 105 | double entropy = BitsEntropy(histograms[self->curr_histogram_ix_].data_, |
| 106 | self->alphabet_size_); |
| 107 | HistogramType combined_histo[2]; |
| 108 | double combined_entropy[2]; |
| 109 | double diff[2]; |
| 110 | size_t j; |
| 111 | for (j = 0; j < 2; ++j) { |
| 112 | size_t last_histogram_ix = self->last_histogram_ix_[j]; |
| 113 | combined_histo[j] = histograms[self->curr_histogram_ix_]; |
| 114 | FN(HistogramAddHistogram)(&combined_histo[j], |
| 115 | &histograms[last_histogram_ix]); |
| 116 | combined_entropy[j] = BitsEntropy( |
| 117 | &combined_histo[j].data_[0], self->alphabet_size_); |
| 118 | diff[j] = combined_entropy[j] - entropy - last_entropy[j]; |
| 119 | } |
| 120 | |
| 121 | if (split->num_types < BROTLI_MAX_NUMBER_OF_BLOCK_TYPES && |
| 122 | diff[0] > self->split_threshold_ && |
| 123 | diff[1] > self->split_threshold_) { |
| 124 | /* Create new block. */ |
| 125 | split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; |
| 126 | split->types[self->num_blocks_] = (uint8_t)split->num_types; |
| 127 | self->last_histogram_ix_[1] = self->last_histogram_ix_[0]; |
| 128 | self->last_histogram_ix_[0] = (uint8_t)split->num_types; |
| 129 | last_entropy[1] = last_entropy[0]; |
| 130 | last_entropy[0] = entropy; |
| 131 | ++self->num_blocks_; |
| 132 | ++split->num_types; |
| 133 | ++self->curr_histogram_ix_; |
| 134 | if (self->curr_histogram_ix_ < *self->histograms_size_) |
| 135 | FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); |
| 136 | self->block_size_ = 0; |
| 137 | self->merge_last_count_ = 0; |
| 138 | self->target_block_size_ = self->min_block_size_; |
| 139 | } else if (diff[1] < diff[0] - 20.0) { |
| 140 | /* Combine this block with second last block. */ |
| 141 | split->lengths[self->num_blocks_] = (uint32_t)self->block_size_; |
| 142 | split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2]; |
| 143 | BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1); |
| 144 | histograms[self->last_histogram_ix_[0]] = combined_histo[1]; |
| 145 | last_entropy[1] = last_entropy[0]; |
| 146 | last_entropy[0] = combined_entropy[1]; |
| 147 | ++self->num_blocks_; |
| 148 | self->block_size_ = 0; |
| 149 | FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); |
| 150 | self->merge_last_count_ = 0; |
| 151 | self->target_block_size_ = self->min_block_size_; |
| 152 | } else { |
| 153 | /* Combine this block with last block. */ |
| 154 | split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_; |
| 155 | histograms[self->last_histogram_ix_[0]] = combined_histo[0]; |
| 156 | last_entropy[0] = combined_entropy[0]; |
| 157 | if (split->num_types == 1) { |
| 158 | last_entropy[1] = last_entropy[0]; |
| 159 | } |
| 160 | self->block_size_ = 0; |
| 161 | FN(HistogramClear)(&histograms[self->curr_histogram_ix_]); |
| 162 | if (++self->merge_last_count_ > 1) { |
| 163 | self->target_block_size_ += self->min_block_size_; |
| 164 | } |
| 165 | } |
| 166 | } |
| 167 | if (is_final) { |
| 168 | *self->histograms_size_ = split->num_types; |
| 169 | split->num_blocks = self->num_blocks_; |
| 170 | } |
| 171 | } |
| 172 | |
| 173 | /* Adds the next symbol to the current histogram. When the current histogram |
| 174 | reaches the target size, decides on merging the block. */ |
| 175 | static void FN(BlockSplitterAddSymbol)(FN(BlockSplitter)* self, size_t symbol) { |
| 176 | FN(HistogramAdd)(&self->histograms_[self->curr_histogram_ix_], symbol); |
| 177 | ++self->block_size_; |
| 178 | if (self->block_size_ == self->target_block_size_) { |
| 179 | FN(BlockSplitterFinishBlock)(self, /* is_final = */ BROTLI_FALSE); |
| 180 | } |
| 181 | } |
| 182 | |
| 183 | #undef HistogramType |
| 184 | |