| 1 | /* Copyright 2013 Google Inc. All Rights Reserved. |
| 2 | |
| 3 | Distributed under MIT license. |
| 4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| 5 | */ |
| 6 | |
| 7 | /* Block split point selection utilities. */ |
| 8 | |
| 9 | #include "./block_splitter.h" |
| 10 | |
| 11 | #include <string.h> /* memcpy, memset */ |
| 12 | |
| 13 | #include "../common/platform.h" |
| 14 | #include "./bit_cost.h" |
| 15 | #include "./cluster.h" |
| 16 | #include "./command.h" |
| 17 | #include "./fast_log.h" |
| 18 | #include "./histogram.h" |
| 19 | #include "./memory.h" |
| 20 | #include "./quality.h" |
| 21 | |
| 22 | #if defined(__cplusplus) || defined(c_plusplus) |
| 23 | extern "C" { |
| 24 | #endif |
| 25 | |
| 26 | static const size_t kMaxLiteralHistograms = 100; |
| 27 | static const size_t kMaxCommandHistograms = 50; |
| 28 | static const double kLiteralBlockSwitchCost = 28.1; |
| 29 | static const double kCommandBlockSwitchCost = 13.5; |
| 30 | static const double kDistanceBlockSwitchCost = 14.6; |
| 31 | static const size_t kLiteralStrideLength = 70; |
| 32 | static const size_t kCommandStrideLength = 40; |
| 33 | static const size_t kSymbolsPerLiteralHistogram = 544; |
| 34 | static const size_t kSymbolsPerCommandHistogram = 530; |
| 35 | static const size_t kSymbolsPerDistanceHistogram = 544; |
| 36 | static const size_t kMinLengthForBlockSplitting = 128; |
| 37 | static const size_t kIterMulForRefining = 2; |
| 38 | static const size_t kMinItersForRefining = 100; |
| 39 | |
| 40 | static size_t CountLiterals(const Command* cmds, const size_t num_commands) { |
| 41 | /* Count how many we have. */ |
| 42 | size_t total_length = 0; |
| 43 | size_t i; |
| 44 | for (i = 0; i < num_commands; ++i) { |
| 45 | total_length += cmds[i].insert_len_; |
| 46 | } |
| 47 | return total_length; |
| 48 | } |
| 49 | |
| 50 | static void CopyLiteralsToByteArray(const Command* cmds, |
| 51 | const size_t num_commands, |
| 52 | const uint8_t* data, |
| 53 | const size_t offset, |
| 54 | const size_t mask, |
| 55 | uint8_t* literals) { |
| 56 | size_t pos = 0; |
| 57 | size_t from_pos = offset & mask; |
| 58 | size_t i; |
| 59 | for (i = 0; i < num_commands; ++i) { |
| 60 | size_t insert_len = cmds[i].insert_len_; |
| 61 | if (from_pos + insert_len > mask) { |
| 62 | size_t head_size = mask + 1 - from_pos; |
| 63 | memcpy(literals + pos, data + from_pos, head_size); |
| 64 | from_pos = 0; |
| 65 | pos += head_size; |
| 66 | insert_len -= head_size; |
| 67 | } |
| 68 | if (insert_len > 0) { |
| 69 | memcpy(literals + pos, data + from_pos, insert_len); |
| 70 | pos += insert_len; |
| 71 | } |
| 72 | from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask; |
| 73 | } |
| 74 | } |
| 75 | |
| 76 | static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) { |
| 77 | /* Initial seed should be 7. In this case, loop length is (1 << 29). */ |
| 78 | *seed *= 16807U; |
| 79 | return *seed; |
| 80 | } |
| 81 | |
| 82 | static BROTLI_INLINE double BitCost(size_t count) { |
| 83 | return count == 0 ? -2.0 : FastLog2(count); |
| 84 | } |
| 85 | |
| 86 | #define HISTOGRAMS_PER_BATCH 64 |
| 87 | #define CLUSTERS_PER_BATCH 16 |
| 88 | |
| 89 | #define FN(X) X ## Literal |
| 90 | #define DataType uint8_t |
| 91 | /* NOLINTNEXTLINE(build/include) */ |
| 92 | #include "./block_splitter_inc.h" |
| 93 | #undef DataType |
| 94 | #undef FN |
| 95 | |
| 96 | #define FN(X) X ## Command |
| 97 | #define DataType uint16_t |
| 98 | /* NOLINTNEXTLINE(build/include) */ |
| 99 | #include "./block_splitter_inc.h" |
| 100 | #undef FN |
| 101 | |
| 102 | #define FN(X) X ## Distance |
| 103 | /* NOLINTNEXTLINE(build/include) */ |
| 104 | #include "./block_splitter_inc.h" |
| 105 | #undef DataType |
| 106 | #undef FN |
| 107 | |
| 108 | void BrotliInitBlockSplit(BlockSplit* self) { |
| 109 | self->num_types = 0; |
| 110 | self->num_blocks = 0; |
| 111 | self->types = 0; |
| 112 | self->lengths = 0; |
| 113 | self->types_alloc_size = 0; |
| 114 | self->lengths_alloc_size = 0; |
| 115 | } |
| 116 | |
| 117 | void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) { |
| 118 | BROTLI_FREE(m, self->types); |
| 119 | BROTLI_FREE(m, self->lengths); |
| 120 | } |
| 121 | |
| 122 | void BrotliSplitBlock(MemoryManager* m, |
| 123 | const Command* cmds, |
| 124 | const size_t num_commands, |
| 125 | const uint8_t* data, |
| 126 | const size_t pos, |
| 127 | const size_t mask, |
| 128 | const BrotliEncoderParams* params, |
| 129 | BlockSplit* literal_split, |
| 130 | BlockSplit* insert_and_copy_split, |
| 131 | BlockSplit* dist_split) { |
| 132 | { |
| 133 | size_t literals_count = CountLiterals(cmds, num_commands); |
| 134 | uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count); |
| 135 | if (BROTLI_IS_OOM(m)) return; |
| 136 | /* Create a continuous array of literals. */ |
| 137 | CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals); |
| 138 | /* Create the block split on the array of literals. |
| 139 | Literal histograms have alphabet size 256. */ |
| 140 | SplitByteVectorLiteral( |
| 141 | m, literals, literals_count, |
| 142 | kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, |
| 143 | kLiteralStrideLength, kLiteralBlockSwitchCost, params, |
| 144 | literal_split); |
| 145 | if (BROTLI_IS_OOM(m)) return; |
| 146 | BROTLI_FREE(m, literals); |
| 147 | } |
| 148 | |
| 149 | { |
| 150 | /* Compute prefix codes for commands. */ |
| 151 | uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands); |
| 152 | size_t i; |
| 153 | if (BROTLI_IS_OOM(m)) return; |
| 154 | for (i = 0; i < num_commands; ++i) { |
| 155 | insert_and_copy_codes[i] = cmds[i].cmd_prefix_; |
| 156 | } |
| 157 | /* Create the block split on the array of command prefixes. */ |
| 158 | SplitByteVectorCommand( |
| 159 | m, insert_and_copy_codes, num_commands, |
| 160 | kSymbolsPerCommandHistogram, kMaxCommandHistograms, |
| 161 | kCommandStrideLength, kCommandBlockSwitchCost, params, |
| 162 | insert_and_copy_split); |
| 163 | if (BROTLI_IS_OOM(m)) return; |
| 164 | /* TODO: reuse for distances? */ |
| 165 | BROTLI_FREE(m, insert_and_copy_codes); |
| 166 | } |
| 167 | |
| 168 | { |
| 169 | /* Create a continuous array of distance prefixes. */ |
| 170 | uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands); |
| 171 | size_t j = 0; |
| 172 | size_t i; |
| 173 | if (BROTLI_IS_OOM(m)) return; |
| 174 | for (i = 0; i < num_commands; ++i) { |
| 175 | const Command* cmd = &cmds[i]; |
| 176 | if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { |
| 177 | distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF; |
| 178 | } |
| 179 | } |
| 180 | /* Create the block split on the array of distance prefixes. */ |
| 181 | SplitByteVectorDistance( |
| 182 | m, distance_prefixes, j, |
| 183 | kSymbolsPerDistanceHistogram, kMaxCommandHistograms, |
| 184 | kCommandStrideLength, kDistanceBlockSwitchCost, params, |
| 185 | dist_split); |
| 186 | if (BROTLI_IS_OOM(m)) return; |
| 187 | BROTLI_FREE(m, distance_prefixes); |
| 188 | } |
| 189 | } |
| 190 | |
| 191 | |
| 192 | #if defined(__cplusplus) || defined(c_plusplus) |
| 193 | } /* extern "C" */ |
| 194 | #endif |
| 195 | |