1 | /* Copyright 2013 Google Inc. All Rights Reserved. |
2 | |
3 | Distributed under MIT license. |
4 | See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
5 | */ |
6 | |
7 | /* Block split point selection utilities. */ |
8 | |
9 | #include "./block_splitter.h" |
10 | |
11 | #include <string.h> /* memcpy, memset */ |
12 | |
13 | #include "../common/platform.h" |
14 | #include "./bit_cost.h" |
15 | #include "./cluster.h" |
16 | #include "./command.h" |
17 | #include "./fast_log.h" |
18 | #include "./histogram.h" |
19 | #include "./memory.h" |
20 | #include "./quality.h" |
21 | |
22 | #if defined(__cplusplus) || defined(c_plusplus) |
23 | extern "C" { |
24 | #endif |
25 | |
26 | static const size_t kMaxLiteralHistograms = 100; |
27 | static const size_t kMaxCommandHistograms = 50; |
28 | static const double kLiteralBlockSwitchCost = 28.1; |
29 | static const double kCommandBlockSwitchCost = 13.5; |
30 | static const double kDistanceBlockSwitchCost = 14.6; |
31 | static const size_t kLiteralStrideLength = 70; |
32 | static const size_t kCommandStrideLength = 40; |
33 | static const size_t kSymbolsPerLiteralHistogram = 544; |
34 | static const size_t kSymbolsPerCommandHistogram = 530; |
35 | static const size_t kSymbolsPerDistanceHistogram = 544; |
36 | static const size_t kMinLengthForBlockSplitting = 128; |
37 | static const size_t kIterMulForRefining = 2; |
38 | static const size_t kMinItersForRefining = 100; |
39 | |
40 | static size_t CountLiterals(const Command* cmds, const size_t num_commands) { |
41 | /* Count how many we have. */ |
42 | size_t total_length = 0; |
43 | size_t i; |
44 | for (i = 0; i < num_commands; ++i) { |
45 | total_length += cmds[i].insert_len_; |
46 | } |
47 | return total_length; |
48 | } |
49 | |
50 | static void CopyLiteralsToByteArray(const Command* cmds, |
51 | const size_t num_commands, |
52 | const uint8_t* data, |
53 | const size_t offset, |
54 | const size_t mask, |
55 | uint8_t* literals) { |
56 | size_t pos = 0; |
57 | size_t from_pos = offset & mask; |
58 | size_t i; |
59 | for (i = 0; i < num_commands; ++i) { |
60 | size_t insert_len = cmds[i].insert_len_; |
61 | if (from_pos + insert_len > mask) { |
62 | size_t head_size = mask + 1 - from_pos; |
63 | memcpy(literals + pos, data + from_pos, head_size); |
64 | from_pos = 0; |
65 | pos += head_size; |
66 | insert_len -= head_size; |
67 | } |
68 | if (insert_len > 0) { |
69 | memcpy(literals + pos, data + from_pos, insert_len); |
70 | pos += insert_len; |
71 | } |
72 | from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask; |
73 | } |
74 | } |
75 | |
76 | static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) { |
77 | /* Initial seed should be 7. In this case, loop length is (1 << 29). */ |
78 | *seed *= 16807U; |
79 | return *seed; |
80 | } |
81 | |
82 | static BROTLI_INLINE double BitCost(size_t count) { |
83 | return count == 0 ? -2.0 : FastLog2(count); |
84 | } |
85 | |
86 | #define HISTOGRAMS_PER_BATCH 64 |
87 | #define CLUSTERS_PER_BATCH 16 |
88 | |
89 | #define FN(X) X ## Literal |
90 | #define DataType uint8_t |
91 | /* NOLINTNEXTLINE(build/include) */ |
92 | #include "./block_splitter_inc.h" |
93 | #undef DataType |
94 | #undef FN |
95 | |
96 | #define FN(X) X ## Command |
97 | #define DataType uint16_t |
98 | /* NOLINTNEXTLINE(build/include) */ |
99 | #include "./block_splitter_inc.h" |
100 | #undef FN |
101 | |
102 | #define FN(X) X ## Distance |
103 | /* NOLINTNEXTLINE(build/include) */ |
104 | #include "./block_splitter_inc.h" |
105 | #undef DataType |
106 | #undef FN |
107 | |
108 | void BrotliInitBlockSplit(BlockSplit* self) { |
109 | self->num_types = 0; |
110 | self->num_blocks = 0; |
111 | self->types = 0; |
112 | self->lengths = 0; |
113 | self->types_alloc_size = 0; |
114 | self->lengths_alloc_size = 0; |
115 | } |
116 | |
117 | void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) { |
118 | BROTLI_FREE(m, self->types); |
119 | BROTLI_FREE(m, self->lengths); |
120 | } |
121 | |
122 | void BrotliSplitBlock(MemoryManager* m, |
123 | const Command* cmds, |
124 | const size_t num_commands, |
125 | const uint8_t* data, |
126 | const size_t pos, |
127 | const size_t mask, |
128 | const BrotliEncoderParams* params, |
129 | BlockSplit* literal_split, |
130 | BlockSplit* insert_and_copy_split, |
131 | BlockSplit* dist_split) { |
132 | { |
133 | size_t literals_count = CountLiterals(cmds, num_commands); |
134 | uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count); |
135 | if (BROTLI_IS_OOM(m)) return; |
136 | /* Create a continuous array of literals. */ |
137 | CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals); |
138 | /* Create the block split on the array of literals. |
139 | Literal histograms have alphabet size 256. */ |
140 | SplitByteVectorLiteral( |
141 | m, literals, literals_count, |
142 | kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, |
143 | kLiteralStrideLength, kLiteralBlockSwitchCost, params, |
144 | literal_split); |
145 | if (BROTLI_IS_OOM(m)) return; |
146 | BROTLI_FREE(m, literals); |
147 | } |
148 | |
149 | { |
150 | /* Compute prefix codes for commands. */ |
151 | uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands); |
152 | size_t i; |
153 | if (BROTLI_IS_OOM(m)) return; |
154 | for (i = 0; i < num_commands; ++i) { |
155 | insert_and_copy_codes[i] = cmds[i].cmd_prefix_; |
156 | } |
157 | /* Create the block split on the array of command prefixes. */ |
158 | SplitByteVectorCommand( |
159 | m, insert_and_copy_codes, num_commands, |
160 | kSymbolsPerCommandHistogram, kMaxCommandHistograms, |
161 | kCommandStrideLength, kCommandBlockSwitchCost, params, |
162 | insert_and_copy_split); |
163 | if (BROTLI_IS_OOM(m)) return; |
164 | /* TODO: reuse for distances? */ |
165 | BROTLI_FREE(m, insert_and_copy_codes); |
166 | } |
167 | |
168 | { |
169 | /* Create a continuous array of distance prefixes. */ |
170 | uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands); |
171 | size_t j = 0; |
172 | size_t i; |
173 | if (BROTLI_IS_OOM(m)) return; |
174 | for (i = 0; i < num_commands; ++i) { |
175 | const Command* cmd = &cmds[i]; |
176 | if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { |
177 | distance_prefixes[j++] = cmd->dist_prefix_ & 0x3FF; |
178 | } |
179 | } |
180 | /* Create the block split on the array of distance prefixes. */ |
181 | SplitByteVectorDistance( |
182 | m, distance_prefixes, j, |
183 | kSymbolsPerDistanceHistogram, kMaxCommandHistograms, |
184 | kCommandStrideLength, kDistanceBlockSwitchCost, params, |
185 | dist_split); |
186 | if (BROTLI_IS_OOM(m)) return; |
187 | BROTLI_FREE(m, distance_prefixes); |
188 | } |
189 | } |
190 | |
191 | |
192 | #if defined(__cplusplus) || defined(c_plusplus) |
193 | } /* extern "C" */ |
194 | #endif |
195 | |