1 | /**************************************************************************** |
2 | ** |
3 | ** Copyright (C) 2016 The Qt Company Ltd. |
4 | ** Contact: https://www.qt.io/licensing/ |
5 | ** |
6 | ** This file is part of the QtNetwork module of the Qt Toolkit. |
7 | ** |
8 | ** $QT_BEGIN_LICENSE:LGPL$ |
9 | ** Commercial License Usage |
10 | ** Licensees holding valid commercial Qt licenses may use this file in |
11 | ** accordance with the commercial license agreement provided with the |
12 | ** Software or, alternatively, in accordance with the terms contained in |
13 | ** a written agreement between you and The Qt Company. For licensing terms |
14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
15 | ** information use the contact form at https://www.qt.io/contact-us. |
16 | ** |
17 | ** GNU Lesser General Public License Usage |
18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
19 | ** General Public License version 3 as published by the Free Software |
20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
21 | ** packaging of this file. Please review the following information to |
22 | ** ensure the GNU Lesser General Public License version 3 requirements |
23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
24 | ** |
25 | ** GNU General Public License Usage |
26 | ** Alternatively, this file may be used under the terms of the GNU |
27 | ** General Public License version 2.0 or (at your option) the GNU General |
28 | ** Public license version 3 or any later version approved by the KDE Free |
29 | ** Qt Foundation. The licenses are as published by the Free Software |
30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
31 | ** included in the packaging of this file. Please review the following |
32 | ** information to ensure the GNU General Public License requirements will |
33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
35 | ** |
36 | ** $QT_END_LICENSE$ |
37 | ** |
38 | ****************************************************************************/ |
39 | |
40 | #include "bitstreams_p.h" |
41 | #include "huffman_p.h" |
42 | |
43 | #include <QtCore/qbytearray.h> |
44 | |
45 | #include <algorithm> |
46 | #include <limits> |
47 | |
48 | QT_BEGIN_NAMESPACE |
49 | |
50 | namespace HPack |
51 | { |
52 | |
53 | /* |
54 | The static Huffman code used here was extracted from: |
55 | https://http2.github.io/http2-spec/compression.html#huffman.code |
56 | |
57 | This code was generated from statistics obtained on a large |
58 | sample of HTTP headers. It is a canonical Huffman code |
59 | with some tweaking to ensure that no symbol has a unique |
60 | code length. All codes were left-aligned - for implementation |
61 | convenience. |
62 | |
63 | Using binary trees to implement decoding would be prohibitively |
64 | expensive (both memory and time-wise). Instead we use a table-based |
65 | approach and any given code itself works as an index into such table(s). |
66 | We have 256 possible byte values and code lengths in |
67 | a range [5, 26]. This would require a huge table (and most of entries |
68 | would be 'wasted', since we only have to encode 256 elements). |
69 | Instead we use multi-level tables. The first level table |
70 | is using 9-bit length index; some entries in this table are 'terminal', |
71 | some reference the next level table(s). |
72 | |
73 | For example, bytes with values 48 and 49 (ASCII codes for '0' and '1') |
74 | both have code length 5, Huffman codes are: 00000 and 00001. They |
75 | both are placed in the 'root' table, |
76 | the 'root' table has index length == 9: |
77 | [00000 | 4 remaining bits] |
78 | ... |
79 | [00001 | 4 remaining bits] |
80 | |
81 | All entires with indices between these two will 'point' to value 48 |
82 | with bitLength == 5 so that bit stream (for example) 000001010 will be |
83 | decoded as: 48 + "put 1010 back into bitstream". |
84 | |
85 | A good description can be found here: |
86 | http://commandlinefanatic.com/cgi-bin/showarticle.cgi?article=art007 |
87 | or just google "Efficient Huffman Decoding". |
88 | Also see comments below about 'filling holes'. |
89 | */ |
90 | |
91 | namespace |
92 | { |
93 | |
94 | const CodeEntry staticHuffmanCodeTable[] |
95 | { |
96 | { 0, 0xffc00000ul, 13}, // 11111111|11000 |
97 | { 1, 0xffffb000ul, 23}, // 11111111|11111111|1011000 |
98 | { 2, 0xfffffe20ul, 28}, // 11111111|11111111|11111110|0010 |
99 | { 3, 0xfffffe30ul, 28}, // 11111111|11111111|11111110|0011 |
100 | { 4, 0xfffffe40ul, 28}, // 11111111|11111111|11111110|0100 |
101 | { 5, 0xfffffe50ul, 28}, // 11111111|11111111|11111110|0101 |
102 | { 6, 0xfffffe60ul, 28}, // 11111111|11111111|11111110|0110 |
103 | { 7, 0xfffffe70ul, 28}, // 11111111|11111111|11111110|0111 |
104 | { 8, 0xfffffe80ul, 28}, // 11111111|11111111|11111110|1000 |
105 | { 9, 0xffffea00ul, 24}, // 11111111|11111111|11101010 |
106 | { 10, 0xfffffff0ul, 30}, // 11111111|11111111|11111111|111100 |
107 | { 11, 0xfffffe90ul, 28}, // 11111111|11111111|11111110|1001 |
108 | { 12, 0xfffffea0ul, 28}, // 11111111|11111111|11111110|1010 |
109 | { 13, 0xfffffff4ul, 30}, // 11111111|11111111|11111111|111101 |
110 | { 14, 0xfffffeb0ul, 28}, // 11111111|11111111|11111110|1011 |
111 | { 15, 0xfffffec0ul, 28}, // 11111111|11111111|11111110|1100 |
112 | { 16, 0xfffffed0ul, 28}, // 11111111|11111111|11111110|1101 |
113 | { 17, 0xfffffee0ul, 28}, // 11111111|11111111|11111110|1110 |
114 | { 18, 0xfffffef0ul, 28}, // 11111111|11111111|11111110|1111 |
115 | { 19, 0xffffff00ul, 28}, // 11111111|11111111|11111111|0000 |
116 | { 20, 0xffffff10ul, 28}, // 11111111|11111111|11111111|0001 |
117 | { 21, 0xffffff20ul, 28}, // 11111111|11111111|11111111|0010 |
118 | { 22, 0xfffffff8ul, 30}, // 11111111|11111111|11111111|111110 |
119 | { 23, 0xffffff30ul, 28}, // 11111111|11111111|11111111|0011 |
120 | { 24, 0xffffff40ul, 28}, // 11111111|11111111|11111111|0100 |
121 | { 25, 0xffffff50ul, 28}, // 11111111|11111111|11111111|0101 |
122 | { 26, 0xffffff60ul, 28}, // 11111111|11111111|11111111|0110 |
123 | { 27, 0xffffff70ul, 28}, // 11111111|11111111|11111111|0111 |
124 | { 28, 0xffffff80ul, 28}, // 11111111|11111111|11111111|1000 |
125 | { 29, 0xffffff90ul, 28}, // 11111111|11111111|11111111|1001 |
126 | { 30, 0xffffffa0ul, 28}, // 11111111|11111111|11111111|1010 |
127 | { 31, 0xffffffb0ul, 28}, // 11111111|11111111|11111111|1011 |
128 | { 32, 0x50000000ul, 6}, // ' ' 010100 |
129 | { 33, 0xfe000000ul, 10}, // '!' 11111110|00 |
130 | { 34, 0xfe400000ul, 10}, // '"' 11111110|01 |
131 | { 35, 0xffa00000ul, 12}, // '#' 11111111|1010 |
132 | { 36, 0xffc80000ul, 13}, // '$' 11111111|11001 |
133 | { 37, 0x54000000ul, 6}, // '%' 010101 |
134 | { 38, 0xf8000000ul, 8}, // '&' 11111000 |
135 | { 39, 0xff400000ul, 11}, // ''' 11111111|010 |
136 | { 40, 0xfe800000ul, 10}, // '(' 11111110|10 |
137 | { 41, 0xfec00000ul, 10}, // ')' 11111110|11 |
138 | { 42, 0xf9000000ul, 8}, // '*' 11111001 |
139 | { 43, 0xff600000ul, 11}, // '+' 11111111|011 |
140 | { 44, 0xfa000000ul, 8}, // ',' 11111010 |
141 | { 45, 0x58000000ul, 6}, // '-' 010110 |
142 | { 46, 0x5c000000ul, 6}, // '.' 010111 |
143 | { 47, 0x60000000ul, 6}, // '/' 011000 |
144 | { 48, 0x00000000ul, 5}, // '0' 00000 |
145 | { 49, 0x08000000ul, 5}, // '1' 00001 |
146 | { 50, 0x10000000ul, 5}, // '2' 00010 |
147 | { 51, 0x64000000ul, 6}, // '3' 011001 |
148 | { 52, 0x68000000ul, 6}, // '4' 011010 |
149 | { 53, 0x6c000000ul, 6}, // '5' 011011 |
150 | { 54, 0x70000000ul, 6}, // '6' 011100 |
151 | { 55, 0x74000000ul, 6}, // '7' 011101 |
152 | { 56, 0x78000000ul, 6}, // '8' 011110 |
153 | { 57, 0x7c000000ul, 6}, // '9' 011111 |
154 | { 58, 0xb8000000ul, 7}, // ':' 1011100 |
155 | { 59, 0xfb000000ul, 8}, // ';' 11111011 |
156 | { 60, 0xfff80000ul, 15}, // '<' 11111111|1111100 |
157 | { 61, 0x80000000ul, 6}, // '=' 100000 |
158 | { 62, 0xffb00000ul, 12}, // '>' 11111111|1011 |
159 | { 63, 0xff000000ul, 10}, // '?' 11111111|00 |
160 | { 64, 0xffd00000ul, 13}, // '@' 11111111|11010 |
161 | { 65, 0x84000000ul, 6}, // 'A' 100001 |
162 | { 66, 0xba000000ul, 7}, // 'B' 1011101 |
163 | { 67, 0xbc000000ul, 7}, // 'C' 1011110 |
164 | { 68, 0xbe000000ul, 7}, // 'D' 1011111 |
165 | { 69, 0xc0000000ul, 7}, // 'E' 1100000 |
166 | { 70, 0xc2000000ul, 7}, // 'F' 1100001 |
167 | { 71, 0xc4000000ul, 7}, // 'G' 1100010 |
168 | { 72, 0xc6000000ul, 7}, // 'H' 1100011 |
169 | { 73, 0xc8000000ul, 7}, // 'I' 1100100 |
170 | { 74, 0xca000000ul, 7}, // 'J' 1100101 |
171 | { 75, 0xcc000000ul, 7}, // 'K' 1100110 |
172 | { 76, 0xce000000ul, 7}, // 'L' 1100111 |
173 | { 77, 0xd0000000ul, 7}, // 'M' 1101000 |
174 | { 78, 0xd2000000ul, 7}, // 'N' 1101001 |
175 | { 79, 0xd4000000ul, 7}, // 'O' 1101010 |
176 | { 80, 0xd6000000ul, 7}, // 'P' 1101011 |
177 | { 81, 0xd8000000ul, 7}, // 'Q' 1101100 |
178 | { 82, 0xda000000ul, 7}, // 'R' 1101101 |
179 | { 83, 0xdc000000ul, 7}, // 'S' 1101110 |
180 | { 84, 0xde000000ul, 7}, // 'T' 1101111 |
181 | { 85, 0xe0000000ul, 7}, // 'U' 1110000 |
182 | { 86, 0xe2000000ul, 7}, // 'V' 1110001 |
183 | { 87, 0xe4000000ul, 7}, // 'W' 1110010 |
184 | { 88, 0xfc000000ul, 8}, // 'X' 11111100 |
185 | { 89, 0xe6000000ul, 7}, // 'Y' 1110011 |
186 | { 90, 0xfd000000ul, 8}, // 'Z' 11111101 |
187 | { 91, 0xffd80000ul, 13}, // '[' 11111111|11011 |
188 | { 92, 0xfffe0000ul, 19}, // '\' 11111111|11111110|000 |
189 | { 93, 0xffe00000ul, 13}, // ']' 11111111|11100 |
190 | { 94, 0xfff00000ul, 14}, // '^' 11111111|111100 |
191 | { 95, 0x88000000ul, 6}, // '_' 100010 |
192 | { 96, 0xfffa0000ul, 15}, // '`' 11111111|1111101 |
193 | { 97, 0x18000000ul, 5}, // 'a' 00011 |
194 | { 98, 0x8c000000ul, 6}, // 'b' 100011 |
195 | { 99, 0x20000000ul, 5}, // 'c' 00100 |
196 | {100, 0x90000000ul, 6}, // 'd' 100100 |
197 | {101, 0x28000000ul, 5}, // 'e' 00101 |
198 | {102, 0x94000000ul, 6}, // 'f' 100101 |
199 | {103, 0x98000000ul, 6}, // 'g' 100110 |
200 | {104, 0x9c000000ul, 6}, // 'h' 100111 |
201 | {105, 0x30000000ul, 5}, // 'i' 00110 |
202 | {106, 0xe8000000ul, 7}, // 'j' 1110100 |
203 | {107, 0xea000000ul, 7}, // 'k' 1110101 |
204 | {108, 0xa0000000ul, 6}, // 'l' 101000 |
205 | {109, 0xa4000000ul, 6}, // 'm' 101001 |
206 | {110, 0xa8000000ul, 6}, // 'n' 101010 |
207 | {111, 0x38000000ul, 5}, // 'o' 00111 |
208 | {112, 0xac000000ul, 6}, // 'p' 101011 |
209 | {113, 0xec000000ul, 7}, // 'q' 1110110 |
210 | {114, 0xb0000000ul, 6}, // 'r' 101100 |
211 | {115, 0x40000000ul, 5}, // 's' 01000 |
212 | {116, 0x48000000ul, 5}, // 't' 01001 |
213 | {117, 0xb4000000ul, 6}, // 'u' 101101 |
214 | {118, 0xee000000ul, 7}, // 'v' 1110111 |
215 | {119, 0xf0000000ul, 7}, // 'w' 1111000 |
216 | {120, 0xf2000000ul, 7}, // 'x' 1111001 |
217 | {121, 0xf4000000ul, 7}, // 'y' 1111010 |
218 | {122, 0xf6000000ul, 7}, // 'z' 1111011 |
219 | {123, 0xfffc0000ul, 15}, // '{' 11111111|1111110 |
220 | {124, 0xff800000ul, 11}, // '|' 11111111|100 |
221 | {125, 0xfff40000ul, 14}, // '}' 11111111|111101 |
222 | {126, 0xffe80000ul, 13}, // '~' 11111111|11101 |
223 | {127, 0xffffffc0ul, 28}, // 11111111|11111111|11111111|1100 |
224 | {128, 0xfffe6000ul, 20}, // 11111111|11111110|0110 |
225 | {129, 0xffff4800ul, 22}, // 11111111|11111111|010010 |
226 | {130, 0xfffe7000ul, 20}, // 11111111|11111110|0111 |
227 | {131, 0xfffe8000ul, 20}, // 11111111|11111110|1000 |
228 | {132, 0xffff4c00ul, 22}, // 11111111|11111111|010011 |
229 | {133, 0xffff5000ul, 22}, // 11111111|11111111|010100 |
230 | {134, 0xffff5400ul, 22}, // 11111111|11111111|010101 |
231 | {135, 0xffffb200ul, 23}, // 11111111|11111111|1011001 |
232 | {136, 0xffff5800ul, 22}, // 11111111|11111111|010110 |
233 | {137, 0xffffb400ul, 23}, // 11111111|11111111|1011010 |
234 | {138, 0xffffb600ul, 23}, // 11111111|11111111|1011011 |
235 | {139, 0xffffb800ul, 23}, // 11111111|11111111|1011100 |
236 | {140, 0xffffba00ul, 23}, // 11111111|11111111|1011101 |
237 | {141, 0xffffbc00ul, 23}, // 11111111|11111111|1011110 |
238 | {142, 0xffffeb00ul, 24}, // 11111111|11111111|11101011 |
239 | {143, 0xffffbe00ul, 23}, // 11111111|11111111|1011111 |
240 | {144, 0xffffec00ul, 24}, // 11111111|11111111|11101100 |
241 | {145, 0xffffed00ul, 24}, // 11111111|11111111|11101101 |
242 | {146, 0xffff5c00ul, 22}, // 11111111|11111111|010111 |
243 | {147, 0xffffc000ul, 23}, // 11111111|11111111|1100000 |
244 | {148, 0xffffee00ul, 24}, // 11111111|11111111|11101110 |
245 | {149, 0xffffc200ul, 23}, // 11111111|11111111|1100001 |
246 | {150, 0xffffc400ul, 23}, // 11111111|11111111|1100010 |
247 | {151, 0xffffc600ul, 23}, // 11111111|11111111|1100011 |
248 | {152, 0xffffc800ul, 23}, // 11111111|11111111|1100100 |
249 | {153, 0xfffee000ul, 21}, // 11111111|11111110|11100 |
250 | {154, 0xffff6000ul, 22}, // 11111111|11111111|011000 |
251 | {155, 0xffffca00ul, 23}, // 11111111|11111111|1100101 |
252 | {156, 0xffff6400ul, 22}, // 11111111|11111111|011001 |
253 | {157, 0xffffcc00ul, 23}, // 11111111|11111111|1100110 |
254 | {158, 0xffffce00ul, 23}, // 11111111|11111111|1100111 |
255 | {159, 0xffffef00ul, 24}, // 11111111|11111111|11101111 |
256 | {160, 0xffff6800ul, 22}, // 11111111|11111111|011010 |
257 | {161, 0xfffee800ul, 21}, // 11111111|11111110|11101 |
258 | {162, 0xfffe9000ul, 20}, // 11111111|11111110|1001 |
259 | {163, 0xffff6c00ul, 22}, // 11111111|11111111|011011 |
260 | {164, 0xffff7000ul, 22}, // 11111111|11111111|011100 |
261 | {165, 0xffffd000ul, 23}, // 11111111|11111111|1101000 |
262 | {166, 0xffffd200ul, 23}, // 11111111|11111111|1101001 |
263 | {167, 0xfffef000ul, 21}, // 11111111|11111110|11110 |
264 | {168, 0xffffd400ul, 23}, // 11111111|11111111|1101010 |
265 | {169, 0xffff7400ul, 22}, // 11111111|11111111|011101 |
266 | {170, 0xffff7800ul, 22}, // 11111111|11111111|011110 |
267 | {171, 0xfffff000ul, 24}, // 11111111|11111111|11110000 |
268 | {172, 0xfffef800ul, 21}, // 11111111|11111110|11111 |
269 | {173, 0xffff7c00ul, 22}, // 11111111|11111111|011111 |
270 | {174, 0xffffd600ul, 23}, // 11111111|11111111|1101011 |
271 | {175, 0xffffd800ul, 23}, // 11111111|11111111|1101100 |
272 | {176, 0xffff0000ul, 21}, // 11111111|11111111|00000 |
273 | {177, 0xffff0800ul, 21}, // 11111111|11111111|00001 |
274 | {178, 0xffff8000ul, 22}, // 11111111|11111111|100000 |
275 | {179, 0xffff1000ul, 21}, // 11111111|11111111|00010 |
276 | {180, 0xffffda00ul, 23}, // 11111111|11111111|1101101 |
277 | {181, 0xffff8400ul, 22}, // 11111111|11111111|100001 |
278 | {182, 0xffffdc00ul, 23}, // 11111111|11111111|1101110 |
279 | {183, 0xffffde00ul, 23}, // 11111111|11111111|1101111 |
280 | {184, 0xfffea000ul, 20}, // 11111111|11111110|1010 |
281 | {185, 0xffff8800ul, 22}, // 11111111|11111111|100010 |
282 | {186, 0xffff8c00ul, 22}, // 11111111|11111111|100011 |
283 | {187, 0xffff9000ul, 22}, // 11111111|11111111|100100 |
284 | {188, 0xffffe000ul, 23}, // 11111111|11111111|1110000 |
285 | {189, 0xffff9400ul, 22}, // 11111111|11111111|100101 |
286 | {190, 0xffff9800ul, 22}, // 11111111|11111111|100110 |
287 | {191, 0xffffe200ul, 23}, // 11111111|11111111|1110001 |
288 | {192, 0xfffff800ul, 26}, // 11111111|11111111|11111000|00 |
289 | {193, 0xfffff840ul, 26}, // 11111111|11111111|11111000|01 |
290 | {194, 0xfffeb000ul, 20}, // 11111111|11111110|1011 |
291 | {195, 0xfffe2000ul, 19}, // 11111111|11111110|001 |
292 | {196, 0xffff9c00ul, 22}, // 11111111|11111111|100111 |
293 | {197, 0xffffe400ul, 23}, // 11111111|11111111|1110010 |
294 | {198, 0xffffa000ul, 22}, // 11111111|11111111|101000 |
295 | {199, 0xfffff600ul, 25}, // 11111111|11111111|11110110|0 |
296 | {200, 0xfffff880ul, 26}, // 11111111|11111111|11111000|10 |
297 | {201, 0xfffff8c0ul, 26}, // 11111111|11111111|11111000|11 |
298 | {202, 0xfffff900ul, 26}, // 11111111|11111111|11111001|00 |
299 | {203, 0xfffffbc0ul, 27}, // 11111111|11111111|11111011|110 |
300 | {204, 0xfffffbe0ul, 27}, // 11111111|11111111|11111011|111 |
301 | {205, 0xfffff940ul, 26}, // 11111111|11111111|11111001|01 |
302 | {206, 0xfffff100ul, 24}, // 11111111|11111111|11110001 |
303 | {207, 0xfffff680ul, 25}, // 11111111|11111111|11110110|1 |
304 | {208, 0xfffe4000ul, 19}, // 11111111|11111110|010 |
305 | {209, 0xffff1800ul, 21}, // 11111111|11111111|00011 |
306 | {210, 0xfffff980ul, 26}, // 11111111|11111111|11111001|10 |
307 | {211, 0xfffffc00ul, 27}, // 11111111|11111111|11111100|000 |
308 | {212, 0xfffffc20ul, 27}, // 11111111|11111111|11111100|001 |
309 | {213, 0xfffff9c0ul, 26}, // 11111111|11111111|11111001|11 |
310 | {214, 0xfffffc40ul, 27}, // 11111111|11111111|11111100|010 |
311 | {215, 0xfffff200ul, 24}, // 11111111|11111111|11110010 |
312 | {216, 0xffff2000ul, 21}, // 11111111|11111111|00100 |
313 | {217, 0xffff2800ul, 21}, // 11111111|11111111|00101 |
314 | {218, 0xfffffa00ul, 26}, // 11111111|11111111|11111010|00 |
315 | {219, 0xfffffa40ul, 26}, // 11111111|11111111|11111010|01 |
316 | {220, 0xffffffd0ul, 28}, // 11111111|11111111|11111111|1101 |
317 | {221, 0xfffffc60ul, 27}, // 11111111|11111111|11111100|011 |
318 | {222, 0xfffffc80ul, 27}, // 11111111|11111111|11111100|100 |
319 | {223, 0xfffffca0ul, 27}, // 11111111|11111111|11111100|101 |
320 | {224, 0xfffec000ul, 20}, // 11111111|11111110|1100 |
321 | {225, 0xfffff300ul, 24}, // 11111111|11111111|11110011 |
322 | {226, 0xfffed000ul, 20}, // 11111111|11111110|1101 |
323 | {227, 0xffff3000ul, 21}, // 11111111|11111111|00110 |
324 | {228, 0xffffa400ul, 22}, // 11111111|11111111|101001 |
325 | {229, 0xffff3800ul, 21}, // 11111111|11111111|00111 |
326 | {230, 0xffff4000ul, 21}, // 11111111|11111111|01000 |
327 | {231, 0xffffe600ul, 23}, // 11111111|11111111|1110011 |
328 | {232, 0xffffa800ul, 22}, // 11111111|11111111|101010 |
329 | {233, 0xffffac00ul, 22}, // 11111111|11111111|101011 |
330 | {234, 0xfffff700ul, 25}, // 11111111|11111111|11110111|0 |
331 | {235, 0xfffff780ul, 25}, // 11111111|11111111|11110111|1 |
332 | {236, 0xfffff400ul, 24}, // 11111111|11111111|11110100 |
333 | {237, 0xfffff500ul, 24}, // 11111111|11111111|11110101 |
334 | {238, 0xfffffa80ul, 26}, // 11111111|11111111|11111010|10 |
335 | {239, 0xffffe800ul, 23}, // 11111111|11111111|1110100 |
336 | {240, 0xfffffac0ul, 26}, // 11111111|11111111|11111010|11 |
337 | {241, 0xfffffcc0ul, 27}, // 11111111|11111111|11111100|110 |
338 | {242, 0xfffffb00ul, 26}, // 11111111|11111111|11111011|00 |
339 | {243, 0xfffffb40ul, 26}, // 11111111|11111111|11111011|01 |
340 | {244, 0xfffffce0ul, 27}, // 11111111|11111111|11111100|111 |
341 | {245, 0xfffffd00ul, 27}, // 11111111|11111111|11111101|000 |
342 | {246, 0xfffffd20ul, 27}, // 11111111|11111111|11111101|001 |
343 | {247, 0xfffffd40ul, 27}, // 11111111|11111111|11111101|010 |
344 | {248, 0xfffffd60ul, 27}, // 11111111|11111111|11111101|011 |
345 | {249, 0xffffffe0ul, 28}, // 11111111|11111111|11111111|1110 |
346 | {250, 0xfffffd80ul, 27}, // 11111111|11111111|11111101|100 |
347 | {251, 0xfffffda0ul, 27}, // 11111111|11111111|11111101|101 |
348 | {252, 0xfffffdc0ul, 27}, // 11111111|11111111|11111101|110 |
349 | {253, 0xfffffde0ul, 27}, // 11111111|11111111|11111101|111 |
350 | {254, 0xfffffe00ul, 27}, // 11111111|11111111|11111110|000 |
351 | {255, 0xfffffb80ul, 26}, // 11111111|11111111|11111011|10 |
352 | {256, 0xfffffffcul, 30} // EOS 11111111|11111111|11111111|111111 |
353 | }; |
354 | |
355 | void write_huffman_code(BitOStream &outputStream, const CodeEntry &code) |
356 | { |
357 | // Append octet by octet. |
358 | auto bitLength = code.bitLength; |
359 | const auto hc = code.huffmanCode >> (32 - bitLength); |
360 | |
361 | if (bitLength > 24) { |
362 | outputStream.writeBits(uchar(hc >> 24), bitLength - 24); |
363 | bitLength = 24; |
364 | } |
365 | |
366 | if (bitLength > 16) { |
367 | outputStream.writeBits(uchar(hc >> 16), bitLength - 16); |
368 | bitLength = 16; |
369 | } |
370 | |
371 | if (bitLength > 8) { |
372 | outputStream.writeBits(uchar(hc >> 8), bitLength - 8); |
373 | bitLength = 8; |
374 | } |
375 | |
376 | outputStream.writeBits(uchar(hc), bitLength); |
377 | } |
378 | |
379 | } |
380 | |
381 | // That's from HPACK's specs - we deal with octets. |
382 | static_assert(std::numeric_limits<uchar>::digits == 8, "octets expected" ); |
383 | |
384 | quint64 huffman_encoded_bit_length(const QByteArray &inputData) |
385 | { |
386 | quint64 bitLength = 0; |
387 | for (int i = 0, e = inputData.size(); i < e; ++i) |
388 | bitLength += staticHuffmanCodeTable[int(inputData[i])].bitLength; |
389 | |
390 | return bitLength; |
391 | } |
392 | |
393 | void huffman_encode_string(const QByteArray &inputData, BitOStream &outputStream) |
394 | { |
395 | for (int i = 0, e = inputData.size(); i < e; ++i) { |
396 | const auto value = uchar(inputData[i]); |
397 | write_huffman_code(outputStream, staticHuffmanCodeTable[value]); |
398 | } |
399 | |
400 | // Pad bits ... |
401 | if (outputStream.bitLength() % 8) |
402 | outputStream.writeBits(0xff, 8 - outputStream.bitLength() % 8); |
403 | } |
404 | |
405 | bool padding_is_valid(quint32 chunk, quint32 nBits) |
406 | { |
407 | Q_ASSERT(nBits); |
408 | |
409 | // HPACK, 5.2: "A padding strictly longer than 7 bits MUST be |
410 | // treated as a decoding error." |
411 | if (nBits > 7) |
412 | return false; |
413 | // HPACK, 5.2: |
414 | // "A padding not corresponding to the most significant bits |
415 | // of the code for the EOS symbol MUST be treated as a decoding error." |
416 | return (chunk >> (32 - nBits)) == quint32((1 << nBits) - 1); |
417 | } |
418 | |
419 | HuffmanDecoder::HuffmanDecoder() |
420 | : minCodeLength() |
421 | { |
422 | const auto nCodes = sizeof staticHuffmanCodeTable / sizeof staticHuffmanCodeTable[0]; |
423 | |
424 | std::vector<CodeEntry> symbols(staticHuffmanCodeTable, staticHuffmanCodeTable + nCodes); |
425 | // Now we sort: by bit length first (in the descending order) and by the symbol |
426 | // value (descending). Descending order: to make sure we do not create prefix tables with |
427 | // short 'indexLength' first and having longer codes that do not fit into such tables later. |
428 | std::sort(symbols.begin(), symbols.end(), [](const CodeEntry &code1, const CodeEntry &code2) { |
429 | if (code1.bitLength == code2.bitLength) |
430 | return code1.byteValue > code2.byteValue; |
431 | return code1.bitLength > code2.bitLength; |
432 | }); |
433 | |
434 | minCodeLength = symbols.back().bitLength; // The shortest one, currently it's 5. |
435 | |
436 | // TODO: add a verification - Huffman codes |
437 | // within a given bit length range also |
438 | // should be in descending order. |
439 | |
440 | // Initialize 'prefixTables' and 'tableData'. |
441 | addTable(0, quint32(BitConstants::rootPrefix)); // 'root' table. |
442 | |
443 | for (const auto &s : symbols) { |
444 | quint32 tableIndex = 0; |
445 | while (true) { |
446 | Q_ASSERT(tableIndex < prefixTables.size()); |
447 | // Note, by value - since prefixTables will be updated in between. |
448 | const auto table = prefixTables[tableIndex]; |
449 | // We skip prefixed bits (if any) and use indexed bits only: |
450 | const auto entryIndex = s.huffmanCode << table.prefixLength >> (32 - table.indexLength); |
451 | // Again, by value. |
452 | PrefixTableEntry entry = tableEntry(table, entryIndex); |
453 | // How many bits were coded by previous tables and this table: |
454 | const auto codedLength = table.prefixLength + table.indexLength; |
455 | if (codedLength < s.bitLength) { |
456 | // We have to add a new prefix table ... (if it's not done yet). |
457 | if (!entry.bitLength) { |
458 | entry.nextTable = addTable(codedLength, std::min<quint32>(quint32(BitConstants::childPrefix), |
459 | s.bitLength - codedLength)); |
460 | entry.bitLength = s.bitLength; |
461 | entry.byteValue = s.byteValue; |
462 | setTableEntry(table, entryIndex, entry); |
463 | } |
464 | tableIndex = entry.nextTable; |
465 | } else { |
466 | // We found the slot for our code (terminal entry): |
467 | entry.byteValue = s.byteValue; |
468 | entry.bitLength = s.bitLength; |
469 | // Refer to our own table as 'nextTable': |
470 | entry.nextTable = tableIndex; |
471 | setTableEntry(table, entryIndex, entry); |
472 | break; |
473 | } |
474 | } |
475 | } |
476 | |
477 | // Now, we have a table(s) and have to fill 'holes' to |
478 | // 'fix' terminal entries. |
479 | for (const auto &table : prefixTables) { |
480 | const quint32 codedLength = table.prefixLength + table.indexLength; |
481 | for (quint32 j = 0; j < table.size();) { |
482 | const PrefixTableEntry &entry = tableEntry(table, j); |
483 | if (entry.bitLength && entry.bitLength < codedLength) { |
484 | const quint32 range = 1 << (codedLength - entry.bitLength); |
485 | for (quint32 k = 1; k < range; ++k) |
486 | setTableEntry(table, j + k, entry); |
487 | j += range; |
488 | } else { |
489 | ++j; |
490 | } |
491 | } |
492 | } |
493 | } |
494 | |
495 | bool HuffmanDecoder::decodeStream(BitIStream &inputStream, QByteArray &outputBuffer) |
496 | { |
497 | while (true) { |
498 | quint32 chunk = 0; |
499 | const quint32 readBits = inputStream.peekBits(inputStream.streamOffset(), 32, &chunk); |
500 | if (!readBits) |
501 | return !inputStream.hasMoreBits(); |
502 | |
503 | if (readBits < minCodeLength) { |
504 | inputStream.skipBits(readBits); |
505 | return padding_is_valid(chunk, readBits); |
506 | } |
507 | |
508 | quint32 tableIndex = 0; |
509 | const PrefixTable *table = &prefixTables[tableIndex]; |
510 | quint32 entryIndex = chunk >> (32 - table->indexLength); |
511 | PrefixTableEntry entry = tableEntry(*table, entryIndex); |
512 | |
513 | while (true) { |
514 | if (entry.nextTable == tableIndex) |
515 | break; |
516 | |
517 | tableIndex = entry.nextTable; |
518 | table = &prefixTables[tableIndex]; |
519 | entryIndex = chunk << table->prefixLength >> (32 - table->indexLength); |
520 | entry = tableEntry(*table, entryIndex); |
521 | } |
522 | |
523 | if (entry.bitLength > readBits) { |
524 | inputStream.skipBits(readBits); |
525 | return padding_is_valid(chunk, readBits); |
526 | } |
527 | |
528 | if (!entry.bitLength || entry.byteValue == 256) { |
529 | //EOS (256) == compression error (HPACK). |
530 | inputStream.skipBits(readBits); |
531 | return false; |
532 | } |
533 | |
534 | outputBuffer.append(entry.byteValue); |
535 | inputStream.skipBits(entry.bitLength); |
536 | } |
537 | |
538 | return false; |
539 | } |
540 | |
541 | quint32 HuffmanDecoder::addTable(quint32 prefix, quint32 index) |
542 | { |
543 | PrefixTable newTable{prefix, index}; |
544 | newTable.offset = quint32(tableData.size()); |
545 | prefixTables.push_back(newTable); |
546 | // Add entries for this table: |
547 | tableData.resize(tableData.size() + newTable.size()); |
548 | |
549 | return quint32(prefixTables.size() - 1); |
550 | } |
551 | |
552 | PrefixTableEntry HuffmanDecoder::tableEntry(const PrefixTable &table, quint32 index) |
553 | { |
554 | Q_ASSERT(index < table.size()); |
555 | return tableData[table.offset + index]; |
556 | } |
557 | |
558 | void HuffmanDecoder::setTableEntry(const PrefixTable &table, quint32 index, |
559 | const PrefixTableEntry &entry) |
560 | { |
561 | Q_ASSERT(index < table.size()); |
562 | tableData[table.offset + index] = entry; |
563 | } |
564 | |
565 | bool huffman_decode_string(BitIStream &inputStream, QByteArray *outputBuffer) |
566 | { |
567 | Q_ASSERT(outputBuffer); |
568 | |
569 | static HuffmanDecoder decoder; |
570 | return decoder.decodeStream(inputStream, *outputBuffer); |
571 | } |
572 | |
573 | } |
574 | |
575 | QT_END_NAMESPACE |
576 | |