| 1 | /**************************************************************************** |
| 2 | ** |
| 3 | ** Copyright (C) 2016 The Qt Company Ltd. |
| 4 | ** Contact: https://www.qt.io/licensing/ |
| 5 | ** |
| 6 | ** This file is part of the QtNetwork module of the Qt Toolkit. |
| 7 | ** |
| 8 | ** $QT_BEGIN_LICENSE:LGPL$ |
| 9 | ** Commercial License Usage |
| 10 | ** Licensees holding valid commercial Qt licenses may use this file in |
| 11 | ** accordance with the commercial license agreement provided with the |
| 12 | ** Software or, alternatively, in accordance with the terms contained in |
| 13 | ** a written agreement between you and The Qt Company. For licensing terms |
| 14 | ** and conditions see https://www.qt.io/terms-conditions. For further |
| 15 | ** information use the contact form at https://www.qt.io/contact-us. |
| 16 | ** |
| 17 | ** GNU Lesser General Public License Usage |
| 18 | ** Alternatively, this file may be used under the terms of the GNU Lesser |
| 19 | ** General Public License version 3 as published by the Free Software |
| 20 | ** Foundation and appearing in the file LICENSE.LGPL3 included in the |
| 21 | ** packaging of this file. Please review the following information to |
| 22 | ** ensure the GNU Lesser General Public License version 3 requirements |
| 23 | ** will be met: https://www.gnu.org/licenses/lgpl-3.0.html. |
| 24 | ** |
| 25 | ** GNU General Public License Usage |
| 26 | ** Alternatively, this file may be used under the terms of the GNU |
| 27 | ** General Public License version 2.0 or (at your option) the GNU General |
| 28 | ** Public license version 3 or any later version approved by the KDE Free |
| 29 | ** Qt Foundation. The licenses are as published by the Free Software |
| 30 | ** Foundation and appearing in the file LICENSE.GPL2 and LICENSE.GPL3 |
| 31 | ** included in the packaging of this file. Please review the following |
| 32 | ** information to ensure the GNU General Public License requirements will |
| 33 | ** be met: https://www.gnu.org/licenses/gpl-2.0.html and |
| 34 | ** https://www.gnu.org/licenses/gpl-3.0.html. |
| 35 | ** |
| 36 | ** $QT_END_LICENSE$ |
| 37 | ** |
| 38 | ****************************************************************************/ |
| 39 | |
| 40 | #include "bitstreams_p.h" |
| 41 | #include "huffman_p.h" |
| 42 | |
| 43 | #include <QtCore/qbytearray.h> |
| 44 | |
| 45 | #include <algorithm> |
| 46 | #include <limits> |
| 47 | |
| 48 | QT_BEGIN_NAMESPACE |
| 49 | |
| 50 | namespace HPack |
| 51 | { |
| 52 | |
| 53 | /* |
| 54 | The static Huffman code used here was extracted from: |
| 55 | https://http2.github.io/http2-spec/compression.html#huffman.code |
| 56 | |
| 57 | This code was generated from statistics obtained on a large |
| 58 | sample of HTTP headers. It is a canonical Huffman code |
| 59 | with some tweaking to ensure that no symbol has a unique |
| 60 | code length. All codes were left-aligned - for implementation |
| 61 | convenience. |
| 62 | |
| 63 | Using binary trees to implement decoding would be prohibitively |
| 64 | expensive (both memory and time-wise). Instead we use a table-based |
| 65 | approach and any given code itself works as an index into such table(s). |
| 66 | We have 256 possible byte values and code lengths in |
| 67 | a range [5, 26]. This would require a huge table (and most of entries |
| 68 | would be 'wasted', since we only have to encode 256 elements). |
| 69 | Instead we use multi-level tables. The first level table |
| 70 | is using 9-bit length index; some entries in this table are 'terminal', |
| 71 | some reference the next level table(s). |
| 72 | |
| 73 | For example, bytes with values 48 and 49 (ASCII codes for '0' and '1') |
| 74 | both have code length 5, Huffman codes are: 00000 and 00001. They |
| 75 | both are placed in the 'root' table, |
| 76 | the 'root' table has index length == 9: |
| 77 | [00000 | 4 remaining bits] |
| 78 | ... |
| 79 | [00001 | 4 remaining bits] |
| 80 | |
| 81 | All entires with indices between these two will 'point' to value 48 |
| 82 | with bitLength == 5 so that bit stream (for example) 000001010 will be |
| 83 | decoded as: 48 + "put 1010 back into bitstream". |
| 84 | |
| 85 | A good description can be found here: |
| 86 | http://commandlinefanatic.com/cgi-bin/showarticle.cgi?article=art007 |
| 87 | or just google "Efficient Huffman Decoding". |
| 88 | Also see comments below about 'filling holes'. |
| 89 | */ |
| 90 | |
| 91 | namespace |
| 92 | { |
| 93 | |
| 94 | const CodeEntry staticHuffmanCodeTable[] |
| 95 | { |
| 96 | { 0, 0xffc00000ul, 13}, // 11111111|11000 |
| 97 | { 1, 0xffffb000ul, 23}, // 11111111|11111111|1011000 |
| 98 | { 2, 0xfffffe20ul, 28}, // 11111111|11111111|11111110|0010 |
| 99 | { 3, 0xfffffe30ul, 28}, // 11111111|11111111|11111110|0011 |
| 100 | { 4, 0xfffffe40ul, 28}, // 11111111|11111111|11111110|0100 |
| 101 | { 5, 0xfffffe50ul, 28}, // 11111111|11111111|11111110|0101 |
| 102 | { 6, 0xfffffe60ul, 28}, // 11111111|11111111|11111110|0110 |
| 103 | { 7, 0xfffffe70ul, 28}, // 11111111|11111111|11111110|0111 |
| 104 | { 8, 0xfffffe80ul, 28}, // 11111111|11111111|11111110|1000 |
| 105 | { 9, 0xffffea00ul, 24}, // 11111111|11111111|11101010 |
| 106 | { 10, 0xfffffff0ul, 30}, // 11111111|11111111|11111111|111100 |
| 107 | { 11, 0xfffffe90ul, 28}, // 11111111|11111111|11111110|1001 |
| 108 | { 12, 0xfffffea0ul, 28}, // 11111111|11111111|11111110|1010 |
| 109 | { 13, 0xfffffff4ul, 30}, // 11111111|11111111|11111111|111101 |
| 110 | { 14, 0xfffffeb0ul, 28}, // 11111111|11111111|11111110|1011 |
| 111 | { 15, 0xfffffec0ul, 28}, // 11111111|11111111|11111110|1100 |
| 112 | { 16, 0xfffffed0ul, 28}, // 11111111|11111111|11111110|1101 |
| 113 | { 17, 0xfffffee0ul, 28}, // 11111111|11111111|11111110|1110 |
| 114 | { 18, 0xfffffef0ul, 28}, // 11111111|11111111|11111110|1111 |
| 115 | { 19, 0xffffff00ul, 28}, // 11111111|11111111|11111111|0000 |
| 116 | { 20, 0xffffff10ul, 28}, // 11111111|11111111|11111111|0001 |
| 117 | { 21, 0xffffff20ul, 28}, // 11111111|11111111|11111111|0010 |
| 118 | { 22, 0xfffffff8ul, 30}, // 11111111|11111111|11111111|111110 |
| 119 | { 23, 0xffffff30ul, 28}, // 11111111|11111111|11111111|0011 |
| 120 | { 24, 0xffffff40ul, 28}, // 11111111|11111111|11111111|0100 |
| 121 | { 25, 0xffffff50ul, 28}, // 11111111|11111111|11111111|0101 |
| 122 | { 26, 0xffffff60ul, 28}, // 11111111|11111111|11111111|0110 |
| 123 | { 27, 0xffffff70ul, 28}, // 11111111|11111111|11111111|0111 |
| 124 | { 28, 0xffffff80ul, 28}, // 11111111|11111111|11111111|1000 |
| 125 | { 29, 0xffffff90ul, 28}, // 11111111|11111111|11111111|1001 |
| 126 | { 30, 0xffffffa0ul, 28}, // 11111111|11111111|11111111|1010 |
| 127 | { 31, 0xffffffb0ul, 28}, // 11111111|11111111|11111111|1011 |
| 128 | { 32, 0x50000000ul, 6}, // ' ' 010100 |
| 129 | { 33, 0xfe000000ul, 10}, // '!' 11111110|00 |
| 130 | { 34, 0xfe400000ul, 10}, // '"' 11111110|01 |
| 131 | { 35, 0xffa00000ul, 12}, // '#' 11111111|1010 |
| 132 | { 36, 0xffc80000ul, 13}, // '$' 11111111|11001 |
| 133 | { 37, 0x54000000ul, 6}, // '%' 010101 |
| 134 | { 38, 0xf8000000ul, 8}, // '&' 11111000 |
| 135 | { 39, 0xff400000ul, 11}, // ''' 11111111|010 |
| 136 | { 40, 0xfe800000ul, 10}, // '(' 11111110|10 |
| 137 | { 41, 0xfec00000ul, 10}, // ')' 11111110|11 |
| 138 | { 42, 0xf9000000ul, 8}, // '*' 11111001 |
| 139 | { 43, 0xff600000ul, 11}, // '+' 11111111|011 |
| 140 | { 44, 0xfa000000ul, 8}, // ',' 11111010 |
| 141 | { 45, 0x58000000ul, 6}, // '-' 010110 |
| 142 | { 46, 0x5c000000ul, 6}, // '.' 010111 |
| 143 | { 47, 0x60000000ul, 6}, // '/' 011000 |
| 144 | { 48, 0x00000000ul, 5}, // '0' 00000 |
| 145 | { 49, 0x08000000ul, 5}, // '1' 00001 |
| 146 | { 50, 0x10000000ul, 5}, // '2' 00010 |
| 147 | { 51, 0x64000000ul, 6}, // '3' 011001 |
| 148 | { 52, 0x68000000ul, 6}, // '4' 011010 |
| 149 | { 53, 0x6c000000ul, 6}, // '5' 011011 |
| 150 | { 54, 0x70000000ul, 6}, // '6' 011100 |
| 151 | { 55, 0x74000000ul, 6}, // '7' 011101 |
| 152 | { 56, 0x78000000ul, 6}, // '8' 011110 |
| 153 | { 57, 0x7c000000ul, 6}, // '9' 011111 |
| 154 | { 58, 0xb8000000ul, 7}, // ':' 1011100 |
| 155 | { 59, 0xfb000000ul, 8}, // ';' 11111011 |
| 156 | { 60, 0xfff80000ul, 15}, // '<' 11111111|1111100 |
| 157 | { 61, 0x80000000ul, 6}, // '=' 100000 |
| 158 | { 62, 0xffb00000ul, 12}, // '>' 11111111|1011 |
| 159 | { 63, 0xff000000ul, 10}, // '?' 11111111|00 |
| 160 | { 64, 0xffd00000ul, 13}, // '@' 11111111|11010 |
| 161 | { 65, 0x84000000ul, 6}, // 'A' 100001 |
| 162 | { 66, 0xba000000ul, 7}, // 'B' 1011101 |
| 163 | { 67, 0xbc000000ul, 7}, // 'C' 1011110 |
| 164 | { 68, 0xbe000000ul, 7}, // 'D' 1011111 |
| 165 | { 69, 0xc0000000ul, 7}, // 'E' 1100000 |
| 166 | { 70, 0xc2000000ul, 7}, // 'F' 1100001 |
| 167 | { 71, 0xc4000000ul, 7}, // 'G' 1100010 |
| 168 | { 72, 0xc6000000ul, 7}, // 'H' 1100011 |
| 169 | { 73, 0xc8000000ul, 7}, // 'I' 1100100 |
| 170 | { 74, 0xca000000ul, 7}, // 'J' 1100101 |
| 171 | { 75, 0xcc000000ul, 7}, // 'K' 1100110 |
| 172 | { 76, 0xce000000ul, 7}, // 'L' 1100111 |
| 173 | { 77, 0xd0000000ul, 7}, // 'M' 1101000 |
| 174 | { 78, 0xd2000000ul, 7}, // 'N' 1101001 |
| 175 | { 79, 0xd4000000ul, 7}, // 'O' 1101010 |
| 176 | { 80, 0xd6000000ul, 7}, // 'P' 1101011 |
| 177 | { 81, 0xd8000000ul, 7}, // 'Q' 1101100 |
| 178 | { 82, 0xda000000ul, 7}, // 'R' 1101101 |
| 179 | { 83, 0xdc000000ul, 7}, // 'S' 1101110 |
| 180 | { 84, 0xde000000ul, 7}, // 'T' 1101111 |
| 181 | { 85, 0xe0000000ul, 7}, // 'U' 1110000 |
| 182 | { 86, 0xe2000000ul, 7}, // 'V' 1110001 |
| 183 | { 87, 0xe4000000ul, 7}, // 'W' 1110010 |
| 184 | { 88, 0xfc000000ul, 8}, // 'X' 11111100 |
| 185 | { 89, 0xe6000000ul, 7}, // 'Y' 1110011 |
| 186 | { 90, 0xfd000000ul, 8}, // 'Z' 11111101 |
| 187 | { 91, 0xffd80000ul, 13}, // '[' 11111111|11011 |
| 188 | { 92, 0xfffe0000ul, 19}, // '\' 11111111|11111110|000 |
| 189 | { 93, 0xffe00000ul, 13}, // ']' 11111111|11100 |
| 190 | { 94, 0xfff00000ul, 14}, // '^' 11111111|111100 |
| 191 | { 95, 0x88000000ul, 6}, // '_' 100010 |
| 192 | { 96, 0xfffa0000ul, 15}, // '`' 11111111|1111101 |
| 193 | { 97, 0x18000000ul, 5}, // 'a' 00011 |
| 194 | { 98, 0x8c000000ul, 6}, // 'b' 100011 |
| 195 | { 99, 0x20000000ul, 5}, // 'c' 00100 |
| 196 | {100, 0x90000000ul, 6}, // 'd' 100100 |
| 197 | {101, 0x28000000ul, 5}, // 'e' 00101 |
| 198 | {102, 0x94000000ul, 6}, // 'f' 100101 |
| 199 | {103, 0x98000000ul, 6}, // 'g' 100110 |
| 200 | {104, 0x9c000000ul, 6}, // 'h' 100111 |
| 201 | {105, 0x30000000ul, 5}, // 'i' 00110 |
| 202 | {106, 0xe8000000ul, 7}, // 'j' 1110100 |
| 203 | {107, 0xea000000ul, 7}, // 'k' 1110101 |
| 204 | {108, 0xa0000000ul, 6}, // 'l' 101000 |
| 205 | {109, 0xa4000000ul, 6}, // 'm' 101001 |
| 206 | {110, 0xa8000000ul, 6}, // 'n' 101010 |
| 207 | {111, 0x38000000ul, 5}, // 'o' 00111 |
| 208 | {112, 0xac000000ul, 6}, // 'p' 101011 |
| 209 | {113, 0xec000000ul, 7}, // 'q' 1110110 |
| 210 | {114, 0xb0000000ul, 6}, // 'r' 101100 |
| 211 | {115, 0x40000000ul, 5}, // 's' 01000 |
| 212 | {116, 0x48000000ul, 5}, // 't' 01001 |
| 213 | {117, 0xb4000000ul, 6}, // 'u' 101101 |
| 214 | {118, 0xee000000ul, 7}, // 'v' 1110111 |
| 215 | {119, 0xf0000000ul, 7}, // 'w' 1111000 |
| 216 | {120, 0xf2000000ul, 7}, // 'x' 1111001 |
| 217 | {121, 0xf4000000ul, 7}, // 'y' 1111010 |
| 218 | {122, 0xf6000000ul, 7}, // 'z' 1111011 |
| 219 | {123, 0xfffc0000ul, 15}, // '{' 11111111|1111110 |
| 220 | {124, 0xff800000ul, 11}, // '|' 11111111|100 |
| 221 | {125, 0xfff40000ul, 14}, // '}' 11111111|111101 |
| 222 | {126, 0xffe80000ul, 13}, // '~' 11111111|11101 |
| 223 | {127, 0xffffffc0ul, 28}, // 11111111|11111111|11111111|1100 |
| 224 | {128, 0xfffe6000ul, 20}, // 11111111|11111110|0110 |
| 225 | {129, 0xffff4800ul, 22}, // 11111111|11111111|010010 |
| 226 | {130, 0xfffe7000ul, 20}, // 11111111|11111110|0111 |
| 227 | {131, 0xfffe8000ul, 20}, // 11111111|11111110|1000 |
| 228 | {132, 0xffff4c00ul, 22}, // 11111111|11111111|010011 |
| 229 | {133, 0xffff5000ul, 22}, // 11111111|11111111|010100 |
| 230 | {134, 0xffff5400ul, 22}, // 11111111|11111111|010101 |
| 231 | {135, 0xffffb200ul, 23}, // 11111111|11111111|1011001 |
| 232 | {136, 0xffff5800ul, 22}, // 11111111|11111111|010110 |
| 233 | {137, 0xffffb400ul, 23}, // 11111111|11111111|1011010 |
| 234 | {138, 0xffffb600ul, 23}, // 11111111|11111111|1011011 |
| 235 | {139, 0xffffb800ul, 23}, // 11111111|11111111|1011100 |
| 236 | {140, 0xffffba00ul, 23}, // 11111111|11111111|1011101 |
| 237 | {141, 0xffffbc00ul, 23}, // 11111111|11111111|1011110 |
| 238 | {142, 0xffffeb00ul, 24}, // 11111111|11111111|11101011 |
| 239 | {143, 0xffffbe00ul, 23}, // 11111111|11111111|1011111 |
| 240 | {144, 0xffffec00ul, 24}, // 11111111|11111111|11101100 |
| 241 | {145, 0xffffed00ul, 24}, // 11111111|11111111|11101101 |
| 242 | {146, 0xffff5c00ul, 22}, // 11111111|11111111|010111 |
| 243 | {147, 0xffffc000ul, 23}, // 11111111|11111111|1100000 |
| 244 | {148, 0xffffee00ul, 24}, // 11111111|11111111|11101110 |
| 245 | {149, 0xffffc200ul, 23}, // 11111111|11111111|1100001 |
| 246 | {150, 0xffffc400ul, 23}, // 11111111|11111111|1100010 |
| 247 | {151, 0xffffc600ul, 23}, // 11111111|11111111|1100011 |
| 248 | {152, 0xffffc800ul, 23}, // 11111111|11111111|1100100 |
| 249 | {153, 0xfffee000ul, 21}, // 11111111|11111110|11100 |
| 250 | {154, 0xffff6000ul, 22}, // 11111111|11111111|011000 |
| 251 | {155, 0xffffca00ul, 23}, // 11111111|11111111|1100101 |
| 252 | {156, 0xffff6400ul, 22}, // 11111111|11111111|011001 |
| 253 | {157, 0xffffcc00ul, 23}, // 11111111|11111111|1100110 |
| 254 | {158, 0xffffce00ul, 23}, // 11111111|11111111|1100111 |
| 255 | {159, 0xffffef00ul, 24}, // 11111111|11111111|11101111 |
| 256 | {160, 0xffff6800ul, 22}, // 11111111|11111111|011010 |
| 257 | {161, 0xfffee800ul, 21}, // 11111111|11111110|11101 |
| 258 | {162, 0xfffe9000ul, 20}, // 11111111|11111110|1001 |
| 259 | {163, 0xffff6c00ul, 22}, // 11111111|11111111|011011 |
| 260 | {164, 0xffff7000ul, 22}, // 11111111|11111111|011100 |
| 261 | {165, 0xffffd000ul, 23}, // 11111111|11111111|1101000 |
| 262 | {166, 0xffffd200ul, 23}, // 11111111|11111111|1101001 |
| 263 | {167, 0xfffef000ul, 21}, // 11111111|11111110|11110 |
| 264 | {168, 0xffffd400ul, 23}, // 11111111|11111111|1101010 |
| 265 | {169, 0xffff7400ul, 22}, // 11111111|11111111|011101 |
| 266 | {170, 0xffff7800ul, 22}, // 11111111|11111111|011110 |
| 267 | {171, 0xfffff000ul, 24}, // 11111111|11111111|11110000 |
| 268 | {172, 0xfffef800ul, 21}, // 11111111|11111110|11111 |
| 269 | {173, 0xffff7c00ul, 22}, // 11111111|11111111|011111 |
| 270 | {174, 0xffffd600ul, 23}, // 11111111|11111111|1101011 |
| 271 | {175, 0xffffd800ul, 23}, // 11111111|11111111|1101100 |
| 272 | {176, 0xffff0000ul, 21}, // 11111111|11111111|00000 |
| 273 | {177, 0xffff0800ul, 21}, // 11111111|11111111|00001 |
| 274 | {178, 0xffff8000ul, 22}, // 11111111|11111111|100000 |
| 275 | {179, 0xffff1000ul, 21}, // 11111111|11111111|00010 |
| 276 | {180, 0xffffda00ul, 23}, // 11111111|11111111|1101101 |
| 277 | {181, 0xffff8400ul, 22}, // 11111111|11111111|100001 |
| 278 | {182, 0xffffdc00ul, 23}, // 11111111|11111111|1101110 |
| 279 | {183, 0xffffde00ul, 23}, // 11111111|11111111|1101111 |
| 280 | {184, 0xfffea000ul, 20}, // 11111111|11111110|1010 |
| 281 | {185, 0xffff8800ul, 22}, // 11111111|11111111|100010 |
| 282 | {186, 0xffff8c00ul, 22}, // 11111111|11111111|100011 |
| 283 | {187, 0xffff9000ul, 22}, // 11111111|11111111|100100 |
| 284 | {188, 0xffffe000ul, 23}, // 11111111|11111111|1110000 |
| 285 | {189, 0xffff9400ul, 22}, // 11111111|11111111|100101 |
| 286 | {190, 0xffff9800ul, 22}, // 11111111|11111111|100110 |
| 287 | {191, 0xffffe200ul, 23}, // 11111111|11111111|1110001 |
| 288 | {192, 0xfffff800ul, 26}, // 11111111|11111111|11111000|00 |
| 289 | {193, 0xfffff840ul, 26}, // 11111111|11111111|11111000|01 |
| 290 | {194, 0xfffeb000ul, 20}, // 11111111|11111110|1011 |
| 291 | {195, 0xfffe2000ul, 19}, // 11111111|11111110|001 |
| 292 | {196, 0xffff9c00ul, 22}, // 11111111|11111111|100111 |
| 293 | {197, 0xffffe400ul, 23}, // 11111111|11111111|1110010 |
| 294 | {198, 0xffffa000ul, 22}, // 11111111|11111111|101000 |
| 295 | {199, 0xfffff600ul, 25}, // 11111111|11111111|11110110|0 |
| 296 | {200, 0xfffff880ul, 26}, // 11111111|11111111|11111000|10 |
| 297 | {201, 0xfffff8c0ul, 26}, // 11111111|11111111|11111000|11 |
| 298 | {202, 0xfffff900ul, 26}, // 11111111|11111111|11111001|00 |
| 299 | {203, 0xfffffbc0ul, 27}, // 11111111|11111111|11111011|110 |
| 300 | {204, 0xfffffbe0ul, 27}, // 11111111|11111111|11111011|111 |
| 301 | {205, 0xfffff940ul, 26}, // 11111111|11111111|11111001|01 |
| 302 | {206, 0xfffff100ul, 24}, // 11111111|11111111|11110001 |
| 303 | {207, 0xfffff680ul, 25}, // 11111111|11111111|11110110|1 |
| 304 | {208, 0xfffe4000ul, 19}, // 11111111|11111110|010 |
| 305 | {209, 0xffff1800ul, 21}, // 11111111|11111111|00011 |
| 306 | {210, 0xfffff980ul, 26}, // 11111111|11111111|11111001|10 |
| 307 | {211, 0xfffffc00ul, 27}, // 11111111|11111111|11111100|000 |
| 308 | {212, 0xfffffc20ul, 27}, // 11111111|11111111|11111100|001 |
| 309 | {213, 0xfffff9c0ul, 26}, // 11111111|11111111|11111001|11 |
| 310 | {214, 0xfffffc40ul, 27}, // 11111111|11111111|11111100|010 |
| 311 | {215, 0xfffff200ul, 24}, // 11111111|11111111|11110010 |
| 312 | {216, 0xffff2000ul, 21}, // 11111111|11111111|00100 |
| 313 | {217, 0xffff2800ul, 21}, // 11111111|11111111|00101 |
| 314 | {218, 0xfffffa00ul, 26}, // 11111111|11111111|11111010|00 |
| 315 | {219, 0xfffffa40ul, 26}, // 11111111|11111111|11111010|01 |
| 316 | {220, 0xffffffd0ul, 28}, // 11111111|11111111|11111111|1101 |
| 317 | {221, 0xfffffc60ul, 27}, // 11111111|11111111|11111100|011 |
| 318 | {222, 0xfffffc80ul, 27}, // 11111111|11111111|11111100|100 |
| 319 | {223, 0xfffffca0ul, 27}, // 11111111|11111111|11111100|101 |
| 320 | {224, 0xfffec000ul, 20}, // 11111111|11111110|1100 |
| 321 | {225, 0xfffff300ul, 24}, // 11111111|11111111|11110011 |
| 322 | {226, 0xfffed000ul, 20}, // 11111111|11111110|1101 |
| 323 | {227, 0xffff3000ul, 21}, // 11111111|11111111|00110 |
| 324 | {228, 0xffffa400ul, 22}, // 11111111|11111111|101001 |
| 325 | {229, 0xffff3800ul, 21}, // 11111111|11111111|00111 |
| 326 | {230, 0xffff4000ul, 21}, // 11111111|11111111|01000 |
| 327 | {231, 0xffffe600ul, 23}, // 11111111|11111111|1110011 |
| 328 | {232, 0xffffa800ul, 22}, // 11111111|11111111|101010 |
| 329 | {233, 0xffffac00ul, 22}, // 11111111|11111111|101011 |
| 330 | {234, 0xfffff700ul, 25}, // 11111111|11111111|11110111|0 |
| 331 | {235, 0xfffff780ul, 25}, // 11111111|11111111|11110111|1 |
| 332 | {236, 0xfffff400ul, 24}, // 11111111|11111111|11110100 |
| 333 | {237, 0xfffff500ul, 24}, // 11111111|11111111|11110101 |
| 334 | {238, 0xfffffa80ul, 26}, // 11111111|11111111|11111010|10 |
| 335 | {239, 0xffffe800ul, 23}, // 11111111|11111111|1110100 |
| 336 | {240, 0xfffffac0ul, 26}, // 11111111|11111111|11111010|11 |
| 337 | {241, 0xfffffcc0ul, 27}, // 11111111|11111111|11111100|110 |
| 338 | {242, 0xfffffb00ul, 26}, // 11111111|11111111|11111011|00 |
| 339 | {243, 0xfffffb40ul, 26}, // 11111111|11111111|11111011|01 |
| 340 | {244, 0xfffffce0ul, 27}, // 11111111|11111111|11111100|111 |
| 341 | {245, 0xfffffd00ul, 27}, // 11111111|11111111|11111101|000 |
| 342 | {246, 0xfffffd20ul, 27}, // 11111111|11111111|11111101|001 |
| 343 | {247, 0xfffffd40ul, 27}, // 11111111|11111111|11111101|010 |
| 344 | {248, 0xfffffd60ul, 27}, // 11111111|11111111|11111101|011 |
| 345 | {249, 0xffffffe0ul, 28}, // 11111111|11111111|11111111|1110 |
| 346 | {250, 0xfffffd80ul, 27}, // 11111111|11111111|11111101|100 |
| 347 | {251, 0xfffffda0ul, 27}, // 11111111|11111111|11111101|101 |
| 348 | {252, 0xfffffdc0ul, 27}, // 11111111|11111111|11111101|110 |
| 349 | {253, 0xfffffde0ul, 27}, // 11111111|11111111|11111101|111 |
| 350 | {254, 0xfffffe00ul, 27}, // 11111111|11111111|11111110|000 |
| 351 | {255, 0xfffffb80ul, 26}, // 11111111|11111111|11111011|10 |
| 352 | {256, 0xfffffffcul, 30} // EOS 11111111|11111111|11111111|111111 |
| 353 | }; |
| 354 | |
| 355 | void write_huffman_code(BitOStream &outputStream, const CodeEntry &code) |
| 356 | { |
| 357 | // Append octet by octet. |
| 358 | auto bitLength = code.bitLength; |
| 359 | const auto hc = code.huffmanCode >> (32 - bitLength); |
| 360 | |
| 361 | if (bitLength > 24) { |
| 362 | outputStream.writeBits(uchar(hc >> 24), bitLength - 24); |
| 363 | bitLength = 24; |
| 364 | } |
| 365 | |
| 366 | if (bitLength > 16) { |
| 367 | outputStream.writeBits(uchar(hc >> 16), bitLength - 16); |
| 368 | bitLength = 16; |
| 369 | } |
| 370 | |
| 371 | if (bitLength > 8) { |
| 372 | outputStream.writeBits(uchar(hc >> 8), bitLength - 8); |
| 373 | bitLength = 8; |
| 374 | } |
| 375 | |
| 376 | outputStream.writeBits(uchar(hc), bitLength); |
| 377 | } |
| 378 | |
| 379 | } |
| 380 | |
| 381 | // That's from HPACK's specs - we deal with octets. |
| 382 | static_assert(std::numeric_limits<uchar>::digits == 8, "octets expected" ); |
| 383 | |
| 384 | quint64 huffman_encoded_bit_length(const QByteArray &inputData) |
| 385 | { |
| 386 | quint64 bitLength = 0; |
| 387 | for (int i = 0, e = inputData.size(); i < e; ++i) |
| 388 | bitLength += staticHuffmanCodeTable[int(inputData[i])].bitLength; |
| 389 | |
| 390 | return bitLength; |
| 391 | } |
| 392 | |
| 393 | void huffman_encode_string(const QByteArray &inputData, BitOStream &outputStream) |
| 394 | { |
| 395 | for (int i = 0, e = inputData.size(); i < e; ++i) { |
| 396 | const auto value = uchar(inputData[i]); |
| 397 | write_huffman_code(outputStream, staticHuffmanCodeTable[value]); |
| 398 | } |
| 399 | |
| 400 | // Pad bits ... |
| 401 | if (outputStream.bitLength() % 8) |
| 402 | outputStream.writeBits(0xff, 8 - outputStream.bitLength() % 8); |
| 403 | } |
| 404 | |
| 405 | bool padding_is_valid(quint32 chunk, quint32 nBits) |
| 406 | { |
| 407 | Q_ASSERT(nBits); |
| 408 | |
| 409 | // HPACK, 5.2: "A padding strictly longer than 7 bits MUST be |
| 410 | // treated as a decoding error." |
| 411 | if (nBits > 7) |
| 412 | return false; |
| 413 | // HPACK, 5.2: |
| 414 | // "A padding not corresponding to the most significant bits |
| 415 | // of the code for the EOS symbol MUST be treated as a decoding error." |
| 416 | return (chunk >> (32 - nBits)) == quint32((1 << nBits) - 1); |
| 417 | } |
| 418 | |
| 419 | HuffmanDecoder::HuffmanDecoder() |
| 420 | : minCodeLength() |
| 421 | { |
| 422 | const auto nCodes = sizeof staticHuffmanCodeTable / sizeof staticHuffmanCodeTable[0]; |
| 423 | |
| 424 | std::vector<CodeEntry> symbols(staticHuffmanCodeTable, staticHuffmanCodeTable + nCodes); |
| 425 | // Now we sort: by bit length first (in the descending order) and by the symbol |
| 426 | // value (descending). Descending order: to make sure we do not create prefix tables with |
| 427 | // short 'indexLength' first and having longer codes that do not fit into such tables later. |
| 428 | std::sort(symbols.begin(), symbols.end(), [](const CodeEntry &code1, const CodeEntry &code2) { |
| 429 | if (code1.bitLength == code2.bitLength) |
| 430 | return code1.byteValue > code2.byteValue; |
| 431 | return code1.bitLength > code2.bitLength; |
| 432 | }); |
| 433 | |
| 434 | minCodeLength = symbols.back().bitLength; // The shortest one, currently it's 5. |
| 435 | |
| 436 | // TODO: add a verification - Huffman codes |
| 437 | // within a given bit length range also |
| 438 | // should be in descending order. |
| 439 | |
| 440 | // Initialize 'prefixTables' and 'tableData'. |
| 441 | addTable(0, quint32(BitConstants::rootPrefix)); // 'root' table. |
| 442 | |
| 443 | for (const auto &s : symbols) { |
| 444 | quint32 tableIndex = 0; |
| 445 | while (true) { |
| 446 | Q_ASSERT(tableIndex < prefixTables.size()); |
| 447 | // Note, by value - since prefixTables will be updated in between. |
| 448 | const auto table = prefixTables[tableIndex]; |
| 449 | // We skip prefixed bits (if any) and use indexed bits only: |
| 450 | const auto entryIndex = s.huffmanCode << table.prefixLength >> (32 - table.indexLength); |
| 451 | // Again, by value. |
| 452 | PrefixTableEntry entry = tableEntry(table, entryIndex); |
| 453 | // How many bits were coded by previous tables and this table: |
| 454 | const auto codedLength = table.prefixLength + table.indexLength; |
| 455 | if (codedLength < s.bitLength) { |
| 456 | // We have to add a new prefix table ... (if it's not done yet). |
| 457 | if (!entry.bitLength) { |
| 458 | entry.nextTable = addTable(codedLength, std::min<quint32>(quint32(BitConstants::childPrefix), |
| 459 | s.bitLength - codedLength)); |
| 460 | entry.bitLength = s.bitLength; |
| 461 | entry.byteValue = s.byteValue; |
| 462 | setTableEntry(table, entryIndex, entry); |
| 463 | } |
| 464 | tableIndex = entry.nextTable; |
| 465 | } else { |
| 466 | // We found the slot for our code (terminal entry): |
| 467 | entry.byteValue = s.byteValue; |
| 468 | entry.bitLength = s.bitLength; |
| 469 | // Refer to our own table as 'nextTable': |
| 470 | entry.nextTable = tableIndex; |
| 471 | setTableEntry(table, entryIndex, entry); |
| 472 | break; |
| 473 | } |
| 474 | } |
| 475 | } |
| 476 | |
| 477 | // Now, we have a table(s) and have to fill 'holes' to |
| 478 | // 'fix' terminal entries. |
| 479 | for (const auto &table : prefixTables) { |
| 480 | const quint32 codedLength = table.prefixLength + table.indexLength; |
| 481 | for (quint32 j = 0; j < table.size();) { |
| 482 | const PrefixTableEntry &entry = tableEntry(table, j); |
| 483 | if (entry.bitLength && entry.bitLength < codedLength) { |
| 484 | const quint32 range = 1 << (codedLength - entry.bitLength); |
| 485 | for (quint32 k = 1; k < range; ++k) |
| 486 | setTableEntry(table, j + k, entry); |
| 487 | j += range; |
| 488 | } else { |
| 489 | ++j; |
| 490 | } |
| 491 | } |
| 492 | } |
| 493 | } |
| 494 | |
| 495 | bool HuffmanDecoder::decodeStream(BitIStream &inputStream, QByteArray &outputBuffer) |
| 496 | { |
| 497 | while (true) { |
| 498 | quint32 chunk = 0; |
| 499 | const quint32 readBits = inputStream.peekBits(inputStream.streamOffset(), 32, &chunk); |
| 500 | if (!readBits) |
| 501 | return !inputStream.hasMoreBits(); |
| 502 | |
| 503 | if (readBits < minCodeLength) { |
| 504 | inputStream.skipBits(readBits); |
| 505 | return padding_is_valid(chunk, readBits); |
| 506 | } |
| 507 | |
| 508 | quint32 tableIndex = 0; |
| 509 | const PrefixTable *table = &prefixTables[tableIndex]; |
| 510 | quint32 entryIndex = chunk >> (32 - table->indexLength); |
| 511 | PrefixTableEntry entry = tableEntry(*table, entryIndex); |
| 512 | |
| 513 | while (true) { |
| 514 | if (entry.nextTable == tableIndex) |
| 515 | break; |
| 516 | |
| 517 | tableIndex = entry.nextTable; |
| 518 | table = &prefixTables[tableIndex]; |
| 519 | entryIndex = chunk << table->prefixLength >> (32 - table->indexLength); |
| 520 | entry = tableEntry(*table, entryIndex); |
| 521 | } |
| 522 | |
| 523 | if (entry.bitLength > readBits) { |
| 524 | inputStream.skipBits(readBits); |
| 525 | return padding_is_valid(chunk, readBits); |
| 526 | } |
| 527 | |
| 528 | if (!entry.bitLength || entry.byteValue == 256) { |
| 529 | //EOS (256) == compression error (HPACK). |
| 530 | inputStream.skipBits(readBits); |
| 531 | return false; |
| 532 | } |
| 533 | |
| 534 | outputBuffer.append(entry.byteValue); |
| 535 | inputStream.skipBits(entry.bitLength); |
| 536 | } |
| 537 | |
| 538 | return false; |
| 539 | } |
| 540 | |
| 541 | quint32 HuffmanDecoder::addTable(quint32 prefix, quint32 index) |
| 542 | { |
| 543 | PrefixTable newTable{prefix, index}; |
| 544 | newTable.offset = quint32(tableData.size()); |
| 545 | prefixTables.push_back(newTable); |
| 546 | // Add entries for this table: |
| 547 | tableData.resize(tableData.size() + newTable.size()); |
| 548 | |
| 549 | return quint32(prefixTables.size() - 1); |
| 550 | } |
| 551 | |
| 552 | PrefixTableEntry HuffmanDecoder::tableEntry(const PrefixTable &table, quint32 index) |
| 553 | { |
| 554 | Q_ASSERT(index < table.size()); |
| 555 | return tableData[table.offset + index]; |
| 556 | } |
| 557 | |
| 558 | void HuffmanDecoder::setTableEntry(const PrefixTable &table, quint32 index, |
| 559 | const PrefixTableEntry &entry) |
| 560 | { |
| 561 | Q_ASSERT(index < table.size()); |
| 562 | tableData[table.offset + index] = entry; |
| 563 | } |
| 564 | |
| 565 | bool huffman_decode_string(BitIStream &inputStream, QByteArray *outputBuffer) |
| 566 | { |
| 567 | Q_ASSERT(outputBuffer); |
| 568 | |
| 569 | static HuffmanDecoder decoder; |
| 570 | return decoder.decodeStream(inputStream, *outputBuffer); |
| 571 | } |
| 572 | |
| 573 | } |
| 574 | |
| 575 | QT_END_NAMESPACE |
| 576 | |