1/* Copyright 2017 Google Inc. All Rights Reserved.
2
3 Distributed under MIT license.
4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7/* Shared Dictionary definition and utilities. */
8
9#include <brotli/shared_dictionary.h>
10
11#include <memory.h>
12#include <stdlib.h> /* malloc, free */
13#include <stdio.h>
14
15#include "dictionary.h"
16#include "platform.h"
17#include "shared_dictionary_internal.h"
18
19#if defined(__cplusplus) || defined(c_plusplus)
20extern "C" {
21#endif
22
23#define BROTLI_NUM_ENCODED_LENGTHS (SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH \
24 - SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH + 1)
25
26/* Max allowed by spec */
27#define BROTLI_MAX_SIZE_BITS 15u
28
29/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
30static BROTLI_BOOL ReadBool(const uint8_t* encoded, size_t size, size_t* pos,
31 BROTLI_BOOL* result) {
32 uint8_t value;
33 size_t position = *pos;
34 if (position >= size) return BROTLI_FALSE; /* past file end */
35 value = encoded[position++];
36 if (value > 1) return BROTLI_FALSE; /* invalid bool */
37 *result = TO_BROTLI_BOOL(value);
38 *pos = position;
39 return BROTLI_TRUE; /* success */
40}
41
42/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
43static BROTLI_BOOL ReadUint8(const uint8_t* encoded, size_t size, size_t* pos,
44 uint8_t* result) {
45 size_t position = *pos;
46 if (position + sizeof(uint8_t) > size) return BROTLI_FALSE;
47 *result = encoded[position++];
48 *pos = position;
49 return BROTLI_TRUE;
50}
51
52/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
53static BROTLI_BOOL ReadUint16(const uint8_t* encoded, size_t size, size_t* pos,
54 uint16_t* result) {
55 size_t position = *pos;
56 if (position + sizeof(uint16_t) > size) return BROTLI_FALSE;
57 *result = BROTLI_UNALIGNED_LOAD16LE(&encoded[position]);
58 position += 2;
59 *pos = position;
60 return BROTLI_TRUE;
61}
62
63/* Reads a varint into a uint32_t, and returns error if it's too large */
64/* Returns BROTLI_TRUE on success, BROTLI_FALSE on failure. */
65static BROTLI_BOOL ReadVarint32(const uint8_t* encoded, size_t size,
66 size_t* pos, uint32_t* result) {
67 int num = 0;
68 uint8_t byte;
69 *result = 0;
70 for (;;) {
71 if (*pos >= size) return BROTLI_FALSE;
72 byte = encoded[(*pos)++];
73 if (num == 4 && byte > 15) return BROTLI_FALSE;
74 *result |= (uint32_t)(byte & 127) << (num * 7);
75 if (byte < 128) return BROTLI_TRUE;
76 num++;
77 }
78}
79
80/* Returns the total length of word list. */
81static size_t BrotliSizeBitsToOffsets(const uint8_t* size_bits_by_length,
82 uint32_t* offsets_by_length) {
83 uint32_t pos = 0;
84 uint32_t i;
85 for (i = 0; i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
86 offsets_by_length[i] = pos;
87 if (size_bits_by_length[i] != 0) {
88 pos += i << size_bits_by_length[i];
89 }
90 }
91 return pos;
92}
93
94static BROTLI_BOOL ParseWordList(size_t size, const uint8_t* encoded,
95 size_t* pos, BrotliDictionary* out) {
96 size_t offset;
97 size_t i;
98 size_t position = *pos;
99 if (position + BROTLI_NUM_ENCODED_LENGTHS > size) {
100 return BROTLI_FALSE;
101 }
102
103 memset(out->size_bits_by_length, 0, SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH);
104 memcpy(out->size_bits_by_length + SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH,
105 &encoded[position], BROTLI_NUM_ENCODED_LENGTHS);
106 for (i = SHARED_BROTLI_MIN_DICTIONARY_WORD_LENGTH;
107 i <= SHARED_BROTLI_MAX_DICTIONARY_WORD_LENGTH; i++) {
108 if (out->size_bits_by_length[i] > BROTLI_MAX_SIZE_BITS) {
109 return BROTLI_FALSE;
110 }
111 }
112 position += BROTLI_NUM_ENCODED_LENGTHS;
113 offset = BrotliSizeBitsToOffsets(
114 out->size_bits_by_length, out->offsets_by_length);
115
116 out->data = &encoded[position];
117 out->data_size = offset;
118 position += offset;
119 if (position > size) return BROTLI_FALSE;
120 *pos = position;
121 return BROTLI_TRUE;
122}
123
124/* Computes the cutOffTransforms of a BrotliTransforms which already has the
125 transforms data correctly filled in. */
126static void ComputeCutoffTransforms(BrotliTransforms* transforms) {
127 uint32_t i;
128 for (i = 0; i < BROTLI_TRANSFORMS_MAX_CUT_OFF + 1; i++) {
129 transforms->cutOffTransforms[i] = -1;
130 }
131 for (i = 0; i < transforms->num_transforms; i++) {
132 const uint8_t* prefix = BROTLI_TRANSFORM_PREFIX(transforms, i);
133 uint8_t type = BROTLI_TRANSFORM_TYPE(transforms, i);
134 const uint8_t* suffix = BROTLI_TRANSFORM_SUFFIX(transforms, i);
135 if (type <= BROTLI_TRANSFORM_OMIT_LAST_9 && *prefix == 0 && *suffix == 0 &&
136 transforms->cutOffTransforms[type] == -1) {
137 transforms->cutOffTransforms[type] = (int16_t)i;
138 }
139 }
140}
141
142static BROTLI_BOOL ParsePrefixSuffixTable(size_t size, const uint8_t* encoded,
143 size_t* pos, BrotliTransforms* out, uint16_t* out_table,
144 size_t* out_table_size) {
145 size_t position = *pos;
146 size_t offset = 0;
147 size_t stringlet_count = 0; /* NUM_PREFIX_SUFFIX */
148 size_t data_length = 0;
149
150 /* PREFIX_SUFFIX_LENGTH */
151 if (!ReadUint16(encoded, size, &position, &out->prefix_suffix_size)) {
152 return BROTLI_FALSE;
153 }
154 data_length = out->prefix_suffix_size;
155
156 /* Must at least have space for null terminator. */
157 if (data_length < 1) return BROTLI_FALSE;
158 out->prefix_suffix = &encoded[position];
159 if (position + data_length >= size) return BROTLI_FALSE;
160 while (BROTLI_TRUE) {
161 /* STRING_LENGTH */
162 size_t stringlet_len = encoded[position + offset];
163 out_table[stringlet_count] = (uint16_t)offset;
164 stringlet_count++;
165 offset++;
166 if (stringlet_len == 0) {
167 if (offset == data_length) {
168 break;
169 } else {
170 return BROTLI_FALSE;
171 }
172 }
173 if (stringlet_count > 255) return BROTLI_FALSE;
174 offset += stringlet_len;
175 if (offset >= data_length) return BROTLI_FALSE;
176 }
177
178 position += data_length;
179 *pos = position;
180 *out_table_size = (uint16_t)stringlet_count;
181 return BROTLI_TRUE;
182}
183
184static BROTLI_BOOL ParseTransformsList(size_t size, const uint8_t* encoded,
185 size_t* pos, BrotliTransforms* out, uint16_t* prefix_suffix_table,
186 size_t* prefix_suffix_count) {
187 uint32_t i;
188 BROTLI_BOOL has_params = BROTLI_FALSE;
189 BROTLI_BOOL prefix_suffix_ok = BROTLI_FALSE;
190 size_t position = *pos;
191 size_t stringlet_cnt = 0;
192 if (position >= size) return BROTLI_FALSE;
193
194 prefix_suffix_ok = ParsePrefixSuffixTable(
195 size, encoded, &position, out, prefix_suffix_table, &stringlet_cnt);
196 if (!prefix_suffix_ok) return BROTLI_FALSE;
197 out->prefix_suffix_map = prefix_suffix_table;
198 *prefix_suffix_count = stringlet_cnt;
199
200 out->num_transforms = encoded[position++];
201 out->transforms = &encoded[position];
202 position += (size_t)out->num_transforms * 3;
203 if (position > size) return BROTLI_FALSE;
204 /* Check for errors and read extra parameters. */
205 for (i = 0; i < out->num_transforms; i++) {
206 uint8_t prefix_id = BROTLI_TRANSFORM_PREFIX_ID(out, i);
207 uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
208 uint8_t suffix_id = BROTLI_TRANSFORM_SUFFIX_ID(out, i);
209 if (prefix_id >= stringlet_cnt) return BROTLI_FALSE;
210 if (type >= BROTLI_NUM_TRANSFORM_TYPES) return BROTLI_FALSE;
211 if (suffix_id >= stringlet_cnt) return BROTLI_FALSE;
212 if (type == BROTLI_TRANSFORM_SHIFT_FIRST ||
213 type == BROTLI_TRANSFORM_SHIFT_ALL) {
214 has_params = BROTLI_TRUE;
215 }
216 }
217 if (has_params) {
218 out->params = &encoded[position];
219 position += (size_t)out->num_transforms * 2;
220 if (position > size) return BROTLI_FALSE;
221 for (i = 0; i < out->num_transforms; i++) {
222 uint8_t type = BROTLI_TRANSFORM_TYPE(out, i);
223 if (type != BROTLI_TRANSFORM_SHIFT_FIRST &&
224 type != BROTLI_TRANSFORM_SHIFT_ALL) {
225 if (out->params[i * 2] != 0 || out->params[i * 2 + 1] != 0) {
226 return BROTLI_FALSE;
227 }
228 }
229 }
230 } else {
231 out->params = NULL;
232 }
233 ComputeCutoffTransforms(out);
234 *pos = position;
235 return BROTLI_TRUE;
236}
237
238static BROTLI_BOOL DryParseDictionary(const uint8_t* encoded,
239 size_t size, uint32_t* num_prefix, BROTLI_BOOL* is_custom_static_dict) {
240 size_t pos = 0;
241 uint32_t chunk_size = 0;
242 uint8_t num_word_lists;
243 uint8_t num_transform_lists;
244 *is_custom_static_dict = BROTLI_FALSE;
245 *num_prefix = 0;
246
247 /* Skip magic header bytes. */
248 pos += 2;
249
250 /* LZ77_DICTIONARY_LENGTH */
251 if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
252 if (chunk_size != 0) {
253 /* This limitation is not specified but the 32-bit Brotli decoder for now */
254 if (chunk_size > 1073741823) return BROTLI_FALSE;
255 *num_prefix = 1;
256 if (pos + chunk_size > size) return BROTLI_FALSE;
257 pos += chunk_size;
258 }
259
260 if (!ReadUint8(encoded, size, &pos, &num_word_lists)) {
261 return BROTLI_FALSE;
262 }
263 if (!ReadUint8(encoded, size, &pos, &num_transform_lists)) {
264 return BROTLI_FALSE;
265 }
266
267 if (num_word_lists > 0 || num_transform_lists > 0) {
268 *is_custom_static_dict = BROTLI_TRUE;
269 }
270
271 return BROTLI_TRUE;
272}
273
274static BROTLI_BOOL ParseDictionary(const uint8_t* encoded, size_t size,
275 BrotliSharedDictionary* dict) {
276 uint32_t i;
277 size_t pos = 0;
278 uint32_t chunk_size = 0;
279 size_t total_prefix_suffix_count = 0;
280 size_t trasform_list_start[SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS];
281 uint16_t temporary_prefix_suffix_table[256];
282
283 /* Skip magic header bytes. */
284 pos += 2;
285
286 /* LZ77_DICTIONARY_LENGTH */
287 if (!ReadVarint32(encoded, size, &pos, &chunk_size)) return BROTLI_FALSE;
288 if (chunk_size != 0) {
289 if (pos + chunk_size > size) return BROTLI_FALSE;
290 dict->prefix_size[dict->num_prefix] = chunk_size;
291 dict->prefix[dict->num_prefix] = &encoded[pos];
292 dict->num_prefix++;
293 /* LZ77_DICTIONARY_LENGTH bytes. */
294 pos += chunk_size;
295 }
296
297 /* NUM_WORD_LISTS */
298 if (!ReadUint8(encoded, size, &pos, &dict->num_word_lists)) {
299 return BROTLI_FALSE;
300 }
301 if (dict->num_word_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
302 return BROTLI_FALSE;
303 }
304
305 if (dict->num_word_lists != 0) {
306 dict->words_instances = (BrotliDictionary*)dict->alloc_func(
307 dict->memory_manager_opaque,
308 dict->num_word_lists * sizeof(*dict->words_instances));
309 if (!dict->words_instances) return BROTLI_FALSE; /* OOM */
310 }
311 for (i = 0; i < dict->num_word_lists; i++) {
312 if (!ParseWordList(size, encoded, &pos, &dict->words_instances[i])) {
313 return BROTLI_FALSE;
314 }
315 }
316
317 /* NUM_TRANSFORM_LISTS */
318 if (!ReadUint8(encoded, size, &pos, &dict->num_transform_lists)) {
319 return BROTLI_FALSE;
320 }
321 if (dict->num_transform_lists > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
322 return BROTLI_FALSE;
323 }
324
325 if (dict->num_transform_lists != 0) {
326 dict->transforms_instances = (BrotliTransforms*)dict->alloc_func(
327 dict->memory_manager_opaque,
328 dict->num_transform_lists * sizeof(*dict->transforms_instances));
329 if (!dict->transforms_instances) return BROTLI_FALSE; /* OOM */
330 }
331 for (i = 0; i < dict->num_transform_lists; i++) {
332 BROTLI_BOOL ok = BROTLI_FALSE;
333 size_t prefix_suffix_count = 0;
334 trasform_list_start[i] = pos;
335 dict->transforms_instances[i].prefix_suffix_map =
336 temporary_prefix_suffix_table;
337 ok = ParseTransformsList(
338 size, encoded, &pos, &dict->transforms_instances[i],
339 temporary_prefix_suffix_table, &prefix_suffix_count);
340 if (!ok) return BROTLI_FALSE;
341 total_prefix_suffix_count += prefix_suffix_count;
342 }
343 if (total_prefix_suffix_count != 0) {
344 dict->prefix_suffix_maps = (uint16_t*)dict->alloc_func(
345 dict->memory_manager_opaque,
346 total_prefix_suffix_count * sizeof(*dict->prefix_suffix_maps));
347 if (!dict->prefix_suffix_maps) return BROTLI_FALSE; /* OOM */
348 }
349 total_prefix_suffix_count = 0;
350 for (i = 0; i < dict->num_transform_lists; i++) {
351 size_t prefix_suffix_count = 0;
352 size_t position = trasform_list_start[i];
353 uint16_t* prefix_suffix_map =
354 &dict->prefix_suffix_maps[total_prefix_suffix_count];
355 BROTLI_BOOL ok = ParsePrefixSuffixTable(
356 size, encoded, &position, &dict->transforms_instances[i],
357 prefix_suffix_map, &prefix_suffix_count);
358 if (!ok) return BROTLI_FALSE;
359 dict->transforms_instances[i].prefix_suffix_map = prefix_suffix_map;
360 total_prefix_suffix_count += prefix_suffix_count;
361 }
362
363 if (dict->num_word_lists != 0 || dict->num_transform_lists != 0) {
364 if (!ReadUint8(encoded, size, &pos, &dict->num_dictionaries)) {
365 return BROTLI_FALSE;
366 }
367 if (dict->num_dictionaries == 0 ||
368 dict->num_dictionaries > SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS) {
369 return BROTLI_FALSE;
370 }
371 for (i = 0; i < dict->num_dictionaries; i++) {
372 uint8_t words_index;
373 uint8_t transforms_index;
374 if (!ReadUint8(encoded, size, &pos, &words_index)) {
375 return BROTLI_FALSE;
376 }
377 if (words_index > dict->num_word_lists) return BROTLI_FALSE;
378 if (!ReadUint8(encoded, size, &pos, &transforms_index)) {
379 return BROTLI_FALSE;
380 }
381 if (transforms_index > dict->num_transform_lists) return BROTLI_FALSE;
382 dict->words[i] = words_index == dict->num_word_lists ?
383 BrotliGetDictionary() : &dict->words_instances[words_index];
384 dict->transforms[i] = transforms_index == dict->num_transform_lists ?
385 BrotliGetTransforms(): &dict->transforms_instances[transforms_index];
386 }
387 /* CONTEXT_ENABLED */
388 if (!ReadBool(encoded, size, &pos, &dict->context_based)) {
389 return BROTLI_FALSE;
390 }
391
392 /* CONTEXT_MAP */
393 if (dict->context_based) {
394 for (i = 0; i < SHARED_BROTLI_NUM_DICTIONARY_CONTEXTS; i++) {
395 if (!ReadUint8(encoded, size, &pos, &dict->context_map[i])) {
396 return BROTLI_FALSE;
397 }
398 if (dict->context_map[i] >= dict->num_dictionaries) {
399 return BROTLI_FALSE;
400 }
401 }
402 }
403 } else {
404 dict->context_based = BROTLI_FALSE;
405 dict->num_dictionaries = 1;
406 dict->words[0] = BrotliGetDictionary();
407 dict->transforms[0] = BrotliGetTransforms();
408 }
409
410 return BROTLI_TRUE;
411}
412
413/* Decodes shared dictionary and verifies correctness.
414 Returns BROTLI_TRUE if dictionary is valid, BROTLI_FALSE otherwise.
415 The BrotliSharedDictionary must already have been initialized. If the
416 BrotliSharedDictionary already contains data, compound dictionaries
417 will be appended, but an error will be returned if it already has
418 custom words or transforms.
419 TODO(lode): link to RFC for shared brotli once published. */
420static BROTLI_BOOL DecodeSharedDictionary(
421 const uint8_t* encoded, size_t size, BrotliSharedDictionary* dict) {
422 uint32_t num_prefix = 0;
423 BROTLI_BOOL is_custom_static_dict = BROTLI_FALSE;
424 BROTLI_BOOL has_custom_static_dict =
425 dict->num_word_lists > 0 || dict->num_transform_lists > 0;
426
427 /* Check magic header bytes. */
428 if (size < 2) return BROTLI_FALSE;
429 if (encoded[0] != 0x91 || encoded[1] != 0) return BROTLI_FALSE;
430
431 if (!DryParseDictionary(encoded, size, &num_prefix, &is_custom_static_dict)) {
432 return BROTLI_FALSE;
433 }
434
435 if (num_prefix + dict->num_prefix > SHARED_BROTLI_MAX_COMPOUND_DICTS) {
436 return BROTLI_FALSE;
437 }
438
439 /* Cannot combine different static dictionaries, only prefix dictionaries */
440 if (has_custom_static_dict && is_custom_static_dict) return BROTLI_FALSE;
441
442 return ParseDictionary(encoded, size, dict);
443}
444
445void BrotliSharedDictionaryDestroyInstance(
446 BrotliSharedDictionary* dict) {
447 if (!dict) {
448 return;
449 } else {
450 brotli_free_func free_func = dict->free_func;
451 void* opaque = dict->memory_manager_opaque;
452 /* Cleanup. */
453 free_func(opaque, dict->words_instances);
454 free_func(opaque, dict->transforms_instances);
455 free_func(opaque, dict->prefix_suffix_maps);
456 /* Self-destruction. */
457 free_func(opaque, dict);
458 }
459}
460
461BROTLI_BOOL BrotliSharedDictionaryAttach(
462 BrotliSharedDictionary* dict, BrotliSharedDictionaryType type,
463 size_t data_size, const uint8_t data[BROTLI_ARRAY_PARAM(data_size)]) {
464 if (!dict) {
465 return BROTLI_FALSE;
466 }
467 if (type == BROTLI_SHARED_DICTIONARY_SERIALIZED) {
468 return DecodeSharedDictionary(data, data_size, dict);
469 } else if (type == BROTLI_SHARED_DICTIONARY_RAW) {
470 if (dict->num_prefix >= SHARED_BROTLI_MAX_COMPOUND_DICTS) {
471 return BROTLI_FALSE;
472 }
473 dict->prefix_size[dict->num_prefix] = data_size;
474 dict->prefix[dict->num_prefix] = data;
475 dict->num_prefix++;
476 return BROTLI_TRUE;
477 } else {
478 return BROTLI_FALSE;
479 }
480}
481
482BrotliSharedDictionary* BrotliSharedDictionaryCreateInstance(
483 brotli_alloc_func alloc_func, brotli_free_func free_func, void* opaque) {
484 BrotliSharedDictionary* dict = 0;
485 if (!alloc_func && !free_func) {
486 dict = (BrotliSharedDictionary*)malloc(sizeof(BrotliSharedDictionary));
487 } else if (alloc_func && free_func) {
488 dict = (BrotliSharedDictionary*)alloc_func(
489 opaque, sizeof(BrotliSharedDictionary));
490 }
491 if (dict == 0) {
492 return 0;
493 }
494
495 /* TODO(eustas): explicitly initialize all the fields? */
496 memset(dict, 0, sizeof(BrotliSharedDictionary));
497
498 dict->context_based = BROTLI_FALSE;
499 dict->num_dictionaries = 1;
500 dict->num_word_lists = 0;
501 dict->num_transform_lists = 0;
502
503 dict->words[0] = BrotliGetDictionary();
504 dict->transforms[0] = BrotliGetTransforms();
505
506 dict->alloc_func = alloc_func ? alloc_func : BrotliDefaultAllocFunc;
507 dict->free_func = free_func ? free_func : BrotliDefaultFreeFunc;
508 dict->memory_manager_opaque = opaque;
509
510 return dict;
511}
512
513#if defined(__cplusplus) || defined(c_plusplus)
514} /* extern "C" */
515#endif
516