1 | /* |
2 | * Copyright (c) Meta Platforms, Inc. and affiliates. |
3 | * All rights reserved. |
4 | * |
5 | * This source code is licensed under both the BSD-style license (found in the |
6 | * LICENSE file in the root directory of this source tree) and the GPLv2 (found |
7 | * in the COPYING file in the root directory of this source tree). |
8 | * You may select, at your option, one of the above-listed licenses. |
9 | */ |
10 | |
11 | /* zstd_ddict.c : |
12 | * concentrates all logic that needs to know the internals of ZSTD_DDict object */ |
13 | |
14 | /*-******************************************************* |
15 | * Dependencies |
16 | *********************************************************/ |
17 | #include "../common/allocations.h" /* ZSTD_customMalloc, ZSTD_customFree */ |
18 | #include "../common/zstd_deps.h" /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */ |
19 | #include "../common/cpu.h" /* bmi2 */ |
20 | #include "../common/mem.h" /* low level memory routines */ |
21 | #define FSE_STATIC_LINKING_ONLY |
22 | #include "../common/fse.h" |
23 | #include "../common/huf.h" |
24 | #include "zstd_decompress_internal.h" |
25 | #include "zstd_ddict.h" |
26 | |
27 | #if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT>=1) |
28 | # include "../legacy/zstd_legacy.h" |
29 | #endif |
30 | |
31 | |
32 | |
33 | /*-******************************************************* |
34 | * Types |
35 | *********************************************************/ |
36 | struct ZSTD_DDict_s { |
37 | void* dictBuffer; |
38 | const void* dictContent; |
39 | size_t dictSize; |
40 | ZSTD_entropyDTables_t entropy; |
41 | U32 dictID; |
42 | U32 entropyPresent; |
43 | ZSTD_customMem cMem; |
44 | }; /* typedef'd to ZSTD_DDict within "zstd.h" */ |
45 | |
46 | const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict) |
47 | { |
48 | assert(ddict != NULL); |
49 | return ddict->dictContent; |
50 | } |
51 | |
52 | size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict) |
53 | { |
54 | assert(ddict != NULL); |
55 | return ddict->dictSize; |
56 | } |
57 | |
58 | void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict) |
59 | { |
60 | DEBUGLOG(4, "ZSTD_copyDDictParameters" ); |
61 | assert(dctx != NULL); |
62 | assert(ddict != NULL); |
63 | dctx->dictID = ddict->dictID; |
64 | dctx->prefixStart = ddict->dictContent; |
65 | dctx->virtualStart = ddict->dictContent; |
66 | dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize; |
67 | dctx->previousDstEnd = dctx->dictEnd; |
68 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
69 | dctx->dictContentBeginForFuzzing = dctx->prefixStart; |
70 | dctx->dictContentEndForFuzzing = dctx->previousDstEnd; |
71 | #endif |
72 | if (ddict->entropyPresent) { |
73 | dctx->litEntropy = 1; |
74 | dctx->fseEntropy = 1; |
75 | dctx->LLTptr = ddict->entropy.LLTable; |
76 | dctx->MLTptr = ddict->entropy.MLTable; |
77 | dctx->OFTptr = ddict->entropy.OFTable; |
78 | dctx->HUFptr = ddict->entropy.hufTable; |
79 | dctx->entropy.rep[0] = ddict->entropy.rep[0]; |
80 | dctx->entropy.rep[1] = ddict->entropy.rep[1]; |
81 | dctx->entropy.rep[2] = ddict->entropy.rep[2]; |
82 | } else { |
83 | dctx->litEntropy = 0; |
84 | dctx->fseEntropy = 0; |
85 | } |
86 | } |
87 | |
88 | |
89 | static size_t |
90 | ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict, |
91 | ZSTD_dictContentType_e dictContentType) |
92 | { |
93 | ddict->dictID = 0; |
94 | ddict->entropyPresent = 0; |
95 | if (dictContentType == ZSTD_dct_rawContent) return 0; |
96 | |
97 | if (ddict->dictSize < 8) { |
98 | if (dictContentType == ZSTD_dct_fullDict) |
99 | return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ |
100 | return 0; /* pure content mode */ |
101 | } |
102 | { U32 const magic = MEM_readLE32(ddict->dictContent); |
103 | if (magic != ZSTD_MAGIC_DICTIONARY) { |
104 | if (dictContentType == ZSTD_dct_fullDict) |
105 | return ERROR(dictionary_corrupted); /* only accept specified dictionaries */ |
106 | return 0; /* pure content mode */ |
107 | } |
108 | } |
109 | ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE); |
110 | |
111 | /* load entropy tables */ |
112 | RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy( |
113 | &ddict->entropy, ddict->dictContent, ddict->dictSize)), |
114 | dictionary_corrupted, "" ); |
115 | ddict->entropyPresent = 1; |
116 | return 0; |
117 | } |
118 | |
119 | |
120 | static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict, |
121 | const void* dict, size_t dictSize, |
122 | ZSTD_dictLoadMethod_e dictLoadMethod, |
123 | ZSTD_dictContentType_e dictContentType) |
124 | { |
125 | if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) { |
126 | ddict->dictBuffer = NULL; |
127 | ddict->dictContent = dict; |
128 | if (!dict) dictSize = 0; |
129 | } else { |
130 | void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem); |
131 | ddict->dictBuffer = internalBuffer; |
132 | ddict->dictContent = internalBuffer; |
133 | if (!internalBuffer) return ERROR(memory_allocation); |
134 | ZSTD_memcpy(internalBuffer, dict, dictSize); |
135 | } |
136 | ddict->dictSize = dictSize; |
137 | ddict->entropy.hufTable[0] = (HUF_DTable)((ZSTD_HUFFDTABLE_CAPACITY_LOG)*0x1000001); /* cover both little and big endian */ |
138 | |
139 | /* parse dictionary content */ |
140 | FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "" ); |
141 | |
142 | return 0; |
143 | } |
144 | |
145 | ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize, |
146 | ZSTD_dictLoadMethod_e dictLoadMethod, |
147 | ZSTD_dictContentType_e dictContentType, |
148 | ZSTD_customMem customMem) |
149 | { |
150 | if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL; |
151 | |
152 | { ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem); |
153 | if (ddict == NULL) return NULL; |
154 | ddict->cMem = customMem; |
155 | { size_t const initResult = ZSTD_initDDict_internal(ddict, |
156 | dict, dictSize, |
157 | dictLoadMethod, dictContentType); |
158 | if (ZSTD_isError(initResult)) { |
159 | ZSTD_freeDDict(ddict); |
160 | return NULL; |
161 | } } |
162 | return ddict; |
163 | } |
164 | } |
165 | |
166 | /*! ZSTD_createDDict() : |
167 | * Create a digested dictionary, to start decompression without startup delay. |
168 | * `dict` content is copied inside DDict. |
169 | * Consequently, `dict` can be released after `ZSTD_DDict` creation */ |
170 | ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize) |
171 | { |
172 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; |
173 | return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator); |
174 | } |
175 | |
176 | /*! ZSTD_createDDict_byReference() : |
177 | * Create a digested dictionary, to start decompression without startup delay. |
178 | * Dictionary content is simply referenced, it will be accessed during decompression. |
179 | * Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */ |
180 | ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize) |
181 | { |
182 | ZSTD_customMem const allocator = { NULL, NULL, NULL }; |
183 | return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator); |
184 | } |
185 | |
186 | |
187 | const ZSTD_DDict* ZSTD_initStaticDDict( |
188 | void* sBuffer, size_t sBufferSize, |
189 | const void* dict, size_t dictSize, |
190 | ZSTD_dictLoadMethod_e dictLoadMethod, |
191 | ZSTD_dictContentType_e dictContentType) |
192 | { |
193 | size_t const neededSpace = sizeof(ZSTD_DDict) |
194 | + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); |
195 | ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer; |
196 | assert(sBuffer != NULL); |
197 | assert(dict != NULL); |
198 | if ((size_t)sBuffer & 7) return NULL; /* 8-aligned */ |
199 | if (sBufferSize < neededSpace) return NULL; |
200 | if (dictLoadMethod == ZSTD_dlm_byCopy) { |
201 | ZSTD_memcpy(ddict+1, dict, dictSize); /* local copy */ |
202 | dict = ddict+1; |
203 | } |
204 | if (ZSTD_isError( ZSTD_initDDict_internal(ddict, |
205 | dict, dictSize, |
206 | ZSTD_dlm_byRef, dictContentType) )) |
207 | return NULL; |
208 | return ddict; |
209 | } |
210 | |
211 | |
212 | size_t ZSTD_freeDDict(ZSTD_DDict* ddict) |
213 | { |
214 | if (ddict==NULL) return 0; /* support free on NULL */ |
215 | { ZSTD_customMem const cMem = ddict->cMem; |
216 | ZSTD_customFree(ddict->dictBuffer, cMem); |
217 | ZSTD_customFree(ddict, cMem); |
218 | return 0; |
219 | } |
220 | } |
221 | |
222 | /*! ZSTD_estimateDDictSize() : |
223 | * Estimate amount of memory that will be needed to create a dictionary for decompression. |
224 | * Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */ |
225 | size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod) |
226 | { |
227 | return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize); |
228 | } |
229 | |
230 | size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict) |
231 | { |
232 | if (ddict==NULL) return 0; /* support sizeof on NULL */ |
233 | return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ; |
234 | } |
235 | |
236 | /*! ZSTD_getDictID_fromDDict() : |
237 | * Provides the dictID of the dictionary loaded into `ddict`. |
238 | * If @return == 0, the dictionary is not conformant to Zstandard specification, or empty. |
239 | * Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */ |
240 | unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict) |
241 | { |
242 | if (ddict==NULL) return 0; |
243 | return ddict->dictID; |
244 | } |
245 | |