1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under both the BSD-style license (found in the
6 * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7 * in the COPYING file in the root directory of this source tree).
8 * You may select, at your option, one of the above-listed licenses.
9 */
10
11 /*-*************************************
12 * Dependencies
13 ***************************************/
14#include "zstd_compress_literals.h"
15
16
17/* **************************************************************
18* Debug Traces
19****************************************************************/
20#if DEBUGLEVEL >= 2
21
22static size_t showHexa(const void* src, size_t srcSize)
23{
24 const BYTE* const ip = (const BYTE*)src;
25 size_t u;
26 for (u=0; u<srcSize; u++) {
27 RAWLOG(5, " %02X", ip[u]); (void)ip;
28 }
29 RAWLOG(5, " \n");
30 return srcSize;
31}
32
33#endif
34
35
36/* **************************************************************
37* Literals compression - special cases
38****************************************************************/
39size_t ZSTD_noCompressLiterals (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
40{
41 BYTE* const ostart = (BYTE*)dst;
42 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
43
44 DEBUGLOG(5, "ZSTD_noCompressLiterals: srcSize=%zu, dstCapacity=%zu", srcSize, dstCapacity);
45
46 RETURN_ERROR_IF(srcSize + flSize > dstCapacity, dstSize_tooSmall, "");
47
48 switch(flSize)
49 {
50 case 1: /* 2 - 1 - 5 */
51 ostart[0] = (BYTE)((U32)set_basic + (srcSize<<3));
52 break;
53 case 2: /* 2 - 2 - 12 */
54 MEM_writeLE16(ostart, (U16)((U32)set_basic + (1<<2) + (srcSize<<4)));
55 break;
56 case 3: /* 2 - 2 - 20 */
57 MEM_writeLE32(ostart, (U32)((U32)set_basic + (3<<2) + (srcSize<<4)));
58 break;
59 default: /* not necessary : flSize is {1,2,3} */
60 assert(0);
61 }
62
63 ZSTD_memcpy(ostart + flSize, src, srcSize);
64 DEBUGLOG(5, "Raw (uncompressed) literals: %u -> %u", (U32)srcSize, (U32)(srcSize + flSize));
65 return srcSize + flSize;
66}
67
68static int allBytesIdentical(const void* src, size_t srcSize)
69{
70 assert(srcSize >= 1);
71 assert(src != NULL);
72 { const BYTE b = ((const BYTE*)src)[0];
73 size_t p;
74 for (p=1; p<srcSize; p++) {
75 if (((const BYTE*)src)[p] != b) return 0;
76 }
77 return 1;
78 }
79}
80
81size_t ZSTD_compressRleLiteralsBlock (void* dst, size_t dstCapacity, const void* src, size_t srcSize)
82{
83 BYTE* const ostart = (BYTE*)dst;
84 U32 const flSize = 1 + (srcSize>31) + (srcSize>4095);
85
86 assert(dstCapacity >= 4); (void)dstCapacity;
87 assert(allBytesIdentical(src, srcSize));
88
89 switch(flSize)
90 {
91 case 1: /* 2 - 1 - 5 */
92 ostart[0] = (BYTE)((U32)set_rle + (srcSize<<3));
93 break;
94 case 2: /* 2 - 2 - 12 */
95 MEM_writeLE16(ostart, (U16)((U32)set_rle + (1<<2) + (srcSize<<4)));
96 break;
97 case 3: /* 2 - 2 - 20 */
98 MEM_writeLE32(ostart, (U32)((U32)set_rle + (3<<2) + (srcSize<<4)));
99 break;
100 default: /* not necessary : flSize is {1,2,3} */
101 assert(0);
102 }
103
104 ostart[flSize] = *(const BYTE*)src;
105 DEBUGLOG(5, "RLE : Repeated Literal (%02X: %u times) -> %u bytes encoded", ((const BYTE*)src)[0], (U32)srcSize, (U32)flSize + 1);
106 return flSize+1;
107}
108
109/* ZSTD_minLiteralsToCompress() :
110 * returns minimal amount of literals
111 * for literal compression to even be attempted.
112 * Minimum is made tighter as compression strategy increases.
113 */
114static size_t
115ZSTD_minLiteralsToCompress(ZSTD_strategy strategy, HUF_repeat huf_repeat)
116{
117 assert((int)strategy >= 0);
118 assert((int)strategy <= 9);
119 /* btultra2 : min 8 bytes;
120 * then 2x larger for each successive compression strategy
121 * max threshold 64 bytes */
122 { int const shift = MIN(9-(int)strategy, 3);
123 size_t const mintc = (huf_repeat == HUF_repeat_valid) ? 6 : (size_t)8 << shift;
124 DEBUGLOG(7, "minLiteralsToCompress = %zu", mintc);
125 return mintc;
126 }
127}
128
129size_t ZSTD_compressLiterals (
130 void* dst, size_t dstCapacity,
131 const void* src, size_t srcSize,
132 void* entropyWorkspace, size_t entropyWorkspaceSize,
133 const ZSTD_hufCTables_t* prevHuf,
134 ZSTD_hufCTables_t* nextHuf,
135 ZSTD_strategy strategy,
136 int disableLiteralCompression,
137 int suspectUncompressible,
138 int bmi2)
139{
140 size_t const lhSize = 3 + (srcSize >= 1 KB) + (srcSize >= 16 KB);
141 BYTE* const ostart = (BYTE*)dst;
142 U32 singleStream = srcSize < 256;
143 symbolEncodingType_e hType = set_compressed;
144 size_t cLitSize;
145
146 DEBUGLOG(5,"ZSTD_compressLiterals (disableLiteralCompression=%i, srcSize=%u, dstCapacity=%zu)",
147 disableLiteralCompression, (U32)srcSize, dstCapacity);
148
149 DEBUGLOG(6, "Completed literals listing (%zu bytes)", showHexa(src, srcSize));
150
151 /* Prepare nextEntropy assuming reusing the existing table */
152 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
153
154 if (disableLiteralCompression)
155 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
156
157 /* if too small, don't even attempt compression (speed opt) */
158 if (srcSize < ZSTD_minLiteralsToCompress(strategy, prevHuf->repeatMode))
159 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
160
161 RETURN_ERROR_IF(dstCapacity < lhSize+1, dstSize_tooSmall, "not enough space for compression");
162 { HUF_repeat repeat = prevHuf->repeatMode;
163 int const flags = 0
164 | (bmi2 ? HUF_flags_bmi2 : 0)
165 | (strategy < ZSTD_lazy && srcSize <= 1024 ? HUF_flags_preferRepeat : 0)
166 | (strategy >= HUF_OPTIMAL_DEPTH_THRESHOLD ? HUF_flags_optimalDepth : 0)
167 | (suspectUncompressible ? HUF_flags_suspectUncompressible : 0);
168
169 typedef size_t (*huf_compress_f)(void*, size_t, const void*, size_t, unsigned, unsigned, void*, size_t, HUF_CElt*, HUF_repeat*, int);
170 huf_compress_f huf_compress;
171 if (repeat == HUF_repeat_valid && lhSize == 3) singleStream = 1;
172 huf_compress = singleStream ? HUF_compress1X_repeat : HUF_compress4X_repeat;
173 cLitSize = huf_compress(ostart+lhSize, dstCapacity-lhSize,
174 src, srcSize,
175 HUF_SYMBOLVALUE_MAX, LitHufLog,
176 entropyWorkspace, entropyWorkspaceSize,
177 (HUF_CElt*)nextHuf->CTable,
178 &repeat, flags);
179 DEBUGLOG(5, "%zu literals compressed into %zu bytes (before header)", srcSize, cLitSize);
180 if (repeat != HUF_repeat_none) {
181 /* reused the existing table */
182 DEBUGLOG(5, "reusing statistics from previous huffman block");
183 hType = set_repeat;
184 }
185 }
186
187 { size_t const minGain = ZSTD_minGain(srcSize, strategy);
188 if ((cLitSize==0) || (cLitSize >= srcSize - minGain) || ERR_isError(cLitSize)) {
189 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
190 return ZSTD_noCompressLiterals(dst, dstCapacity, src, srcSize);
191 } }
192 if (cLitSize==1) {
193 /* A return value of 1 signals that the alphabet consists of a single symbol.
194 * However, in some rare circumstances, it could be the compressed size (a single byte).
195 * For that outcome to have a chance to happen, it's necessary that `srcSize < 8`.
196 * (it's also necessary to not generate statistics).
197 * Therefore, in such a case, actively check that all bytes are identical. */
198 if ((srcSize >= 8) || allBytesIdentical(src, srcSize)) {
199 ZSTD_memcpy(nextHuf, prevHuf, sizeof(*prevHuf));
200 return ZSTD_compressRleLiteralsBlock(dst, dstCapacity, src, srcSize);
201 } }
202
203 if (hType == set_compressed) {
204 /* using a newly constructed table */
205 nextHuf->repeatMode = HUF_repeat_check;
206 }
207
208 /* Build header */
209 switch(lhSize)
210 {
211 case 3: /* 2 - 2 - 10 - 10 */
212 if (!singleStream) assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
213 { U32 const lhc = hType + ((U32)(!singleStream) << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<14);
214 MEM_writeLE24(ostart, lhc);
215 break;
216 }
217 case 4: /* 2 - 2 - 14 - 14 */
218 assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
219 { U32 const lhc = hType + (2 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<18);
220 MEM_writeLE32(ostart, lhc);
221 break;
222 }
223 case 5: /* 2 - 2 - 18 - 18 */
224 assert(srcSize >= MIN_LITERALS_FOR_4_STREAMS);
225 { U32 const lhc = hType + (3 << 2) + ((U32)srcSize<<4) + ((U32)cLitSize<<22);
226 MEM_writeLE32(ostart, lhc);
227 ostart[4] = (BYTE)(cLitSize >> 10);
228 break;
229 }
230 default: /* not possible : lhSize is {3,4,5} */
231 assert(0);
232 }
233 DEBUGLOG(5, "Compressed literals: %u -> %u", (U32)srcSize, (U32)(lhSize+cLitSize));
234 return lhSize+cLitSize;
235}
236