| 1 | /* |
| 2 | lz4opt.h - Optimal Mode of LZ4 |
| 3 | Copyright (C) 2015-2017, Przemyslaw Skibinski <inikep@gmail.com> |
| 4 | Note : this file is intended to be included within lz4hc.c |
| 5 | |
| 6 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) |
| 7 | |
| 8 | Redistribution and use in source and binary forms, with or without |
| 9 | modification, are permitted provided that the following conditions are |
| 10 | met: |
| 11 | |
| 12 | * Redistributions of source code must retain the above copyright |
| 13 | notice, this list of conditions and the following disclaimer. |
| 14 | * Redistributions in binary form must reproduce the above |
| 15 | copyright notice, this list of conditions and the following disclaimer |
| 16 | in the documentation and/or other materials provided with the |
| 17 | distribution. |
| 18 | |
| 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | |
| 31 | You can contact the author at : |
| 32 | - LZ4 source repository : https://github.com/lz4/lz4 |
| 33 | - LZ4 public forum : https://groups.google.com/forum/#!forum/lz4c |
| 34 | */ |
| 35 | |
| 36 | #define LZ4_OPT_NUM (1<<12) |
| 37 | |
| 38 | typedef struct { |
| 39 | int price; |
| 40 | int off; |
| 41 | int mlen; |
| 42 | int litlen; |
| 43 | } LZ4HC_optimal_t; |
| 44 | |
| 45 | |
| 46 | /* price in bytes */ |
| 47 | LZ4_FORCE_INLINE int LZ4HC_literalsPrice(int const litlen) |
| 48 | { |
| 49 | int price = litlen; |
| 50 | if (litlen >= (int)RUN_MASK) |
| 51 | price += 1 + (litlen-RUN_MASK)/255; |
| 52 | return price; |
| 53 | } |
| 54 | |
| 55 | |
| 56 | /* requires mlen >= MINMATCH */ |
| 57 | LZ4_FORCE_INLINE int LZ4HC_sequencePrice(int litlen, int mlen) |
| 58 | { |
| 59 | int price = 1 + 2 ; /* token + 16-bit offset */ |
| 60 | |
| 61 | price += LZ4HC_literalsPrice(litlen); |
| 62 | |
| 63 | if (mlen >= (int)(ML_MASK+MINMATCH)) |
| 64 | price += 1 + (mlen-(ML_MASK+MINMATCH))/255; |
| 65 | |
| 66 | return price; |
| 67 | } |
| 68 | |
| 69 | |
| 70 | /*-************************************* |
| 71 | * Match finder |
| 72 | ***************************************/ |
| 73 | typedef struct { |
| 74 | int off; |
| 75 | int len; |
| 76 | } LZ4HC_match_t; |
| 77 | |
| 78 | LZ4_FORCE_INLINE |
| 79 | LZ4HC_match_t LZ4HC_FindLongerMatch(LZ4HC_CCtx_internal* const ctx, |
| 80 | const BYTE* ip, const BYTE* const iHighLimit, |
| 81 | int minLen, int nbSearches) |
| 82 | { |
| 83 | LZ4HC_match_t match = { 0 , 0 }; |
| 84 | const BYTE* matchPtr = NULL; |
| 85 | /* note : LZ4HC_InsertAndGetWiderMatch() is able to modify the starting position of a match (*startpos), |
| 86 | * but this won't be the case here, as we define iLowLimit==ip, |
| 87 | * so LZ4HC_InsertAndGetWiderMatch() won't be allowed to search past ip */ |
| 88 | int const matchLength = LZ4HC_InsertAndGetWiderMatch(ctx, ip, ip, iHighLimit, minLen, &matchPtr, &ip, nbSearches); |
| 89 | if (matchLength <= minLen) return match; |
| 90 | match.len = matchLength; |
| 91 | match.off = (int)(ip-matchPtr); |
| 92 | return match; |
| 93 | } |
| 94 | |
| 95 | |
| 96 | static int LZ4HC_compress_optimal ( |
| 97 | LZ4HC_CCtx_internal* ctx, |
| 98 | const char* const source, |
| 99 | char* dst, |
| 100 | int inputSize, |
| 101 | int dstCapacity, |
| 102 | limitedOutput_directive limit, |
| 103 | int const nbSearches, |
| 104 | size_t sufficient_len, |
| 105 | int const fullUpdate |
| 106 | ) |
| 107 | { |
| 108 | #define TRAILING_LITERALS 3 |
| 109 | LZ4HC_optimal_t opt[LZ4_OPT_NUM + TRAILING_LITERALS]; /* this uses a bit too much stack memory to my taste ... */ |
| 110 | |
| 111 | const BYTE* ip = (const BYTE*) source; |
| 112 | const BYTE* anchor = ip; |
| 113 | const BYTE* const iend = ip + inputSize; |
| 114 | const BYTE* const mflimit = iend - MFLIMIT; |
| 115 | const BYTE* const matchlimit = iend - LASTLITERALS; |
| 116 | BYTE* op = (BYTE*) dst; |
| 117 | BYTE* const oend = op + dstCapacity; |
| 118 | |
| 119 | /* init */ |
| 120 | DEBUGLOG(5, "LZ4HC_compress_optimal" ); |
| 121 | if (sufficient_len >= LZ4_OPT_NUM) sufficient_len = LZ4_OPT_NUM-1; |
| 122 | |
| 123 | /* Main Loop */ |
| 124 | assert(ip - anchor < LZ4_MAX_INPUT_SIZE); |
| 125 | while (ip < mflimit) { |
| 126 | int const llen = (int)(ip - anchor); |
| 127 | int best_mlen, best_off; |
| 128 | int cur, last_match_pos = 0; |
| 129 | |
| 130 | LZ4HC_match_t const firstMatch = LZ4HC_FindLongerMatch(ctx, ip, matchlimit, MINMATCH-1, nbSearches); |
| 131 | if (firstMatch.len==0) { ip++; continue; } |
| 132 | |
| 133 | if ((size_t)firstMatch.len > sufficient_len) { |
| 134 | /* good enough solution : immediate encoding */ |
| 135 | int const firstML = firstMatch.len; |
| 136 | const BYTE* const matchPos = ip - firstMatch.off; |
| 137 | if ( LZ4HC_encodeSequence(&ip, &op, &anchor, firstML, matchPos, limit, oend) ) /* updates ip, op and anchor */ |
| 138 | return 0; /* error */ |
| 139 | continue; |
| 140 | } |
| 141 | |
| 142 | /* set prices for first positions (literals) */ |
| 143 | { int rPos; |
| 144 | for (rPos = 0 ; rPos < MINMATCH ; rPos++) { |
| 145 | int const cost = LZ4HC_literalsPrice(llen + rPos); |
| 146 | opt[rPos].mlen = 1; |
| 147 | opt[rPos].off = 0; |
| 148 | opt[rPos].litlen = llen + rPos; |
| 149 | opt[rPos].price = cost; |
| 150 | DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup" , |
| 151 | rPos, cost, opt[rPos].litlen); |
| 152 | } } |
| 153 | /* set prices using initial match */ |
| 154 | { int mlen = MINMATCH; |
| 155 | int const matchML = firstMatch.len; /* necessarily < sufficient_len < LZ4_OPT_NUM */ |
| 156 | int const offset = firstMatch.off; |
| 157 | assert(matchML < LZ4_OPT_NUM); |
| 158 | for ( ; mlen <= matchML ; mlen++) { |
| 159 | int const cost = LZ4HC_sequencePrice(llen, mlen); |
| 160 | opt[mlen].mlen = mlen; |
| 161 | opt[mlen].off = offset; |
| 162 | opt[mlen].litlen = llen; |
| 163 | opt[mlen].price = cost; |
| 164 | DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i) -- initial setup" , |
| 165 | mlen, cost, mlen); |
| 166 | } } |
| 167 | last_match_pos = firstMatch.len; |
| 168 | { int addLit; |
| 169 | for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { |
| 170 | opt[last_match_pos+addLit].mlen = 1; /* literal */ |
| 171 | opt[last_match_pos+addLit].off = 0; |
| 172 | opt[last_match_pos+addLit].litlen = addLit; |
| 173 | opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); |
| 174 | DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i) -- initial setup" , |
| 175 | last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); |
| 176 | } } |
| 177 | |
| 178 | /* check further positions */ |
| 179 | for (cur = 1; cur < last_match_pos; cur++) { |
| 180 | const BYTE* const curPtr = ip + cur; |
| 181 | LZ4HC_match_t newMatch; |
| 182 | |
| 183 | if (curPtr >= mflimit) break; |
| 184 | DEBUGLOG(7, "rPos:%u[%u] vs [%u]%u" , |
| 185 | cur, opt[cur].price, opt[cur+1].price, cur+1); |
| 186 | if (fullUpdate) { |
| 187 | /* not useful to search here if next position has same (or lower) cost */ |
| 188 | if ( (opt[cur+1].price <= opt[cur].price) |
| 189 | /* in some cases, next position has same cost, but cost rises sharply after, so a small match would still be beneficial */ |
| 190 | && (opt[cur+MINMATCH].price < opt[cur].price + 3/*min seq price*/) ) |
| 191 | continue; |
| 192 | } else { |
| 193 | /* not useful to search here if next position has same (or lower) cost */ |
| 194 | if (opt[cur+1].price <= opt[cur].price) continue; |
| 195 | } |
| 196 | |
| 197 | DEBUGLOG(7, "search at rPos:%u" , cur); |
| 198 | if (fullUpdate) |
| 199 | newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, MINMATCH-1, nbSearches); |
| 200 | else |
| 201 | /* only test matches of minimum length; slightly faster, but misses a few bytes */ |
| 202 | newMatch = LZ4HC_FindLongerMatch(ctx, curPtr, matchlimit, last_match_pos - cur, nbSearches); |
| 203 | if (!newMatch.len) continue; |
| 204 | |
| 205 | if ( ((size_t)newMatch.len > sufficient_len) |
| 206 | || (newMatch.len + cur >= LZ4_OPT_NUM) ) { |
| 207 | /* immediate encoding */ |
| 208 | best_mlen = newMatch.len; |
| 209 | best_off = newMatch.off; |
| 210 | last_match_pos = cur + 1; |
| 211 | goto encode; |
| 212 | } |
| 213 | |
| 214 | /* before match : set price with literals at beginning */ |
| 215 | { int const baseLitlen = opt[cur].litlen; |
| 216 | int litlen; |
| 217 | for (litlen = 1; litlen < MINMATCH; litlen++) { |
| 218 | int const price = opt[cur].price - LZ4HC_literalsPrice(baseLitlen) + LZ4HC_literalsPrice(baseLitlen+litlen); |
| 219 | int const pos = cur + litlen; |
| 220 | if (price < opt[pos].price) { |
| 221 | opt[pos].mlen = 1; /* literal */ |
| 222 | opt[pos].off = 0; |
| 223 | opt[pos].litlen = baseLitlen+litlen; |
| 224 | opt[pos].price = price; |
| 225 | DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)" , |
| 226 | pos, price, opt[pos].litlen); |
| 227 | } } } |
| 228 | |
| 229 | /* set prices using match at position = cur */ |
| 230 | { int const matchML = newMatch.len; |
| 231 | int ml = MINMATCH; |
| 232 | |
| 233 | assert(cur + newMatch.len < LZ4_OPT_NUM); |
| 234 | for ( ; ml <= matchML ; ml++) { |
| 235 | int const pos = cur + ml; |
| 236 | int const offset = newMatch.off; |
| 237 | int price; |
| 238 | int ll; |
| 239 | DEBUGLOG(7, "testing price rPos %i (last_match_pos=%i)" , |
| 240 | pos, last_match_pos); |
| 241 | if (opt[cur].mlen == 1) { |
| 242 | ll = opt[cur].litlen; |
| 243 | price = ((cur > ll) ? opt[cur - ll].price : 0) |
| 244 | + LZ4HC_sequencePrice(ll, ml); |
| 245 | } else { |
| 246 | ll = 0; |
| 247 | price = opt[cur].price + LZ4HC_sequencePrice(0, ml); |
| 248 | } |
| 249 | |
| 250 | if (pos > last_match_pos+TRAILING_LITERALS || price <= opt[pos].price) { |
| 251 | DEBUGLOG(7, "rPos:%3i => price:%3i (matchlen=%i)" , |
| 252 | pos, price, ml); |
| 253 | assert(pos < LZ4_OPT_NUM); |
| 254 | if ( (ml == matchML) /* last pos of last match */ |
| 255 | && (last_match_pos < pos) ) |
| 256 | last_match_pos = pos; |
| 257 | opt[pos].mlen = ml; |
| 258 | opt[pos].off = offset; |
| 259 | opt[pos].litlen = ll; |
| 260 | opt[pos].price = price; |
| 261 | } } } |
| 262 | /* complete following positions with literals */ |
| 263 | { int addLit; |
| 264 | for (addLit = 1; addLit <= TRAILING_LITERALS; addLit ++) { |
| 265 | opt[last_match_pos+addLit].mlen = 1; /* literal */ |
| 266 | opt[last_match_pos+addLit].off = 0; |
| 267 | opt[last_match_pos+addLit].litlen = addLit; |
| 268 | opt[last_match_pos+addLit].price = opt[last_match_pos].price + LZ4HC_literalsPrice(addLit); |
| 269 | DEBUGLOG(7, "rPos:%3i => price:%3i (litlen=%i)" , last_match_pos+addLit, opt[last_match_pos+addLit].price, addLit); |
| 270 | } } |
| 271 | } /* for (cur = 1; cur <= last_match_pos; cur++) */ |
| 272 | |
| 273 | best_mlen = opt[last_match_pos].mlen; |
| 274 | best_off = opt[last_match_pos].off; |
| 275 | cur = last_match_pos - best_mlen; |
| 276 | |
| 277 | encode: /* cur, last_match_pos, best_mlen, best_off must be set */ |
| 278 | assert(cur < LZ4_OPT_NUM); |
| 279 | assert(last_match_pos >= 1); /* == 1 when only one candidate */ |
| 280 | DEBUGLOG(6, "reverse traversal, looking for shortest path" ) |
| 281 | DEBUGLOG(6, "last_match_pos = %i" , last_match_pos); |
| 282 | { int candidate_pos = cur; |
| 283 | int selected_matchLength = best_mlen; |
| 284 | int selected_offset = best_off; |
| 285 | while (1) { /* from end to beginning */ |
| 286 | int const next_matchLength = opt[candidate_pos].mlen; /* can be 1, means literal */ |
| 287 | int const next_offset = opt[candidate_pos].off; |
| 288 | DEBUGLOG(6, "pos %i: sequence length %i" , candidate_pos, selected_matchLength); |
| 289 | opt[candidate_pos].mlen = selected_matchLength; |
| 290 | opt[candidate_pos].off = selected_offset; |
| 291 | selected_matchLength = next_matchLength; |
| 292 | selected_offset = next_offset; |
| 293 | if (next_matchLength > candidate_pos) break; /* last match elected, first match to encode */ |
| 294 | assert(next_matchLength > 0); /* can be 1, means literal */ |
| 295 | candidate_pos -= next_matchLength; |
| 296 | } } |
| 297 | |
| 298 | /* encode all recorded sequences in order */ |
| 299 | { int rPos = 0; /* relative position (to ip) */ |
| 300 | while (rPos < last_match_pos) { |
| 301 | int const ml = opt[rPos].mlen; |
| 302 | int const offset = opt[rPos].off; |
| 303 | if (ml == 1) { ip++; rPos++; continue; } /* literal; note: can end up with several literals, in which case, skip them */ |
| 304 | rPos += ml; |
| 305 | assert(ml >= MINMATCH); |
| 306 | assert((offset >= 1) && (offset <= MAX_DISTANCE)); |
| 307 | if ( LZ4HC_encodeSequence(&ip, &op, &anchor, ml, ip - offset, limit, oend) ) /* updates ip, op and anchor */ |
| 308 | return 0; /* error */ |
| 309 | } } |
| 310 | } /* while (ip < mflimit) */ |
| 311 | |
| 312 | /* Encode Last Literals */ |
| 313 | { int lastRun = (int)(iend - anchor); |
| 314 | if ( (limit) |
| 315 | && (((char*)op - dst) + lastRun + 1 + ((lastRun+255-RUN_MASK)/255) > (U32)dstCapacity)) |
| 316 | return 0; /* Check output limit */ |
| 317 | if (lastRun >= (int)RUN_MASK) { |
| 318 | *op++=(RUN_MASK<<ML_BITS); |
| 319 | lastRun-=RUN_MASK; |
| 320 | for (; lastRun > 254 ; lastRun-=255) *op++ = 255; |
| 321 | *op++ = (BYTE) lastRun; |
| 322 | } else *op++ = (BYTE)(lastRun<<ML_BITS); |
| 323 | memcpy(op, anchor, iend - anchor); |
| 324 | op += iend-anchor; |
| 325 | } |
| 326 | |
| 327 | /* End */ |
| 328 | return (int) ((char*)op-dst); |
| 329 | } |
| 330 | |