| 1 | // SPDX-License-Identifier: MIT OR MPL-2.0 OR LGPL-2.1-or-later OR GPL-2.0-or-later |
| 2 | // Copyright 2015, SIL International, All rights reserved. |
| 3 | |
| 4 | #include <cassert> |
| 5 | |
| 6 | #include "inc/Decompressor.h" |
| 7 | #include "inc/Compression.h" |
| 8 | |
| 9 | using namespace lz4; |
| 10 | |
| 11 | namespace { |
| 12 | |
| 13 | inline |
| 14 | u32 read_literal(u8 const * &s, u8 const * const e, u32 l) { |
| 15 | if (l == 15 && s != e) |
| 16 | { |
| 17 | u8 b = 0; |
| 18 | do { l += b = *s++; } while(b==0xff && s != e); |
| 19 | } |
| 20 | return l; |
| 21 | } |
| 22 | |
| 23 | bool read_sequence(u8 const * &src, u8 const * const end, u8 const * &literal, |
| 24 | u32 & literal_len, u32 & match_len, u32 & match_dist) |
| 25 | { |
| 26 | u8 const token = *src++; |
| 27 | |
| 28 | literal_len = read_literal(src, end, token >> 4); |
| 29 | literal = src; |
| 30 | src += literal_len; |
| 31 | |
| 32 | // Normal exit for end of stream, wrap arround check and parital match check. |
| 33 | if (src > end - sizeof(u16) || src < literal) |
| 34 | return false; |
| 35 | |
| 36 | match_dist = *src++; |
| 37 | match_dist |= *src++ << 8; |
| 38 | match_len = read_literal(src, end, token & 0xf) + MINMATCH; |
| 39 | |
| 40 | // Malformed stream check. |
| 41 | return src <= end-MINCODA; |
| 42 | } |
| 43 | |
| 44 | } |
| 45 | |
| 46 | int lz4::decompress(void const *in, size_t in_size, void *out, size_t out_size) |
| 47 | { |
| 48 | if (out_size <= in_size || in_size < MINSRCSIZE) |
| 49 | return -1; |
| 50 | |
| 51 | u8 const * src = static_cast<u8 const *>(in), |
| 52 | * literal = 0, |
| 53 | * const src_end = src + in_size; |
| 54 | |
| 55 | u8 * dst = static_cast<u8*>(out), |
| 56 | * const dst_end = dst + out_size; |
| 57 | |
| 58 | // Check the in and out size hasn't wrapped around. |
| 59 | if (src >= src_end || dst >= dst_end) |
| 60 | return -1; |
| 61 | |
| 62 | u32 literal_len = 0, |
| 63 | match_len = 0, |
| 64 | match_dist = 0; |
| 65 | |
| 66 | while (read_sequence(src, src_end, literal, literal_len, match_len, |
| 67 | match_dist)) |
| 68 | { |
| 69 | if (literal_len != 0) |
| 70 | { |
| 71 | // Copy in literal. At this point the a minimal literal + minminal |
| 72 | // match plus the coda (1 + 2 + 5) must be 8 bytes or more allowing |
| 73 | // us to remain within the src buffer for an overrun_copy on |
| 74 | // machines upto 64 bits. |
| 75 | if (align(literal_len) > out_size) |
| 76 | return -1; |
| 77 | dst = overrun_copy(dst, literal, literal_len); |
| 78 | out_size -= literal_len; |
| 79 | } |
| 80 | |
| 81 | // Copy, possibly repeating, match from earlier in the |
| 82 | // decoded output. |
| 83 | u8 const * const pcpy = dst - match_dist; |
| 84 | if (pcpy < static_cast<u8*>(out) |
| 85 | || match_len > unsigned(out_size - LASTLITERALS) |
| 86 | // Wrap around checks: |
| 87 | || out_size < LASTLITERALS || pcpy >= dst) |
| 88 | return -1; |
| 89 | if (dst > pcpy+sizeof(unsigned long) |
| 90 | && align(match_len) <= out_size) |
| 91 | dst = overrun_copy(dst, pcpy, match_len); |
| 92 | else |
| 93 | dst = safe_copy(dst, pcpy, match_len); |
| 94 | out_size -= match_len; |
| 95 | } |
| 96 | |
| 97 | if (literal > src_end - literal_len || literal_len > out_size) |
| 98 | return -1; |
| 99 | dst = fast_copy(dst, literal, literal_len); |
| 100 | |
| 101 | return int(dst - (u8*)out); |
| 102 | } |
| 103 | |