1/**
2 * \file lzma/container.h
3 * \brief File formats
4 */
5
6/*
7 * Author: Lasse Collin
8 *
9 * This file has been put into the public domain.
10 * You can do whatever you want with this file.
11 *
12 * See ../lzma.h for information about liblzma as a whole.
13 */
14
15#ifndef LZMA_H_INTERNAL
16# error Never include this file directly. Use <lzma.h> instead.
17#endif
18
19
20/************
21 * Encoding *
22 ************/
23
24/**
25 * \brief Default compression preset
26 *
27 * It's not straightforward to recommend a default preset, because in some
28 * cases keeping the resource usage relatively low is more important that
29 * getting the maximum compression ratio.
30 */
31#define LZMA_PRESET_DEFAULT UINT32_C(6)
32
33
34/**
35 * \brief Mask for preset level
36 *
37 * This is useful only if you need to extract the level from the preset
38 * variable. That should be rare.
39 */
40#define LZMA_PRESET_LEVEL_MASK UINT32_C(0x1F)
41
42
43/*
44 * Preset flags
45 *
46 * Currently only one flag is defined.
47 */
48
49/**
50 * \brief Extreme compression preset
51 *
52 * This flag modifies the preset to make the encoding significantly slower
53 * while improving the compression ratio only marginally. This is useful
54 * when you don't mind wasting time to get as small result as possible.
55 *
56 * This flag doesn't affect the memory usage requirements of the decoder (at
57 * least not significantly). The memory usage of the encoder may be increased
58 * a little but only at the lowest preset levels (0-3).
59 */
60#define LZMA_PRESET_EXTREME (UINT32_C(1) << 31)
61
62
63/**
64 * \brief Multithreading options
65 */
66typedef struct {
67 /**
68 * \brief Flags
69 *
70 * Set this to zero if no flags are wanted.
71 *
72 * No flags are currently supported.
73 */
74 uint32_t flags;
75
76 /**
77 * \brief Number of worker threads to use
78 */
79 uint32_t threads;
80
81 /**
82 * \brief Maximum uncompressed size of a Block
83 *
84 * The encoder will start a new .xz Block every block_size bytes.
85 * Using LZMA_FULL_FLUSH or LZMA_FULL_BARRIER with lzma_code()
86 * the caller may tell liblzma to start a new Block earlier.
87 *
88 * With LZMA2, a recommended block size is 2-4 times the LZMA2
89 * dictionary size. With very small dictionaries, it is recommended
90 * to use at least 1 MiB block size for good compression ratio, even
91 * if this is more than four times the dictionary size. Note that
92 * these are only recommendations for typical use cases; feel free
93 * to use other values. Just keep in mind that using a block size
94 * less than the LZMA2 dictionary size is waste of RAM.
95 *
96 * Set this to 0 to let liblzma choose the block size depending
97 * on the compression options. For LZMA2 it will be 3*dict_size
98 * or 1 MiB, whichever is more.
99 *
100 * For each thread, about 3 * block_size bytes of memory will be
101 * allocated. This may change in later liblzma versions. If so,
102 * the memory usage will probably be reduced, not increased.
103 */
104 uint64_t block_size;
105
106 /**
107 * \brief Timeout to allow lzma_code() to return early
108 *
109 * Multithreading can make liblzma to consume input and produce
110 * output in a very bursty way: it may first read a lot of input
111 * to fill internal buffers, then no input or output occurs for
112 * a while.
113 *
114 * In single-threaded mode, lzma_code() won't return until it has
115 * either consumed all the input or filled the output buffer. If
116 * this is done in multithreaded mode, it may cause a call
117 * lzma_code() to take even tens of seconds, which isn't acceptable
118 * in all applications.
119 *
120 * To avoid very long blocking times in lzma_code(), a timeout
121 * (in milliseconds) may be set here. If lzma_code() would block
122 * longer than this number of milliseconds, it will return with
123 * LZMA_OK. Reasonable values are 100 ms or more. The xz command
124 * line tool uses 300 ms.
125 *
126 * If long blocking times are fine for you, set timeout to a special
127 * value of 0, which will disable the timeout mechanism and will make
128 * lzma_code() block until all the input is consumed or the output
129 * buffer has been filled.
130 *
131 * \note Even with a timeout, lzma_code() might sometimes take
132 * somewhat long time to return. No timing guarantees
133 * are made.
134 */
135 uint32_t timeout;
136
137 /**
138 * \brief Compression preset (level and possible flags)
139 *
140 * The preset is set just like with lzma_easy_encoder().
141 * The preset is ignored if filters below is non-NULL.
142 */
143 uint32_t preset;
144
145 /**
146 * \brief Filter chain (alternative to a preset)
147 *
148 * If this is NULL, the preset above is used. Otherwise the preset
149 * is ignored and the filter chain specified here is used.
150 */
151 const lzma_filter *filters;
152
153 /**
154 * \brief Integrity check type
155 *
156 * See check.h for available checks. The xz command line tool
157 * defaults to LZMA_CHECK_CRC64, which is a good choice if you
158 * are unsure.
159 */
160 lzma_check check;
161
162 /*
163 * Reserved space to allow possible future extensions without
164 * breaking the ABI. You should not touch these, because the names
165 * of these variables may change. These are and will never be used
166 * with the currently supported options, so it is safe to leave these
167 * uninitialized.
168 */
169 lzma_reserved_enum reserved_enum1;
170 lzma_reserved_enum reserved_enum2;
171 lzma_reserved_enum reserved_enum3;
172 uint32_t reserved_int1;
173 uint32_t reserved_int2;
174 uint32_t reserved_int3;
175 uint32_t reserved_int4;
176 uint64_t reserved_int5;
177 uint64_t reserved_int6;
178 uint64_t reserved_int7;
179 uint64_t reserved_int8;
180 void *reserved_ptr1;
181 void *reserved_ptr2;
182 void *reserved_ptr3;
183 void *reserved_ptr4;
184
185} lzma_mt;
186
187
188/**
189 * \brief Calculate approximate memory usage of easy encoder
190 *
191 * This function is a wrapper for lzma_raw_encoder_memusage().
192 *
193 * \param preset Compression preset (level and possible flags)
194 *
195 * \return Number of bytes of memory required for the given
196 * preset when encoding. If an error occurs, for example
197 * due to unsupported preset, UINT64_MAX is returned.
198 */
199extern LZMA_API(uint64_t) lzma_easy_encoder_memusage(uint32_t preset)
200 lzma_nothrow lzma_attr_pure;
201
202
203/**
204 * \brief Calculate approximate decoder memory usage of a preset
205 *
206 * This function is a wrapper for lzma_raw_decoder_memusage().
207 *
208 * \param preset Compression preset (level and possible flags)
209 *
210 * \return Number of bytes of memory required to decompress a file
211 * that was compressed using the given preset. If an error
212 * occurs, for example due to unsupported preset, UINT64_MAX
213 * is returned.
214 */
215extern LZMA_API(uint64_t) lzma_easy_decoder_memusage(uint32_t preset)
216 lzma_nothrow lzma_attr_pure;
217
218
219/**
220 * \brief Initialize .xz Stream encoder using a preset number
221 *
222 * This function is intended for those who just want to use the basic features
223 * if liblzma (that is, most developers out there).
224 *
225 * \param strm Pointer to lzma_stream that is at least initialized
226 * with LZMA_STREAM_INIT.
227 * \param preset Compression preset to use. A preset consist of level
228 * number and zero or more flags. Usually flags aren't
229 * used, so preset is simply a number [0, 9] which match
230 * the options -0 ... -9 of the xz command line tool.
231 * Additional flags can be be set using bitwise-or with
232 * the preset level number, e.g. 6 | LZMA_PRESET_EXTREME.
233 * \param check Integrity check type to use. See check.h for available
234 * checks. The xz command line tool defaults to
235 * LZMA_CHECK_CRC64, which is a good choice if you are
236 * unsure. LZMA_CHECK_CRC32 is good too as long as the
237 * uncompressed file is not many gigabytes.
238 *
239 * \return - LZMA_OK: Initialization succeeded. Use lzma_code() to
240 * encode your data.
241 * - LZMA_MEM_ERROR: Memory allocation failed.
242 * - LZMA_OPTIONS_ERROR: The given compression preset is not
243 * supported by this build of liblzma.
244 * - LZMA_UNSUPPORTED_CHECK: The given check type is not
245 * supported by this liblzma build.
246 * - LZMA_PROG_ERROR: One or more of the parameters have values
247 * that will never be valid. For example, strm == NULL.
248 *
249 * If initialization fails (return value is not LZMA_OK), all the memory
250 * allocated for *strm by liblzma is always freed. Thus, there is no need
251 * to call lzma_end() after failed initialization.
252 *
253 * If initialization succeeds, use lzma_code() to do the actual encoding.
254 * Valid values for `action' (the second argument of lzma_code()) are
255 * LZMA_RUN, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, and LZMA_FINISH. In future,
256 * there may be compression levels or flags that don't support LZMA_SYNC_FLUSH.
257 */
258extern LZMA_API(lzma_ret) lzma_easy_encoder(
259 lzma_stream *strm, uint32_t preset, lzma_check check)
260 lzma_nothrow lzma_attr_warn_unused_result;
261
262
263/**
264 * \brief Single-call .xz Stream encoding using a preset number
265 *
266 * The maximum required output buffer size can be calculated with
267 * lzma_stream_buffer_bound().
268 *
269 * \param preset Compression preset to use. See the description
270 * in lzma_easy_encoder().
271 * \param check Type of the integrity check to calculate from
272 * uncompressed data.
273 * \param allocator lzma_allocator for custom allocator functions.
274 * Set to NULL to use malloc() and free().
275 * \param in Beginning of the input buffer
276 * \param in_size Size of the input buffer
277 * \param out Beginning of the output buffer
278 * \param out_pos The next byte will be written to out[*out_pos].
279 * *out_pos is updated only if encoding succeeds.
280 * \param out_size Size of the out buffer; the first byte into
281 * which no data is written to is out[out_size].
282 *
283 * \return - LZMA_OK: Encoding was successful.
284 * - LZMA_BUF_ERROR: Not enough output buffer space.
285 * - LZMA_UNSUPPORTED_CHECK
286 * - LZMA_OPTIONS_ERROR
287 * - LZMA_MEM_ERROR
288 * - LZMA_DATA_ERROR
289 * - LZMA_PROG_ERROR
290 */
291extern LZMA_API(lzma_ret) lzma_easy_buffer_encode(
292 uint32_t preset, lzma_check check,
293 const lzma_allocator *allocator,
294 const uint8_t *in, size_t in_size,
295 uint8_t *out, size_t *out_pos, size_t out_size) lzma_nothrow;
296
297
298/**
299 * \brief Initialize .xz Stream encoder using a custom filter chain
300 *
301 * \param strm Pointer to properly prepared lzma_stream
302 * \param filters Array of filters. This must be terminated with
303 * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h for
304 * more information.
305 * \param check Type of the integrity check to calculate from
306 * uncompressed data.
307 *
308 * \return - LZMA_OK: Initialization was successful.
309 * - LZMA_MEM_ERROR
310 * - LZMA_UNSUPPORTED_CHECK
311 * - LZMA_OPTIONS_ERROR
312 * - LZMA_PROG_ERROR
313 */
314extern LZMA_API(lzma_ret) lzma_stream_encoder(lzma_stream *strm,
315 const lzma_filter *filters, lzma_check check)
316 lzma_nothrow lzma_attr_warn_unused_result;
317
318
319/**
320 * \brief Calculate approximate memory usage of multithreaded .xz encoder
321 *
322 * Since doing the encoding in threaded mode doesn't affect the memory
323 * requirements of single-threaded decompressor, you can use
324 * lzma_easy_decoder_memusage(options->preset) or
325 * lzma_raw_decoder_memusage(options->filters) to calculate
326 * the decompressor memory requirements.
327 *
328 * \param options Compression options
329 *
330 * \return Number of bytes of memory required for encoding with the
331 * given options. If an error occurs, for example due to
332 * unsupported preset or filter chain, UINT64_MAX is returned.
333 */
334extern LZMA_API(uint64_t) lzma_stream_encoder_mt_memusage(
335 const lzma_mt *options) lzma_nothrow lzma_attr_pure;
336
337
338/**
339 * \brief Initialize multithreaded .xz Stream encoder
340 *
341 * This provides the functionality of lzma_easy_encoder() and
342 * lzma_stream_encoder() as a single function for multithreaded use.
343 *
344 * The supported actions for lzma_code() are LZMA_RUN, LZMA_FULL_FLUSH,
345 * LZMA_FULL_BARRIER, and LZMA_FINISH. Support for LZMA_SYNC_FLUSH might be
346 * added in the future.
347 *
348 * \param strm Pointer to properly prepared lzma_stream
349 * \param options Pointer to multithreaded compression options
350 *
351 * \return - LZMA_OK
352 * - LZMA_MEM_ERROR
353 * - LZMA_UNSUPPORTED_CHECK
354 * - LZMA_OPTIONS_ERROR
355 * - LZMA_PROG_ERROR
356 */
357extern LZMA_API(lzma_ret) lzma_stream_encoder_mt(
358 lzma_stream *strm, const lzma_mt *options)
359 lzma_nothrow lzma_attr_warn_unused_result;
360
361
362/**
363 * \brief Initialize .lzma encoder (legacy file format)
364 *
365 * The .lzma format is sometimes called the LZMA_Alone format, which is the
366 * reason for the name of this function. The .lzma format supports only the
367 * LZMA1 filter. There is no support for integrity checks like CRC32.
368 *
369 * Use this function if and only if you need to create files readable by
370 * legacy LZMA tools such as LZMA Utils 4.32.x. Moving to the .xz format
371 * is strongly recommended.
372 *
373 * The valid action values for lzma_code() are LZMA_RUN and LZMA_FINISH.
374 * No kind of flushing is supported, because the file format doesn't make
375 * it possible.
376 *
377 * \return - LZMA_OK
378 * - LZMA_MEM_ERROR
379 * - LZMA_OPTIONS_ERROR
380 * - LZMA_PROG_ERROR
381 */
382extern LZMA_API(lzma_ret) lzma_alone_encoder(
383 lzma_stream *strm, const lzma_options_lzma *options)
384 lzma_nothrow lzma_attr_warn_unused_result;
385
386
387/**
388 * \brief Calculate output buffer size for single-call Stream encoder
389 *
390 * When trying to compress uncompressible data, the encoded size will be
391 * slightly bigger than the input data. This function calculates how much
392 * output buffer space is required to be sure that lzma_stream_buffer_encode()
393 * doesn't return LZMA_BUF_ERROR.
394 *
395 * The calculated value is not exact, but it is guaranteed to be big enough.
396 * The actual maximum output space required may be slightly smaller (up to
397 * about 100 bytes). This should not be a problem in practice.
398 *
399 * If the calculated maximum size doesn't fit into size_t or would make the
400 * Stream grow past LZMA_VLI_MAX (which should never happen in practice),
401 * zero is returned to indicate the error.
402 *
403 * \note The limit calculated by this function applies only to
404 * single-call encoding. Multi-call encoding may (and probably
405 * will) have larger maximum expansion when encoding
406 * uncompressible data. Currently there is no function to
407 * calculate the maximum expansion of multi-call encoding.
408 */
409extern LZMA_API(size_t) lzma_stream_buffer_bound(size_t uncompressed_size)
410 lzma_nothrow;
411
412
413/**
414 * \brief Single-call .xz Stream encoder
415 *
416 * \param filters Array of filters. This must be terminated with
417 * filters[n].id = LZMA_VLI_UNKNOWN. See filter.h
418 * for more information.
419 * \param check Type of the integrity check to calculate from
420 * uncompressed data.
421 * \param allocator lzma_allocator for custom allocator functions.
422 * Set to NULL to use malloc() and free().
423 * \param in Beginning of the input buffer
424 * \param in_size Size of the input buffer
425 * \param out Beginning of the output buffer
426 * \param out_pos The next byte will be written to out[*out_pos].
427 * *out_pos is updated only if encoding succeeds.
428 * \param out_size Size of the out buffer; the first byte into
429 * which no data is written to is out[out_size].
430 *
431 * \return - LZMA_OK: Encoding was successful.
432 * - LZMA_BUF_ERROR: Not enough output buffer space.
433 * - LZMA_UNSUPPORTED_CHECK
434 * - LZMA_OPTIONS_ERROR
435 * - LZMA_MEM_ERROR
436 * - LZMA_DATA_ERROR
437 * - LZMA_PROG_ERROR
438 */
439extern LZMA_API(lzma_ret) lzma_stream_buffer_encode(
440 lzma_filter *filters, lzma_check check,
441 const lzma_allocator *allocator,
442 const uint8_t *in, size_t in_size,
443 uint8_t *out, size_t *out_pos, size_t out_size)
444 lzma_nothrow lzma_attr_warn_unused_result;
445
446
447/************
448 * Decoding *
449 ************/
450
451/**
452 * This flag makes lzma_code() return LZMA_NO_CHECK if the input stream
453 * being decoded has no integrity check. Note that when used with
454 * lzma_auto_decoder(), all .lzma files will trigger LZMA_NO_CHECK
455 * if LZMA_TELL_NO_CHECK is used.
456 */
457#define LZMA_TELL_NO_CHECK UINT32_C(0x01)
458
459
460/**
461 * This flag makes lzma_code() return LZMA_UNSUPPORTED_CHECK if the input
462 * stream has an integrity check, but the type of the integrity check is not
463 * supported by this liblzma version or build. Such files can still be
464 * decoded, but the integrity check cannot be verified.
465 */
466#define LZMA_TELL_UNSUPPORTED_CHECK UINT32_C(0x02)
467
468
469/**
470 * This flag makes lzma_code() return LZMA_GET_CHECK as soon as the type
471 * of the integrity check is known. The type can then be got with
472 * lzma_get_check().
473 */
474#define LZMA_TELL_ANY_CHECK UINT32_C(0x04)
475
476
477/**
478 * This flag makes lzma_code() not calculate and verify the integrity check
479 * of the compressed data in .xz files. This means that invalid integrity
480 * check values won't be detected and LZMA_DATA_ERROR won't be returned in
481 * such cases.
482 *
483 * This flag only affects the checks of the compressed data itself; the CRC32
484 * values in the .xz headers will still be verified normally.
485 *
486 * Don't use this flag unless you know what you are doing. Possible reasons
487 * to use this flag:
488 *
489 * - Trying to recover data from a corrupt .xz file.
490 *
491 * - Speeding up decompression, which matters mostly with SHA-256
492 * or with files that have compressed extremely well. It's recommended
493 * to not use this flag for this purpose unless the file integrity is
494 * verified externally in some other way.
495 *
496 * Support for this flag was added in liblzma 5.1.4beta.
497 */
498#define LZMA_IGNORE_CHECK UINT32_C(0x10)
499
500
501/**
502 * This flag enables decoding of concatenated files with file formats that
503 * allow concatenating compressed files as is. From the formats currently
504 * supported by liblzma, only the .xz format allows concatenated files.
505 * Concatenated files are not allowed with the legacy .lzma format.
506 *
507 * This flag also affects the usage of the `action' argument for lzma_code().
508 * When LZMA_CONCATENATED is used, lzma_code() won't return LZMA_STREAM_END
509 * unless LZMA_FINISH is used as `action'. Thus, the application has to set
510 * LZMA_FINISH in the same way as it does when encoding.
511 *
512 * If LZMA_CONCATENATED is not used, the decoders still accept LZMA_FINISH
513 * as `action' for lzma_code(), but the usage of LZMA_FINISH isn't required.
514 */
515#define LZMA_CONCATENATED UINT32_C(0x08)
516
517
518/**
519 * \brief Initialize .xz Stream decoder
520 *
521 * \param strm Pointer to properly prepared lzma_stream
522 * \param memlimit Memory usage limit as bytes. Use UINT64_MAX
523 * to effectively disable the limiter.
524 * \param flags Bitwise-or of zero or more of the decoder flags:
525 * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
526 * LZMA_TELL_ANY_CHECK, LZMA_CONCATENATED
527 *
528 * \return - LZMA_OK: Initialization was successful.
529 * - LZMA_MEM_ERROR: Cannot allocate memory.
530 * - LZMA_OPTIONS_ERROR: Unsupported flags
531 * - LZMA_PROG_ERROR
532 */
533extern LZMA_API(lzma_ret) lzma_stream_decoder(
534 lzma_stream *strm, uint64_t memlimit, uint32_t flags)
535 lzma_nothrow lzma_attr_warn_unused_result;
536
537
538/**
539 * \brief Decode .xz Streams and .lzma files with autodetection
540 *
541 * This decoder autodetects between the .xz and .lzma file formats, and
542 * calls lzma_stream_decoder() or lzma_alone_decoder() once the type
543 * of the input file has been detected.
544 *
545 * \param strm Pointer to properly prepared lzma_stream
546 * \param memlimit Memory usage limit as bytes. Use UINT64_MAX
547 * to effectively disable the limiter.
548 * \param flags Bitwise-or of flags, or zero for no flags.
549 *
550 * \return - LZMA_OK: Initialization was successful.
551 * - LZMA_MEM_ERROR: Cannot allocate memory.
552 * - LZMA_OPTIONS_ERROR: Unsupported flags
553 * - LZMA_PROG_ERROR
554 */
555extern LZMA_API(lzma_ret) lzma_auto_decoder(
556 lzma_stream *strm, uint64_t memlimit, uint32_t flags)
557 lzma_nothrow lzma_attr_warn_unused_result;
558
559
560/**
561 * \brief Initialize .lzma decoder (legacy file format)
562 *
563 * Valid `action' arguments to lzma_code() are LZMA_RUN and LZMA_FINISH.
564 * There is no need to use LZMA_FINISH, but allowing it may simplify
565 * certain types of applications.
566 *
567 * \return - LZMA_OK
568 * - LZMA_MEM_ERROR
569 * - LZMA_PROG_ERROR
570 */
571extern LZMA_API(lzma_ret) lzma_alone_decoder(
572 lzma_stream *strm, uint64_t memlimit)
573 lzma_nothrow lzma_attr_warn_unused_result;
574
575
576/**
577 * \brief Single-call .xz Stream decoder
578 *
579 * \param memlimit Pointer to how much memory the decoder is allowed
580 * to allocate. The value pointed by this pointer is
581 * modified if and only if LZMA_MEMLIMIT_ERROR is
582 * returned.
583 * \param flags Bitwise-or of zero or more of the decoder flags:
584 * LZMA_TELL_NO_CHECK, LZMA_TELL_UNSUPPORTED_CHECK,
585 * LZMA_CONCATENATED. Note that LZMA_TELL_ANY_CHECK
586 * is not allowed and will return LZMA_PROG_ERROR.
587 * \param allocator lzma_allocator for custom allocator functions.
588 * Set to NULL to use malloc() and free().
589 * \param in Beginning of the input buffer
590 * \param in_pos The next byte will be read from in[*in_pos].
591 * *in_pos is updated only if decoding succeeds.
592 * \param in_size Size of the input buffer; the first byte that
593 * won't be read is in[in_size].
594 * \param out Beginning of the output buffer
595 * \param out_pos The next byte will be written to out[*out_pos].
596 * *out_pos is updated only if decoding succeeds.
597 * \param out_size Size of the out buffer; the first byte into
598 * which no data is written to is out[out_size].
599 *
600 * \return - LZMA_OK: Decoding was successful.
601 * - LZMA_FORMAT_ERROR
602 * - LZMA_OPTIONS_ERROR
603 * - LZMA_DATA_ERROR
604 * - LZMA_NO_CHECK: This can be returned only if using
605 * the LZMA_TELL_NO_CHECK flag.
606 * - LZMA_UNSUPPORTED_CHECK: This can be returned only if using
607 * the LZMA_TELL_UNSUPPORTED_CHECK flag.
608 * - LZMA_MEM_ERROR
609 * - LZMA_MEMLIMIT_ERROR: Memory usage limit was reached.
610 * The minimum required memlimit value was stored to *memlimit.
611 * - LZMA_BUF_ERROR: Output buffer was too small.
612 * - LZMA_PROG_ERROR
613 */
614extern LZMA_API(lzma_ret) lzma_stream_buffer_decode(
615 uint64_t *memlimit, uint32_t flags,
616 const lzma_allocator *allocator,
617 const uint8_t *in, size_t *in_pos, size_t in_size,
618 uint8_t *out, size_t *out_pos, size_t out_size)
619 lzma_nothrow lzma_attr_warn_unused_result;
620