1/**
2 * \file lzma/base.h
3 * \brief Data types and functions used in many places in liblzma API
4 */
5
6/*
7 * Author: Lasse Collin
8 *
9 * This file has been put into the public domain.
10 * You can do whatever you want with this file.
11 *
12 * See ../lzma.h for information about liblzma as a whole.
13 */
14
15#ifndef LZMA_H_INTERNAL
16# error Never include this file directly. Use <lzma.h> instead.
17#endif
18
19
20/**
21 * \brief Boolean
22 *
23 * This is here because C89 doesn't have stdbool.h. To set a value for
24 * variables having type lzma_bool, you can use
25 * - C99's `true' and `false' from stdbool.h;
26 * - C++'s internal `true' and `false'; or
27 * - integers one (true) and zero (false).
28 */
29typedef unsigned char lzma_bool;
30
31
32/**
33 * \brief Type of reserved enumeration variable in structures
34 *
35 * To avoid breaking library ABI when new features are added, several
36 * structures contain extra variables that may be used in future. Since
37 * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may
38 * even vary depending on the range of enumeration constants, we specify
39 * a separate type to be used for reserved enumeration variables. All
40 * enumeration constants in liblzma API will be non-negative and less
41 * than 128, which should guarantee that the ABI won't break even when
42 * new constants are added to existing enumerations.
43 */
44typedef enum {
45 LZMA_RESERVED_ENUM = 0
46} lzma_reserved_enum;
47
48
49/**
50 * \brief Return values used by several functions in liblzma
51 *
52 * Check the descriptions of specific functions to find out which return
53 * values they can return. With some functions the return values may have
54 * more specific meanings than described here; those differences are
55 * described per-function basis.
56 */
57typedef enum {
58 LZMA_OK = 0,
59 /**<
60 * \brief Operation completed successfully
61 */
62
63 LZMA_STREAM_END = 1,
64 /**<
65 * \brief End of stream was reached
66 *
67 * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or
68 * LZMA_FINISH was finished. In decoder, this indicates
69 * that all the data was successfully decoded.
70 *
71 * In all cases, when LZMA_STREAM_END is returned, the last
72 * output bytes should be picked from strm->next_out.
73 */
74
75 LZMA_NO_CHECK = 2,
76 /**<
77 * \brief Input stream has no integrity check
78 *
79 * This return value can be returned only if the
80 * LZMA_TELL_NO_CHECK flag was used when initializing
81 * the decoder. LZMA_NO_CHECK is just a warning, and
82 * the decoding can be continued normally.
83 *
84 * It is possible to call lzma_get_check() immediately after
85 * lzma_code has returned LZMA_NO_CHECK. The result will
86 * naturally be LZMA_CHECK_NONE, but the possibility to call
87 * lzma_get_check() may be convenient in some applications.
88 */
89
90 LZMA_UNSUPPORTED_CHECK = 3,
91 /**<
92 * \brief Cannot calculate the integrity check
93 *
94 * The usage of this return value is different in encoders
95 * and decoders.
96 *
97 * Encoders can return this value only from the initialization
98 * function. If initialization fails with this value, the
99 * encoding cannot be done, because there's no way to produce
100 * output with the correct integrity check.
101 *
102 * Decoders can return this value only from lzma_code() and
103 * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when
104 * initializing the decoder. The decoding can still be
105 * continued normally even if the check type is unsupported,
106 * but naturally the check will not be validated, and possible
107 * errors may go undetected.
108 *
109 * With decoder, it is possible to call lzma_get_check()
110 * immediately after lzma_code() has returned
111 * LZMA_UNSUPPORTED_CHECK. This way it is possible to find
112 * out what the unsupported Check ID was.
113 */
114
115 LZMA_GET_CHECK = 4,
116 /**<
117 * \brief Integrity check type is now available
118 *
119 * This value can be returned only by the lzma_code() function
120 * and only if the decoder was initialized with the
121 * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the
122 * application that it may now call lzma_get_check() to find
123 * out the Check ID. This can be used, for example, to
124 * implement a decoder that accepts only files that have
125 * strong enough integrity check.
126 */
127
128 LZMA_MEM_ERROR = 5,
129 /**<
130 * \brief Cannot allocate memory
131 *
132 * Memory allocation failed, or the size of the allocation
133 * would be greater than SIZE_MAX.
134 *
135 * Due to internal implementation reasons, the coding cannot
136 * be continued even if more memory were made available after
137 * LZMA_MEM_ERROR.
138 */
139
140 LZMA_MEMLIMIT_ERROR = 6,
141 /**
142 * \brief Memory usage limit was reached
143 *
144 * Decoder would need more memory than allowed by the
145 * specified memory usage limit. To continue decoding,
146 * the memory usage limit has to be increased with
147 * lzma_memlimit_set().
148 */
149
150 LZMA_FORMAT_ERROR = 7,
151 /**<
152 * \brief File format not recognized
153 *
154 * The decoder did not recognize the input as supported file
155 * format. This error can occur, for example, when trying to
156 * decode .lzma format file with lzma_stream_decoder,
157 * because lzma_stream_decoder accepts only the .xz format.
158 */
159
160 LZMA_OPTIONS_ERROR = 8,
161 /**<
162 * \brief Invalid or unsupported options
163 *
164 * Invalid or unsupported options, for example
165 * - unsupported filter(s) or filter options; or
166 * - reserved bits set in headers (decoder only).
167 *
168 * Rebuilding liblzma with more features enabled, or
169 * upgrading to a newer version of liblzma may help.
170 */
171
172 LZMA_DATA_ERROR = 9,
173 /**<
174 * \brief Data is corrupt
175 *
176 * The usage of this return value is different in encoders
177 * and decoders. In both encoder and decoder, the coding
178 * cannot continue after this error.
179 *
180 * Encoders return this if size limits of the target file
181 * format would be exceeded. These limits are huge, thus
182 * getting this error from an encoder is mostly theoretical.
183 * For example, the maximum compressed and uncompressed
184 * size of a .xz Stream is roughly 8 EiB (2^63 bytes).
185 *
186 * Decoders return this error if the input data is corrupt.
187 * This can mean, for example, invalid CRC32 in headers
188 * or invalid check of uncompressed data.
189 */
190
191 LZMA_BUF_ERROR = 10,
192 /**<
193 * \brief No progress is possible
194 *
195 * This error code is returned when the coder cannot consume
196 * any new input and produce any new output. The most common
197 * reason for this error is that the input stream being
198 * decoded is truncated or corrupt.
199 *
200 * This error is not fatal. Coding can be continued normally
201 * by providing more input and/or more output space, if
202 * possible.
203 *
204 * Typically the first call to lzma_code() that can do no
205 * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only
206 * the second consecutive call doing no progress will return
207 * LZMA_BUF_ERROR. This is intentional.
208 *
209 * With zlib, Z_BUF_ERROR may be returned even if the
210 * application is doing nothing wrong, so apps will need
211 * to handle Z_BUF_ERROR specially. The above hack
212 * guarantees that liblzma never returns LZMA_BUF_ERROR
213 * to properly written applications unless the input file
214 * is truncated or corrupt. This should simplify the
215 * applications a little.
216 */
217
218 LZMA_PROG_ERROR = 11,
219 /**<
220 * \brief Programming error
221 *
222 * This indicates that the arguments given to the function are
223 * invalid or the internal state of the decoder is corrupt.
224 * - Function arguments are invalid or the structures
225 * pointed by the argument pointers are invalid
226 * e.g. if strm->next_out has been set to NULL and
227 * strm->avail_out > 0 when calling lzma_code().
228 * - lzma_* functions have been called in wrong order
229 * e.g. lzma_code() was called right after lzma_end().
230 * - If errors occur randomly, the reason might be flaky
231 * hardware.
232 *
233 * If you think that your code is correct, this error code
234 * can be a sign of a bug in liblzma. See the documentation
235 * how to report bugs.
236 */
237} lzma_ret;
238
239
240/**
241 * \brief The `action' argument for lzma_code()
242 *
243 * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, LZMA_FULL_BARRIER,
244 * or LZMA_FINISH, the same `action' must is used until lzma_code() returns
245 * LZMA_STREAM_END. Also, the amount of input (that is, strm->avail_in) must
246 * not be modified by the application until lzma_code() returns
247 * LZMA_STREAM_END. Changing the `action' or modifying the amount of input
248 * will make lzma_code() return LZMA_PROG_ERROR.
249 */
250typedef enum {
251 LZMA_RUN = 0,
252 /**<
253 * \brief Continue coding
254 *
255 * Encoder: Encode as much input as possible. Some internal
256 * buffering will probably be done (depends on the filter
257 * chain in use), which causes latency: the input used won't
258 * usually be decodeable from the output of the same
259 * lzma_code() call.
260 *
261 * Decoder: Decode as much input as possible and produce as
262 * much output as possible.
263 */
264
265 LZMA_SYNC_FLUSH = 1,
266 /**<
267 * \brief Make all the input available at output
268 *
269 * Normally the encoder introduces some latency.
270 * LZMA_SYNC_FLUSH forces all the buffered data to be
271 * available at output without resetting the internal
272 * state of the encoder. This way it is possible to use
273 * compressed stream for example for communication over
274 * network.
275 *
276 * Only some filters support LZMA_SYNC_FLUSH. Trying to use
277 * LZMA_SYNC_FLUSH with filters that don't support it will
278 * make lzma_code() return LZMA_OPTIONS_ERROR. For example,
279 * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does.
280 *
281 * Using LZMA_SYNC_FLUSH very often can dramatically reduce
282 * the compression ratio. With some filters (for example,
283 * LZMA2), fine-tuning the compression options may help
284 * mitigate this problem significantly (for example,
285 * match finder with LZMA2).
286 *
287 * Decoders don't support LZMA_SYNC_FLUSH.
288 */
289
290 LZMA_FULL_FLUSH = 2,
291 /**<
292 * \brief Finish encoding of the current Block
293 *
294 * All the input data going to the current Block must have
295 * been given to the encoder (the last bytes can still be
296 * pending in *next_in). Call lzma_code() with LZMA_FULL_FLUSH
297 * until it returns LZMA_STREAM_END. Then continue normally
298 * with LZMA_RUN or finish the Stream with LZMA_FINISH.
299 *
300 * This action is currently supported only by Stream encoder
301 * and easy encoder (which uses Stream encoder). If there is
302 * no unfinished Block, no empty Block is created.
303 */
304
305 LZMA_FULL_BARRIER = 4,
306 /**<
307 * \brief Finish encoding of the current Block
308 *
309 * This is like LZMA_FULL_FLUSH except that this doesn't
310 * necessarily wait until all the input has been made
311 * available via the output buffer. That is, lzma_code()
312 * might return LZMA_STREAM_END as soon as all the input
313 * has been consumed (avail_in == 0).
314 *
315 * LZMA_FULL_BARRIER is useful with a threaded encoder if
316 * one wants to split the .xz Stream into Blocks at specific
317 * offsets but doesn't care if the output isn't flushed
318 * immediately. Using LZMA_FULL_BARRIER allows keeping
319 * the threads busy while LZMA_FULL_FLUSH would make
320 * lzma_code() wait until all the threads have finished
321 * until more data could be passed to the encoder.
322 *
323 * With a lzma_stream initialized with the single-threaded
324 * lzma_stream_encoder() or lzma_easy_encoder(),
325 * LZMA_FULL_BARRIER is an alias for LZMA_FULL_FLUSH.
326 */
327
328 LZMA_FINISH = 3
329 /**<
330 * \brief Finish the coding operation
331 *
332 * All the input data must have been given to the encoder
333 * (the last bytes can still be pending in next_in).
334 * Call lzma_code() with LZMA_FINISH until it returns
335 * LZMA_STREAM_END. Once LZMA_FINISH has been used,
336 * the amount of input must no longer be changed by
337 * the application.
338 *
339 * When decoding, using LZMA_FINISH is optional unless the
340 * LZMA_CONCATENATED flag was used when the decoder was
341 * initialized. When LZMA_CONCATENATED was not used, the only
342 * effect of LZMA_FINISH is that the amount of input must not
343 * be changed just like in the encoder.
344 */
345} lzma_action;
346
347
348/**
349 * \brief Custom functions for memory handling
350 *
351 * A pointer to lzma_allocator may be passed via lzma_stream structure
352 * to liblzma, and some advanced functions take a pointer to lzma_allocator
353 * as a separate function argument. The library will use the functions
354 * specified in lzma_allocator for memory handling instead of the default
355 * malloc() and free(). C++ users should note that the custom memory
356 * handling functions must not throw exceptions.
357 *
358 * Single-threaded mode only: liblzma doesn't make an internal copy of
359 * lzma_allocator. Thus, it is OK to change these function pointers in
360 * the middle of the coding process, but obviously it must be done
361 * carefully to make sure that the replacement `free' can deallocate
362 * memory allocated by the earlier `alloc' function(s).
363 *
364 * Multithreaded mode: liblzma might internally store pointers to the
365 * lzma_allocator given via the lzma_stream structure. The application
366 * must not change the allocator pointer in lzma_stream or the contents
367 * of the pointed lzma_allocator structure until lzma_end() has been used
368 * to free the memory associated with that lzma_stream. The allocation
369 * functions might be called simultaneously from multiple threads, and
370 * thus they must be thread safe.
371 */
372typedef struct {
373 /**
374 * \brief Pointer to a custom memory allocation function
375 *
376 * If you don't want a custom allocator, but still want
377 * custom free(), set this to NULL and liblzma will use
378 * the standard malloc().
379 *
380 * \param opaque lzma_allocator.opaque (see below)
381 * \param nmemb Number of elements like in calloc(). liblzma
382 * will always set nmemb to 1, so it is safe to
383 * ignore nmemb in a custom allocator if you like.
384 * The nmemb argument exists only for
385 * compatibility with zlib and libbzip2.
386 * \param size Size of an element in bytes.
387 * liblzma never sets this to zero.
388 *
389 * \return Pointer to the beginning of a memory block of
390 * `size' bytes, or NULL if allocation fails
391 * for some reason. When allocation fails, functions
392 * of liblzma return LZMA_MEM_ERROR.
393 *
394 * The allocator should not waste time zeroing the allocated buffers.
395 * This is not only about speed, but also memory usage, since the
396 * operating system kernel doesn't necessarily allocate the requested
397 * memory in physical memory until it is actually used. With small
398 * input files, liblzma may actually need only a fraction of the
399 * memory that it requested for allocation.
400 *
401 * \note LZMA_MEM_ERROR is also used when the size of the
402 * allocation would be greater than SIZE_MAX. Thus,
403 * don't assume that the custom allocator must have
404 * returned NULL if some function from liblzma
405 * returns LZMA_MEM_ERROR.
406 */
407 void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size);
408
409 /**
410 * \brief Pointer to a custom memory freeing function
411 *
412 * If you don't want a custom freeing function, but still
413 * want a custom allocator, set this to NULL and liblzma
414 * will use the standard free().
415 *
416 * \param opaque lzma_allocator.opaque (see below)
417 * \param ptr Pointer returned by lzma_allocator.alloc(),
418 * or when it is set to NULL, a pointer returned
419 * by the standard malloc().
420 */
421 void (LZMA_API_CALL *free)(void *opaque, void *ptr);
422
423 /**
424 * \brief Pointer passed to .alloc() and .free()
425 *
426 * opaque is passed as the first argument to lzma_allocator.alloc()
427 * and lzma_allocator.free(). This intended to ease implementing
428 * custom memory allocation functions for use with liblzma.
429 *
430 * If you don't need this, you should set this to NULL.
431 */
432 void *opaque;
433
434} lzma_allocator;
435
436
437/**
438 * \brief Internal data structure
439 *
440 * The contents of this structure is not visible outside the library.
441 */
442typedef struct lzma_internal_s lzma_internal;
443
444
445/**
446 * \brief Passing data to and from liblzma
447 *
448 * The lzma_stream structure is used for
449 * - passing pointers to input and output buffers to liblzma;
450 * - defining custom memory hander functions; and
451 * - holding a pointer to coder-specific internal data structures.
452 *
453 * Typical usage:
454 *
455 * - After allocating lzma_stream (on stack or with malloc()), it must be
456 * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details).
457 *
458 * - Initialize a coder to the lzma_stream, for example by using
459 * lzma_easy_encoder() or lzma_auto_decoder(). Some notes:
460 * - In contrast to zlib, strm->next_in and strm->next_out are
461 * ignored by all initialization functions, thus it is safe
462 * to not initialize them yet.
463 * - The initialization functions always set strm->total_in and
464 * strm->total_out to zero.
465 * - If the initialization function fails, no memory is left allocated
466 * that would require freeing with lzma_end() even if some memory was
467 * associated with the lzma_stream structure when the initialization
468 * function was called.
469 *
470 * - Use lzma_code() to do the actual work.
471 *
472 * - Once the coding has been finished, the existing lzma_stream can be
473 * reused. It is OK to reuse lzma_stream with different initialization
474 * function without calling lzma_end() first. Old allocations are
475 * automatically freed.
476 *
477 * - Finally, use lzma_end() to free the allocated memory. lzma_end() never
478 * frees the lzma_stream structure itself.
479 *
480 * Application may modify the values of total_in and total_out as it wants.
481 * They are updated by liblzma to match the amount of data read and
482 * written but aren't used for anything else except as a possible return
483 * values from lzma_get_progress().
484 */
485typedef struct {
486 const uint8_t *next_in; /**< Pointer to the next input byte. */
487 size_t avail_in; /**< Number of available input bytes in next_in. */
488 uint64_t total_in; /**< Total number of bytes read by liblzma. */
489
490 uint8_t *next_out; /**< Pointer to the next output position. */
491 size_t avail_out; /**< Amount of free space in next_out. */
492 uint64_t total_out; /**< Total number of bytes written by liblzma. */
493
494 /**
495 * \brief Custom memory allocation functions
496 *
497 * In most cases this is NULL which makes liblzma use
498 * the standard malloc() and free().
499 *
500 * \note In 5.0.x this is not a const pointer.
501 */
502 const lzma_allocator *allocator;
503
504 /** Internal state is not visible to applications. */
505 lzma_internal *internal;
506
507 /*
508 * Reserved space to allow possible future extensions without
509 * breaking the ABI. Excluding the initialization of this structure,
510 * you should not touch these, because the names of these variables
511 * may change.
512 */
513 void *reserved_ptr1;
514 void *reserved_ptr2;
515 void *reserved_ptr3;
516 void *reserved_ptr4;
517 uint64_t reserved_int1;
518 uint64_t reserved_int2;
519 size_t reserved_int3;
520 size_t reserved_int4;
521 lzma_reserved_enum reserved_enum1;
522 lzma_reserved_enum reserved_enum2;
523
524} lzma_stream;
525
526
527/**
528 * \brief Initialization for lzma_stream
529 *
530 * When you declare an instance of lzma_stream, you can immediately
531 * initialize it so that initialization functions know that no memory
532 * has been allocated yet:
533 *
534 * lzma_stream strm = LZMA_STREAM_INIT;
535 *
536 * If you need to initialize a dynamically allocated lzma_stream, you can use
537 * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this
538 * violates the C standard since NULL may have different internal
539 * representation than zero, but it should be portable enough in practice.
540 * Anyway, for maximum portability, you can use something like this:
541 *
542 * lzma_stream tmp = LZMA_STREAM_INIT;
543 * *strm = tmp;
544 */
545#define LZMA_STREAM_INIT \
546 { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \
547 NULL, NULL, NULL, NULL, 0, 0, 0, 0, \
548 LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM }
549
550
551/**
552 * \brief Encode or decode data
553 *
554 * Once the lzma_stream has been successfully initialized (e.g. with
555 * lzma_stream_encoder()), the actual encoding or decoding is done
556 * using this function. The application has to update strm->next_in,
557 * strm->avail_in, strm->next_out, and strm->avail_out to pass input
558 * to and get output from liblzma.
559 *
560 * See the description of the coder-specific initialization function to find
561 * out what `action' values are supported by the coder.
562 */
563extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action)
564 lzma_nothrow lzma_attr_warn_unused_result;
565
566
567/**
568 * \brief Free memory allocated for the coder data structures
569 *
570 * \param strm Pointer to lzma_stream that is at least initialized
571 * with LZMA_STREAM_INIT.
572 *
573 * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other
574 * members of the lzma_stream structure are touched.
575 *
576 * \note zlib indicates an error if application end()s unfinished
577 * stream structure. liblzma doesn't do this, and assumes that
578 * application knows what it is doing.
579 */
580extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow;
581
582
583/**
584 * \brief Get progress information
585 *
586 * In single-threaded mode, applications can get progress information from
587 * strm->total_in and strm->total_out. In multi-threaded mode this is less
588 * useful because a significant amount of both input and output data gets
589 * buffered internally by liblzma. This makes total_in and total_out give
590 * misleading information and also makes the progress indicator updates
591 * non-smooth.
592 *
593 * This function gives realistic progress information also in multi-threaded
594 * mode by taking into account the progress made by each thread. In
595 * single-threaded mode *progress_in and *progress_out are set to
596 * strm->total_in and strm->total_out, respectively.
597 */
598extern LZMA_API(void) lzma_get_progress(lzma_stream *strm,
599 uint64_t *progress_in, uint64_t *progress_out) lzma_nothrow;
600
601
602/**
603 * \brief Get the memory usage of decoder filter chain
604 *
605 * This function is currently supported only when *strm has been initialized
606 * with a function that takes a memlimit argument. With other functions, you
607 * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage()
608 * to estimate the memory requirements.
609 *
610 * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big
611 * the memory usage limit should have been to decode the input. Note that
612 * this may give misleading information if decoding .xz Streams that have
613 * multiple Blocks, because each Block can have different memory requirements.
614 *
615 * \return How much memory is currently allocated for the filter
616 * decoders. If no filter chain is currently allocated,
617 * some non-zero value is still returned, which is less than
618 * or equal to what any filter chain would indicate as its
619 * memory requirement.
620 *
621 * If this function isn't supported by *strm or some other error
622 * occurs, zero is returned.
623 */
624extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm)
625 lzma_nothrow lzma_attr_pure;
626
627
628/**
629 * \brief Get the current memory usage limit
630 *
631 * This function is supported only when *strm has been initialized with
632 * a function that takes a memlimit argument.
633 *
634 * \return On success, the current memory usage limit is returned
635 * (always non-zero). On error, zero is returned.
636 */
637extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm)
638 lzma_nothrow lzma_attr_pure;
639
640
641/**
642 * \brief Set the memory usage limit
643 *
644 * This function is supported only when *strm has been initialized with
645 * a function that takes a memlimit argument.
646 *
647 * \return - LZMA_OK: New memory usage limit successfully set.
648 * - LZMA_MEMLIMIT_ERROR: The new limit is too small.
649 * The limit was not changed.
650 * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't
651 * support memory usage limit or memlimit was zero.
652 */
653extern LZMA_API(lzma_ret) lzma_memlimit_set(
654 lzma_stream *strm, uint64_t memlimit) lzma_nothrow;
655