1/* -*- mode: C; c-basic-offset: 4; indent-tabs-mode: nil -*- */
2// vim: expandtab:ts=8:sw=4:softtabstop=4:
3/**
4 * \file lzma/base.h
5 * \brief Data types and functions used in many places in liblzma API
6 */
7
8/*
9 * Author: Lasse Collin
10 *
11 * This file has been put into the public domain.
12 * You can do whatever you want with this file.
13 *
14 * See ../lzma.h for information about liblzma as a whole.
15 */
16
17#ifndef LZMA_H_INTERNAL
18# error Never include this file directly. Use <lzma.h> instead.
19#endif
20
21
22/**
23 * \brief Boolean
24 *
25 * This is here because C89 doesn't have stdbool.h. To set a value for
26 * variables having type lzma_bool, you can use
27 * - C99's `true' and `false' from stdbool.h;
28 * - C++'s internal `true' and `false'; or
29 * - integers one (true) and zero (false).
30 */
31typedef unsigned char lzma_bool;
32
33
34/**
35 * \brief Type of reserved enumeration variable in structures
36 *
37 * To avoid breaking library ABI when new features are added, several
38 * structures contain extra variables that may be used in future. Since
39 * sizeof(enum) can be different than sizeof(int), and sizeof(enum) may
40 * even vary depending on the range of enumeration constants, we specify
41 * a separate type to be used for reserved enumeration variables. All
42 * enumeration constants in liblzma API will be non-negative and less
43 * than 128, which should guarantee that the ABI won't break even when
44 * new constants are added to existing enumerations.
45 */
46typedef enum {
47 LZMA_RESERVED_ENUM = 0
48} lzma_reserved_enum;
49
50
51/**
52 * \brief Return values used by several functions in liblzma
53 *
54 * Check the descriptions of specific functions to find out which return
55 * values they can return. With some functions the return values may have
56 * more specific meanings than described here; those differences are
57 * described per-function basis.
58 */
59typedef enum {
60 LZMA_OK = 0,
61 /**<
62 * \brief Operation completed successfully
63 */
64
65 LZMA_STREAM_END = 1,
66 /**<
67 * \brief End of stream was reached
68 *
69 * In encoder, LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or
70 * LZMA_FINISH was finished. In decoder, this indicates
71 * that all the data was successfully decoded.
72 *
73 * In all cases, when LZMA_STREAM_END is returned, the last
74 * output bytes should be picked from strm->next_out.
75 */
76
77 LZMA_NO_CHECK = 2,
78 /**<
79 * \brief Input stream has no integrity check
80 *
81 * This return value can be returned only if the
82 * LZMA_TELL_NO_CHECK flag was used when initializing
83 * the decoder. LZMA_NO_CHECK is just a warning, and
84 * the decoding can be continued normally.
85 *
86 * It is possible to call lzma_get_check() immediatelly after
87 * lzma_code has returned LZMA_NO_CHECK. The result will
88 * naturally be LZMA_CHECK_NONE, but the possibility to call
89 * lzma_get_check() may be convenient in some applications.
90 */
91
92 LZMA_UNSUPPORTED_CHECK = 3,
93 /**<
94 * \brief Cannot calculate the integrity check
95 *
96 * The usage of this return value is different in encoders
97 * and decoders.
98 *
99 * Encoders can return this value only from the initialization
100 * function. If initialization fails with this value, the
101 * encoding cannot be done, because there's no way to produce
102 * output with the correct integrity check.
103 *
104 * Decoders can return this value only from lzma_code() and
105 * only if the LZMA_TELL_UNSUPPORTED_CHECK flag was used when
106 * initializing the decoder. The decoding can still be
107 * continued normally even if the check type is unsupported,
108 * but naturally the check will not be validated, and possible
109 * errors may go undetected.
110 *
111 * With decoder, it is possible to call lzma_get_check()
112 * immediatelly after lzma_code() has returned
113 * LZMA_UNSUPPORTED_CHECK. This way it is possible to find
114 * out what the unsupported Check ID was.
115 */
116
117 LZMA_GET_CHECK = 4,
118 /**<
119 * \brief Integrity check type is now available
120 *
121 * This value can be returned only by the lzma_code() function
122 * and only if the decoder was initialized with the
123 * LZMA_TELL_ANY_CHECK flag. LZMA_GET_CHECK tells the
124 * application that it may now call lzma_get_check() to find
125 * out the Check ID. This can be used, for example, to
126 * implement a decoder that accepts only files that have
127 * strong enough integrity check.
128 */
129
130 LZMA_MEM_ERROR = 5,
131 /**<
132 * \brief Cannot allocate memory
133 *
134 * Memory allocation failed, or the size of the allocation
135 * would be greater than SIZE_MAX.
136 *
137 * Due to internal implementation reasons, the coding cannot
138 * be continued even if more memory were made available after
139 * LZMA_MEM_ERROR.
140 */
141
142 LZMA_MEMLIMIT_ERROR = 6,
143 /**
144 * \brief Memory usage limit was reached
145 *
146 * Decoder would need more memory than allowed by the
147 * specified memory usage limit. To continue decoding,
148 * the memory usage limit has to be increased with
149 * lzma_memlimit_set().
150 */
151
152 LZMA_FORMAT_ERROR = 7,
153 /**<
154 * \brief File format not recognized
155 *
156 * The decoder did not recognize the input as supported file
157 * format. This error can occur, for example, when trying to
158 * decode .lzma format file with lzma_stream_decoder,
159 * because lzma_stream_decoder accepts only the .xz format.
160 */
161
162 LZMA_OPTIONS_ERROR = 8,
163 /**<
164 * \brief Invalid or unsupported options
165 *
166 * Invalid or unsupported options, for example
167 * - unsupported filter(s) or filter options; or
168 * - reserved bits set in headers (decoder only).
169 *
170 * Rebuilding liblzma with more features enabled, or
171 * upgrading to a newer version of liblzma may help.
172 */
173
174 LZMA_DATA_ERROR = 9,
175 /**<
176 * \brief Data is corrupt
177 *
178 * The usage of this return value is different in encoders
179 * and decoders. In both encoder and decoder, the coding
180 * cannot continue after this error.
181 *
182 * Encoders return this if size limits of the target file
183 * format would be exceeded. These limits are huge, thus
184 * getting this error from an encoder is mostly theoretical.
185 * For example, the maximum compressed and uncompressed
186 * size of a .xz Stream is roughly 8 EiB (2^63 bytes).
187 *
188 * Decoders return this error if the input data is corrupt.
189 * This can mean, for example, invalid CRC32 in headers
190 * or invalid check of uncompressed data.
191 */
192
193 LZMA_BUF_ERROR = 10,
194 /**<
195 * \brief No progress is possible
196 *
197 * This error code is returned when the coder cannot consume
198 * any new input and produce any new output. The most common
199 * reason for this error is that the input stream being
200 * decoded is truncated or corrupt.
201 *
202 * This error is not fatal. Coding can be continued normally
203 * by providing more input and/or more output space, if
204 * possible.
205 *
206 * Typically the first call to lzma_code() that can do no
207 * progress returns LZMA_OK instead of LZMA_BUF_ERROR. Only
208 * the second consecutive call doing no progress will return
209 * LZMA_BUF_ERROR. This is intentional.
210 *
211 * With zlib, Z_BUF_ERROR may be returned even if the
212 * application is doing nothing wrong, so apps will need
213 * to handle Z_BUF_ERROR specially. The above hack
214 * guarantees that liblzma never returns LZMA_BUF_ERROR
215 * to properly written applications unless the input file
216 * is truncated or corrupt. This should simplify the
217 * applications a little.
218 */
219
220 LZMA_PROG_ERROR = 11,
221 /**<
222 * \brief Programming error
223 *
224 * This indicates that the arguments given to the function are
225 * invalid or the internal state of the decoder is corrupt.
226 * - Function arguments are invalid or the structures
227 * pointed by the argument pointers are invalid
228 * e.g. if strm->next_out has been set to NULL and
229 * strm->avail_out > 0 when calling lzma_code().
230 * - lzma_* functions have been called in wrong order
231 * e.g. lzma_code() was called right after lzma_end().
232 * - If errors occur randomly, the reason might be flaky
233 * hardware.
234 *
235 * If you think that your code is correct, this error code
236 * can be a sign of a bug in liblzma. See the documentation
237 * how to report bugs.
238 */
239} lzma_ret;
240
241
242/**
243 * \brief The `action' argument for lzma_code()
244 *
245 * After the first use of LZMA_SYNC_FLUSH, LZMA_FULL_FLUSH, or LZMA_FINISH,
246 * the same `action' must is used until lzma_code() returns LZMA_STREAM_END.
247 * Also, the amount of input (that is, strm->avail_in) must not be modified
248 * by the application until lzma_code() returns LZMA_STREAM_END. Changing the
249 * `action' or modifying the amount of input will make lzma_code() return
250 * LZMA_PROG_ERROR.
251 */
252typedef enum {
253 LZMA_RUN = 0,
254 /**<
255 * \brief Continue coding
256 *
257 * Encoder: Encode as much input as possible. Some internal
258 * buffering will probably be done (depends on the filter
259 * chain in use), which causes latency: the input used won't
260 * usually be decodeable from the output of the same
261 * lzma_code() call.
262 *
263 * Decoder: Decode as much input as possible and produce as
264 * much output as possible.
265 */
266
267 LZMA_SYNC_FLUSH = 1,
268 /**<
269 * \brief Make all the input available at output
270 *
271 * Normally the encoder introduces some latency.
272 * LZMA_SYNC_FLUSH forces all the buffered data to be
273 * available at output without resetting the internal
274 * state of the encoder. This way it is possible to use
275 * compressed stream for example for communication over
276 * network.
277 *
278 * Only some filters support LZMA_SYNC_FLUSH. Trying to use
279 * LZMA_SYNC_FLUSH with filters that don't support it will
280 * make lzma_code() return LZMA_OPTIONS_ERROR. For example,
281 * LZMA1 doesn't support LZMA_SYNC_FLUSH but LZMA2 does.
282 *
283 * Using LZMA_SYNC_FLUSH very often can dramatically reduce
284 * the compression ratio. With some filters (for example,
285 * LZMA2), finetuning the compression options may help
286 * mitigate this problem significantly.
287 *
288 * Decoders don't support LZMA_SYNC_FLUSH.
289 */
290
291 LZMA_FULL_FLUSH = 2,
292 /**<
293 * \brief Make all the input available at output
294 *
295 * Finish encoding of the current Block. All the input
296 * data going to the current Block must have been given
297 * to the encoder (the last bytes can still be pending in
298 * next_in). Call lzma_code() with LZMA_FULL_FLUSH until
299 * it returns LZMA_STREAM_END. Then continue normally with
300 * LZMA_RUN or finish the Stream with LZMA_FINISH.
301 *
302 * This action is currently supported only by Stream encoder
303 * and easy encoder (which uses Stream encoder). If there is
304 * no unfinished Block, no empty Block is created.
305 */
306
307 LZMA_FINISH = 3
308 /**<
309 * \brief Finish the coding operation
310 *
311 * Finishes the coding operation. All the input data must
312 * have been given to the encoder (the last bytes can still
313 * be pending in next_in). Call lzma_code() with LZMA_FINISH
314 * until it returns LZMA_STREAM_END. Once LZMA_FINISH has
315 * been used, the amount of input must no longer be changed
316 * by the application.
317 *
318 * When decoding, using LZMA_FINISH is optional unless the
319 * LZMA_CONCATENATED flag was used when the decoder was
320 * initialized. When LZMA_CONCATENATED was not used, the only
321 * effect of LZMA_FINISH is that the amount of input must not
322 * be changed just like in the encoder.
323 */
324} lzma_action;
325
326
327/**
328 * \brief Custom functions for memory handling
329 *
330 * A pointer to lzma_allocator may be passed via lzma_stream structure
331 * to liblzma, and some advanced functions take a pointer to lzma_allocator
332 * as a separate function argument. The library will use the functions
333 * specified in lzma_allocator for memory handling instead of the default
334 * malloc() and free(). C++ users should note that the custom memory
335 * handling functions must not throw exceptions.
336 *
337 * liblzma doesn't make an internal copy of lzma_allocator. Thus, it is
338 * OK to change these function pointers in the middle of the coding
339 * process, but obviously it must be done carefully to make sure that the
340 * replacement `free' can deallocate memory allocated by the earlier
341 * `alloc' function(s).
342 */
343typedef struct {
344 /**
345 * \brief Pointer to a custom memory allocation function
346 *
347 * If you don't want a custom allocator, but still want
348 * custom free(), set this to NULL and liblzma will use
349 * the standard malloc().
350 *
351 * \param opaque lzma_allocator.opaque (see below)
352 * \param nmemb Number of elements like in calloc(). liblzma
353 * will always set nmemb to 1, so it is safe to
354 * ignore nmemb in a custom allocator if you like.
355 * The nmemb argument exists only for
356 * compatibility with zlib and libbzip2.
357 * \param size Size of an element in bytes.
358 * liblzma never sets this to zero.
359 *
360 * \return Pointer to the beginning of a memory block of
361 * `size' bytes, or NULL if allocation fails
362 * for some reason. When allocation fails, functions
363 * of liblzma return LZMA_MEM_ERROR.
364 *
365 * The allocator should not waste time zeroing the allocated buffers.
366 * This is not only about speed, but also memory usage, since the
367 * operating system kernel doesn't necessarily allocate the requested
368 * memory in physical memory until it is actually used. With small
369 * input files, liblzma may actually need only a fraction of the
370 * memory that it requested for allocation.
371 *
372 * \note LZMA_MEM_ERROR is also used when the size of the
373 * allocation would be greater than SIZE_MAX. Thus,
374 * don't assume that the custom allocator must have
375 * returned NULL if some function from liblzma
376 * returns LZMA_MEM_ERROR.
377 */
378 void *(LZMA_API_CALL *alloc)(void *opaque, size_t nmemb, size_t size);
379
380 /**
381 * \brief Pointer to a custom memory freeing function
382 *
383 * If you don't want a custom freeing function, but still
384 * want a custom allocator, set this to NULL and liblzma
385 * will use the standard free().
386 *
387 * \param opaque lzma_allocator.opaque (see below)
388 * \param ptr Pointer returned by lzma_allocator.alloc(),
389 * or when it is set to NULL, a pointer returned
390 * by the standard malloc().
391 */
392 void (LZMA_API_CALL *free)(void *opaque, void *ptr);
393
394 /**
395 * \brief Pointer passed to .alloc() and .free()
396 *
397 * opaque is passed as the first argument to lzma_allocator.alloc()
398 * and lzma_allocator.free(). This intended to ease implementing
399 * custom memory allocation functions for use with liblzma.
400 *
401 * If you don't need this, you should set this to NULL.
402 */
403 void *opaque;
404
405} lzma_allocator;
406
407
408/**
409 * \brief Internal data structure
410 *
411 * The contents of this structure is not visible outside the library.
412 */
413typedef struct lzma_internal_s lzma_internal;
414
415
416/**
417 * \brief Passing data to and from liblzma
418 *
419 * The lzma_stream structure is used for
420 * - passing pointers to input and output buffers to liblzma;
421 * - defining custom memory hander functions; and
422 * - holding a pointer to coder-specific internal data structures.
423 *
424 * Typical usage:
425 *
426 * - After allocating lzma_stream (on stack or with malloc()), it must be
427 * initialized to LZMA_STREAM_INIT (see LZMA_STREAM_INIT for details).
428 *
429 * - Initialize a coder to the lzma_stream, for example by using
430 * lzma_easy_encoder() or lzma_auto_decoder(). Some notes:
431 * - In contrast to zlib, strm->next_in and strm->next_out are
432 * ignored by all initialization functions, thus it is safe
433 * to not initialize them yet.
434 * - The initialization functions always set strm->total_in and
435 * strm->total_out to zero.
436 * - If the initialization function fails, no memory is left allocated
437 * that would require freeing with lzma_end() even if some memory was
438 * associated with the lzma_stream structure when the initialization
439 * function was called.
440 *
441 * - Use lzma_code() to do the actual work.
442 *
443 * - Once the coding has been finished, the existing lzma_stream can be
444 * reused. It is OK to reuse lzma_stream with different initialization
445 * function without calling lzma_end() first. Old allocations are
446 * automatically freed.
447 *
448 * - Finally, use lzma_end() to free the allocated memory. lzma_end() never
449 * frees the lzma_stream structure itself.
450 *
451 * Application may modify the values of total_in and total_out as it wants.
452 * They are updated by liblzma to match the amount of data read and
453 * written, but aren't used for anything else.
454 */
455typedef struct {
456 const uint8_t *next_in; /**< Pointer to the next input byte. */
457 size_t avail_in; /**< Number of available input bytes in next_in. */
458 uint64_t total_in; /**< Total number of bytes read by liblzma. */
459
460 uint8_t *next_out; /**< Pointer to the next output position. */
461 size_t avail_out; /**< Amount of free space in next_out. */
462 uint64_t total_out; /**< Total number of bytes written by liblzma. */
463
464 /**
465 * \brief Custom memory allocation functions
466 *
467 * In most cases this is NULL which makes liblzma use
468 * the standard malloc() and free().
469 */
470 lzma_allocator *allocator;
471
472 /** Internal state is not visible to applications. */
473 lzma_internal *internal;
474
475 /*
476 * Reserved space to allow possible future extensions without
477 * breaking the ABI. Excluding the initialization of this structure,
478 * you should not touch these, because the names of these variables
479 * may change.
480 */
481 void *reserved_ptr1;
482 void *reserved_ptr2;
483 uint64_t reserved_int1;
484 uint64_t reserved_int2;
485 lzma_reserved_enum reserved_enum1;
486 lzma_reserved_enum reserved_enum2;
487
488} lzma_stream;
489
490
491/**
492 * \brief Initialization for lzma_stream
493 *
494 * When you declare an instance of lzma_stream, you can immediatelly
495 * initialize it so that initialization functions know that no memory
496 * has been allocated yet:
497 *
498 * lzma_stream strm = LZMA_STREAM_INIT;
499 *
500 * If you need to initialize a dynamically allocated lzma_stream, you can use
501 * memset(strm_pointer, 0, sizeof(lzma_stream)). Strictly speaking, this
502 * violates the C standard since NULL may have different internal
503 * representation than zero, but it should be portable enough in practice.
504 * Anyway, for maximum portability, you can use something like this:
505 *
506 * lzma_stream tmp = LZMA_STREAM_INIT;
507 * *strm = tmp;
508 */
509#define LZMA_STREAM_INIT \
510 { NULL, 0, 0, NULL, 0, 0, NULL, NULL, \
511 NULL, NULL, 0, 0, LZMA_RESERVED_ENUM, LZMA_RESERVED_ENUM }
512
513
514/**
515 * \brief Encode or decode data
516 *
517 * Once the lzma_stream has been successfully initialized (e.g. with
518 * lzma_stream_encoder()), the actual encoding or decoding is done
519 * using this function. The application has to update strm->next_in,
520 * strm->avail_in, strm->next_out, and strm->avail_out to pass input
521 * to and get output from liblzma.
522 *
523 * See the description of the coder-specific initialization function to find
524 * out what `action' values are supported by the coder.
525 */
526extern LZMA_API(lzma_ret) lzma_code(lzma_stream *strm, lzma_action action)
527 lzma_nothrow lzma_attr_warn_unused_result;
528
529
530/**
531 * \brief Free memory allocated for the coder data structures
532 *
533 * \param strm Pointer to lzma_stream that is at least initialized
534 * with LZMA_STREAM_INIT.
535 *
536 * After lzma_end(strm), strm->internal is guaranteed to be NULL. No other
537 * members of the lzma_stream structure are touched.
538 *
539 * \note zlib indicates an error if application end()s unfinished
540 * stream structure. liblzma doesn't do this, and assumes that
541 * application knows what it is doing.
542 */
543extern LZMA_API(void) lzma_end(lzma_stream *strm) lzma_nothrow;
544
545
546/**
547 * \brief Get the memory usage of decoder filter chain
548 *
549 * This function is currently supported only when *strm has been initialized
550 * with a function that takes a memlimit argument. With other functions, you
551 * should use e.g. lzma_raw_encoder_memusage() or lzma_raw_decoder_memusage()
552 * to estimate the memory requirements.
553 *
554 * This function is useful e.g. after LZMA_MEMLIMIT_ERROR to find out how big
555 * the memory usage limit should have been to decode the input. Note that
556 * this may give misleading information if decoding .xz Streams that have
557 * multiple Blocks, because each Block can have different memory requirements.
558 *
559 * \return Rough estimate of how much memory is currently allocated
560 * for the filter decoders. If no filter chain is currently
561 * allocated, some non-zero value is still returned, which is
562 * less than or equal to what any filter chain would indicate
563 * as its memory requirement.
564 *
565 * If this function isn't supported by *strm or some other error
566 * occurs, zero is returned.
567 */
568extern LZMA_API(uint64_t) lzma_memusage(const lzma_stream *strm)
569 lzma_nothrow lzma_attr_pure;
570
571
572/**
573 * \brief Get the current memory usage limit
574 *
575 * This function is supported only when *strm has been initialized with
576 * a function that takes a memlimit argument.
577 *
578 * \return On success, the current memory usage limit is returned
579 * (always non-zero). On error, zero is returned.
580 */
581extern LZMA_API(uint64_t) lzma_memlimit_get(const lzma_stream *strm)
582 lzma_nothrow lzma_attr_pure;
583
584
585/**
586 * \brief Set the memory usage limit
587 *
588 * This function is supported only when *strm has been initialized with
589 * a function that takes a memlimit argument.
590 *
591 * \return - LZMA_OK: New memory usage limit successfully set.
592 * - LZMA_MEMLIMIT_ERROR: The new limit is too small.
593 * The limit was not changed.
594 * - LZMA_PROG_ERROR: Invalid arguments, e.g. *strm doesn't
595 * support memory usage limit or memlimit was zero.
596 */
597extern LZMA_API(lzma_ret) lzma_memlimit_set(
598 lzma_stream *strm, uint64_t memlimit) lzma_nothrow;
599