1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#ifndef ARROW_UTIL_COMPRESSION_H
19#define ARROW_UTIL_COMPRESSION_H
20
21#include <cstdint>
22#include <memory>
23
24#include "arrow/util/visibility.h"
25
26namespace arrow {
27
28class Status;
29
30struct Compression {
31 enum type { UNCOMPRESSED, SNAPPY, GZIP, BROTLI, ZSTD, LZ4, LZO, BZ2 };
32};
33
34namespace util {
35
36/// \brief Streaming compressor interface
37///
38class ARROW_EXPORT Compressor {
39 public:
40 virtual ~Compressor();
41
42 /// \brief Compress some input.
43 ///
44 /// If bytes_read is 0 on return, then a larger output buffer should be supplied.
45 virtual Status Compress(int64_t input_len, const uint8_t* input, int64_t output_len,
46 uint8_t* output, int64_t* bytes_read,
47 int64_t* bytes_written) = 0;
48
49 /// \brief Flush part of the compressed output.
50 ///
51 /// If should_retry is true on return, Flush() should be called again
52 /// with a larger buffer.
53 virtual Status Flush(int64_t output_len, uint8_t* output, int64_t* bytes_written,
54 bool* should_retry) = 0;
55
56 /// \brief End compressing, doing whatever is necessary to end the stream.
57 ///
58 /// If should_retry is true on return, End() should be called again
59 /// with a larger buffer. Otherwise, the Compressor should not be used anymore.
60 ///
61 /// End() implies Flush().
62 virtual Status End(int64_t output_len, uint8_t* output, int64_t* bytes_written,
63 bool* should_retry) = 0;
64
65 // XXX add methods for buffer size heuristics?
66};
67
68/// \brief Streaming decompressor interface
69///
70class ARROW_EXPORT Decompressor {
71 public:
72 virtual ~Decompressor();
73
74 /// \brief Decompress some input.
75 ///
76 /// If need_more_output is true on return, a larger output buffer needs
77 /// to be supplied.
78 /// XXX is need_more_output necessary? (Brotli?)
79 virtual Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len,
80 uint8_t* output, int64_t* bytes_read, int64_t* bytes_written,
81 bool* need_more_output) = 0;
82
83 /// \brief Return whether the compressed stream is finished.
84 ///
85 /// This is a heuristic. If true is returned, then it is guaranteed
86 /// that the stream is finished. If false is returned, however, it may
87 /// simply be that the underlying library isn't able to provide the information.
88 virtual bool IsFinished() = 0;
89
90 // XXX add methods for buffer size heuristics?
91};
92
93class ARROW_EXPORT Codec {
94 public:
95 virtual ~Codec();
96
97 static Status Create(Compression::type codec, std::unique_ptr<Codec>* out);
98
99 /// \brief One-shot decompression function
100 ///
101 /// output_buffer_len must be correct and therefore be obtained in advance.
102 ///
103 /// \note One-shot decompression is not always compatible with streaming
104 /// compression. Depending on the codec (e.g. LZ4), different formats may
105 /// be used.
106 virtual Status Decompress(int64_t input_len, const uint8_t* input,
107 int64_t output_buffer_len, uint8_t* output_buffer) = 0;
108
109 /// \brief One-shot decompression function that also returns the
110 /// actual decompressed size.
111 ///
112 /// \param[in] input_len the number of bytes of compressed data.
113 /// \param[in] input the compressed data.
114 /// \param[in] output_buffer_len the number of bytes of buffer for
115 /// decompressed data.
116 /// \param[in] output_buffer the buffer for decompressed data.
117 /// \param[out] output_len the actual decompressed size.
118 ///
119 /// \note One-shot decompression is not always compatible with streaming
120 /// compression. Depending on the codec (e.g. LZ4), different formats may
121 /// be used.
122 virtual Status Decompress(int64_t input_len, const uint8_t* input,
123 int64_t output_buffer_len, uint8_t* output_buffer,
124 int64_t* output_len) = 0;
125
126 /// \brief One-shot compression function
127 ///
128 /// output_buffer_len must first have been computed using MaxCompressedLen().
129 ///
130 /// \note One-shot compression is not always compatible with streaming
131 /// decompression. Depending on the codec (e.g. LZ4), different formats may
132 /// be used.
133 virtual Status Compress(int64_t input_len, const uint8_t* input,
134 int64_t output_buffer_len, uint8_t* output_buffer,
135 int64_t* output_len) = 0;
136
137 virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) = 0;
138
139 // XXX Should be able to choose compression level, or presets? ("fast", etc.)
140
141 /// \brief Create a streaming compressor instance
142 virtual Status MakeCompressor(std::shared_ptr<Compressor>* out) = 0;
143
144 /// \brief Create a streaming decompressor instance
145 virtual Status MakeDecompressor(std::shared_ptr<Decompressor>* out) = 0;
146
147 virtual const char* name() const = 0;
148};
149
150} // namespace util
151} // namespace arrow
152
153#endif
154