1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #ifndef ARROW_UTIL_COMPRESSION_H |
19 | #define ARROW_UTIL_COMPRESSION_H |
20 | |
21 | #include <cstdint> |
22 | #include <memory> |
23 | |
24 | #include "arrow/util/visibility.h" |
25 | |
26 | namespace arrow { |
27 | |
28 | class Status; |
29 | |
30 | struct Compression { |
31 | enum type { UNCOMPRESSED, SNAPPY, GZIP, BROTLI, ZSTD, LZ4, LZO, BZ2 }; |
32 | }; |
33 | |
34 | namespace util { |
35 | |
36 | /// \brief Streaming compressor interface |
37 | /// |
38 | class ARROW_EXPORT Compressor { |
39 | public: |
40 | virtual ~Compressor(); |
41 | |
42 | /// \brief Compress some input. |
43 | /// |
44 | /// If bytes_read is 0 on return, then a larger output buffer should be supplied. |
45 | virtual Status Compress(int64_t input_len, const uint8_t* input, int64_t output_len, |
46 | uint8_t* output, int64_t* bytes_read, |
47 | int64_t* bytes_written) = 0; |
48 | |
49 | /// \brief Flush part of the compressed output. |
50 | /// |
51 | /// If should_retry is true on return, Flush() should be called again |
52 | /// with a larger buffer. |
53 | virtual Status Flush(int64_t output_len, uint8_t* output, int64_t* bytes_written, |
54 | bool* should_retry) = 0; |
55 | |
56 | /// \brief End compressing, doing whatever is necessary to end the stream. |
57 | /// |
58 | /// If should_retry is true on return, End() should be called again |
59 | /// with a larger buffer. Otherwise, the Compressor should not be used anymore. |
60 | /// |
61 | /// End() implies Flush(). |
62 | virtual Status End(int64_t output_len, uint8_t* output, int64_t* bytes_written, |
63 | bool* should_retry) = 0; |
64 | |
65 | // XXX add methods for buffer size heuristics? |
66 | }; |
67 | |
68 | /// \brief Streaming decompressor interface |
69 | /// |
70 | class ARROW_EXPORT Decompressor { |
71 | public: |
72 | virtual ~Decompressor(); |
73 | |
74 | /// \brief Decompress some input. |
75 | /// |
76 | /// If need_more_output is true on return, a larger output buffer needs |
77 | /// to be supplied. |
78 | /// XXX is need_more_output necessary? (Brotli?) |
79 | virtual Status Decompress(int64_t input_len, const uint8_t* input, int64_t output_len, |
80 | uint8_t* output, int64_t* bytes_read, int64_t* bytes_written, |
81 | bool* need_more_output) = 0; |
82 | |
83 | /// \brief Return whether the compressed stream is finished. |
84 | /// |
85 | /// This is a heuristic. If true is returned, then it is guaranteed |
86 | /// that the stream is finished. If false is returned, however, it may |
87 | /// simply be that the underlying library isn't able to provide the information. |
88 | virtual bool IsFinished() = 0; |
89 | |
90 | // XXX add methods for buffer size heuristics? |
91 | }; |
92 | |
93 | class ARROW_EXPORT Codec { |
94 | public: |
95 | virtual ~Codec(); |
96 | |
97 | static Status Create(Compression::type codec, std::unique_ptr<Codec>* out); |
98 | |
99 | /// \brief One-shot decompression function |
100 | /// |
101 | /// output_buffer_len must be correct and therefore be obtained in advance. |
102 | /// |
103 | /// \note One-shot decompression is not always compatible with streaming |
104 | /// compression. Depending on the codec (e.g. LZ4), different formats may |
105 | /// be used. |
106 | virtual Status Decompress(int64_t input_len, const uint8_t* input, |
107 | int64_t output_buffer_len, uint8_t* output_buffer) = 0; |
108 | |
109 | /// \brief One-shot decompression function that also returns the |
110 | /// actual decompressed size. |
111 | /// |
112 | /// \param[in] input_len the number of bytes of compressed data. |
113 | /// \param[in] input the compressed data. |
114 | /// \param[in] output_buffer_len the number of bytes of buffer for |
115 | /// decompressed data. |
116 | /// \param[in] output_buffer the buffer for decompressed data. |
117 | /// \param[out] output_len the actual decompressed size. |
118 | /// |
119 | /// \note One-shot decompression is not always compatible with streaming |
120 | /// compression. Depending on the codec (e.g. LZ4), different formats may |
121 | /// be used. |
122 | virtual Status Decompress(int64_t input_len, const uint8_t* input, |
123 | int64_t output_buffer_len, uint8_t* output_buffer, |
124 | int64_t* output_len) = 0; |
125 | |
126 | /// \brief One-shot compression function |
127 | /// |
128 | /// output_buffer_len must first have been computed using MaxCompressedLen(). |
129 | /// |
130 | /// \note One-shot compression is not always compatible with streaming |
131 | /// decompression. Depending on the codec (e.g. LZ4), different formats may |
132 | /// be used. |
133 | virtual Status Compress(int64_t input_len, const uint8_t* input, |
134 | int64_t output_buffer_len, uint8_t* output_buffer, |
135 | int64_t* output_len) = 0; |
136 | |
137 | virtual int64_t MaxCompressedLen(int64_t input_len, const uint8_t* input) = 0; |
138 | |
139 | // XXX Should be able to choose compression level, or presets? ("fast", etc.) |
140 | |
141 | /// \brief Create a streaming compressor instance |
142 | virtual Status MakeCompressor(std::shared_ptr<Compressor>* out) = 0; |
143 | |
144 | /// \brief Create a streaming decompressor instance |
145 | virtual Status MakeDecompressor(std::shared_ptr<Decompressor>* out) = 0; |
146 | |
147 | virtual const char* name() const = 0; |
148 | }; |
149 | |
150 | } // namespace util |
151 | } // namespace arrow |
152 | |
153 | #endif |
154 | |