1 | /** |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, software |
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | * See the License for the specific language governing permissions and |
16 | * limitations under the License. |
17 | */ |
18 | |
19 | #ifndef ORC_BYTE_RLE_HH |
20 | #define ORC_BYTE_RLE_HH |
21 | |
22 | #include <memory> |
23 | |
24 | #include "io/InputStream.hh" |
25 | #include "io/OutputStream.hh" |
26 | |
27 | namespace orc { |
28 | |
29 | class ByteRleEncoder { |
30 | public: |
31 | virtual ~ByteRleEncoder(); |
32 | |
33 | /** |
34 | * Encode the next batch of values |
35 | * @param data to be encoded |
36 | * @param numValues the number of values to be encoded |
37 | * @param notNull If the pointer is null, all values are read. If the |
38 | * pointer is not null, positions that are false are skipped. |
39 | */ |
40 | virtual void add(const char* data, uint64_t numValues, |
41 | const char* notNull) = 0; |
42 | |
43 | /** |
44 | * Get size of buffer used so far. |
45 | */ |
46 | virtual uint64_t getBufferSize() const = 0; |
47 | |
48 | /** |
49 | * Flushing underlying output stream |
50 | */ |
51 | virtual uint64_t flush() = 0; |
52 | |
53 | /** |
54 | * record current position |
55 | * @param recorder use the recorder to record current positions |
56 | */ |
57 | virtual void recordPosition(PositionRecorder* recorder) const = 0; |
58 | }; |
59 | |
60 | class ByteRleDecoder { |
61 | public: |
62 | virtual ~ByteRleDecoder(); |
63 | |
64 | /** |
65 | * Seek to a particular spot. |
66 | */ |
67 | virtual void seek(PositionProvider&) = 0; |
68 | |
69 | /** |
70 | * Seek over a given number of values. |
71 | */ |
72 | virtual void skip(uint64_t numValues) = 0; |
73 | |
74 | /** |
75 | * Read a number of values into the batch. |
76 | * @param data the array to read into |
77 | * @param numValues the number of values to read |
78 | * @param notNull If the pointer is null, all values are read. If the |
79 | * pointer is not null, positions that are false are skipped. |
80 | */ |
81 | virtual void next(char* data, uint64_t numValues, char* notNull) = 0; |
82 | }; |
83 | |
84 | /** |
85 | * Create a byte RLE encoder. |
86 | * @param output the output stream to write to |
87 | */ |
88 | std::unique_ptr<ByteRleEncoder> createByteRleEncoder |
89 | (std::unique_ptr<BufferedOutputStream> output); |
90 | |
91 | /** |
92 | * Create a boolean RLE encoder. |
93 | * @param output the output stream to write to |
94 | */ |
95 | std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder |
96 | (std::unique_ptr<BufferedOutputStream> output); |
97 | |
98 | /** |
99 | * Create a byte RLE decoder. |
100 | * @param input the input stream to read from |
101 | */ |
102 | std::unique_ptr<ByteRleDecoder> createByteRleDecoder |
103 | (std::unique_ptr<SeekableInputStream> input); |
104 | |
105 | /** |
106 | * Create a boolean RLE decoder. |
107 | * |
108 | * Unlike the other RLE decoders, the boolean decoder sets the data to 0 |
109 | * if the value is masked by notNull. This is required for the notNull stream |
110 | * processing to properly apply multiple masks from nested types. |
111 | * @param input the input stream to read from |
112 | */ |
113 | std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder |
114 | (std::unique_ptr<SeekableInputStream> input); |
115 | } |
116 | |
117 | #endif |
118 | |