1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#ifndef ORC_BYTE_RLE_HH
20#define ORC_BYTE_RLE_HH
21
22#include <memory>
23
24#include "io/InputStream.hh"
25#include "io/OutputStream.hh"
26
27namespace orc {
28
29 class ByteRleEncoder {
30 public:
31 virtual ~ByteRleEncoder();
32
33 /**
34 * Encode the next batch of values
35 * @param data to be encoded
36 * @param numValues the number of values to be encoded
37 * @param notNull If the pointer is null, all values are read. If the
38 * pointer is not null, positions that are false are skipped.
39 */
40 virtual void add(const char* data, uint64_t numValues,
41 const char* notNull) = 0;
42
43 /**
44 * Get size of buffer used so far.
45 */
46 virtual uint64_t getBufferSize() const = 0;
47
48 /**
49 * Flushing underlying output stream
50 */
51 virtual uint64_t flush() = 0;
52
53 /**
54 * record current position
55 * @param recorder use the recorder to record current positions
56 */
57 virtual void recordPosition(PositionRecorder* recorder) const = 0;
58 };
59
60 class ByteRleDecoder {
61 public:
62 virtual ~ByteRleDecoder();
63
64 /**
65 * Seek to a particular spot.
66 */
67 virtual void seek(PositionProvider&) = 0;
68
69 /**
70 * Seek over a given number of values.
71 */
72 virtual void skip(uint64_t numValues) = 0;
73
74 /**
75 * Read a number of values into the batch.
76 * @param data the array to read into
77 * @param numValues the number of values to read
78 * @param notNull If the pointer is null, all values are read. If the
79 * pointer is not null, positions that are false are skipped.
80 */
81 virtual void next(char* data, uint64_t numValues, char* notNull) = 0;
82 };
83
84 /**
85 * Create a byte RLE encoder.
86 * @param output the output stream to write to
87 */
88 std::unique_ptr<ByteRleEncoder> createByteRleEncoder
89 (std::unique_ptr<BufferedOutputStream> output);
90
91 /**
92 * Create a boolean RLE encoder.
93 * @param output the output stream to write to
94 */
95 std::unique_ptr<ByteRleEncoder> createBooleanRleEncoder
96 (std::unique_ptr<BufferedOutputStream> output);
97
98 /**
99 * Create a byte RLE decoder.
100 * @param input the input stream to read from
101 */
102 std::unique_ptr<ByteRleDecoder> createByteRleDecoder
103 (std::unique_ptr<SeekableInputStream> input);
104
105 /**
106 * Create a boolean RLE decoder.
107 *
108 * Unlike the other RLE decoders, the boolean decoder sets the data to 0
109 * if the value is masked by notNull. This is required for the notNull stream
110 * processing to properly apply multiple masks from nested types.
111 * @param input the input stream to read from
112 */
113 std::unique_ptr<ByteRleDecoder> createBooleanRleDecoder
114 (std::unique_ptr<SeekableInputStream> input);
115}
116
117#endif
118