1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#ifndef ORC_RLE_HH
20#define ORC_RLE_HH
21
22#include "io/InputStream.hh"
23#include "io/OutputStream.hh"
24
25#include <memory>
26
27namespace orc {
28
29 enum RleVersion {
30 RleVersion_1,
31 RleVersion_2
32 };
33
34 inline int64_t zigZag(int64_t value) {
35 return (value << 1) ^ (value >> 63);
36 }
37
38 inline int64_t unZigZag(uint64_t value) {
39 return value >> 1 ^ -(value & 1);
40 }
41
42 class RleEncoder {
43 public:
44 // must be non-inline!
45 virtual ~RleEncoder();
46
47 /**
48 * Encode the next batch of values.
49 * @param data the array to read from
50 * @param numValues the number of values to write
51 * @param notNull If the pointer is null, all values are read. If the
52 * pointer is not null, positions that are false are skipped.
53 */
54 virtual void add(const int64_t* data, uint64_t numValues,
55 const char* notNull) = 0;
56
57 /**
58 * Get size of buffer used so far.
59 */
60 virtual uint64_t getBufferSize() const = 0;
61
62 /**
63 * Flushing underlying BufferedOutputStream
64 */
65 virtual uint64_t flush() = 0;
66
67 /**
68 * record current position
69 * @param recorder use the recorder to record current positions
70 */
71 virtual void recordPosition(PositionRecorder* recorder) const = 0;
72 };
73
74 class RleDecoder {
75 public:
76 // must be non-inline!
77 virtual ~RleDecoder();
78
79 /**
80 * Seek to a particular spot.
81 */
82 virtual void seek(PositionProvider&) = 0;
83
84 /**
85 * Seek over a given number of values.
86 */
87 virtual void skip(uint64_t numValues) = 0;
88
89 /**
90 * Read a number of values into the batch.
91 * @param data the array to read into
92 * @param numValues the number of values to read
93 * @param notNull If the pointer is null, all values are read. If the
94 * pointer is not null, positions that are false are skipped.
95 */
96 virtual void next(int64_t* data, uint64_t numValues,
97 const char* notNull) = 0;
98 };
99
100 /**
101 * Create an RLE encoder.
102 * @param output the output stream to write to
103 * @param isSigned true if the number sequence is signed
104 * @param version version of RLE decoding to do
105 * @param pool memory pool to use for allocation
106 */
107 std::unique_ptr<RleEncoder> createRleEncoder
108 (std::unique_ptr<BufferedOutputStream> output,
109 bool isSigned,
110 RleVersion version,
111 MemoryPool& pool);
112
113 /**
114 * Create an RLE decoder.
115 * @param input the input stream to read from
116 * @param isSigned true if the number sequence is signed
117 * @param version version of RLE decoding to do
118 * @param pool memory pool to use for allocation
119 */
120 std::unique_ptr<RleDecoder> createRleDecoder
121 (std::unique_ptr<SeekableInputStream> input,
122 bool isSigned,
123 RleVersion version,
124 MemoryPool& pool);
125
126} // namespace orc
127
128#endif // ORC_RLE_HH
129