1 | /** |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, software |
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | * See the License for the specific language governing permissions and |
16 | * limitations under the License. |
17 | */ |
18 | |
19 | #ifndef ORC_RLEV1_HH |
20 | #define ORC_RLEV1_HH |
21 | |
22 | #include "Adaptor.hh" |
23 | #include "RLE.hh" |
24 | |
25 | #include <memory> |
26 | |
27 | namespace orc { |
28 | |
29 | class RleEncoderV1 : public RleEncoder { |
30 | public: |
31 | RleEncoderV1(std::unique_ptr<BufferedOutputStream> outStream, |
32 | bool hasSigned); |
33 | ~RleEncoderV1() override; |
34 | |
35 | /** |
36 | * Encode the next batch of values. |
37 | * @param data the array to be written |
38 | * @param numValues the number of values to write |
39 | * @param notNull If the pointer is null, all values are writen. If the |
40 | * pointer is not null, positions that are false are skipped. |
41 | */ |
42 | void add(const int64_t* data, uint64_t numValues, |
43 | const char* notNull) override; |
44 | |
45 | /** |
46 | * Get size of buffer used so far. |
47 | */ |
48 | uint64_t getBufferSize() const override { |
49 | return outputStream->getSize(); |
50 | } |
51 | |
52 | /** |
53 | * Flushing underlying BufferedOutputStream |
54 | */ |
55 | uint64_t flush() override; |
56 | |
57 | /** |
58 | * record current position |
59 | * @param recorder use the recorder to record current positions |
60 | */ |
61 | virtual void recordPosition(PositionRecorder* recorder) const override; |
62 | |
63 | private: |
64 | std::unique_ptr<BufferedOutputStream> outputStream; |
65 | bool isSigned; |
66 | int64_t* literals; |
67 | int numLiterals; |
68 | int64_t delta; |
69 | bool repeat; |
70 | int tailRunLength; |
71 | int bufferPosition; |
72 | int bufferLength; |
73 | char* buffer; |
74 | |
75 | void write(int64_t val); |
76 | void writeByte(char c); |
77 | void writeVulong(int64_t val); |
78 | void writeVslong(int64_t val); |
79 | void writeValues(); |
80 | }; |
81 | |
82 | class RleDecoderV1 : public RleDecoder { |
83 | public: |
84 | RleDecoderV1(std::unique_ptr<SeekableInputStream> input, |
85 | bool isSigned); |
86 | |
87 | /** |
88 | * Seek to a particular spot. |
89 | */ |
90 | void seek(PositionProvider&) override; |
91 | |
92 | /** |
93 | * Seek over a given number of values. |
94 | */ |
95 | void skip(uint64_t numValues) override; |
96 | |
97 | /** |
98 | * Read a number of values into the batch. |
99 | */ |
100 | void next(int64_t* data, uint64_t numValues, |
101 | const char* notNull) override; |
102 | |
103 | private: |
104 | inline signed char readByte(); |
105 | |
106 | inline void (); |
107 | |
108 | inline uint64_t readLong(); |
109 | |
110 | inline void skipLongs(uint64_t numValues); |
111 | |
112 | const std::unique_ptr<SeekableInputStream> inputStream; |
113 | const bool isSigned; |
114 | uint64_t remainingValues; |
115 | int64_t value; |
116 | const char* bufferStart; |
117 | const char* bufferEnd; |
118 | int64_t delta; |
119 | bool repeating; |
120 | }; |
121 | } // namespace orc |
122 | |
123 | #endif // ORC_RLEV1_HH |
124 | |