1 | /** |
2 | * Licensed to the Apache Software Foundation (ASF) under one |
3 | * or more contributor license agreements. See the NOTICE file |
4 | * distributed with this work for additional information |
5 | * regarding copyright ownership. The ASF licenses this file |
6 | * to you under the Apache License, Version 2.0 (the |
7 | * "License"); you may not use this file except in compliance |
8 | * with the License. You may obtain a copy of the License at |
9 | * |
10 | * http://www.apache.org/licenses/LICENSE-2.0 |
11 | * |
12 | * Unless required by applicable law or agreed to in writing, software |
13 | * distributed under the License is distributed on an "AS IS" BASIS, |
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | * See the License for the specific language governing permissions and |
16 | * limitations under the License. |
17 | */ |
18 | |
19 | #ifndef ORC_COLUMN_READER_HH |
20 | #define ORC_COLUMN_READER_HH |
21 | |
22 | #include "orc/Vector.hh" |
23 | #include "ByteRLE.hh" |
24 | #include "Compression.hh" |
25 | #include "Timezone.hh" |
26 | #include "wrap/orc-proto-wrapper.hh" |
27 | |
28 | namespace orc { |
29 | |
30 | class StripeStreams { |
31 | public: |
32 | virtual ~StripeStreams(); |
33 | |
34 | /** |
35 | * Get the array of booleans for which columns are selected. |
36 | * @return the address of an array which contains true at the index of |
37 | * each columnId is selected. |
38 | */ |
39 | virtual const std::vector<bool> getSelectedColumns() const = 0; |
40 | |
41 | /** |
42 | * Get the encoding for the given column for this stripe. |
43 | */ |
44 | virtual proto::ColumnEncoding getEncoding(uint64_t columnId) const = 0; |
45 | |
46 | /** |
47 | * Get the stream for the given column/kind in this stripe. |
48 | * @param columnId the id of the column |
49 | * @param kind the kind of the stream |
50 | * @param shouldStream should the reading page the stream in |
51 | * @return the new stream |
52 | */ |
53 | virtual std::unique_ptr<SeekableInputStream> |
54 | getStream(uint64_t columnId, |
55 | proto::Stream_Kind kind, |
56 | bool shouldStream) const = 0; |
57 | |
58 | /** |
59 | * Get the memory pool for this reader. |
60 | */ |
61 | virtual MemoryPool& getMemoryPool() const = 0; |
62 | |
63 | /** |
64 | * Get the writer's timezone, so that we can convert their dates correctly. |
65 | */ |
66 | virtual const Timezone& getWriterTimezone() const = 0; |
67 | |
68 | /** |
69 | * Get the error stream. |
70 | * @return a pointer to the stream that should get error messages |
71 | */ |
72 | virtual std::ostream* getErrorStream() const = 0; |
73 | |
74 | /** |
75 | * Should the reader throw when the scale overflows when reading Hive 0.11 |
76 | * decimals. |
77 | * @return true if it should throw |
78 | */ |
79 | virtual bool getThrowOnHive11DecimalOverflow() const = 0; |
80 | |
81 | /** |
82 | * What is the scale forced on the Hive 0.11 decimals? |
83 | * @return the number of scale digits |
84 | */ |
85 | virtual int32_t getForcedScaleOnHive11Decimal() const = 0; |
86 | }; |
87 | |
88 | /** |
89 | * The interface for reading ORC data types. |
90 | */ |
91 | class ColumnReader { |
92 | protected: |
93 | std::unique_ptr<ByteRleDecoder> notNullDecoder; |
94 | uint64_t columnId; |
95 | MemoryPool& memoryPool; |
96 | |
97 | public: |
98 | ColumnReader(const Type& type, StripeStreams& stipe); |
99 | |
100 | virtual ~ColumnReader(); |
101 | |
102 | /** |
103 | * Skip number of specified rows. |
104 | * @param numValues the number of values to skip |
105 | * @return the number of non-null values skipped |
106 | */ |
107 | virtual uint64_t skip(uint64_t numValues); |
108 | |
109 | /** |
110 | * Read the next group of values into this rowBatch. |
111 | * @param rowBatch the memory to read into. |
112 | * @param numValues the number of values to read |
113 | * @param notNull if null, all values are not null. Otherwise, it is |
114 | * a mask (with at least numValues bytes) for which values to |
115 | * set. |
116 | */ |
117 | virtual void next(ColumnVectorBatch& rowBatch, |
118 | uint64_t numValues, |
119 | char* notNull); |
120 | }; |
121 | |
122 | /** |
123 | * Create a reader for the given stripe. |
124 | */ |
125 | std::unique_ptr<ColumnReader> buildReader(const Type& type, |
126 | StripeStreams& stripe); |
127 | } |
128 | |
129 | #endif |
130 | |