1/**
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 */
18
19#ifndef ORC_COLUMN_READER_HH
20#define ORC_COLUMN_READER_HH
21
22#include "orc/Vector.hh"
23#include "ByteRLE.hh"
24#include "Compression.hh"
25#include "Timezone.hh"
26#include "wrap/orc-proto-wrapper.hh"
27
28namespace orc {
29
30 class StripeStreams {
31 public:
32 virtual ~StripeStreams();
33
34 /**
35 * Get the array of booleans for which columns are selected.
36 * @return the address of an array which contains true at the index of
37 * each columnId is selected.
38 */
39 virtual const std::vector<bool> getSelectedColumns() const = 0;
40
41 /**
42 * Get the encoding for the given column for this stripe.
43 */
44 virtual proto::ColumnEncoding getEncoding(uint64_t columnId) const = 0;
45
46 /**
47 * Get the stream for the given column/kind in this stripe.
48 * @param columnId the id of the column
49 * @param kind the kind of the stream
50 * @param shouldStream should the reading page the stream in
51 * @return the new stream
52 */
53 virtual std::unique_ptr<SeekableInputStream>
54 getStream(uint64_t columnId,
55 proto::Stream_Kind kind,
56 bool shouldStream) const = 0;
57
58 /**
59 * Get the memory pool for this reader.
60 */
61 virtual MemoryPool& getMemoryPool() const = 0;
62
63 /**
64 * Get the writer's timezone, so that we can convert their dates correctly.
65 */
66 virtual const Timezone& getWriterTimezone() const = 0;
67
68 /**
69 * Get the error stream.
70 * @return a pointer to the stream that should get error messages
71 */
72 virtual std::ostream* getErrorStream() const = 0;
73
74 /**
75 * Should the reader throw when the scale overflows when reading Hive 0.11
76 * decimals.
77 * @return true if it should throw
78 */
79 virtual bool getThrowOnHive11DecimalOverflow() const = 0;
80
81 /**
82 * What is the scale forced on the Hive 0.11 decimals?
83 * @return the number of scale digits
84 */
85 virtual int32_t getForcedScaleOnHive11Decimal() const = 0;
86 };
87
88 /**
89 * The interface for reading ORC data types.
90 */
91 class ColumnReader {
92 protected:
93 std::unique_ptr<ByteRleDecoder> notNullDecoder;
94 uint64_t columnId;
95 MemoryPool& memoryPool;
96
97 public:
98 ColumnReader(const Type& type, StripeStreams& stipe);
99
100 virtual ~ColumnReader();
101
102 /**
103 * Skip number of specified rows.
104 * @param numValues the number of values to skip
105 * @return the number of non-null values skipped
106 */
107 virtual uint64_t skip(uint64_t numValues);
108
109 /**
110 * Read the next group of values into this rowBatch.
111 * @param rowBatch the memory to read into.
112 * @param numValues the number of values to read
113 * @param notNull if null, all values are not null. Otherwise, it is
114 * a mask (with at least numValues bytes) for which values to
115 * set.
116 */
117 virtual void next(ColumnVectorBatch& rowBatch,
118 uint64_t numValues,
119 char* notNull);
120 };
121
122 /**
123 * Create a reader for the given stripe.
124 */
125 std::unique_ptr<ColumnReader> buildReader(const Type& type,
126 StripeStreams& stripe);
127}
128
129#endif
130