1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #ifndef PARQUET_RECORD_READER_H |
19 | #define PARQUET_RECORD_READER_H |
20 | |
21 | #include <cstdint> |
22 | #include <memory> |
23 | #include <vector> |
24 | |
25 | #include "arrow/memory_pool.h" |
26 | |
27 | #include "parquet/util/macros.h" |
28 | #include "parquet/util/memory.h" |
29 | |
30 | namespace arrow { |
31 | |
32 | class Array; |
33 | |
34 | } // namespace arrow |
35 | |
36 | namespace parquet { |
37 | |
38 | class ColumnDescriptor; |
39 | class ; |
40 | |
41 | namespace internal { |
42 | |
43 | /// \brief Stateful column reader that delimits semantic records for both flat |
44 | /// and nested columns |
45 | /// |
46 | /// \note API EXPERIMENTAL |
47 | /// \since 1.3.0 |
48 | class RecordReader { |
49 | public: |
50 | // So that we can create subclasses |
51 | class RecordReaderImpl; |
52 | |
53 | static std::shared_ptr<RecordReader> Make( |
54 | const ColumnDescriptor* descr, |
55 | ::arrow::MemoryPool* pool = ::arrow::default_memory_pool()); |
56 | |
57 | virtual ~RecordReader(); |
58 | |
59 | /// \brief Decoded definition levels |
60 | const int16_t* def_levels() const; |
61 | |
62 | /// \brief Decoded repetition levels |
63 | const int16_t* rep_levels() const; |
64 | |
65 | /// \brief Decoded values, including nulls, if any |
66 | const uint8_t* values() const; |
67 | |
68 | /// \brief Attempt to read indicated number of records from column chunk |
69 | /// \return number of records read |
70 | int64_t ReadRecords(int64_t num_records); |
71 | |
72 | /// \brief Pre-allocate space for data. Results in better flat read performance |
73 | void Reserve(int64_t num_values); |
74 | |
75 | /// \brief Clear consumed values and repetition/definition levels as the |
76 | /// result of calling ReadRecords |
77 | void Reset(); |
78 | |
79 | std::shared_ptr<ResizableBuffer> ReleaseValues(); |
80 | std::shared_ptr<ResizableBuffer> ReleaseIsValid(); |
81 | |
82 | /// \brief Number of values written including nulls (if any) |
83 | int64_t values_written() const; |
84 | |
85 | /// \brief Number of definition / repetition levels (from those that have |
86 | /// been decoded) that have been consumed inside the reader. |
87 | int64_t levels_position() const; |
88 | |
89 | /// \brief Number of definition / repetition levels that have been written |
90 | /// internally in the reader |
91 | int64_t levels_written() const; |
92 | |
93 | /// \brief Number of nulls in the leaf |
94 | int64_t null_count() const; |
95 | |
96 | /// \brief True if the leaf values are nullable |
97 | bool nullable_values() const; |
98 | |
99 | /// \brief Return true if the record reader has more internal data yet to |
100 | /// process |
101 | bool HasMoreData() const; |
102 | |
103 | /// \brief Advance record reader to the next row group |
104 | /// \param[in] reader obtained from RowGroupReader::GetColumnPageReader |
105 | void (std::unique_ptr<PageReader> reader); |
106 | |
107 | void DebugPrintState(); |
108 | |
109 | // For BYTE_ARRAY, FIXED_LEN_BYTE_ARRAY types that may have chunked output |
110 | std::vector<std::shared_ptr<::arrow::Array>> GetBuilderChunks(); |
111 | |
112 | private: |
113 | std::unique_ptr<RecordReaderImpl> impl_; |
114 | explicit RecordReader(RecordReaderImpl* impl); |
115 | }; |
116 | |
117 | } // namespace internal |
118 | } // namespace parquet |
119 | |
120 | #endif // PARQUET_RECORD_READER_H |
121 | |