1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | // Public API for the "Feather" file format, originally created at |
19 | // http://github.com/wesm/feather |
20 | |
21 | #ifndef ARROW_IPC_FEATHER_H |
22 | #define ARROW_IPC_FEATHER_H |
23 | |
24 | #include <cstdint> |
25 | #include <memory> |
26 | #include <string> |
27 | #include <vector> |
28 | |
29 | #include "arrow/util/visibility.h" |
30 | |
31 | namespace arrow { |
32 | |
33 | class Array; |
34 | class Column; |
35 | class Status; |
36 | class Table; |
37 | |
38 | namespace io { |
39 | |
40 | class OutputStream; |
41 | class RandomAccessFile; |
42 | |
43 | } // namespace io |
44 | |
45 | namespace ipc { |
46 | namespace feather { |
47 | |
48 | static constexpr const int kFeatherVersion = 2; |
49 | |
50 | // ---------------------------------------------------------------------- |
51 | // Metadata accessor classes |
52 | |
53 | /// \class TableReader |
54 | /// \brief An interface for reading columns from Feather files |
55 | class ARROW_EXPORT TableReader { |
56 | public: |
57 | TableReader(); |
58 | ~TableReader(); |
59 | |
60 | /// \brief Open a Feather file from a RandomAccessFile interface |
61 | /// |
62 | /// \param[in] source a RandomAccessFile instance |
63 | /// \param[out] out the table reader |
64 | static Status Open(const std::shared_ptr<io::RandomAccessFile>& source, |
65 | std::unique_ptr<TableReader>* out); |
66 | |
67 | /// \brief Optional table description |
68 | /// |
69 | /// This does not return a const std::string& because a string has to be |
70 | /// copied from the flatbuffer to be able to return a non-flatbuffer type |
71 | std::string GetDescription() const; |
72 | |
73 | /// \brief Return true if the table has a description field populated |
74 | bool HasDescription() const; |
75 | |
76 | /// \brief Return the version number of the Feather file |
77 | int version() const; |
78 | |
79 | /// \brief Return the number of rows in the file |
80 | int64_t num_rows() const; |
81 | |
82 | /// \brief Return the number of columns in the file |
83 | int64_t num_columns() const; |
84 | |
85 | std::string GetColumnName(int i) const; |
86 | |
87 | /// \brief Read a column from the file as an arrow::Column. |
88 | /// |
89 | /// \param[in] i the column index to read |
90 | /// \param[out] out the returned column |
91 | /// \return Status |
92 | /// |
93 | /// This function is zero-copy if the file source supports zero-copy reads |
94 | Status GetColumn(int i, std::shared_ptr<Column>* out); |
95 | |
96 | /// \brief Read all columns from the file as an arrow::Table. |
97 | /// |
98 | /// \param[out] out the returned table |
99 | /// \return Status |
100 | /// |
101 | /// This function is zero-copy if the file source supports zero-copy reads |
102 | Status Read(std::shared_ptr<Table>* out); |
103 | |
104 | /// \brief Read only the specified columns from the file as an arrow::Table. |
105 | /// |
106 | /// \param[in] indices the column indices to read |
107 | /// \param[out] out the returned table |
108 | /// \return Status |
109 | /// |
110 | /// This function is zero-copy if the file source supports zero-copy reads |
111 | Status Read(const std::vector<int>& indices, std::shared_ptr<Table>* out); |
112 | |
113 | /// \brief Read only the specified columns from the file as an arrow::Table. |
114 | /// |
115 | /// \param[in] names the column names to read |
116 | /// \param[out] out the returned table |
117 | /// \return Status |
118 | /// |
119 | /// This function is zero-copy if the file source supports zero-copy reads |
120 | Status Read(const std::vector<std::string>& names, std::shared_ptr<Table>* out); |
121 | |
122 | private: |
123 | class ARROW_NO_EXPORT TableReaderImpl; |
124 | std::unique_ptr<TableReaderImpl> impl_; |
125 | }; |
126 | |
127 | /// \class TableWriter |
128 | /// \brief Interface for writing Feather files |
129 | class ARROW_EXPORT TableWriter { |
130 | public: |
131 | ~TableWriter(); |
132 | |
133 | /// \brief Create a new TableWriter that writes to an OutputStream |
134 | /// \param[in] stream an output stream |
135 | /// \param[out] out the returned table writer |
136 | /// \return Status |
137 | static Status Open(const std::shared_ptr<io::OutputStream>& stream, |
138 | std::unique_ptr<TableWriter>* out); |
139 | |
140 | /// \brief Set the description field in the file metadata |
141 | void SetDescription(const std::string& desc); |
142 | |
143 | /// \brief Set the number of rows in the file |
144 | void SetNumRows(int64_t num_rows); |
145 | |
146 | /// \brief Append a column to the file |
147 | /// |
148 | /// \param[in] name the column name |
149 | /// \param[in] values the column values as a contiguous arrow::Array |
150 | /// \return Status |
151 | Status Append(const std::string& name, const Array& values); |
152 | |
153 | /// \brief Write a table to the file |
154 | /// |
155 | /// \param[in] table the table to be written |
156 | /// \return Status |
157 | Status Write(const Table& table); |
158 | |
159 | /// \brief Finalize the file by writing the file metadata and footer |
160 | /// \return Status |
161 | Status Finalize(); |
162 | |
163 | private: |
164 | TableWriter(); |
165 | class ARROW_NO_EXPORT TableWriterImpl; |
166 | std::unique_ptr<TableWriterImpl> impl_; |
167 | }; |
168 | |
169 | } // namespace feather |
170 | } // namespace ipc |
171 | } // namespace arrow |
172 | |
173 | #endif // ARROW_IPC_FEATHER_H |
174 | |