1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Public API for the "Feather" file format, originally created at
19// http://github.com/wesm/feather
20
21#ifndef ARROW_IPC_FEATHER_H
22#define ARROW_IPC_FEATHER_H
23
24#include <cstdint>
25#include <memory>
26#include <string>
27#include <vector>
28
29#include "arrow/util/visibility.h"
30
31namespace arrow {
32
33class Array;
34class Column;
35class Status;
36class Table;
37
38namespace io {
39
40class OutputStream;
41class RandomAccessFile;
42
43} // namespace io
44
45namespace ipc {
46namespace feather {
47
48static constexpr const int kFeatherVersion = 2;
49
50// ----------------------------------------------------------------------
51// Metadata accessor classes
52
53/// \class TableReader
54/// \brief An interface for reading columns from Feather files
55class ARROW_EXPORT TableReader {
56 public:
57 TableReader();
58 ~TableReader();
59
60 /// \brief Open a Feather file from a RandomAccessFile interface
61 ///
62 /// \param[in] source a RandomAccessFile instance
63 /// \param[out] out the table reader
64 static Status Open(const std::shared_ptr<io::RandomAccessFile>& source,
65 std::unique_ptr<TableReader>* out);
66
67 /// \brief Optional table description
68 ///
69 /// This does not return a const std::string& because a string has to be
70 /// copied from the flatbuffer to be able to return a non-flatbuffer type
71 std::string GetDescription() const;
72
73 /// \brief Return true if the table has a description field populated
74 bool HasDescription() const;
75
76 /// \brief Return the version number of the Feather file
77 int version() const;
78
79 /// \brief Return the number of rows in the file
80 int64_t num_rows() const;
81
82 /// \brief Return the number of columns in the file
83 int64_t num_columns() const;
84
85 std::string GetColumnName(int i) const;
86
87 /// \brief Read a column from the file as an arrow::Column.
88 ///
89 /// \param[in] i the column index to read
90 /// \param[out] out the returned column
91 /// \return Status
92 ///
93 /// This function is zero-copy if the file source supports zero-copy reads
94 Status GetColumn(int i, std::shared_ptr<Column>* out);
95
96 /// \brief Read all columns from the file as an arrow::Table.
97 ///
98 /// \param[out] out the returned table
99 /// \return Status
100 ///
101 /// This function is zero-copy if the file source supports zero-copy reads
102 Status Read(std::shared_ptr<Table>* out);
103
104 /// \brief Read only the specified columns from the file as an arrow::Table.
105 ///
106 /// \param[in] indices the column indices to read
107 /// \param[out] out the returned table
108 /// \return Status
109 ///
110 /// This function is zero-copy if the file source supports zero-copy reads
111 Status Read(const std::vector<int>& indices, std::shared_ptr<Table>* out);
112
113 /// \brief Read only the specified columns from the file as an arrow::Table.
114 ///
115 /// \param[in] names the column names to read
116 /// \param[out] out the returned table
117 /// \return Status
118 ///
119 /// This function is zero-copy if the file source supports zero-copy reads
120 Status Read(const std::vector<std::string>& names, std::shared_ptr<Table>* out);
121
122 private:
123 class ARROW_NO_EXPORT TableReaderImpl;
124 std::unique_ptr<TableReaderImpl> impl_;
125};
126
127/// \class TableWriter
128/// \brief Interface for writing Feather files
129class ARROW_EXPORT TableWriter {
130 public:
131 ~TableWriter();
132
133 /// \brief Create a new TableWriter that writes to an OutputStream
134 /// \param[in] stream an output stream
135 /// \param[out] out the returned table writer
136 /// \return Status
137 static Status Open(const std::shared_ptr<io::OutputStream>& stream,
138 std::unique_ptr<TableWriter>* out);
139
140 /// \brief Set the description field in the file metadata
141 void SetDescription(const std::string& desc);
142
143 /// \brief Set the number of rows in the file
144 void SetNumRows(int64_t num_rows);
145
146 /// \brief Append a column to the file
147 ///
148 /// \param[in] name the column name
149 /// \param[in] values the column values as a contiguous arrow::Array
150 /// \return Status
151 Status Append(const std::string& name, const Array& values);
152
153 /// \brief Write a table to the file
154 ///
155 /// \param[in] table the table to be written
156 /// \return Status
157 Status Write(const Table& table);
158
159 /// \brief Finalize the file by writing the file metadata and footer
160 /// \return Status
161 Status Finalize();
162
163 private:
164 TableWriter();
165 class ARROW_NO_EXPORT TableWriterImpl;
166 std::unique_ptr<TableWriterImpl> impl_;
167};
168
169} // namespace feather
170} // namespace ipc
171} // namespace arrow
172
173#endif // ARROW_IPC_FEATHER_H
174