1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Public API for the "Feather" file format, originally created at
19// http://github.com/wesm/feather
20
21#ifndef ARROW_IPC_FEATHER_INTERNAL_H
22#define ARROW_IPC_FEATHER_INTERNAL_H
23
24#include <cstdint>
25#include <iostream>
26#include <memory>
27#include <string>
28#include <vector>
29
30#include "flatbuffers/flatbuffers.h"
31
32#include "arrow/buffer.h"
33#include "arrow/ipc/feather.h"
34#include "arrow/ipc/feather_generated.h"
35#include "arrow/type.h"
36
37namespace arrow {
38namespace ipc {
39namespace feather {
40
41typedef std::vector<flatbuffers::Offset<fbs::Column>> ColumnVector;
42typedef flatbuffers::FlatBufferBuilder FBB;
43typedef flatbuffers::Offset<flatbuffers::String> FBString;
44
45struct ARROW_EXPORT ColumnType {
46 enum type { PRIMITIVE, CATEGORY, TIMESTAMP, DATE, TIME };
47};
48
49struct ARROW_EXPORT ArrayMetadata {
50 ArrayMetadata() {}
51
52 ArrayMetadata(fbs::Type type, int64_t offset, int64_t length, int64_t null_count,
53 int64_t total_bytes)
54 : type(type),
55 offset(offset),
56 length(length),
57 null_count(null_count),
58 total_bytes(total_bytes) {}
59
60 bool Equals(const ArrayMetadata& other) const {
61 return this->type == other.type && this->offset == other.offset &&
62 this->length == other.length && this->null_count == other.null_count &&
63 this->total_bytes == other.total_bytes;
64 }
65
66 fbs::Type type;
67 int64_t offset;
68 int64_t length;
69 int64_t null_count;
70 int64_t total_bytes;
71};
72
73struct ARROW_EXPORT CategoryMetadata {
74 ArrayMetadata levels;
75 bool ordered;
76};
77
78struct ARROW_EXPORT TimestampMetadata {
79 TimeUnit::type unit;
80
81 // A timezone name known to the Olson timezone database. For display purposes
82 // because the actual data is all UTC
83 std::string timezone;
84};
85
86struct ARROW_EXPORT TimeMetadata {
87 TimeUnit::type unit;
88};
89
90static constexpr const char* kFeatherMagicBytes = "FEA1";
91static constexpr const int kFeatherDefaultAlignment = 8;
92
93class ColumnBuilder;
94
95class ARROW_EXPORT TableBuilder {
96 public:
97 explicit TableBuilder(int64_t num_rows);
98 ~TableBuilder() = default;
99
100 FBB& fbb();
101 Status Finish();
102 std::shared_ptr<Buffer> GetBuffer() const;
103
104 std::unique_ptr<ColumnBuilder> AddColumn(const std::string& name);
105 void SetDescription(const std::string& description);
106 void SetNumRows(int64_t num_rows);
107 void add_column(const flatbuffers::Offset<fbs::Column>& col);
108
109 private:
110 flatbuffers::FlatBufferBuilder fbb_;
111 ColumnVector columns_;
112
113 friend class ColumnBuilder;
114
115 bool finished_;
116 std::string description_;
117 int64_t num_rows_;
118};
119
120class ARROW_EXPORT TableMetadata {
121 public:
122 TableMetadata() : table_(NULLPTR) {}
123 ~TableMetadata() = default;
124
125 Status Open(const std::shared_ptr<Buffer>& buffer) {
126 metadata_buffer_ = buffer;
127 table_ = fbs::GetCTable(buffer->data());
128
129 if (table_->version() < kFeatherVersion) {
130 std::cout << "This Feather file is old"
131 << " and will not be readable beyond the 0.3.0 release" << std::endl;
132 }
133 return Status::OK();
134 }
135
136 bool HasDescription() const { return table_->description() != 0; }
137
138 std::string GetDescription() const {
139 if (!HasDescription()) {
140 return std::string("");
141 }
142 return table_->description()->str();
143 }
144
145 int version() const { return table_->version(); }
146 int64_t num_rows() const { return table_->num_rows(); }
147 int64_t num_columns() const { return table_->columns()->size(); }
148
149 const fbs::Column* column(int i) { return table_->columns()->Get(i); }
150
151 private:
152 std::shared_ptr<Buffer> metadata_buffer_;
153 const fbs::CTable* table_;
154};
155
156static inline flatbuffers::Offset<fbs::PrimitiveArray> GetPrimitiveArray(
157 FBB& fbb, const ArrayMetadata& array) {
158 return fbs::CreatePrimitiveArray(fbb, array.type, fbs::Encoding_PLAIN, array.offset,
159 array.length, array.null_count, array.total_bytes);
160}
161
162static inline fbs::TimeUnit ToFlatbufferEnum(TimeUnit::type unit) {
163 return static_cast<fbs::TimeUnit>(static_cast<int>(unit));
164}
165
166static inline TimeUnit::type FromFlatbufferEnum(fbs::TimeUnit unit) {
167 return static_cast<TimeUnit::type>(static_cast<int>(unit));
168}
169
170// Convert Feather enums to Flatbuffer enums
171
172const fbs::TypeMetadata COLUMN_TYPE_ENUM_MAPPING[] = {
173 fbs::TypeMetadata_NONE, // PRIMITIVE
174 fbs::TypeMetadata_CategoryMetadata, // CATEGORY
175 fbs::TypeMetadata_TimestampMetadata, // TIMESTAMP
176 fbs::TypeMetadata_DateMetadata, // DATE
177 fbs::TypeMetadata_TimeMetadata // TIME
178};
179
180static inline fbs::TypeMetadata ToFlatbufferEnum(ColumnType::type column_type) {
181 return COLUMN_TYPE_ENUM_MAPPING[column_type];
182}
183
184static inline void FromFlatbuffer(const fbs::PrimitiveArray* values, ArrayMetadata* out) {
185 out->type = values->type();
186 out->offset = values->offset();
187 out->length = values->length();
188 out->null_count = values->null_count();
189 out->total_bytes = values->total_bytes();
190}
191
192class ARROW_EXPORT ColumnBuilder {
193 public:
194 ColumnBuilder(TableBuilder* parent, const std::string& name);
195 ~ColumnBuilder() = default;
196
197 flatbuffers::Offset<void> CreateColumnMetadata();
198
199 Status Finish();
200 void SetValues(const ArrayMetadata& values);
201 void SetUserMetadata(const std::string& data);
202 void SetCategory(const ArrayMetadata& levels, bool ordered = false);
203 void SetTimestamp(TimeUnit::type unit);
204 void SetTimestamp(TimeUnit::type unit, const std::string& timezone);
205 void SetDate();
206 void SetTime(TimeUnit::type unit);
207 FBB& fbb();
208
209 private:
210 TableBuilder* parent_;
211
212 std::string name_;
213 ArrayMetadata values_;
214 std::string user_metadata_;
215
216 // Column metadata
217
218 // Is this a primitive type, or one of the types having metadata? Default is
219 // primitive
220 ColumnType::type type_;
221
222 // Type-specific metadata union
223 CategoryMetadata meta_category_;
224 TimeMetadata meta_time_;
225
226 TimestampMetadata meta_timestamp_;
227
228 FBB* fbb_;
229};
230
231} // namespace feather
232} // namespace ipc
233} // namespace arrow
234
235#endif // ARROW_IPC_FEATHER_INTERNAL_H
236