1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#ifndef ARROW_TABLE_BUILDER_H
19#define ARROW_TABLE_BUILDER_H
20
21#include <cstdint>
22#include <memory>
23#include <vector>
24
25#include "arrow/builder.h"
26#include "arrow/status.h"
27#include "arrow/type.h"
28#include "arrow/util/checked_cast.h"
29#include "arrow/util/macros.h"
30#include "arrow/util/visibility.h"
31
32namespace arrow {
33
34class MemoryPool;
35class RecordBatch;
36
37/// \class RecordBatchBuilder
38/// \brief Helper class for creating record batches iteratively given a known
39/// schema
40class ARROW_EXPORT RecordBatchBuilder {
41 public:
42 /// \brief Create an initialize a RecordBatchBuilder
43 /// \param[in] schema The schema for the record batch
44 /// \param[in] pool A MemoryPool to use for allocations
45 /// \param[in] builder the created builder instance
46 static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
47 std::unique_ptr<RecordBatchBuilder>* builder);
48
49 /// \brief Create an initialize a RecordBatchBuilder
50 /// \param[in] schema The schema for the record batch
51 /// \param[in] pool A MemoryPool to use for allocations
52 /// \param[in] initial_capacity The initial capacity for the builders
53 /// \param[in] builder the created builder instance
54 static Status Make(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
55 int64_t initial_capacity,
56 std::unique_ptr<RecordBatchBuilder>* builder);
57
58 /// \brief Get base pointer to field builder
59 /// \param i the field index
60 /// \return pointer to ArrayBuilder
61 ArrayBuilder* GetField(int i) { return raw_field_builders_[i]; }
62
63 /// \brief Return field builder casted to indicated specific builder type
64 /// \param i the field index
65 /// \return pointer to template type
66 template <typename T>
67 T* GetFieldAs(int i) {
68 return internal::checked_cast<T*>(raw_field_builders_[i]);
69 }
70
71 /// \brief Finish current batch and optionally reset
72 /// \param[in] reset_builders the resulting RecordBatch
73 /// \param[out] batch the resulting RecordBatch
74 /// \return Status
75 Status Flush(bool reset_builders, std::shared_ptr<RecordBatch>* batch);
76
77 /// \brief Finish current batch and reset
78 /// \param[out] batch the resulting RecordBatch
79 /// \return Status
80 Status Flush(std::shared_ptr<RecordBatch>* batch);
81
82 /// \brief Set the initial capacity for new builders
83 void SetInitialCapacity(int64_t capacity);
84
85 /// \brief The initial capacity for builders
86 int64_t initial_capacity() const { return initial_capacity_; }
87
88 /// \brief The number of fields in the schema
89 int num_fields() const { return schema_->num_fields(); }
90
91 /// \brief The number of fields in the schema
92 std::shared_ptr<Schema> schema() const { return schema_; }
93
94 private:
95 ARROW_DISALLOW_COPY_AND_ASSIGN(RecordBatchBuilder);
96
97 RecordBatchBuilder(const std::shared_ptr<Schema>& schema, MemoryPool* pool,
98 int64_t initial_capacity);
99
100 Status CreateBuilders();
101 Status InitBuilders();
102
103 std::shared_ptr<Schema> schema_;
104 int64_t initial_capacity_;
105 MemoryPool* pool_;
106
107 std::vector<std::unique_ptr<ArrayBuilder>> field_builders_;
108 std::vector<ArrayBuilder*> raw_field_builders_;
109};
110
111} // namespace arrow
112
113#endif // ARROW_TABLE_BUILDER_H
114