1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#pragma once
19
20#include <memory>
21
22#include "arrow/array/builder_adaptive.h" // IWYU pragma: export
23#include "arrow/array/builder_base.h" // IWYU pragma: export
24
25namespace arrow {
26
27// ----------------------------------------------------------------------
28// Dictionary builder
29
30namespace internal {
31
32template <typename T>
33struct DictionaryScalar {
34 using type = typename T::c_type;
35};
36
37template <>
38struct DictionaryScalar<BinaryType> {
39 using type = util::string_view;
40};
41
42template <>
43struct DictionaryScalar<StringType> {
44 using type = util::string_view;
45};
46
47template <>
48struct DictionaryScalar<FixedSizeBinaryType> {
49 using type = util::string_view;
50};
51
52} // namespace internal
53
54/// \brief Array builder for created encoded DictionaryArray from dense array
55///
56/// Unlike other builders, dictionary builder does not completely reset the state
57/// on Finish calls. The arrays built after the initial Finish call will reuse
58/// the previously created encoding and build a delta dictionary when new terms
59/// occur.
60///
61/// data
62template <typename T>
63class ARROW_EXPORT DictionaryBuilder : public ArrayBuilder {
64 public:
65 using Scalar = typename internal::DictionaryScalar<T>::type;
66
67 // WARNING: the type given below is the value type, not the DictionaryType.
68 // The DictionaryType is instantiated on the Finish() call.
69 DictionaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
70
71 template <typename T1 = T>
72 explicit DictionaryBuilder(
73 typename std::enable_if<TypeTraits<T1>::is_parameter_free, MemoryPool*>::type pool)
74 : DictionaryBuilder<T1>(TypeTraits<T1>::type_singleton(), pool) {}
75
76 ~DictionaryBuilder() override;
77
78 /// \brief Append a scalar value
79 Status Append(const Scalar& value);
80
81 /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
82 template <typename T1 = T>
83 Status Append(typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T1>::value,
84 const uint8_t*>::type value) {
85 return Append(util::string_view(reinterpret_cast<const char*>(value), byte_width_));
86 }
87
88 /// \brief Append a fixed-width string (only for FixedSizeBinaryType)
89 template <typename T1 = T>
90 Status Append(typename std::enable_if<std::is_base_of<FixedSizeBinaryType, T1>::value,
91 const char*>::type value) {
92 return Append(util::string_view(value, byte_width_));
93 }
94
95 /// \brief Append a scalar null value
96 Status AppendNull();
97
98 /// \brief Append a whole dense array to the builder
99 Status AppendArray(const Array& array);
100
101 void Reset() override;
102 Status Resize(int64_t capacity) override;
103 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
104
105 /// is the dictionary builder in the delta building mode
106 bool is_building_delta() { return delta_offset_ > 0; }
107
108 protected:
109 class MemoTableImpl;
110 std::unique_ptr<MemoTableImpl> memo_table_;
111
112 int32_t delta_offset_;
113 // Only used for FixedSizeBinaryType
114 int32_t byte_width_;
115
116 AdaptiveIntBuilder values_builder_;
117};
118
119template <>
120class ARROW_EXPORT DictionaryBuilder<NullType> : public ArrayBuilder {
121 public:
122 DictionaryBuilder(const std::shared_ptr<DataType>& type, MemoryPool* pool);
123 explicit DictionaryBuilder(MemoryPool* pool);
124
125 /// \brief Append a scalar null value
126 Status AppendNull();
127
128 /// \brief Append a whole dense array to the builder
129 Status AppendArray(const Array& array);
130
131 Status Resize(int64_t capacity) override;
132 Status FinishInternal(std::shared_ptr<ArrayData>* out) override;
133
134 protected:
135 AdaptiveIntBuilder values_builder_;
136};
137
138class ARROW_EXPORT BinaryDictionaryBuilder : public DictionaryBuilder<BinaryType> {
139 public:
140 using DictionaryBuilder::Append;
141 using DictionaryBuilder::DictionaryBuilder;
142
143 Status Append(const uint8_t* value, int32_t length) {
144 return Append(reinterpret_cast<const char*>(value), length);
145 }
146
147 Status Append(const char* value, int32_t length) {
148 return Append(util::string_view(value, length));
149 }
150};
151
152/// \brief Dictionary array builder with convenience methods for strings
153class ARROW_EXPORT StringDictionaryBuilder : public DictionaryBuilder<StringType> {
154 public:
155 using DictionaryBuilder::Append;
156 using DictionaryBuilder::DictionaryBuilder;
157
158 Status Append(const uint8_t* value, int32_t length) {
159 return Append(reinterpret_cast<const char*>(value), length);
160 }
161
162 Status Append(const char* value, int32_t length) {
163 return Append(util::string_view(value, length));
164 }
165};
166
167} // namespace arrow
168