1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#ifndef ARROW_COMPUTE_KERNEL_H
19#define ARROW_COMPUTE_KERNEL_H
20
21#include <memory>
22#include <utility>
23#include <vector>
24
25#include "arrow/array.h"
26#include "arrow/record_batch.h"
27#include "arrow/table.h"
28#include "arrow/util/macros.h"
29#include "arrow/util/variant.h" // IWYU pragma: export
30#include "arrow/util/visibility.h"
31
32namespace arrow {
33namespace compute {
34
35class FunctionContext;
36
37/// \class OpKernel
38/// \brief Base class for operator kernels
39class ARROW_EXPORT OpKernel {
40 public:
41 virtual ~OpKernel() = default;
42};
43
44/// \brief Placeholder for Scalar values until we implement these
45struct ARROW_EXPORT Scalar {
46 ~Scalar() {}
47
48 ARROW_DISALLOW_COPY_AND_ASSIGN(Scalar);
49};
50
51/// \class Datum
52/// \brief Variant type for various Arrow C++ data structures
53struct ARROW_EXPORT Datum {
54 enum type { NONE, SCALAR, ARRAY, CHUNKED_ARRAY, RECORD_BATCH, TABLE, COLLECTION };
55
56 util::variant<decltype(NULLPTR), std::shared_ptr<Scalar>, std::shared_ptr<ArrayData>,
57 std::shared_ptr<ChunkedArray>, std::shared_ptr<RecordBatch>,
58 std::shared_ptr<Table>, std::vector<Datum>>
59 value;
60
61 /// \brief Empty datum, to be populated elsewhere
62 Datum() : value(NULLPTR) {}
63
64 Datum(const std::shared_ptr<Scalar>& value) // NOLINT implicit conversion
65 : value(value) {}
66 Datum(const std::shared_ptr<ArrayData>& value) // NOLINT implicit conversion
67 : value(value) {}
68
69 Datum(const std::shared_ptr<Array>& value) // NOLINT implicit conversion
70 : Datum(value ? value->data() : NULLPTR) {}
71
72 Datum(const std::shared_ptr<ChunkedArray>& value) // NOLINT implicit conversion
73 : value(value) {}
74 Datum(const std::shared_ptr<RecordBatch>& value) // NOLINT implicit conversion
75 : value(value) {}
76 Datum(const std::shared_ptr<Table>& value) // NOLINT implicit conversion
77 : value(value) {}
78 Datum(const std::vector<Datum>& value) // NOLINT implicit conversion
79 : value(value) {}
80
81 // Cast from subtypes of Array to Datum
82 template <typename T,
83 typename = typename std::enable_if<std::is_base_of<Array, T>::value>::type>
84 Datum(const std::shared_ptr<T>& value) // NOLINT implicit conversion
85 : Datum(std::shared_ptr<Array>(value)) {}
86
87 ~Datum() {}
88
89 Datum(const Datum& other) noexcept { this->value = other.value; }
90
91 // Define move constructor and move assignment, for better performance
92 Datum(Datum&& other) noexcept : value(std::move(other.value)) {}
93
94 Datum& operator=(Datum&& other) noexcept {
95 value = std::move(other.value);
96 return *this;
97 }
98
99 Datum::type kind() const {
100 switch (this->value.which()) {
101 case 0:
102 return Datum::NONE;
103 case 1:
104 return Datum::SCALAR;
105 case 2:
106 return Datum::ARRAY;
107 case 3:
108 return Datum::CHUNKED_ARRAY;
109 case 4:
110 return Datum::RECORD_BATCH;
111 case 5:
112 return Datum::TABLE;
113 case 6:
114 return Datum::COLLECTION;
115 default:
116 return Datum::NONE;
117 }
118 }
119
120 std::shared_ptr<ArrayData> array() const {
121 return util::get<std::shared_ptr<ArrayData>>(this->value);
122 }
123
124 std::shared_ptr<Array> make_array() const {
125 return MakeArray(util::get<std::shared_ptr<ArrayData>>(this->value));
126 }
127
128 std::shared_ptr<ChunkedArray> chunked_array() const {
129 return util::get<std::shared_ptr<ChunkedArray>>(this->value);
130 }
131
132 const std::vector<Datum> collection() const {
133 return util::get<std::vector<Datum>>(this->value);
134 }
135
136 bool is_arraylike() const {
137 return this->kind() == Datum::ARRAY || this->kind() == Datum::CHUNKED_ARRAY;
138 }
139
140 /// \brief The value type of the variant, if any
141 ///
142 /// \return nullptr if no type
143 std::shared_ptr<DataType> type() const {
144 if (this->kind() == Datum::ARRAY) {
145 return util::get<std::shared_ptr<ArrayData>>(this->value)->type;
146 } else if (this->kind() == Datum::CHUNKED_ARRAY) {
147 return util::get<std::shared_ptr<ChunkedArray>>(this->value)->type();
148 }
149 return NULLPTR;
150 }
151};
152
153/// \class UnaryKernel
154/// \brief An function of a single input argument.
155///
156/// Note to implementors: Try to avoid making kernels that allocate memory if
157/// the output size is a deterministic function of the Input Datum's metadata.
158/// Instead separate the logic of the kernel and allocations necessary into
159/// two different kernels. Some reusable kernels that allocate buffers
160/// and delegate computation to another kernel are available in util-internal.h.
161class ARROW_EXPORT UnaryKernel : public OpKernel {
162 public:
163 /// \brief Executes the kernel.
164 ///
165 /// \param[out] out The output of the function. Each implementation of this
166 /// function might assume different things about the existing contents of out
167 /// (e.g. which buffers are preallocated). In the future it is expected that
168 /// there will be a more generic mechansim for understanding the necessary
169 /// contracts.
170 virtual Status Call(FunctionContext* ctx, const Datum& input, Datum* out) = 0;
171};
172
173/// \class BinaryKernel
174/// \brief An array-valued function of a two input arguments
175class ARROW_EXPORT BinaryKernel : public OpKernel {
176 public:
177 virtual Status Call(FunctionContext* ctx, const Datum& left, const Datum& right,
178 Datum* out) = 0;
179};
180
181} // namespace compute
182} // namespace arrow
183
184#endif // ARROW_COMPUTE_KERNEL_H
185