1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#ifndef ARROW_COMPUTE_KERNELS_HASH_H
19#define ARROW_COMPUTE_KERNELS_HASH_H
20
21#include <memory>
22
23#include "arrow/compute/kernel.h"
24#include "arrow/status.h"
25#include "arrow/util/visibility.h"
26
27namespace arrow {
28
29class Array;
30class DataType;
31struct ArrayData;
32
33namespace compute {
34
35class FunctionContext;
36
37/// \brief Invoke hash table kernel on input array, returning any output
38/// values. Implementations should be thread-safe
39class ARROW_EXPORT HashKernel : public UnaryKernel {
40 public:
41 // XXX why are those methods exposed?
42 virtual Status Reset() = 0;
43 virtual Status Append(FunctionContext* ctx, const ArrayData& input) = 0;
44 virtual Status Flush(Datum* out) = 0;
45 virtual Status GetDictionary(std::shared_ptr<ArrayData>* out) = 0;
46};
47
48/// \since 0.8.0
49/// \note API not yet finalized
50ARROW_EXPORT
51Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr<DataType>& type,
52 std::unique_ptr<HashKernel>* kernel);
53
54ARROW_EXPORT
55Status GetDictionaryEncodeKernel(FunctionContext* ctx,
56 const std::shared_ptr<DataType>& type,
57 std::unique_ptr<HashKernel>* kernel);
58
59/// \brief Compute unique elements from an array-like object
60/// \param[in] context the FunctionContext
61/// \param[in] datum array-like input
62/// \param[out] out result as Array
63///
64/// \since 0.8.0
65/// \note API not yet finalized
66ARROW_EXPORT
67Status Unique(FunctionContext* context, const Datum& datum, std::shared_ptr<Array>* out);
68
69/// \brief Dictionary-encode values in an array-like object
70/// \param[in] context the FunctionContext
71/// \param[in] data array-like input
72/// \param[out] out result with same shape and type as input
73///
74/// \since 0.8.0
75/// \note API not yet finalized
76ARROW_EXPORT
77Status DictionaryEncode(FunctionContext* context, const Datum& data, Datum* out);
78
79// TODO(wesm): Define API for incremental dictionary encoding
80
81// TODO(wesm): Define API for regularizing DictionaryArray objects with
82// different dictionaries
83
84// class DictionaryEncoder {
85// public:
86// virtual Encode(const Datum& data, Datum* out) = 0;
87// };
88
89//
90// ARROW_EXPORT
91// Status DictionaryEncode(FunctionContext* context, const Datum& data,
92// const Array& prior_dictionary, Datum* out);
93
94// TODO(wesm): Implement these next
95// ARROW_EXPORT
96// Status Match(FunctionContext* context, const Datum& values, const Datum& member_set,
97// Datum* out);
98
99// ARROW_EXPORT
100// Status IsIn(FunctionContext* context, const Datum& values, const Datum& member_set,
101// Datum* out);
102
103// ARROW_EXPORT
104// Status CountValues(FunctionContext* context, const Datum& values,
105// std::shared_ptr<Array>* out_uniques,
106// std::shared_ptr<Array>* out_counts);
107
108} // namespace compute
109} // namespace arrow
110
111#endif // ARROW_COMPUTE_KERNELS_HASH_H
112