1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #ifndef ARROW_COMPUTE_KERNELS_HASH_H |
19 | #define ARROW_COMPUTE_KERNELS_HASH_H |
20 | |
21 | #include <memory> |
22 | |
23 | #include "arrow/compute/kernel.h" |
24 | #include "arrow/status.h" |
25 | #include "arrow/util/visibility.h" |
26 | |
27 | namespace arrow { |
28 | |
29 | class Array; |
30 | class DataType; |
31 | struct ArrayData; |
32 | |
33 | namespace compute { |
34 | |
35 | class FunctionContext; |
36 | |
37 | /// \brief Invoke hash table kernel on input array, returning any output |
38 | /// values. Implementations should be thread-safe |
39 | class ARROW_EXPORT HashKernel : public UnaryKernel { |
40 | public: |
41 | // XXX why are those methods exposed? |
42 | virtual Status Reset() = 0; |
43 | virtual Status Append(FunctionContext* ctx, const ArrayData& input) = 0; |
44 | virtual Status Flush(Datum* out) = 0; |
45 | virtual Status GetDictionary(std::shared_ptr<ArrayData>* out) = 0; |
46 | }; |
47 | |
48 | /// \since 0.8.0 |
49 | /// \note API not yet finalized |
50 | ARROW_EXPORT |
51 | Status GetUniqueKernel(FunctionContext* ctx, const std::shared_ptr<DataType>& type, |
52 | std::unique_ptr<HashKernel>* kernel); |
53 | |
54 | ARROW_EXPORT |
55 | Status GetDictionaryEncodeKernel(FunctionContext* ctx, |
56 | const std::shared_ptr<DataType>& type, |
57 | std::unique_ptr<HashKernel>* kernel); |
58 | |
59 | /// \brief Compute unique elements from an array-like object |
60 | /// \param[in] context the FunctionContext |
61 | /// \param[in] datum array-like input |
62 | /// \param[out] out result as Array |
63 | /// |
64 | /// \since 0.8.0 |
65 | /// \note API not yet finalized |
66 | ARROW_EXPORT |
67 | Status Unique(FunctionContext* context, const Datum& datum, std::shared_ptr<Array>* out); |
68 | |
69 | /// \brief Dictionary-encode values in an array-like object |
70 | /// \param[in] context the FunctionContext |
71 | /// \param[in] data array-like input |
72 | /// \param[out] out result with same shape and type as input |
73 | /// |
74 | /// \since 0.8.0 |
75 | /// \note API not yet finalized |
76 | ARROW_EXPORT |
77 | Status DictionaryEncode(FunctionContext* context, const Datum& data, Datum* out); |
78 | |
79 | // TODO(wesm): Define API for incremental dictionary encoding |
80 | |
81 | // TODO(wesm): Define API for regularizing DictionaryArray objects with |
82 | // different dictionaries |
83 | |
84 | // class DictionaryEncoder { |
85 | // public: |
86 | // virtual Encode(const Datum& data, Datum* out) = 0; |
87 | // }; |
88 | |
89 | // |
90 | // ARROW_EXPORT |
91 | // Status DictionaryEncode(FunctionContext* context, const Datum& data, |
92 | // const Array& prior_dictionary, Datum* out); |
93 | |
94 | // TODO(wesm): Implement these next |
95 | // ARROW_EXPORT |
96 | // Status Match(FunctionContext* context, const Datum& values, const Datum& member_set, |
97 | // Datum* out); |
98 | |
99 | // ARROW_EXPORT |
100 | // Status IsIn(FunctionContext* context, const Datum& values, const Datum& member_set, |
101 | // Datum* out); |
102 | |
103 | // ARROW_EXPORT |
104 | // Status CountValues(FunctionContext* context, const Datum& values, |
105 | // std::shared_ptr<Array>* out_uniques, |
106 | // std::shared_ptr<Array>* out_counts); |
107 | |
108 | } // namespace compute |
109 | } // namespace arrow |
110 | |
111 | #endif // ARROW_COMPUTE_KERNELS_HASH_H |
112 | |