1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | // Unit tests for DataType (and subclasses), Field, and Schema |
19 | |
20 | #include <cstdint> |
21 | #include <memory> |
22 | #include <string> |
23 | #include <vector> |
24 | |
25 | #include <iostream> |
26 | |
27 | #include <gtest/gtest.h> |
28 | |
29 | #include "arrow/sparse_tensor.h" |
30 | #include "arrow/test-util.h" |
31 | #include "arrow/type.h" |
32 | |
33 | namespace arrow { |
34 | |
35 | static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected, |
36 | const SparseTensor& sparse_tensor) { |
37 | ASSERT_EQ(expected, sparse_tensor.format_id()); |
38 | ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id()); |
39 | } |
40 | |
41 | static inline void AssertCOOIndex( |
42 | const std::shared_ptr<SparseCOOIndex::CoordsTensor>& sidx, const int64_t nth, |
43 | const std::vector<int64_t>& expected_values) { |
44 | int64_t n = static_cast<int64_t>(expected_values.size()); |
45 | for (int64_t i = 0; i < n; ++i) { |
46 | ASSERT_EQ(expected_values[i], sidx->Value({nth, i})); |
47 | } |
48 | } |
49 | |
50 | TEST(TestSparseCOOTensor, CreationEmptyTensor) { |
51 | std::vector<int64_t> shape = {2, 3, 4}; |
52 | SparseTensorImpl<SparseCOOIndex> st1(int64(), shape); |
53 | |
54 | std::vector<std::string> dim_names = {"foo" , "bar" , "baz" }; |
55 | SparseTensorImpl<SparseCOOIndex> st2(int64(), shape, dim_names); |
56 | |
57 | ASSERT_EQ(0, st1.non_zero_length()); |
58 | ASSERT_EQ(0, st2.non_zero_length()); |
59 | |
60 | ASSERT_EQ(24, st1.size()); |
61 | ASSERT_EQ(24, st2.size()); |
62 | |
63 | ASSERT_EQ("foo" , st2.dim_name(0)); |
64 | ASSERT_EQ("bar" , st2.dim_name(1)); |
65 | ASSERT_EQ("baz" , st2.dim_name(2)); |
66 | |
67 | ASSERT_EQ("" , st1.dim_name(0)); |
68 | ASSERT_EQ("" , st1.dim_name(1)); |
69 | ASSERT_EQ("" , st1.dim_name(2)); |
70 | } |
71 | |
72 | TEST(TestSparseCOOTensor, CreationFromNumericTensor) { |
73 | std::vector<int64_t> shape = {2, 3, 4}; |
74 | std::vector<int64_t> values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0, |
75 | 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; |
76 | std::shared_ptr<Buffer> buffer = Buffer::Wrap(values); |
77 | std::vector<std::string> dim_names = {"foo" , "bar" , "baz" }; |
78 | NumericTensor<Int64Type> tensor1(buffer, shape); |
79 | NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names); |
80 | SparseTensorImpl<SparseCOOIndex> st1(tensor1); |
81 | SparseTensorImpl<SparseCOOIndex> st2(tensor2); |
82 | |
83 | CheckSparseIndexFormatType(SparseTensorFormat::COO, st1); |
84 | |
85 | ASSERT_EQ(12, st1.non_zero_length()); |
86 | ASSERT_TRUE(st1.is_mutable()); |
87 | |
88 | ASSERT_EQ("foo" , st2.dim_name(0)); |
89 | ASSERT_EQ("bar" , st2.dim_name(1)); |
90 | ASSERT_EQ("baz" , st2.dim_name(2)); |
91 | |
92 | ASSERT_EQ("" , st1.dim_name(0)); |
93 | ASSERT_EQ("" , st1.dim_name(1)); |
94 | ASSERT_EQ("" , st1.dim_name(2)); |
95 | |
96 | const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data()); |
97 | AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); |
98 | |
99 | const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index()); |
100 | ASSERT_EQ(std::string("SparseCOOIndex" ), si.ToString()); |
101 | |
102 | std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices(); |
103 | ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape()); |
104 | ASSERT_TRUE(sidx->is_column_major()); |
105 | |
106 | AssertCOOIndex(sidx, 0, {0, 0, 0}); |
107 | AssertCOOIndex(sidx, 1, {0, 0, 2}); |
108 | AssertCOOIndex(sidx, 2, {0, 1, 1}); |
109 | AssertCOOIndex(sidx, 10, {1, 2, 1}); |
110 | AssertCOOIndex(sidx, 11, {1, 2, 3}); |
111 | } |
112 | |
113 | TEST(TestSparseCOOTensor, CreationFromTensor) { |
114 | std::vector<int64_t> shape = {2, 3, 4}; |
115 | std::vector<int64_t> values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0, |
116 | 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; |
117 | std::shared_ptr<Buffer> buffer = Buffer::Wrap(values); |
118 | std::vector<std::string> dim_names = {"foo" , "bar" , "baz" }; |
119 | Tensor tensor1(int64(), buffer, shape); |
120 | Tensor tensor2(int64(), buffer, shape, {}, dim_names); |
121 | SparseTensorImpl<SparseCOOIndex> st1(tensor1); |
122 | SparseTensorImpl<SparseCOOIndex> st2(tensor2); |
123 | |
124 | ASSERT_EQ(12, st1.non_zero_length()); |
125 | ASSERT_TRUE(st1.is_mutable()); |
126 | |
127 | ASSERT_EQ("foo" , st2.dim_name(0)); |
128 | ASSERT_EQ("bar" , st2.dim_name(1)); |
129 | ASSERT_EQ("baz" , st2.dim_name(2)); |
130 | |
131 | ASSERT_EQ("" , st1.dim_name(0)); |
132 | ASSERT_EQ("" , st1.dim_name(1)); |
133 | ASSERT_EQ("" , st1.dim_name(2)); |
134 | |
135 | const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data()); |
136 | AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); |
137 | |
138 | const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index()); |
139 | std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices(); |
140 | ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape()); |
141 | ASSERT_TRUE(sidx->is_column_major()); |
142 | |
143 | AssertCOOIndex(sidx, 0, {0, 0, 0}); |
144 | AssertCOOIndex(sidx, 1, {0, 0, 2}); |
145 | AssertCOOIndex(sidx, 2, {0, 1, 1}); |
146 | AssertCOOIndex(sidx, 10, {1, 2, 1}); |
147 | AssertCOOIndex(sidx, 11, {1, 2, 3}); |
148 | } |
149 | |
150 | TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) { |
151 | std::vector<int64_t> shape = {2, 3, 4}; |
152 | std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, |
153 | 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0, |
154 | 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0}; |
155 | std::vector<int64_t> strides = {192, 64, 16}; |
156 | std::shared_ptr<Buffer> buffer = Buffer::Wrap(values); |
157 | Tensor tensor(int64(), buffer, shape, strides); |
158 | SparseTensorImpl<SparseCOOIndex> st(tensor); |
159 | |
160 | ASSERT_EQ(12, st.non_zero_length()); |
161 | ASSERT_TRUE(st.is_mutable()); |
162 | |
163 | const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data()); |
164 | AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); |
165 | |
166 | const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st.sparse_index()); |
167 | std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices(); |
168 | ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape()); |
169 | ASSERT_TRUE(sidx->is_column_major()); |
170 | |
171 | AssertCOOIndex(sidx, 0, {0, 0, 0}); |
172 | AssertCOOIndex(sidx, 1, {0, 0, 2}); |
173 | AssertCOOIndex(sidx, 2, {0, 1, 1}); |
174 | AssertCOOIndex(sidx, 10, {1, 2, 1}); |
175 | AssertCOOIndex(sidx, 11, {1, 2, 3}); |
176 | } |
177 | |
178 | TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) { |
179 | std::vector<int64_t> shape = {6, 4}; |
180 | std::vector<int64_t> values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0, |
181 | 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16}; |
182 | std::shared_ptr<Buffer> buffer = Buffer::Wrap(values); |
183 | std::vector<std::string> dim_names = {"foo" , "bar" , "baz" }; |
184 | NumericTensor<Int64Type> tensor1(buffer, shape); |
185 | NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names); |
186 | |
187 | SparseTensorImpl<SparseCSRIndex> st1(tensor1); |
188 | SparseTensorImpl<SparseCSRIndex> st2(tensor2); |
189 | |
190 | CheckSparseIndexFormatType(SparseTensorFormat::CSR, st1); |
191 | |
192 | ASSERT_EQ(12, st1.non_zero_length()); |
193 | ASSERT_TRUE(st1.is_mutable()); |
194 | |
195 | ASSERT_EQ("foo" , st2.dim_name(0)); |
196 | ASSERT_EQ("bar" , st2.dim_name(1)); |
197 | ASSERT_EQ("baz" , st2.dim_name(2)); |
198 | |
199 | ASSERT_EQ("" , st1.dim_name(0)); |
200 | ASSERT_EQ("" , st1.dim_name(1)); |
201 | ASSERT_EQ("" , st1.dim_name(2)); |
202 | |
203 | const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data()); |
204 | AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); |
205 | |
206 | const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index()); |
207 | ASSERT_EQ(std::string("SparseCSRIndex" ), si.ToString()); |
208 | ASSERT_EQ(1, si.indptr()->ndim()); |
209 | ASSERT_EQ(1, si.indices()->ndim()); |
210 | |
211 | const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data()); |
212 | std::vector<int64_t> indptr_values(indptr_begin, |
213 | indptr_begin + si.indptr()->shape()[0]); |
214 | |
215 | ASSERT_EQ(7, indptr_values.size()); |
216 | ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values); |
217 | |
218 | const int64_t* indices_begin = |
219 | reinterpret_cast<const int64_t*>(si.indices()->raw_data()); |
220 | std::vector<int64_t> indices_values(indices_begin, |
221 | indices_begin + si.indices()->shape()[0]); |
222 | |
223 | ASSERT_EQ(12, indices_values.size()); |
224 | ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values); |
225 | } |
226 | |
227 | TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) { |
228 | std::vector<int64_t> shape = {6, 4}; |
229 | std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0, |
230 | 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0, |
231 | 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0}; |
232 | std::vector<int64_t> strides = {64, 16}; |
233 | std::shared_ptr<Buffer> buffer = Buffer::Wrap(values); |
234 | Tensor tensor(int64(), buffer, shape, strides); |
235 | SparseTensorImpl<SparseCSRIndex> st(tensor); |
236 | |
237 | ASSERT_EQ(12, st.non_zero_length()); |
238 | ASSERT_TRUE(st.is_mutable()); |
239 | |
240 | const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data()); |
241 | AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16}); |
242 | |
243 | const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st.sparse_index()); |
244 | ASSERT_EQ(1, si.indptr()->ndim()); |
245 | ASSERT_EQ(1, si.indices()->ndim()); |
246 | |
247 | const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data()); |
248 | std::vector<int64_t> indptr_values(indptr_begin, |
249 | indptr_begin + si.indptr()->shape()[0]); |
250 | |
251 | ASSERT_EQ(7, indptr_values.size()); |
252 | ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values); |
253 | |
254 | const int64_t* indices_begin = |
255 | reinterpret_cast<const int64_t*>(si.indices()->raw_data()); |
256 | std::vector<int64_t> indices_values(indices_begin, |
257 | indices_begin + si.indices()->shape()[0]); |
258 | |
259 | ASSERT_EQ(12, indices_values.size()); |
260 | ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values); |
261 | } |
262 | |
263 | } // namespace arrow |
264 | |