1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// Unit tests for DataType (and subclasses), Field, and Schema
19
20#include <cstdint>
21#include <memory>
22#include <string>
23#include <vector>
24
25#include <iostream>
26
27#include <gtest/gtest.h>
28
29#include "arrow/sparse_tensor.h"
30#include "arrow/test-util.h"
31#include "arrow/type.h"
32
33namespace arrow {
34
35static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
36 const SparseTensor& sparse_tensor) {
37 ASSERT_EQ(expected, sparse_tensor.format_id());
38 ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
39}
40
41static inline void AssertCOOIndex(
42 const std::shared_ptr<SparseCOOIndex::CoordsTensor>& sidx, const int64_t nth,
43 const std::vector<int64_t>& expected_values) {
44 int64_t n = static_cast<int64_t>(expected_values.size());
45 for (int64_t i = 0; i < n; ++i) {
46 ASSERT_EQ(expected_values[i], sidx->Value({nth, i}));
47 }
48}
49
50TEST(TestSparseCOOTensor, CreationEmptyTensor) {
51 std::vector<int64_t> shape = {2, 3, 4};
52 SparseTensorImpl<SparseCOOIndex> st1(int64(), shape);
53
54 std::vector<std::string> dim_names = {"foo", "bar", "baz"};
55 SparseTensorImpl<SparseCOOIndex> st2(int64(), shape, dim_names);
56
57 ASSERT_EQ(0, st1.non_zero_length());
58 ASSERT_EQ(0, st2.non_zero_length());
59
60 ASSERT_EQ(24, st1.size());
61 ASSERT_EQ(24, st2.size());
62
63 ASSERT_EQ("foo", st2.dim_name(0));
64 ASSERT_EQ("bar", st2.dim_name(1));
65 ASSERT_EQ("baz", st2.dim_name(2));
66
67 ASSERT_EQ("", st1.dim_name(0));
68 ASSERT_EQ("", st1.dim_name(1));
69 ASSERT_EQ("", st1.dim_name(2));
70}
71
72TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
73 std::vector<int64_t> shape = {2, 3, 4};
74 std::vector<int64_t> values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0,
75 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
76 std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
77 std::vector<std::string> dim_names = {"foo", "bar", "baz"};
78 NumericTensor<Int64Type> tensor1(buffer, shape);
79 NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
80 SparseTensorImpl<SparseCOOIndex> st1(tensor1);
81 SparseTensorImpl<SparseCOOIndex> st2(tensor2);
82
83 CheckSparseIndexFormatType(SparseTensorFormat::COO, st1);
84
85 ASSERT_EQ(12, st1.non_zero_length());
86 ASSERT_TRUE(st1.is_mutable());
87
88 ASSERT_EQ("foo", st2.dim_name(0));
89 ASSERT_EQ("bar", st2.dim_name(1));
90 ASSERT_EQ("baz", st2.dim_name(2));
91
92 ASSERT_EQ("", st1.dim_name(0));
93 ASSERT_EQ("", st1.dim_name(1));
94 ASSERT_EQ("", st1.dim_name(2));
95
96 const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
97 AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
98
99 const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
100 ASSERT_EQ(std::string("SparseCOOIndex"), si.ToString());
101
102 std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
103 ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
104 ASSERT_TRUE(sidx->is_column_major());
105
106 AssertCOOIndex(sidx, 0, {0, 0, 0});
107 AssertCOOIndex(sidx, 1, {0, 0, 2});
108 AssertCOOIndex(sidx, 2, {0, 1, 1});
109 AssertCOOIndex(sidx, 10, {1, 2, 1});
110 AssertCOOIndex(sidx, 11, {1, 2, 3});
111}
112
113TEST(TestSparseCOOTensor, CreationFromTensor) {
114 std::vector<int64_t> shape = {2, 3, 4};
115 std::vector<int64_t> values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0,
116 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
117 std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
118 std::vector<std::string> dim_names = {"foo", "bar", "baz"};
119 Tensor tensor1(int64(), buffer, shape);
120 Tensor tensor2(int64(), buffer, shape, {}, dim_names);
121 SparseTensorImpl<SparseCOOIndex> st1(tensor1);
122 SparseTensorImpl<SparseCOOIndex> st2(tensor2);
123
124 ASSERT_EQ(12, st1.non_zero_length());
125 ASSERT_TRUE(st1.is_mutable());
126
127 ASSERT_EQ("foo", st2.dim_name(0));
128 ASSERT_EQ("bar", st2.dim_name(1));
129 ASSERT_EQ("baz", st2.dim_name(2));
130
131 ASSERT_EQ("", st1.dim_name(0));
132 ASSERT_EQ("", st1.dim_name(1));
133 ASSERT_EQ("", st1.dim_name(2));
134
135 const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
136 AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
137
138 const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
139 std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
140 ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
141 ASSERT_TRUE(sidx->is_column_major());
142
143 AssertCOOIndex(sidx, 0, {0, 0, 0});
144 AssertCOOIndex(sidx, 1, {0, 0, 2});
145 AssertCOOIndex(sidx, 2, {0, 1, 1});
146 AssertCOOIndex(sidx, 10, {1, 2, 1});
147 AssertCOOIndex(sidx, 11, {1, 2, 3});
148}
149
150TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) {
151 std::vector<int64_t> shape = {2, 3, 4};
152 std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
153 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
154 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
155 std::vector<int64_t> strides = {192, 64, 16};
156 std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
157 Tensor tensor(int64(), buffer, shape, strides);
158 SparseTensorImpl<SparseCOOIndex> st(tensor);
159
160 ASSERT_EQ(12, st.non_zero_length());
161 ASSERT_TRUE(st.is_mutable());
162
163 const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
164 AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
165
166 const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st.sparse_index());
167 std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
168 ASSERT_EQ(std::vector<int64_t>({12, 3}), sidx->shape());
169 ASSERT_TRUE(sidx->is_column_major());
170
171 AssertCOOIndex(sidx, 0, {0, 0, 0});
172 AssertCOOIndex(sidx, 1, {0, 0, 2});
173 AssertCOOIndex(sidx, 2, {0, 1, 1});
174 AssertCOOIndex(sidx, 10, {1, 2, 1});
175 AssertCOOIndex(sidx, 11, {1, 2, 3});
176}
177
178TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
179 std::vector<int64_t> shape = {6, 4};
180 std::vector<int64_t> values = {1, 0, 2, 0, 0, 3, 0, 4, 5, 0, 6, 0,
181 0, 11, 0, 12, 13, 0, 14, 0, 0, 15, 0, 16};
182 std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
183 std::vector<std::string> dim_names = {"foo", "bar", "baz"};
184 NumericTensor<Int64Type> tensor1(buffer, shape);
185 NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
186
187 SparseTensorImpl<SparseCSRIndex> st1(tensor1);
188 SparseTensorImpl<SparseCSRIndex> st2(tensor2);
189
190 CheckSparseIndexFormatType(SparseTensorFormat::CSR, st1);
191
192 ASSERT_EQ(12, st1.non_zero_length());
193 ASSERT_TRUE(st1.is_mutable());
194
195 ASSERT_EQ("foo", st2.dim_name(0));
196 ASSERT_EQ("bar", st2.dim_name(1));
197 ASSERT_EQ("baz", st2.dim_name(2));
198
199 ASSERT_EQ("", st1.dim_name(0));
200 ASSERT_EQ("", st1.dim_name(1));
201 ASSERT_EQ("", st1.dim_name(2));
202
203 const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
204 AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
205
206 const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index());
207 ASSERT_EQ(std::string("SparseCSRIndex"), si.ToString());
208 ASSERT_EQ(1, si.indptr()->ndim());
209 ASSERT_EQ(1, si.indices()->ndim());
210
211 const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
212 std::vector<int64_t> indptr_values(indptr_begin,
213 indptr_begin + si.indptr()->shape()[0]);
214
215 ASSERT_EQ(7, indptr_values.size());
216 ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
217
218 const int64_t* indices_begin =
219 reinterpret_cast<const int64_t*>(si.indices()->raw_data());
220 std::vector<int64_t> indices_values(indices_begin,
221 indices_begin + si.indices()->shape()[0]);
222
223 ASSERT_EQ(12, indices_values.size());
224 ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
225}
226
227TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) {
228 std::vector<int64_t> shape = {6, 4};
229 std::vector<int64_t> values = {1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 0, 4, 0,
230 5, 0, 0, 0, 6, 0, 0, 0, 0, 0, 11, 0, 0, 0, 12, 0,
231 13, 0, 0, 0, 14, 0, 0, 0, 0, 0, 15, 0, 0, 0, 16, 0};
232 std::vector<int64_t> strides = {64, 16};
233 std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
234 Tensor tensor(int64(), buffer, shape, strides);
235 SparseTensorImpl<SparseCSRIndex> st(tensor);
236
237 ASSERT_EQ(12, st.non_zero_length());
238 ASSERT_TRUE(st.is_mutable());
239
240 const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
241 AssertNumericDataEqual(raw_data, {1, 2, 3, 4, 5, 6, 11, 12, 13, 14, 15, 16});
242
243 const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st.sparse_index());
244 ASSERT_EQ(1, si.indptr()->ndim());
245 ASSERT_EQ(1, si.indices()->ndim());
246
247 const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
248 std::vector<int64_t> indptr_values(indptr_begin,
249 indptr_begin + si.indptr()->shape()[0]);
250
251 ASSERT_EQ(7, indptr_values.size());
252 ASSERT_EQ(std::vector<int64_t>({0, 2, 4, 6, 8, 10, 12}), indptr_values);
253
254 const int64_t* indices_begin =
255 reinterpret_cast<const int64_t*>(si.indices()->raw_data());
256 std::vector<int64_t> indices_values(indices_begin,
257 indices_begin + si.indices()->shape()[0]);
258
259 ASSERT_EQ(12, indices_values.size());
260 ASSERT_EQ(std::vector<int64_t>({0, 2, 1, 3, 0, 2, 1, 3, 0, 2, 1, 3}), indices_values);
261}
262
263} // namespace arrow
264