sparse_tensor-test.cc source code [arrow/arrow/sparse_tensor-test.cc]

1	// Licensed to the Apache Software Foundation (ASF) under one
2	// or more contributor license agreements. See the NOTICE file
3	// distributed with this work for additional information
4	// regarding copyright ownership. The ASF licenses this file
5	// to you under the Apache License, Version 2.0 (the
6	// "License"); you may not use this file except in compliance
7	// with the License. You may obtain a copy of the License at
8	//
9	// http://www.apache.org/licenses/LICENSE-2.0
10	//
11	// Unless required by applicable law or agreed to in writing,
12	// software distributed under the License is distributed on an
13	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14	// KIND, either express or implied. See the License for the
15	// specific language governing permissions and limitations
16	// under the License.
17
18	// Unit tests for DataType (and subclasses), Field, and Schema
19
20	#include <cstdint>
21	#include <memory>
22	#include <string>
23	#include <vector>
24
25	#include <iostream>
26
27	#include <gtest/gtest.h>
28
29	#include "arrow/sparse_tensor.h"
30	#include "arrow/test-util.h"
31	#include "arrow/type.h"
32
33	namespace arrow {
34
35	static inline void CheckSparseIndexFormatType(SparseTensorFormat::type expected,
36	const SparseTensor& sparse_tensor) {
37	ASSERT_EQ(expected, sparse_tensor.format_id());
38	ASSERT_EQ(expected, sparse_tensor.sparse_index()->format_id());
39	}
40
41	static inline void AssertCOOIndex(
42	const std::shared_ptr<SparseCOOIndex::CoordsTensor>& sidx, const int64_t nth,
43	const std::vector<int64_t>& expected_values) {
44	int64_t n = static_cast<int64_t>(expected_values.size());
45	for (int64_t i = `0`; i < n; ++i) {
46	ASSERT_EQ(expected_values[i], sidx ->Value({nth, i}));
47	}
48	}
49
50	TEST(TestSparseCOOTensor, CreationEmptyTensor) {
51	std::vector<int64_t> shape = {`2`, `3`, `4`};
52	SparseTensorImpl<SparseCOOIndex> st1(int64(), shape);
53
54	std::vector<std::string> dim_names = {"foo", "bar", "baz"};
55	SparseTensorImpl<SparseCOOIndex> st2(int64(), shape, dim_names);
56
57	ASSERT_EQ(`0`, st1.non_zero_length());
58	ASSERT_EQ(`0`, st2.non_zero_length());
59
60	ASSERT_EQ(`24`, st1.size());
61	ASSERT_EQ(`24`, st2.size());
62
63	ASSERT_EQ("foo", st2.dim_name(`0`));
64	ASSERT_EQ("bar", st2.dim_name(`1`));
65	ASSERT_EQ("baz", st2.dim_name(`2`));
66
67	ASSERT_EQ("", st1.dim_name(`0`));
68	ASSERT_EQ("", st1.dim_name(`1`));
69	ASSERT_EQ("", st1.dim_name(`2`));
70	}
71
72	TEST(TestSparseCOOTensor, CreationFromNumericTensor) {
73	std::vector<int64_t> shape = {`2`, `3`, `4`};
74	std::vector<int64_t> values = {`1`, `0`, `2`, `0`, `0`, `3`, `0`, `4`, `5`, `0`, `6`, `0`,
75	`0`, `11`, `0`, `12`, `13`, `0`, `14`, `0`, `0`, `15`, `0`, `16`};
76	std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
77	std::vector<std::string> dim_names = {"foo", "bar", "baz"};
78	NumericTensor<Int64Type> tensor1(buffer, shape);
79	NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
80	SparseTensorImpl<SparseCOOIndex> st1(tensor1);
81	SparseTensorImpl<SparseCOOIndex> st2(tensor2);
82
83	CheckSparseIndexFormatType(SparseTensorFormat::COO, st1);
84
85	ASSERT_EQ(`12`, st1.non_zero_length());
86	ASSERT_TRUE(st1.is_mutable());
87
88	ASSERT_EQ("foo", st2.dim_name(`0`));
89	ASSERT_EQ("bar", st2.dim_name(`1`));
90	ASSERT_EQ("baz", st2.dim_name(`2`));
91
92	ASSERT_EQ("", st1.dim_name(`0`));
93	ASSERT_EQ("", st1.dim_name(`1`));
94	ASSERT_EQ("", st1.dim_name(`2`));
95
96	const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
97	AssertNumericDataEqual(raw_data, {`1`, `2`, `3`, `4`, `5`, `6`, `11`, `12`, `13`, `14`, `15`, `16`});
98
99	const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
100	ASSERT_EQ(std::string ("SparseCOOIndex"), si.ToString());
101
102	std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
103	ASSERT_EQ(std::vector<int64_t>({`12`, `3`}), sidx ->shape());
104	ASSERT_TRUE(sidx ->is_column_major());
105
106	AssertCOOIndex(sidx, `0`, {`0`, `0`, `0`});
107	AssertCOOIndex(sidx, `1`, {`0`, `0`, `2`});
108	AssertCOOIndex(sidx, `2`, {`0`, `1`, `1`});
109	AssertCOOIndex(sidx, `10`, {`1`, `2`, `1`});
110	AssertCOOIndex(sidx, `11`, {`1`, `2`, `3`});
111	}
112
113	TEST(TestSparseCOOTensor, CreationFromTensor) {
114	std::vector<int64_t> shape = {`2`, `3`, `4`};
115	std::vector<int64_t> values = {`1`, `0`, `2`, `0`, `0`, `3`, `0`, `4`, `5`, `0`, `6`, `0`,
116	`0`, `11`, `0`, `12`, `13`, `0`, `14`, `0`, `0`, `15`, `0`, `16`};
117	std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
118	std::vector<std::string> dim_names = {"foo", "bar", "baz"};
119	Tensor tensor1(int64(), buffer, shape);
120	Tensor tensor2(int64(), buffer, shape, {}, dim_names);
121	SparseTensorImpl<SparseCOOIndex> st1(tensor1);
122	SparseTensorImpl<SparseCOOIndex> st2(tensor2);
123
124	ASSERT_EQ(`12`, st1.non_zero_length());
125	ASSERT_TRUE(st1.is_mutable());
126
127	ASSERT_EQ("foo", st2.dim_name(`0`));
128	ASSERT_EQ("bar", st2.dim_name(`1`));
129	ASSERT_EQ("baz", st2.dim_name(`2`));
130
131	ASSERT_EQ("", st1.dim_name(`0`));
132	ASSERT_EQ("", st1.dim_name(`1`));
133	ASSERT_EQ("", st1.dim_name(`2`));
134
135	const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
136	AssertNumericDataEqual(raw_data, {`1`, `2`, `3`, `4`, `5`, `6`, `11`, `12`, `13`, `14`, `15`, `16`});
137
138	const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st1.sparse_index());
139	std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
140	ASSERT_EQ(std::vector<int64_t>({`12`, `3`}), sidx ->shape());
141	ASSERT_TRUE(sidx ->is_column_major());
142
143	AssertCOOIndex(sidx, `0`, {`0`, `0`, `0`});
144	AssertCOOIndex(sidx, `1`, {`0`, `0`, `2`});
145	AssertCOOIndex(sidx, `2`, {`0`, `1`, `1`});
146	AssertCOOIndex(sidx, `10`, {`1`, `2`, `1`});
147	AssertCOOIndex(sidx, `11`, {`1`, `2`, `3`});
148	}
149
150	TEST(TestSparseCOOTensor, CreationFromNonContiguousTensor) {
151	std::vector<int64_t> shape = {`2`, `3`, `4`};
152	std::vector<int64_t> values = {`1`, `0`, `0`, `0`, `2`, `0`, `0`, `0`, `0`, `0`, `3`, `0`, `0`, `0`, `4`, `0`,
153	`5`, `0`, `0`, `0`, `6`, `0`, `0`, `0`, `0`, `0`, `11`, `0`, `0`, `0`, `12`, `0`,
154	`13`, `0`, `0`, `0`, `14`, `0`, `0`, `0`, `0`, `0`, `15`, `0`, `0`, `0`, `16`, `0`};
155	std::vector<int64_t> strides = {`192`, `64`, `16`};
156	std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
157	Tensor tensor(int64(), buffer, shape, strides);
158	SparseTensorImpl<SparseCOOIndex> st(tensor);
159
160	ASSERT_EQ(`12`, st.non_zero_length());
161	ASSERT_TRUE(st.is_mutable());
162
163	const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
164	AssertNumericDataEqual(raw_data, {`1`, `2`, `3`, `4`, `5`, `6`, `11`, `12`, `13`, `14`, `15`, `16`});
165
166	const auto& si = internal::checked_cast<const SparseCOOIndex&>(*st.sparse_index());
167	std::shared_ptr<SparseCOOIndex::CoordsTensor> sidx = si.indices();
168	ASSERT_EQ(std::vector<int64_t>({`12`, `3`}), sidx ->shape());
169	ASSERT_TRUE(sidx ->is_column_major());
170
171	AssertCOOIndex(sidx, `0`, {`0`, `0`, `0`});
172	AssertCOOIndex(sidx, `1`, {`0`, `0`, `2`});
173	AssertCOOIndex(sidx, `2`, {`0`, `1`, `1`});
174	AssertCOOIndex(sidx, `10`, {`1`, `2`, `1`});
175	AssertCOOIndex(sidx, `11`, {`1`, `2`, `3`});
176	}
177
178	TEST(TestSparseCSRMatrix, CreationFromNumericTensor2D) {
179	std::vector<int64_t> shape = {`6`, `4`};
180	std::vector<int64_t> values = {`1`, `0`, `2`, `0`, `0`, `3`, `0`, `4`, `5`, `0`, `6`, `0`,
181	`0`, `11`, `0`, `12`, `13`, `0`, `14`, `0`, `0`, `15`, `0`, `16`};
182	std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
183	std::vector<std::string> dim_names = {"foo", "bar", "baz"};
184	NumericTensor<Int64Type> tensor1(buffer, shape);
185	NumericTensor<Int64Type> tensor2(buffer, shape, {}, dim_names);
186
187	SparseTensorImpl<SparseCSRIndex> st1(tensor1);
188	SparseTensorImpl<SparseCSRIndex> st2(tensor2);
189
190	CheckSparseIndexFormatType(SparseTensorFormat::CSR, st1);
191
192	ASSERT_EQ(`12`, st1.non_zero_length());
193	ASSERT_TRUE(st1.is_mutable());
194
195	ASSERT_EQ("foo", st2.dim_name(`0`));
196	ASSERT_EQ("bar", st2.dim_name(`1`));
197	ASSERT_EQ("baz", st2.dim_name(`2`));
198
199	ASSERT_EQ("", st1.dim_name(`0`));
200	ASSERT_EQ("", st1.dim_name(`1`));
201	ASSERT_EQ("", st1.dim_name(`2`));
202
203	const int64_t* raw_data = reinterpret_cast<const int64_t*>(st1.raw_data());
204	AssertNumericDataEqual(raw_data, {`1`, `2`, `3`, `4`, `5`, `6`, `11`, `12`, `13`, `14`, `15`, `16`});
205
206	const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st1.sparse_index());
207	ASSERT_EQ(std::string ("SparseCSRIndex"), si.ToString());
208	ASSERT_EQ(`1`, si.indptr()->ndim());
209	ASSERT_EQ(`1`, si.indices()->ndim());
210
211	const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
212	std::vector<int64_t> indptr_values(indptr_begin,
213	indptr_begin + si.indptr()->shape()[`0`]);
214
215	ASSERT_EQ(`7`, indptr_values.size());
216	ASSERT_EQ(std::vector<int64_t>({`0`, `2`, `4`, `6`, `8`, `10`, `12`}), indptr_values);
217
218	const int64_t* indices_begin =
219	reinterpret_cast<const int64_t*>(si.indices()->raw_data());
220	std::vector<int64_t> indices_values(indices_begin,
221	indices_begin + si.indices()->shape()[`0`]);
222
223	ASSERT_EQ(`12`, indices_values.size());
224	ASSERT_EQ(std::vector<int64_t>({`0`, `2`, `1`, `3`, `0`, `2`, `1`, `3`, `0`, `2`, `1`, `3`}), indices_values);
225	}
226
227	TEST(TestSparseCSRMatrix, CreationFromNonContiguousTensor) {
228	std::vector<int64_t> shape = {`6`, `4`};
229	std::vector<int64_t> values = {`1`, `0`, `0`, `0`, `2`, `0`, `0`, `0`, `0`, `0`, `3`, `0`, `0`, `0`, `4`, `0`,
230	`5`, `0`, `0`, `0`, `6`, `0`, `0`, `0`, `0`, `0`, `11`, `0`, `0`, `0`, `12`, `0`,
231	`13`, `0`, `0`, `0`, `14`, `0`, `0`, `0`, `0`, `0`, `15`, `0`, `0`, `0`, `16`, `0`};
232	std::vector<int64_t> strides = {`64`, `16`};
233	std::shared_ptr<Buffer> buffer = Buffer::Wrap(values);
234	Tensor tensor(int64(), buffer, shape, strides);
235	SparseTensorImpl<SparseCSRIndex> st(tensor);
236
237	ASSERT_EQ(`12`, st.non_zero_length());
238	ASSERT_TRUE(st.is_mutable());
239
240	const int64_t* raw_data = reinterpret_cast<const int64_t*>(st.raw_data());
241	AssertNumericDataEqual(raw_data, {`1`, `2`, `3`, `4`, `5`, `6`, `11`, `12`, `13`, `14`, `15`, `16`});
242
243	const auto& si = internal::checked_cast<const SparseCSRIndex&>(*st.sparse_index());
244	ASSERT_EQ(`1`, si.indptr()->ndim());
245	ASSERT_EQ(`1`, si.indices()->ndim());
246
247	const int64_t* indptr_begin = reinterpret_cast<const int64_t*>(si.indptr()->raw_data());
248	std::vector<int64_t> indptr_values(indptr_begin,
249	indptr_begin + si.indptr()->shape()[`0`]);
250
251	ASSERT_EQ(`7`, indptr_values.size());
252	ASSERT_EQ(std::vector<int64_t>({`0`, `2`, `4`, `6`, `8`, `10`, `12`}), indptr_values);
253
254	const int64_t* indices_begin =
255	reinterpret_cast<const int64_t*>(si.indices()->raw_data());
256	std::vector<int64_t> indices_values(indices_begin,
257	indices_begin + si.indices()->shape()[`0`]);
258
259	ASSERT_EQ(`12`, indices_values.size());
260	ASSERT_EQ(std::vector<int64_t>({`0`, `2`, `1`, `3`, `0`, `2`, `1`, `3`, `0`, `2`, `1`, `3`}), indices_values);
261	}
262
263	} // namespace arrow
264

Browse the source code of arrow/arrow/sparse_tensor-test.cc