1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#ifndef PARQUET_UTIL_TEST_COMMON_H
19#define PARQUET_UTIL_TEST_COMMON_H
20
21#include <chrono>
22#include <iostream>
23#include <limits>
24#include <random>
25#include <vector>
26
27#include "parquet/exception.h"
28#include "parquet/types.h"
29
30using std::vector;
31
32namespace parquet {
33
34namespace test {
35
36typedef ::testing::Types<BooleanType, Int32Type, Int64Type, Int96Type, FloatType,
37 DoubleType, ByteArrayType, FLBAType>
38 ParquetTypes;
39
40class ParquetTestException : public parquet::ParquetException {
41 using ParquetException::ParquetException;
42};
43
44const char* get_data_dir() {
45 const auto result = std::getenv("PARQUET_TEST_DATA");
46 if (!result || !result[0]) {
47 throw ParquetTestException(
48 "Please point the PARQUET_TEST_DATA environment "
49 "variable to the test data directory");
50 }
51 return result;
52}
53
54template <typename T>
55static inline void assert_vector_equal(const vector<T>& left, const vector<T>& right) {
56 ASSERT_EQ(left.size(), right.size());
57
58 for (size_t i = 0; i < left.size(); ++i) {
59 ASSERT_EQ(left[i], right[i]) << i;
60 }
61}
62
63template <typename T>
64static inline bool vector_equal(const vector<T>& left, const vector<T>& right) {
65 if (left.size() != right.size()) {
66 return false;
67 }
68
69 for (size_t i = 0; i < left.size(); ++i) {
70 if (left[i] != right[i]) {
71 std::cerr << "index " << i << " left was " << left[i] << " right was " << right[i]
72 << std::endl;
73 return false;
74 }
75 }
76
77 return true;
78}
79
80template <typename T>
81static vector<T> slice(const vector<T>& values, int start, int end) {
82 if (end < start) {
83 return vector<T>(0);
84 }
85
86 vector<T> out(end - start);
87 for (int i = start; i < end; ++i) {
88 out[i - start] = values[i];
89 }
90 return out;
91}
92
93static inline vector<bool> flip_coins_seed(int n, double p, uint32_t seed) {
94 std::default_random_engine gen(seed);
95 std::bernoulli_distribution d(p);
96
97 vector<bool> draws(n);
98 for (int i = 0; i < n; ++i) {
99 draws[i] = d(gen);
100 }
101 return draws;
102}
103
104static inline vector<bool> flip_coins(int n, double p) {
105 uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count();
106 return flip_coins_seed(n, p, static_cast<uint32_t>(seed));
107}
108
109void random_bytes(int n, uint32_t seed, std::vector<uint8_t>* out) {
110 std::default_random_engine gen(seed);
111 std::uniform_int_distribution<int> d(0, 255);
112
113 out->resize(n);
114 for (int i = 0; i < n; ++i) {
115 (*out)[i] = static_cast<uint8_t>(d(gen));
116 }
117}
118
119void random_bools(int n, double p, uint32_t seed, bool* out) {
120 std::default_random_engine gen(seed);
121 std::bernoulli_distribution d(p);
122 for (int i = 0; i < n; ++i) {
123 out[i] = d(gen);
124 }
125}
126
127template <typename T>
128void random_numbers(int n, uint32_t seed, T min_value, T max_value, T* out) {
129 std::default_random_engine gen(seed);
130 std::uniform_int_distribution<T> d(min_value, max_value);
131 for (int i = 0; i < n; ++i) {
132 out[i] = d(gen);
133 }
134}
135
136template <>
137void random_numbers(int n, uint32_t seed, float min_value, float max_value, float* out) {
138 std::default_random_engine gen(seed);
139 std::uniform_real_distribution<float> d(min_value, max_value);
140 for (int i = 0; i < n; ++i) {
141 out[i] = d(gen);
142 }
143}
144
145template <>
146void random_numbers(int n, uint32_t seed, double min_value, double max_value,
147 double* out) {
148 std::default_random_engine gen(seed);
149 std::uniform_real_distribution<double> d(min_value, max_value);
150 for (int i = 0; i < n; ++i) {
151 out[i] = d(gen);
152 }
153}
154
155void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value,
156 Int96* out) {
157 std::default_random_engine gen(seed);
158 std::uniform_int_distribution<int32_t> d(min_value, max_value);
159 for (int i = 0; i < n; ++i) {
160 out[i].value[0] = d(gen);
161 out[i].value[1] = d(gen);
162 out[i].value[2] = d(gen);
163 }
164}
165
166void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out) {
167 std::default_random_engine gen(seed);
168 std::uniform_int_distribution<int> d(0, 255);
169 for (int i = 0; i < n; ++i) {
170 out[i].ptr = buf;
171 for (int j = 0; j < len; ++j) {
172 buf[j] = static_cast<uint8_t>(d(gen));
173 }
174 buf += len;
175 }
176}
177
178void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size,
179 int max_size) {
180 std::default_random_engine gen(seed);
181 std::uniform_int_distribution<int> d1(min_size, max_size);
182 std::uniform_int_distribution<int> d2(0, 255);
183 for (int i = 0; i < n; ++i) {
184 int len = d1(gen);
185 out[i].len = len;
186 out[i].ptr = buf;
187 for (int j = 0; j < len; ++j) {
188 buf[j] = static_cast<uint8_t>(d2(gen));
189 }
190 buf += len;
191 }
192}
193
194void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int max_size) {
195 random_byte_array(n, seed, buf, out, 0, max_size);
196}
197
198} // namespace test
199} // namespace parquet
200
201#endif // PARQUET_UTIL_TEST_COMMON_H
202