1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #ifndef PARQUET_UTIL_TEST_COMMON_H |
19 | #define PARQUET_UTIL_TEST_COMMON_H |
20 | |
21 | #include <chrono> |
22 | #include <iostream> |
23 | #include <limits> |
24 | #include <random> |
25 | #include <vector> |
26 | |
27 | #include "parquet/exception.h" |
28 | #include "parquet/types.h" |
29 | |
30 | using std::vector; |
31 | |
32 | namespace parquet { |
33 | |
34 | namespace test { |
35 | |
36 | typedef ::testing::Types<BooleanType, Int32Type, Int64Type, Int96Type, FloatType, |
37 | DoubleType, ByteArrayType, FLBAType> |
38 | ParquetTypes; |
39 | |
40 | class ParquetTestException : public parquet::ParquetException { |
41 | using ParquetException::ParquetException; |
42 | }; |
43 | |
44 | const char* get_data_dir() { |
45 | const auto result = std::getenv("PARQUET_TEST_DATA" ); |
46 | if (!result || !result[0]) { |
47 | throw ParquetTestException( |
48 | "Please point the PARQUET_TEST_DATA environment " |
49 | "variable to the test data directory" ); |
50 | } |
51 | return result; |
52 | } |
53 | |
54 | template <typename T> |
55 | static inline void assert_vector_equal(const vector<T>& left, const vector<T>& right) { |
56 | ASSERT_EQ(left.size(), right.size()); |
57 | |
58 | for (size_t i = 0; i < left.size(); ++i) { |
59 | ASSERT_EQ(left[i], right[i]) << i; |
60 | } |
61 | } |
62 | |
63 | template <typename T> |
64 | static inline bool vector_equal(const vector<T>& left, const vector<T>& right) { |
65 | if (left.size() != right.size()) { |
66 | return false; |
67 | } |
68 | |
69 | for (size_t i = 0; i < left.size(); ++i) { |
70 | if (left[i] != right[i]) { |
71 | std::cerr << "index " << i << " left was " << left[i] << " right was " << right[i] |
72 | << std::endl; |
73 | return false; |
74 | } |
75 | } |
76 | |
77 | return true; |
78 | } |
79 | |
80 | template <typename T> |
81 | static vector<T> slice(const vector<T>& values, int start, int end) { |
82 | if (end < start) { |
83 | return vector<T>(0); |
84 | } |
85 | |
86 | vector<T> out(end - start); |
87 | for (int i = start; i < end; ++i) { |
88 | out[i - start] = values[i]; |
89 | } |
90 | return out; |
91 | } |
92 | |
93 | static inline vector<bool> flip_coins_seed(int n, double p, uint32_t seed) { |
94 | std::default_random_engine gen(seed); |
95 | std::bernoulli_distribution d(p); |
96 | |
97 | vector<bool> draws(n); |
98 | for (int i = 0; i < n; ++i) { |
99 | draws[i] = d(gen); |
100 | } |
101 | return draws; |
102 | } |
103 | |
104 | static inline vector<bool> flip_coins(int n, double p) { |
105 | uint64_t seed = std::chrono::high_resolution_clock::now().time_since_epoch().count(); |
106 | return flip_coins_seed(n, p, static_cast<uint32_t>(seed)); |
107 | } |
108 | |
109 | void random_bytes(int n, uint32_t seed, std::vector<uint8_t>* out) { |
110 | std::default_random_engine gen(seed); |
111 | std::uniform_int_distribution<int> d(0, 255); |
112 | |
113 | out->resize(n); |
114 | for (int i = 0; i < n; ++i) { |
115 | (*out)[i] = static_cast<uint8_t>(d(gen)); |
116 | } |
117 | } |
118 | |
119 | void random_bools(int n, double p, uint32_t seed, bool* out) { |
120 | std::default_random_engine gen(seed); |
121 | std::bernoulli_distribution d(p); |
122 | for (int i = 0; i < n; ++i) { |
123 | out[i] = d(gen); |
124 | } |
125 | } |
126 | |
127 | template <typename T> |
128 | void random_numbers(int n, uint32_t seed, T min_value, T max_value, T* out) { |
129 | std::default_random_engine gen(seed); |
130 | std::uniform_int_distribution<T> d(min_value, max_value); |
131 | for (int i = 0; i < n; ++i) { |
132 | out[i] = d(gen); |
133 | } |
134 | } |
135 | |
136 | template <> |
137 | void random_numbers(int n, uint32_t seed, float min_value, float max_value, float* out) { |
138 | std::default_random_engine gen(seed); |
139 | std::uniform_real_distribution<float> d(min_value, max_value); |
140 | for (int i = 0; i < n; ++i) { |
141 | out[i] = d(gen); |
142 | } |
143 | } |
144 | |
145 | template <> |
146 | void random_numbers(int n, uint32_t seed, double min_value, double max_value, |
147 | double* out) { |
148 | std::default_random_engine gen(seed); |
149 | std::uniform_real_distribution<double> d(min_value, max_value); |
150 | for (int i = 0; i < n; ++i) { |
151 | out[i] = d(gen); |
152 | } |
153 | } |
154 | |
155 | void random_Int96_numbers(int n, uint32_t seed, int32_t min_value, int32_t max_value, |
156 | Int96* out) { |
157 | std::default_random_engine gen(seed); |
158 | std::uniform_int_distribution<int32_t> d(min_value, max_value); |
159 | for (int i = 0; i < n; ++i) { |
160 | out[i].value[0] = d(gen); |
161 | out[i].value[1] = d(gen); |
162 | out[i].value[2] = d(gen); |
163 | } |
164 | } |
165 | |
166 | void random_fixed_byte_array(int n, uint32_t seed, uint8_t* buf, int len, FLBA* out) { |
167 | std::default_random_engine gen(seed); |
168 | std::uniform_int_distribution<int> d(0, 255); |
169 | for (int i = 0; i < n; ++i) { |
170 | out[i].ptr = buf; |
171 | for (int j = 0; j < len; ++j) { |
172 | buf[j] = static_cast<uint8_t>(d(gen)); |
173 | } |
174 | buf += len; |
175 | } |
176 | } |
177 | |
178 | void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int min_size, |
179 | int max_size) { |
180 | std::default_random_engine gen(seed); |
181 | std::uniform_int_distribution<int> d1(min_size, max_size); |
182 | std::uniform_int_distribution<int> d2(0, 255); |
183 | for (int i = 0; i < n; ++i) { |
184 | int len = d1(gen); |
185 | out[i].len = len; |
186 | out[i].ptr = buf; |
187 | for (int j = 0; j < len; ++j) { |
188 | buf[j] = static_cast<uint8_t>(d2(gen)); |
189 | } |
190 | buf += len; |
191 | } |
192 | } |
193 | |
194 | void random_byte_array(int n, uint32_t seed, uint8_t* buf, ByteArray* out, int max_size) { |
195 | random_byte_array(n, seed, buf, out, 0, max_size); |
196 | } |
197 | |
198 | } // namespace test |
199 | } // namespace parquet |
200 | |
201 | #endif // PARQUET_UTIL_TEST_COMMON_H |
202 | |