1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// This module defines an abstract interface for iterating through pages in a
19// Parquet column chunk within a row group. It could be extended in the future
20// to iterate through all data pages in all chunks in a file.
21
22#pragma once
23
24#include <algorithm>
25#include <limits>
26#include <sstream>
27#include <string>
28#include <vector>
29
30#include "parquet/test-util.h"
31
32namespace parquet {
33
34namespace test {
35
36template <>
37void inline InitValues<bool>(int num_values, vector<bool>& values,
38 vector<uint8_t>& buffer) {
39 values = flip_coins(num_values, 0);
40}
41
42template <>
43void inline InitValues<ByteArray>(int num_values, vector<ByteArray>& values,
44 vector<uint8_t>& buffer) {
45 int max_byte_array_len = 12;
46 int num_bytes = static_cast<int>(max_byte_array_len + sizeof(uint32_t));
47 size_t nbytes = num_values * num_bytes;
48 buffer.resize(nbytes);
49 random_byte_array(num_values, 0, buffer.data(), values.data(), max_byte_array_len);
50}
51
52void inline InitWideByteArrayValues(int num_values, vector<ByteArray>& values,
53 vector<uint8_t>& buffer, int min_len, int max_len) {
54 int num_bytes = static_cast<int>(max_len + sizeof(uint32_t));
55 size_t nbytes = num_values * num_bytes;
56 buffer.resize(nbytes);
57 random_byte_array(num_values, 0, buffer.data(), values.data(), min_len, max_len);
58}
59
60template <>
61void inline InitValues<FLBA>(int num_values, vector<FLBA>& values,
62 vector<uint8_t>& buffer) {
63 size_t nbytes = num_values * FLBA_LENGTH;
64 buffer.resize(nbytes);
65 random_fixed_byte_array(num_values, 0, buffer.data(), FLBA_LENGTH, values.data());
66}
67
68template <>
69void inline InitValues<Int96>(int num_values, vector<Int96>& values,
70 vector<uint8_t>& buffer) {
71 random_Int96_numbers(num_values, 0, std::numeric_limits<int32_t>::min(),
72 std::numeric_limits<int32_t>::max(), values.data());
73}
74
75inline std::string TestColumnName(int i) {
76 std::stringstream col_name;
77 col_name << "column_" << i;
78 return col_name.str();
79}
80
81// This class lives here because of its dependency on the InitValues specializations.
82template <typename TestType>
83class PrimitiveTypedTest : public ::testing::Test {
84 public:
85 typedef typename TestType::c_type T;
86
87 void SetUpSchema(Repetition::type repetition, int num_columns = 1) {
88 std::vector<schema::NodePtr> fields;
89
90 for (int i = 0; i < num_columns; ++i) {
91 std::string name = TestColumnName(i);
92 fields.push_back(schema::PrimitiveNode::Make(name, repetition, TestType::type_num,
93 LogicalType::NONE, FLBA_LENGTH));
94 }
95 node_ = schema::GroupNode::Make("schema", Repetition::REQUIRED, fields);
96 schema_.Init(node_);
97 }
98
99 void GenerateData(int64_t num_values);
100 void SetupValuesOut(int64_t num_values);
101 void SyncValuesOut();
102
103 protected:
104 schema::NodePtr node_;
105 SchemaDescriptor schema_;
106
107 // Input buffers
108 std::vector<T> values_;
109
110 std::vector<int16_t> def_levels_;
111
112 std::vector<uint8_t> buffer_;
113 // Pointer to the values, needed as we cannot use vector<bool>::data()
114 T* values_ptr_;
115 std::vector<uint8_t> bool_buffer_;
116
117 // Output buffers
118 std::vector<T> values_out_;
119 std::vector<uint8_t> bool_buffer_out_;
120 T* values_out_ptr_;
121};
122
123template <typename TestType>
124void PrimitiveTypedTest<TestType>::SyncValuesOut() {}
125
126template <>
127void PrimitiveTypedTest<BooleanType>::SyncValuesOut() {
128 std::vector<uint8_t>::const_iterator source_iterator = bool_buffer_out_.begin();
129 std::vector<T>::iterator destination_iterator = values_out_.begin();
130 while (source_iterator != bool_buffer_out_.end()) {
131 *destination_iterator++ = *source_iterator++ != 0;
132 }
133}
134
135template <typename TestType>
136void PrimitiveTypedTest<TestType>::SetupValuesOut(int64_t num_values) {
137 values_out_.clear();
138 values_out_.resize(num_values);
139 values_out_ptr_ = values_out_.data();
140}
141
142template <>
143void PrimitiveTypedTest<BooleanType>::SetupValuesOut(int64_t num_values) {
144 values_out_.clear();
145 values_out_.resize(num_values);
146
147 bool_buffer_out_.clear();
148 bool_buffer_out_.resize(num_values);
149 // Write once to all values so we can copy it without getting Valgrind errors
150 // about uninitialised values.
151 std::fill(bool_buffer_out_.begin(), bool_buffer_out_.end(), true);
152 values_out_ptr_ = reinterpret_cast<bool*>(bool_buffer_out_.data());
153}
154
155template <typename TestType>
156void PrimitiveTypedTest<TestType>::GenerateData(int64_t num_values) {
157 def_levels_.resize(num_values);
158 values_.resize(num_values);
159
160 InitValues<T>(static_cast<int>(num_values), values_, buffer_);
161 values_ptr_ = values_.data();
162
163 std::fill(def_levels_.begin(), def_levels_.end(), 1);
164}
165
166template <>
167void PrimitiveTypedTest<BooleanType>::GenerateData(int64_t num_values) {
168 def_levels_.resize(num_values);
169 values_.resize(num_values);
170
171 InitValues<T>(static_cast<int>(num_values), values_, buffer_);
172 bool_buffer_.resize(num_values);
173 std::copy(values_.begin(), values_.end(), bool_buffer_.begin());
174 values_ptr_ = reinterpret_cast<bool*>(bool_buffer_.data());
175
176 std::fill(def_levels_.begin(), def_levels_.end(), 1);
177}
178} // namespace test
179
180} // namespace parquet
181