1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18#include <gtest/gtest.h>
19
20#include <string>
21
22#include "parquet/types.h"
23
24namespace parquet {
25
26TEST(TestTypeToString, PhysicalTypes) {
27 ASSERT_STREQ("BOOLEAN", TypeToString(Type::BOOLEAN).c_str());
28 ASSERT_STREQ("INT32", TypeToString(Type::INT32).c_str());
29 ASSERT_STREQ("INT64", TypeToString(Type::INT64).c_str());
30 ASSERT_STREQ("INT96", TypeToString(Type::INT96).c_str());
31 ASSERT_STREQ("FLOAT", TypeToString(Type::FLOAT).c_str());
32 ASSERT_STREQ("DOUBLE", TypeToString(Type::DOUBLE).c_str());
33 ASSERT_STREQ("BYTE_ARRAY", TypeToString(Type::BYTE_ARRAY).c_str());
34 ASSERT_STREQ("FIXED_LEN_BYTE_ARRAY", TypeToString(Type::FIXED_LEN_BYTE_ARRAY).c_str());
35}
36
37TEST(TestLogicalTypeToString, LogicalTypes) {
38 ASSERT_STREQ("NONE", LogicalTypeToString(LogicalType::NONE).c_str());
39 ASSERT_STREQ("UTF8", LogicalTypeToString(LogicalType::UTF8).c_str());
40 ASSERT_STREQ("MAP_KEY_VALUE", LogicalTypeToString(LogicalType::MAP_KEY_VALUE).c_str());
41 ASSERT_STREQ("LIST", LogicalTypeToString(LogicalType::LIST).c_str());
42 ASSERT_STREQ("ENUM", LogicalTypeToString(LogicalType::ENUM).c_str());
43 ASSERT_STREQ("DECIMAL", LogicalTypeToString(LogicalType::DECIMAL).c_str());
44 ASSERT_STREQ("DATE", LogicalTypeToString(LogicalType::DATE).c_str());
45 ASSERT_STREQ("TIME_MILLIS", LogicalTypeToString(LogicalType::TIME_MILLIS).c_str());
46 ASSERT_STREQ("TIME_MICROS", LogicalTypeToString(LogicalType::TIME_MICROS).c_str());
47 ASSERT_STREQ("TIMESTAMP_MILLIS",
48 LogicalTypeToString(LogicalType::TIMESTAMP_MILLIS).c_str());
49 ASSERT_STREQ("TIMESTAMP_MICROS",
50 LogicalTypeToString(LogicalType::TIMESTAMP_MICROS).c_str());
51 ASSERT_STREQ("UINT_8", LogicalTypeToString(LogicalType::UINT_8).c_str());
52 ASSERT_STREQ("UINT_16", LogicalTypeToString(LogicalType::UINT_16).c_str());
53 ASSERT_STREQ("UINT_32", LogicalTypeToString(LogicalType::UINT_32).c_str());
54 ASSERT_STREQ("UINT_64", LogicalTypeToString(LogicalType::UINT_64).c_str());
55 ASSERT_STREQ("INT_8", LogicalTypeToString(LogicalType::INT_8).c_str());
56 ASSERT_STREQ("INT_16", LogicalTypeToString(LogicalType::INT_16).c_str());
57 ASSERT_STREQ("INT_32", LogicalTypeToString(LogicalType::INT_32).c_str());
58 ASSERT_STREQ("INT_64", LogicalTypeToString(LogicalType::INT_64).c_str());
59 ASSERT_STREQ("JSON", LogicalTypeToString(LogicalType::JSON).c_str());
60 ASSERT_STREQ("BSON", LogicalTypeToString(LogicalType::BSON).c_str());
61 ASSERT_STREQ("INTERVAL", LogicalTypeToString(LogicalType::INTERVAL).c_str());
62}
63
64TEST(TestCompressionToString, Compression) {
65 ASSERT_STREQ("UNCOMPRESSED", CompressionToString(Compression::UNCOMPRESSED).c_str());
66 ASSERT_STREQ("SNAPPY", CompressionToString(Compression::SNAPPY).c_str());
67 ASSERT_STREQ("GZIP", CompressionToString(Compression::GZIP).c_str());
68 ASSERT_STREQ("LZO", CompressionToString(Compression::LZO).c_str());
69 ASSERT_STREQ("BROTLI", CompressionToString(Compression::BROTLI).c_str());
70 ASSERT_STREQ("LZ4", CompressionToString(Compression::LZ4).c_str());
71 ASSERT_STREQ("ZSTD", CompressionToString(Compression::ZSTD).c_str());
72}
73
74#if !(defined(_WIN32) || defined(__CYGWIN__))
75#pragma GCC diagnostic push
76#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
77#elif _MSC_VER
78#pragma warning(push)
79#pragma warning(disable : 4996)
80#endif
81
82TEST(TypePrinter, StatisticsTypes) {
83 std::string smin;
84 std::string smax;
85 int32_t int_min = 1024;
86 int32_t int_max = 2048;
87 smin = std::string(reinterpret_cast<char*>(&int_min), sizeof(int32_t));
88 smax = std::string(reinterpret_cast<char*>(&int_max), sizeof(int32_t));
89 ASSERT_STREQ("1024", FormatStatValue(Type::INT32, smin).c_str());
90 ASSERT_STREQ("1024", FormatStatValue(Type::INT32, smin.c_str()).c_str());
91 ASSERT_STREQ("2048", FormatStatValue(Type::INT32, smax).c_str());
92 ASSERT_STREQ("2048", FormatStatValue(Type::INT32, smax.c_str()).c_str());
93
94 int64_t int64_min = 10240000000000;
95 int64_t int64_max = 20480000000000;
96 smin = std::string(reinterpret_cast<char*>(&int64_min), sizeof(int64_t));
97 smax = std::string(reinterpret_cast<char*>(&int64_max), sizeof(int64_t));
98 ASSERT_STREQ("10240000000000", FormatStatValue(Type::INT64, smin).c_str());
99 ASSERT_STREQ("10240000000000", FormatStatValue(Type::INT64, smin.c_str()).c_str());
100 ASSERT_STREQ("20480000000000", FormatStatValue(Type::INT64, smax).c_str());
101 ASSERT_STREQ("20480000000000", FormatStatValue(Type::INT64, smax.c_str()).c_str());
102
103 float float_min = 1.024f;
104 float float_max = 2.048f;
105 smin = std::string(reinterpret_cast<char*>(&float_min), sizeof(float));
106 smax = std::string(reinterpret_cast<char*>(&float_max), sizeof(float));
107 ASSERT_STREQ("1.024", FormatStatValue(Type::FLOAT, smin).c_str());
108 ASSERT_STREQ("1.024", FormatStatValue(Type::FLOAT, smin.c_str()).c_str());
109 ASSERT_STREQ("2.048", FormatStatValue(Type::FLOAT, smax).c_str());
110 ASSERT_STREQ("2.048", FormatStatValue(Type::FLOAT, smax.c_str()).c_str());
111
112 double double_min = 1.0245;
113 double double_max = 2.0489;
114 smin = std::string(reinterpret_cast<char*>(&double_min), sizeof(double));
115 smax = std::string(reinterpret_cast<char*>(&double_max), sizeof(double));
116 ASSERT_STREQ("1.0245", FormatStatValue(Type::DOUBLE, smin).c_str());
117 ASSERT_STREQ("1.0245", FormatStatValue(Type::DOUBLE, smin.c_str()).c_str());
118 ASSERT_STREQ("2.0489", FormatStatValue(Type::DOUBLE, smax).c_str());
119 ASSERT_STREQ("2.0489", FormatStatValue(Type::DOUBLE, smax.c_str()).c_str());
120
121 Int96 Int96_min = {{1024, 2048, 4096}};
122 Int96 Int96_max = {{2048, 4096, 8192}};
123 smin = std::string(reinterpret_cast<char*>(&Int96_min), sizeof(Int96));
124 smax = std::string(reinterpret_cast<char*>(&Int96_max), sizeof(Int96));
125 ASSERT_STREQ("1024 2048 4096", FormatStatValue(Type::INT96, smin).c_str());
126 ASSERT_STREQ("1024 2048 4096", FormatStatValue(Type::INT96, smin.c_str()).c_str());
127 ASSERT_STREQ("2048 4096 8192", FormatStatValue(Type::INT96, smax).c_str());
128 ASSERT_STREQ("2048 4096 8192", FormatStatValue(Type::INT96, smax.c_str()).c_str());
129
130 smin = std::string("abcdef");
131 smax = std::string("ijklmnop");
132 ASSERT_STREQ("abcdef", FormatStatValue(Type::BYTE_ARRAY, smin).c_str());
133 ASSERT_STREQ("abcdef", FormatStatValue(Type::BYTE_ARRAY, smin.c_str()).c_str());
134 ASSERT_STREQ("ijklmnop", FormatStatValue(Type::BYTE_ARRAY, smax).c_str());
135 ASSERT_STREQ("ijklmnop", FormatStatValue(Type::BYTE_ARRAY, smax.c_str()).c_str());
136
137 // PARQUET-1357: FormatStatValue truncates binary statistics on zero character
138 smax.push_back('\0');
139 ASSERT_EQ(smax, FormatStatValue(Type::BYTE_ARRAY, smax));
140 // This fails, thus the call to FormatStatValue(.., const char*) was deprecated.
141 // ASSERT_EQ(smax, FormatStatValue(Type::BYTE_ARRAY, smax.c_str()));
142
143 smin = std::string("abcdefgh");
144 smax = std::string("ijklmnop");
145 ASSERT_STREQ("abcdefgh", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin).c_str());
146 ASSERT_STREQ("abcdefgh",
147 FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin.c_str()).c_str());
148 ASSERT_STREQ("ijklmnop", FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax).c_str());
149 ASSERT_STREQ("ijklmnop",
150 FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str());
151}
152
153#if !(defined(_WIN32) || defined(__CYGWIN__))
154#pragma GCC diagnostic pop
155#elif _MSC_VER
156#pragma warning(pop)
157#endif
158
159} // namespace parquet
160