1 | // Licensed to the Apache Software Foundation (ASF) under one |
2 | // or more contributor license agreements. See the NOTICE file |
3 | // distributed with this work for additional information |
4 | // regarding copyright ownership. The ASF licenses this file |
5 | // to you under the Apache License, Version 2.0 (the |
6 | // "License"); you may not use this file except in compliance |
7 | // with the License. You may obtain a copy of the License at |
8 | // |
9 | // http://www.apache.org/licenses/LICENSE-2.0 |
10 | // |
11 | // Unless required by applicable law or agreed to in writing, |
12 | // software distributed under the License is distributed on an |
13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
14 | // KIND, either express or implied. See the License for the |
15 | // specific language governing permissions and limitations |
16 | // under the License. |
17 | |
18 | #include <gtest/gtest.h> |
19 | |
20 | #include <string> |
21 | |
22 | #include "parquet/types.h" |
23 | |
24 | namespace parquet { |
25 | |
26 | TEST(TestTypeToString, PhysicalTypes) { |
27 | ASSERT_STREQ("BOOLEAN" , TypeToString(Type::BOOLEAN).c_str()); |
28 | ASSERT_STREQ("INT32" , TypeToString(Type::INT32).c_str()); |
29 | ASSERT_STREQ("INT64" , TypeToString(Type::INT64).c_str()); |
30 | ASSERT_STREQ("INT96" , TypeToString(Type::INT96).c_str()); |
31 | ASSERT_STREQ("FLOAT" , TypeToString(Type::FLOAT).c_str()); |
32 | ASSERT_STREQ("DOUBLE" , TypeToString(Type::DOUBLE).c_str()); |
33 | ASSERT_STREQ("BYTE_ARRAY" , TypeToString(Type::BYTE_ARRAY).c_str()); |
34 | ASSERT_STREQ("FIXED_LEN_BYTE_ARRAY" , TypeToString(Type::FIXED_LEN_BYTE_ARRAY).c_str()); |
35 | } |
36 | |
37 | TEST(TestLogicalTypeToString, LogicalTypes) { |
38 | ASSERT_STREQ("NONE" , LogicalTypeToString(LogicalType::NONE).c_str()); |
39 | ASSERT_STREQ("UTF8" , LogicalTypeToString(LogicalType::UTF8).c_str()); |
40 | ASSERT_STREQ("MAP_KEY_VALUE" , LogicalTypeToString(LogicalType::MAP_KEY_VALUE).c_str()); |
41 | ASSERT_STREQ("LIST" , LogicalTypeToString(LogicalType::LIST).c_str()); |
42 | ASSERT_STREQ("ENUM" , LogicalTypeToString(LogicalType::ENUM).c_str()); |
43 | ASSERT_STREQ("DECIMAL" , LogicalTypeToString(LogicalType::DECIMAL).c_str()); |
44 | ASSERT_STREQ("DATE" , LogicalTypeToString(LogicalType::DATE).c_str()); |
45 | ASSERT_STREQ("TIME_MILLIS" , LogicalTypeToString(LogicalType::TIME_MILLIS).c_str()); |
46 | ASSERT_STREQ("TIME_MICROS" , LogicalTypeToString(LogicalType::TIME_MICROS).c_str()); |
47 | ASSERT_STREQ("TIMESTAMP_MILLIS" , |
48 | LogicalTypeToString(LogicalType::TIMESTAMP_MILLIS).c_str()); |
49 | ASSERT_STREQ("TIMESTAMP_MICROS" , |
50 | LogicalTypeToString(LogicalType::TIMESTAMP_MICROS).c_str()); |
51 | ASSERT_STREQ("UINT_8" , LogicalTypeToString(LogicalType::UINT_8).c_str()); |
52 | ASSERT_STREQ("UINT_16" , LogicalTypeToString(LogicalType::UINT_16).c_str()); |
53 | ASSERT_STREQ("UINT_32" , LogicalTypeToString(LogicalType::UINT_32).c_str()); |
54 | ASSERT_STREQ("UINT_64" , LogicalTypeToString(LogicalType::UINT_64).c_str()); |
55 | ASSERT_STREQ("INT_8" , LogicalTypeToString(LogicalType::INT_8).c_str()); |
56 | ASSERT_STREQ("INT_16" , LogicalTypeToString(LogicalType::INT_16).c_str()); |
57 | ASSERT_STREQ("INT_32" , LogicalTypeToString(LogicalType::INT_32).c_str()); |
58 | ASSERT_STREQ("INT_64" , LogicalTypeToString(LogicalType::INT_64).c_str()); |
59 | ASSERT_STREQ("JSON" , LogicalTypeToString(LogicalType::JSON).c_str()); |
60 | ASSERT_STREQ("BSON" , LogicalTypeToString(LogicalType::BSON).c_str()); |
61 | ASSERT_STREQ("INTERVAL" , LogicalTypeToString(LogicalType::INTERVAL).c_str()); |
62 | } |
63 | |
64 | TEST(TestCompressionToString, Compression) { |
65 | ASSERT_STREQ("UNCOMPRESSED" , CompressionToString(Compression::UNCOMPRESSED).c_str()); |
66 | ASSERT_STREQ("SNAPPY" , CompressionToString(Compression::SNAPPY).c_str()); |
67 | ASSERT_STREQ("GZIP" , CompressionToString(Compression::GZIP).c_str()); |
68 | ASSERT_STREQ("LZO" , CompressionToString(Compression::LZO).c_str()); |
69 | ASSERT_STREQ("BROTLI" , CompressionToString(Compression::BROTLI).c_str()); |
70 | ASSERT_STREQ("LZ4" , CompressionToString(Compression::LZ4).c_str()); |
71 | ASSERT_STREQ("ZSTD" , CompressionToString(Compression::ZSTD).c_str()); |
72 | } |
73 | |
74 | #if !(defined(_WIN32) || defined(__CYGWIN__)) |
75 | #pragma GCC diagnostic push |
76 | #pragma GCC diagnostic ignored "-Wdeprecated-declarations" |
77 | #elif _MSC_VER |
78 | #pragma warning(push) |
79 | #pragma warning(disable : 4996) |
80 | #endif |
81 | |
82 | TEST(TypePrinter, StatisticsTypes) { |
83 | std::string smin; |
84 | std::string smax; |
85 | int32_t int_min = 1024; |
86 | int32_t int_max = 2048; |
87 | smin = std::string(reinterpret_cast<char*>(&int_min), sizeof(int32_t)); |
88 | smax = std::string(reinterpret_cast<char*>(&int_max), sizeof(int32_t)); |
89 | ASSERT_STREQ("1024" , FormatStatValue(Type::INT32, smin).c_str()); |
90 | ASSERT_STREQ("1024" , FormatStatValue(Type::INT32, smin.c_str()).c_str()); |
91 | ASSERT_STREQ("2048" , FormatStatValue(Type::INT32, smax).c_str()); |
92 | ASSERT_STREQ("2048" , FormatStatValue(Type::INT32, smax.c_str()).c_str()); |
93 | |
94 | int64_t int64_min = 10240000000000; |
95 | int64_t int64_max = 20480000000000; |
96 | smin = std::string(reinterpret_cast<char*>(&int64_min), sizeof(int64_t)); |
97 | smax = std::string(reinterpret_cast<char*>(&int64_max), sizeof(int64_t)); |
98 | ASSERT_STREQ("10240000000000" , FormatStatValue(Type::INT64, smin).c_str()); |
99 | ASSERT_STREQ("10240000000000" , FormatStatValue(Type::INT64, smin.c_str()).c_str()); |
100 | ASSERT_STREQ("20480000000000" , FormatStatValue(Type::INT64, smax).c_str()); |
101 | ASSERT_STREQ("20480000000000" , FormatStatValue(Type::INT64, smax.c_str()).c_str()); |
102 | |
103 | float float_min = 1.024f; |
104 | float float_max = 2.048f; |
105 | smin = std::string(reinterpret_cast<char*>(&float_min), sizeof(float)); |
106 | smax = std::string(reinterpret_cast<char*>(&float_max), sizeof(float)); |
107 | ASSERT_STREQ("1.024" , FormatStatValue(Type::FLOAT, smin).c_str()); |
108 | ASSERT_STREQ("1.024" , FormatStatValue(Type::FLOAT, smin.c_str()).c_str()); |
109 | ASSERT_STREQ("2.048" , FormatStatValue(Type::FLOAT, smax).c_str()); |
110 | ASSERT_STREQ("2.048" , FormatStatValue(Type::FLOAT, smax.c_str()).c_str()); |
111 | |
112 | double double_min = 1.0245; |
113 | double double_max = 2.0489; |
114 | smin = std::string(reinterpret_cast<char*>(&double_min), sizeof(double)); |
115 | smax = std::string(reinterpret_cast<char*>(&double_max), sizeof(double)); |
116 | ASSERT_STREQ("1.0245" , FormatStatValue(Type::DOUBLE, smin).c_str()); |
117 | ASSERT_STREQ("1.0245" , FormatStatValue(Type::DOUBLE, smin.c_str()).c_str()); |
118 | ASSERT_STREQ("2.0489" , FormatStatValue(Type::DOUBLE, smax).c_str()); |
119 | ASSERT_STREQ("2.0489" , FormatStatValue(Type::DOUBLE, smax.c_str()).c_str()); |
120 | |
121 | Int96 Int96_min = {{1024, 2048, 4096}}; |
122 | Int96 Int96_max = {{2048, 4096, 8192}}; |
123 | smin = std::string(reinterpret_cast<char*>(&Int96_min), sizeof(Int96)); |
124 | smax = std::string(reinterpret_cast<char*>(&Int96_max), sizeof(Int96)); |
125 | ASSERT_STREQ("1024 2048 4096" , FormatStatValue(Type::INT96, smin).c_str()); |
126 | ASSERT_STREQ("1024 2048 4096" , FormatStatValue(Type::INT96, smin.c_str()).c_str()); |
127 | ASSERT_STREQ("2048 4096 8192" , FormatStatValue(Type::INT96, smax).c_str()); |
128 | ASSERT_STREQ("2048 4096 8192" , FormatStatValue(Type::INT96, smax.c_str()).c_str()); |
129 | |
130 | smin = std::string("abcdef" ); |
131 | smax = std::string("ijklmnop" ); |
132 | ASSERT_STREQ("abcdef" , FormatStatValue(Type::BYTE_ARRAY, smin).c_str()); |
133 | ASSERT_STREQ("abcdef" , FormatStatValue(Type::BYTE_ARRAY, smin.c_str()).c_str()); |
134 | ASSERT_STREQ("ijklmnop" , FormatStatValue(Type::BYTE_ARRAY, smax).c_str()); |
135 | ASSERT_STREQ("ijklmnop" , FormatStatValue(Type::BYTE_ARRAY, smax.c_str()).c_str()); |
136 | |
137 | // PARQUET-1357: FormatStatValue truncates binary statistics on zero character |
138 | smax.push_back('\0'); |
139 | ASSERT_EQ(smax, FormatStatValue(Type::BYTE_ARRAY, smax)); |
140 | // This fails, thus the call to FormatStatValue(.., const char*) was deprecated. |
141 | // ASSERT_EQ(smax, FormatStatValue(Type::BYTE_ARRAY, smax.c_str())); |
142 | |
143 | smin = std::string("abcdefgh" ); |
144 | smax = std::string("ijklmnop" ); |
145 | ASSERT_STREQ("abcdefgh" , FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin).c_str()); |
146 | ASSERT_STREQ("abcdefgh" , |
147 | FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smin.c_str()).c_str()); |
148 | ASSERT_STREQ("ijklmnop" , FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax).c_str()); |
149 | ASSERT_STREQ("ijklmnop" , |
150 | FormatStatValue(Type::FIXED_LEN_BYTE_ARRAY, smax.c_str()).c_str()); |
151 | } |
152 | |
153 | #if !(defined(_WIN32) || defined(__CYGWIN__)) |
154 | #pragma GCC diagnostic pop |
155 | #elif _MSC_VER |
156 | #pragma warning(pop) |
157 | #endif |
158 | |
159 | } // namespace parquet |
160 | |