1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements. See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership. The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License. You may obtain a copy of the License at
8//
9// http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied. See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18// This module contains the logical parquet-cpp types (independent of Thrift
19// structures), schema nodes, and related type tools
20
21#ifndef PARQUET_SCHEMA_INTERNAL_H
22#define PARQUET_SCHEMA_INTERNAL_H
23
24#include <cstdint>
25#include <memory>
26#include <string>
27#include <unordered_set>
28#include <vector>
29
30#include "parquet/platform.h"
31#include "parquet/schema.h"
32#include "parquet/types.h"
33
34namespace parquet {
35
36namespace format {
37class SchemaElement;
38}
39
40namespace schema {
41
42// ----------------------------------------------------------------------
43// Conversion from Parquet Thrift metadata
44
45PARQUET_EXPORT
46std::shared_ptr<SchemaDescriptor> FromParquet(
47 const std::vector<format::SchemaElement>& schema);
48
49class PARQUET_EXPORT FlatSchemaConverter {
50 public:
51 FlatSchemaConverter(const format::SchemaElement* elements, int length)
52 : elements_(elements), length_(length), pos_(0), current_id_(0) {}
53
54 std::unique_ptr<Node> Convert();
55
56 private:
57 const format::SchemaElement* elements_;
58 int length_;
59 int pos_;
60 int current_id_;
61
62 int next_id() { return current_id_++; }
63
64 const format::SchemaElement& Next();
65
66 std::unique_ptr<Node> NextNode();
67};
68
69// ----------------------------------------------------------------------
70// Conversion to Parquet Thrift metadata
71
72PARQUET_EXPORT
73void ToParquet(const GroupNode* schema, std::vector<format::SchemaElement>* out);
74
75// Converts nested parquet schema back to a flat vector of Thrift structs
76class PARQUET_EXPORT SchemaFlattener {
77 public:
78 SchemaFlattener(const GroupNode* schema, std::vector<format::SchemaElement>* out);
79
80 void Flatten();
81
82 private:
83 const GroupNode* root_;
84 std::vector<format::SchemaElement>* elements_;
85};
86
87} // namespace schema
88} // namespace parquet
89
90#endif // PARQUET_SCHEMA_INTERNAL_H
91