1//===----------------------------------------------------------------------===//
2// DuckDB
3//
4// duckdb/common/multi_file_reader_options.hpp
5//
6//
7//===----------------------------------------------------------------------===//
8
9#pragma once
10
11#include "duckdb/common/types.hpp"
12#include "duckdb/common/hive_partitioning.hpp"
13#include "re2/re2.h"
14
15namespace duckdb {
16class Serializer;
17class Deserializer;
18struct BindInfo;
19
20struct MultiFileReaderOptions {
21 bool filename = false;
22 bool hive_partitioning = false;
23 bool auto_detect_hive_partitioning = true;
24 bool union_by_name = false;
25
26 DUCKDB_API void Serialize(Serializer &serializer) const;
27 DUCKDB_API static MultiFileReaderOptions Deserialize(Deserializer &source);
28 DUCKDB_API void AddBatchInfo(BindInfo &bind_info) const;
29
30 static bool AutoDetectHivePartitioning(const vector<string> &files) {
31 if (files.empty()) {
32 return false;
33 }
34
35 std::unordered_set<string> uset;
36 idx_t splits_size;
37 {
38 // front file
39 auto splits = StringUtil::Split(input: files.front(), split: FileSystem::PathSeparator());
40 splits_size = splits.size();
41 if (splits.size() < 2) {
42 return false;
43 }
44 for (auto it = splits.begin(); it != std::prev(x: splits.end()); it++) {
45 auto part = StringUtil::Split(input: *it, split: "=");
46 if (part.size() == 2) {
47 uset.insert(x: part.front());
48 }
49 }
50 }
51 if (uset.empty()) {
52 return false;
53 }
54 for (auto &file : files) {
55 auto splits = StringUtil::Split(input: file, split: FileSystem::PathSeparator());
56 if (splits.size() != splits_size) {
57 return false;
58 }
59 for (auto it = splits.begin(); it != std::prev(x: splits.end()); it++) {
60 auto part = StringUtil::Split(input: *it, split: "=");
61 if (part.size() == 2) {
62 if (uset.find(x: part.front()) == uset.end()) {
63 return false;
64 }
65 }
66 }
67 }
68 return true;
69 }
70};
71
72} // namespace duckdb
73