1 | #include "duckdb/catalog/catalog.hpp" |
2 | #include "duckdb/parser/statement/export_statement.hpp" |
3 | #include "duckdb/planner/binder.hpp" |
4 | #include "duckdb/planner/operator/logical_export.hpp" |
5 | #include "duckdb/catalog/catalog_entry/copy_function_catalog_entry.hpp" |
6 | #include "duckdb/catalog/catalog_entry/table_catalog_entry.hpp" |
7 | #include "duckdb/parser/statement/copy_statement.hpp" |
8 | #include "duckdb/main/client_context.hpp" |
9 | #include "duckdb/main/database.hpp" |
10 | #include "duckdb/common/file_system.hpp" |
11 | #include "duckdb/planner/operator/logical_set_operation.hpp" |
12 | #include "duckdb/parser/parsed_data/exported_table_data.hpp" |
13 | #include "duckdb/parser/constraints/foreign_key_constraint.hpp" |
14 | |
15 | #include "duckdb/common/string_util.hpp" |
16 | #include <algorithm> |
17 | |
18 | namespace duckdb { |
19 | |
20 | //! Sanitizes a string to have only low case chars and underscores |
21 | string SanitizeExportIdentifier(const string &str) { |
22 | // Copy the original string to result |
23 | string result(str); |
24 | |
25 | for (idx_t i = 0; i < str.length(); ++i) { |
26 | auto c = str[i]; |
27 | if (c >= 'a' && c <= 'z') { |
28 | // If it is lower case just continue |
29 | continue; |
30 | } |
31 | |
32 | if (c >= 'A' && c <= 'Z') { |
33 | // To lowercase |
34 | result[i] = tolower(c: c); |
35 | } else { |
36 | // Substitute to underscore |
37 | result[i] = '_'; |
38 | } |
39 | } |
40 | |
41 | return result; |
42 | } |
43 | |
44 | bool IsExistMainKeyTable(string &table_name, vector<reference<TableCatalogEntry>> &unordered) { |
45 | for (idx_t i = 0; i < unordered.size(); i++) { |
46 | if (unordered[i].get().name == table_name) { |
47 | return true; |
48 | } |
49 | } |
50 | return false; |
51 | } |
52 | |
53 | void ScanForeignKeyTable(vector<reference<TableCatalogEntry>> &ordered, vector<reference<TableCatalogEntry>> &unordered, |
54 | bool move_only_pk_table) { |
55 | for (auto i = unordered.begin(); i != unordered.end();) { |
56 | auto table_entry = *i; |
57 | bool move_to_ordered = true; |
58 | auto &constraints = table_entry.get().GetConstraints(); |
59 | for (idx_t j = 0; j < constraints.size(); j++) { |
60 | auto &cond = constraints[j]; |
61 | if (cond->type == ConstraintType::FOREIGN_KEY) { |
62 | auto &fk = cond->Cast<ForeignKeyConstraint>(); |
63 | if ((move_only_pk_table && fk.info.type == ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE) || |
64 | (!move_only_pk_table && fk.info.type == ForeignKeyType::FK_TYPE_FOREIGN_KEY_TABLE && |
65 | IsExistMainKeyTable(table_name&: fk.info.table, unordered))) { |
66 | move_to_ordered = false; |
67 | break; |
68 | } |
69 | } |
70 | } |
71 | if (move_to_ordered) { |
72 | ordered.push_back(x: table_entry); |
73 | i = unordered.erase(position: i); |
74 | } else { |
75 | i++; |
76 | } |
77 | } |
78 | } |
79 | |
80 | void ReorderTableEntries(vector<reference<TableCatalogEntry>> &tables) { |
81 | vector<reference<TableCatalogEntry>> ordered; |
82 | vector<reference<TableCatalogEntry>> unordered = tables; |
83 | ScanForeignKeyTable(ordered, unordered, move_only_pk_table: true); |
84 | while (!unordered.empty()) { |
85 | ScanForeignKeyTable(ordered, unordered, move_only_pk_table: false); |
86 | } |
87 | tables = ordered; |
88 | } |
89 | |
90 | string CreateFileName(const string &id_suffix, TableCatalogEntry &table, const string &extension) { |
91 | auto name = SanitizeExportIdentifier(str: table.name); |
92 | if (table.schema.name == DEFAULT_SCHEMA) { |
93 | return StringUtil::Format(fmt_str: "%s%s.%s" , params: name, params: id_suffix, params: extension); |
94 | } |
95 | auto schema = SanitizeExportIdentifier(str: table.schema.name); |
96 | return StringUtil::Format(fmt_str: "%s_%s%s.%s" , params: schema, params: name, params: id_suffix, params: extension); |
97 | } |
98 | |
99 | BoundStatement Binder::Bind(ExportStatement &stmt) { |
100 | // COPY TO a file |
101 | auto &config = DBConfig::GetConfig(context); |
102 | if (!config.options.enable_external_access) { |
103 | throw PermissionException("COPY TO is disabled through configuration" ); |
104 | } |
105 | BoundStatement result; |
106 | result.types = {LogicalType::BOOLEAN}; |
107 | result.names = {"Success" }; |
108 | |
109 | // lookup the format in the catalog |
110 | auto ©_function = |
111 | Catalog::GetEntry<CopyFunctionCatalogEntry>(context, INVALID_CATALOG, DEFAULT_SCHEMA, name: stmt.info->format); |
112 | if (!copy_function.function.copy_to_bind && !copy_function.function.plan) { |
113 | throw NotImplementedException("COPY TO is not supported for FORMAT \"%s\"" , stmt.info->format); |
114 | } |
115 | |
116 | // gather a list of all the tables |
117 | string catalog = stmt.database.empty() ? INVALID_CATALOG : stmt.database; |
118 | vector<reference<TableCatalogEntry>> tables; |
119 | auto schemas = Catalog::GetSchemas(context, catalog_name: catalog); |
120 | for (auto &schema : schemas) { |
121 | schema.get().Scan(context, type: CatalogType::TABLE_ENTRY, callback: [&](CatalogEntry &entry) { |
122 | if (entry.type == CatalogType::TABLE_ENTRY) { |
123 | tables.push_back(x: entry.Cast<TableCatalogEntry>()); |
124 | } |
125 | }); |
126 | } |
127 | |
128 | // reorder tables because of foreign key constraint |
129 | ReorderTableEntries(tables); |
130 | |
131 | // now generate the COPY statements for each of the tables |
132 | auto &fs = FileSystem::GetFileSystem(context); |
133 | unique_ptr<LogicalOperator> child_operator; |
134 | |
135 | BoundExportData exported_tables; |
136 | |
137 | unordered_set<string> table_name_index; |
138 | for (auto &t : tables) { |
139 | auto &table = t.get(); |
140 | auto info = make_uniq<CopyInfo>(); |
141 | // we copy the options supplied to the EXPORT |
142 | info->format = stmt.info->format; |
143 | info->options = stmt.info->options; |
144 | // set up the file name for the COPY TO |
145 | |
146 | idx_t id = 0; |
147 | while (true) { |
148 | string id_suffix = id == 0 ? string() : "_" + to_string(val: id); |
149 | auto name = CreateFileName(id_suffix, table, extension: copy_function.function.extension); |
150 | auto directory = stmt.info->file_path; |
151 | auto full_path = fs.JoinPath(a: directory, path: name); |
152 | info->file_path = full_path; |
153 | auto insert_result = table_name_index.insert(x: info->file_path); |
154 | if (insert_result.second == true) { |
155 | // this name was not yet taken: take it |
156 | break; |
157 | } |
158 | id++; |
159 | } |
160 | info->is_from = false; |
161 | info->catalog = catalog; |
162 | info->schema = table.schema.name; |
163 | info->table = table.name; |
164 | |
165 | // We can not export generated columns |
166 | for (auto &col : table.GetColumns().Physical()) { |
167 | info->select_list.push_back(x: col.GetName()); |
168 | } |
169 | |
170 | ExportedTableData exported_data; |
171 | exported_data.database_name = catalog; |
172 | exported_data.table_name = info->table; |
173 | exported_data.schema_name = info->schema; |
174 | |
175 | exported_data.file_path = info->file_path; |
176 | |
177 | ExportedTableInfo table_info(table, std::move(exported_data)); |
178 | exported_tables.data.push_back(x: table_info); |
179 | id++; |
180 | |
181 | // generate the copy statement and bind it |
182 | CopyStatement copy_stmt; |
183 | copy_stmt.info = std::move(info); |
184 | |
185 | auto copy_binder = Binder::CreateBinder(context, parent: this); |
186 | auto bound_statement = copy_binder->Bind(stmt&: copy_stmt); |
187 | if (child_operator) { |
188 | // use UNION ALL to combine the individual copy statements into a single node |
189 | auto copy_union = |
190 | make_uniq<LogicalSetOperation>(args: GenerateTableIndex(), args: 1, args: std::move(child_operator), |
191 | args: std::move(bound_statement.plan), args: LogicalOperatorType::LOGICAL_UNION); |
192 | child_operator = std::move(copy_union); |
193 | } else { |
194 | child_operator = std::move(bound_statement.plan); |
195 | } |
196 | } |
197 | |
198 | // try to create the directory, if it doesn't exist yet |
199 | // a bit hacky to do it here, but we need to create the directory BEFORE the copy statements run |
200 | if (!fs.DirectoryExists(directory: stmt.info->file_path)) { |
201 | fs.CreateDirectory(directory: stmt.info->file_path); |
202 | } |
203 | |
204 | // create the export node |
205 | auto export_node = make_uniq<LogicalExport>(args&: copy_function.function, args: std::move(stmt.info), args&: exported_tables); |
206 | |
207 | if (child_operator) { |
208 | export_node->children.push_back(x: std::move(child_operator)); |
209 | } |
210 | |
211 | result.plan = std::move(export_node); |
212 | properties.allow_stream_result = false; |
213 | properties.return_type = StatementReturnType::NOTHING; |
214 | return result; |
215 | } |
216 | |
217 | } // namespace duckdb |
218 | |