1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/function/table_function.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/common/enums/operator_result_type.hpp" |
12 | #include "duckdb/common/optional_ptr.hpp" |
13 | #include "duckdb/execution/execution_context.hpp" |
14 | #include "duckdb/function/function.hpp" |
15 | #include "duckdb/planner/bind_context.hpp" |
16 | #include "duckdb/planner/logical_operator.hpp" |
17 | #include "duckdb/storage/statistics/node_statistics.hpp" |
18 | |
19 | #include <functional> |
20 | |
21 | namespace duckdb { |
22 | |
23 | class BaseStatistics; |
24 | class DependencyList; |
25 | class LogicalGet; |
26 | class TableFilterSet; |
27 | |
28 | struct TableFunctionInfo { |
29 | DUCKDB_API virtual ~TableFunctionInfo(); |
30 | |
31 | template <class TARGET> |
32 | TARGET &Cast() { |
33 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
34 | return reinterpret_cast<TARGET &>(*this); |
35 | } |
36 | template <class TARGET> |
37 | const TARGET &Cast() const { |
38 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
39 | return reinterpret_cast<const TARGET &>(*this); |
40 | } |
41 | }; |
42 | |
43 | struct GlobalTableFunctionState { |
44 | public: |
45 | // value returned from MaxThreads when as many threads as possible should be used |
46 | constexpr static const int64_t MAX_THREADS = 999999999; |
47 | |
48 | public: |
49 | DUCKDB_API virtual ~GlobalTableFunctionState(); |
50 | |
51 | virtual idx_t MaxThreads() const { |
52 | return 1; |
53 | } |
54 | |
55 | template <class TARGET> |
56 | TARGET &Cast() { |
57 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
58 | return reinterpret_cast<TARGET &>(*this); |
59 | } |
60 | template <class TARGET> |
61 | const TARGET &Cast() const { |
62 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
63 | return reinterpret_cast<const TARGET &>(*this); |
64 | } |
65 | }; |
66 | |
67 | struct LocalTableFunctionState { |
68 | DUCKDB_API virtual ~LocalTableFunctionState(); |
69 | |
70 | template <class TARGET> |
71 | TARGET &Cast() { |
72 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
73 | return reinterpret_cast<TARGET &>(*this); |
74 | } |
75 | template <class TARGET> |
76 | const TARGET &Cast() const { |
77 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
78 | return reinterpret_cast<const TARGET &>(*this); |
79 | } |
80 | }; |
81 | |
82 | struct TableFunctionBindInput { |
83 | TableFunctionBindInput(vector<Value> &inputs, named_parameter_map_t &named_parameters, |
84 | vector<LogicalType> &input_table_types, vector<string> &input_table_names, |
85 | optional_ptr<TableFunctionInfo> info) |
86 | : inputs(inputs), named_parameters(named_parameters), input_table_types(input_table_types), |
87 | input_table_names(input_table_names), info(info) { |
88 | } |
89 | |
90 | vector<Value> &inputs; |
91 | named_parameter_map_t &named_parameters; |
92 | vector<LogicalType> &input_table_types; |
93 | vector<string> &input_table_names; |
94 | optional_ptr<TableFunctionInfo> info; |
95 | }; |
96 | |
97 | struct TableFunctionInitInput { |
98 | TableFunctionInitInput(optional_ptr<const FunctionData> bind_data_p, const vector<column_t> &column_ids_p, |
99 | const vector<idx_t> &projection_ids_p, optional_ptr<TableFilterSet> filters_p) |
100 | : bind_data(bind_data_p), column_ids(column_ids_p), projection_ids(projection_ids_p), filters(filters_p) { |
101 | } |
102 | |
103 | optional_ptr<const FunctionData> bind_data; |
104 | const vector<column_t> &column_ids; |
105 | const vector<idx_t> projection_ids; |
106 | optional_ptr<TableFilterSet> filters; |
107 | |
108 | bool CanRemoveFilterColumns() const { |
109 | if (projection_ids.empty()) { |
110 | // Not set, can't remove filter columns |
111 | return false; |
112 | } else if (projection_ids.size() == column_ids.size()) { |
113 | // Filter column is used in remainder of plan, can't remove |
114 | return false; |
115 | } else { |
116 | // Less columns need to be projected out than that we scan |
117 | return true; |
118 | } |
119 | } |
120 | }; |
121 | |
122 | struct TableFunctionInput { |
123 | public: |
124 | TableFunctionInput(optional_ptr<const FunctionData> bind_data_p, |
125 | optional_ptr<LocalTableFunctionState> local_state_p, |
126 | optional_ptr<GlobalTableFunctionState> global_state_p) |
127 | : bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) { |
128 | } |
129 | |
130 | public: |
131 | optional_ptr<const FunctionData> bind_data; |
132 | optional_ptr<LocalTableFunctionState> local_state; |
133 | optional_ptr<GlobalTableFunctionState> global_state; |
134 | }; |
135 | |
136 | enum ScanType { TABLE, PARQUET }; |
137 | |
138 | struct BindInfo { |
139 | public: |
140 | explicit BindInfo(ScanType type_p) : type(type_p) {}; |
141 | unordered_map<string, Value> options; |
142 | ScanType type; |
143 | void InsertOption(const string &name, Value value) { |
144 | if (options.find(x: name) != options.end()) { |
145 | throw InternalException("This option already exists" ); |
146 | } |
147 | options[name] = std::move(value); |
148 | } |
149 | template <class T> |
150 | T GetOption(const string &name) { |
151 | if (options.find(x: name) == options.end()) { |
152 | throw InternalException("This option does not exist" ); |
153 | } |
154 | return options[name].GetValue<T>(); |
155 | } |
156 | template <class T> |
157 | vector<T> GetOptionList(const string &name) { |
158 | if (options.find(x: name) == options.end()) { |
159 | throw InternalException("This option does not exist" ); |
160 | } |
161 | auto option = options[name]; |
162 | if (option.type().id() != LogicalTypeId::LIST) { |
163 | throw InternalException("This option is not a list" ); |
164 | } |
165 | vector<T> result; |
166 | auto list_children = ListValue::GetChildren(value: option); |
167 | for (auto &child : list_children) { |
168 | result.emplace_back(child.GetValue<T>()); |
169 | } |
170 | return result; |
171 | } |
172 | }; |
173 | |
174 | typedef unique_ptr<FunctionData> (*table_function_bind_t)(ClientContext &context, TableFunctionBindInput &input, |
175 | vector<LogicalType> &return_types, vector<string> &names); |
176 | typedef unique_ptr<TableRef> (*table_function_bind_replace_t)(ClientContext &context, TableFunctionBindInput &input); |
177 | typedef unique_ptr<GlobalTableFunctionState> (*table_function_init_global_t)(ClientContext &context, |
178 | TableFunctionInitInput &input); |
179 | typedef unique_ptr<LocalTableFunctionState> (*table_function_init_local_t)(ExecutionContext &context, |
180 | TableFunctionInitInput &input, |
181 | GlobalTableFunctionState *global_state); |
182 | typedef unique_ptr<BaseStatistics> (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data, |
183 | column_t column_index); |
184 | typedef void (*table_function_t)(ClientContext &context, TableFunctionInput &data, DataChunk &output); |
185 | typedef OperatorResultType (*table_in_out_function_t)(ExecutionContext &context, TableFunctionInput &data, |
186 | DataChunk &input, DataChunk &output); |
187 | typedef OperatorFinalizeResultType (*table_in_out_function_final_t)(ExecutionContext &context, TableFunctionInput &data, |
188 | DataChunk &output); |
189 | typedef idx_t (*table_function_get_batch_index_t)(ClientContext &context, const FunctionData *bind_data, |
190 | LocalTableFunctionState *local_state, |
191 | GlobalTableFunctionState *global_state); |
192 | |
193 | typedef BindInfo (*table_function_get_bind_info)(const FunctionData *bind_data); |
194 | |
195 | typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data, |
196 | const GlobalTableFunctionState *global_state); |
197 | typedef void (*table_function_dependency_t)(DependencyList &dependencies, const FunctionData *bind_data); |
198 | typedef unique_ptr<NodeStatistics> (*table_function_cardinality_t)(ClientContext &context, |
199 | const FunctionData *bind_data); |
200 | typedef void (*table_function_pushdown_complex_filter_t)(ClientContext &context, LogicalGet &get, |
201 | FunctionData *bind_data, |
202 | vector<unique_ptr<Expression>> &filters); |
203 | typedef string (*table_function_to_string_t)(const FunctionData *bind_data); |
204 | |
205 | typedef void (*table_function_serialize_t)(FieldWriter &writer, const FunctionData *bind_data, |
206 | const TableFunction &function); |
207 | typedef unique_ptr<FunctionData> (*table_function_deserialize_t)(PlanDeserializationState &context, FieldReader &reader, |
208 | TableFunction &function); |
209 | |
210 | class TableFunction : public SimpleNamedParameterFunction { |
211 | public: |
212 | DUCKDB_API |
213 | TableFunction(string name, vector<LogicalType> arguments, table_function_t function, |
214 | table_function_bind_t bind = nullptr, table_function_init_global_t init_global = nullptr, |
215 | table_function_init_local_t init_local = nullptr); |
216 | DUCKDB_API |
217 | TableFunction(const vector<LogicalType> &arguments, table_function_t function, table_function_bind_t bind = nullptr, |
218 | table_function_init_global_t init_global = nullptr, table_function_init_local_t init_local = nullptr); |
219 | DUCKDB_API TableFunction(); |
220 | |
221 | //! Bind function |
222 | //! This function is used for determining the return type of a table producing function and returning bind data |
223 | //! The returned FunctionData object should be constant and should not be changed during execution. |
224 | table_function_bind_t bind; |
225 | //! (Optional) Bind replace function |
226 | //! This function is called before the regular bind function. It allows returning a TableRef will be used to |
227 | //! to generate a logical plan that replaces the LogicalGet of a regularly bound TableFunction. The BindReplace can |
228 | //! also return a nullptr to indicate a regular bind needs to be performed instead. |
229 | table_function_bind_replace_t bind_replace; |
230 | //! (Optional) global init function |
231 | //! Initialize the global operator state of the function. |
232 | //! The global operator state is used to keep track of the progress in the table function and is shared between |
233 | //! all threads working on the table function. |
234 | table_function_init_global_t init_global; |
235 | //! (Optional) local init function |
236 | //! Initialize the local operator state of the function. |
237 | //! The local operator state is used to keep track of the progress in the table function and is thread-local. |
238 | table_function_init_local_t init_local; |
239 | //! The main function |
240 | table_function_t function; |
241 | //! The table in-out function (if this is an in-out function) |
242 | table_in_out_function_t in_out_function; |
243 | //! The table in-out final function (if this is an in-out function) |
244 | table_in_out_function_final_t in_out_function_final; |
245 | //! (Optional) statistics function |
246 | //! Returns the statistics of a specified column |
247 | table_statistics_t statistics; |
248 | //! (Optional) dependency function |
249 | //! Sets up which catalog entries this table function depend on |
250 | table_function_dependency_t dependency; |
251 | //! (Optional) cardinality function |
252 | //! Returns the expected cardinality of this scan |
253 | table_function_cardinality_t cardinality; |
254 | //! (Optional) pushdown a set of arbitrary filter expressions, rather than only simple comparisons with a constant |
255 | //! Any functions remaining in the expression list will be pushed as a regular filter after the scan |
256 | table_function_pushdown_complex_filter_t pushdown_complex_filter; |
257 | //! (Optional) function for rendering the operator to a string in profiling output |
258 | table_function_to_string_t to_string; |
259 | //! (Optional) return how much of the table we have scanned up to this point (% of the data) |
260 | table_function_progress_t table_scan_progress; |
261 | //! (Optional) returns the current batch index of the current scan operator |
262 | table_function_get_batch_index_t get_batch_index; |
263 | //! (Optional) returns the extra batch info, currently only used for the substrait extension |
264 | table_function_get_bind_info get_batch_info; |
265 | |
266 | table_function_serialize_t serialize; |
267 | table_function_deserialize_t deserialize; |
268 | bool verify_serialization = true; |
269 | |
270 | //! Whether or not the table function supports projection pushdown. If not supported a projection will be added |
271 | //! that filters out unused columns. |
272 | bool projection_pushdown; |
273 | //! Whether or not the table function supports filter pushdown. If not supported a filter will be added |
274 | //! that applies the table filter directly. |
275 | bool filter_pushdown; |
276 | //! Whether or not the table function can immediately prune out filter columns that are unused in the remainder of |
277 | //! the query plan, e.g., "SELECT i FROM tbl WHERE j = 42;" - j does not need to leave the table function at all |
278 | bool filter_prune; |
279 | //! Additional function info, passed to the bind |
280 | shared_ptr<TableFunctionInfo> function_info; |
281 | |
282 | DUCKDB_API bool Equal(const TableFunction &rhs) const; |
283 | }; |
284 | |
285 | } // namespace duckdb |
286 | |