| 1 | //===----------------------------------------------------------------------===// |
| 2 | // DuckDB |
| 3 | // |
| 4 | // duckdb/function/table_function.hpp |
| 5 | // |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #pragma once |
| 10 | |
| 11 | #include "duckdb/common/enums/operator_result_type.hpp" |
| 12 | #include "duckdb/common/optional_ptr.hpp" |
| 13 | #include "duckdb/execution/execution_context.hpp" |
| 14 | #include "duckdb/function/function.hpp" |
| 15 | #include "duckdb/planner/bind_context.hpp" |
| 16 | #include "duckdb/planner/logical_operator.hpp" |
| 17 | #include "duckdb/storage/statistics/node_statistics.hpp" |
| 18 | |
| 19 | #include <functional> |
| 20 | |
| 21 | namespace duckdb { |
| 22 | |
| 23 | class BaseStatistics; |
| 24 | class DependencyList; |
| 25 | class LogicalGet; |
| 26 | class TableFilterSet; |
| 27 | |
| 28 | struct TableFunctionInfo { |
| 29 | DUCKDB_API virtual ~TableFunctionInfo(); |
| 30 | |
| 31 | template <class TARGET> |
| 32 | TARGET &Cast() { |
| 33 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
| 34 | return reinterpret_cast<TARGET &>(*this); |
| 35 | } |
| 36 | template <class TARGET> |
| 37 | const TARGET &Cast() const { |
| 38 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
| 39 | return reinterpret_cast<const TARGET &>(*this); |
| 40 | } |
| 41 | }; |
| 42 | |
| 43 | struct GlobalTableFunctionState { |
| 44 | public: |
| 45 | // value returned from MaxThreads when as many threads as possible should be used |
| 46 | constexpr static const int64_t MAX_THREADS = 999999999; |
| 47 | |
| 48 | public: |
| 49 | DUCKDB_API virtual ~GlobalTableFunctionState(); |
| 50 | |
| 51 | virtual idx_t MaxThreads() const { |
| 52 | return 1; |
| 53 | } |
| 54 | |
| 55 | template <class TARGET> |
| 56 | TARGET &Cast() { |
| 57 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
| 58 | return reinterpret_cast<TARGET &>(*this); |
| 59 | } |
| 60 | template <class TARGET> |
| 61 | const TARGET &Cast() const { |
| 62 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
| 63 | return reinterpret_cast<const TARGET &>(*this); |
| 64 | } |
| 65 | }; |
| 66 | |
| 67 | struct LocalTableFunctionState { |
| 68 | DUCKDB_API virtual ~LocalTableFunctionState(); |
| 69 | |
| 70 | template <class TARGET> |
| 71 | TARGET &Cast() { |
| 72 | D_ASSERT(dynamic_cast<TARGET *>(this)); |
| 73 | return reinterpret_cast<TARGET &>(*this); |
| 74 | } |
| 75 | template <class TARGET> |
| 76 | const TARGET &Cast() const { |
| 77 | D_ASSERT(dynamic_cast<const TARGET *>(this)); |
| 78 | return reinterpret_cast<const TARGET &>(*this); |
| 79 | } |
| 80 | }; |
| 81 | |
| 82 | struct TableFunctionBindInput { |
| 83 | TableFunctionBindInput(vector<Value> &inputs, named_parameter_map_t &named_parameters, |
| 84 | vector<LogicalType> &input_table_types, vector<string> &input_table_names, |
| 85 | optional_ptr<TableFunctionInfo> info) |
| 86 | : inputs(inputs), named_parameters(named_parameters), input_table_types(input_table_types), |
| 87 | input_table_names(input_table_names), info(info) { |
| 88 | } |
| 89 | |
| 90 | vector<Value> &inputs; |
| 91 | named_parameter_map_t &named_parameters; |
| 92 | vector<LogicalType> &input_table_types; |
| 93 | vector<string> &input_table_names; |
| 94 | optional_ptr<TableFunctionInfo> info; |
| 95 | }; |
| 96 | |
| 97 | struct TableFunctionInitInput { |
| 98 | TableFunctionInitInput(optional_ptr<const FunctionData> bind_data_p, const vector<column_t> &column_ids_p, |
| 99 | const vector<idx_t> &projection_ids_p, optional_ptr<TableFilterSet> filters_p) |
| 100 | : bind_data(bind_data_p), column_ids(column_ids_p), projection_ids(projection_ids_p), filters(filters_p) { |
| 101 | } |
| 102 | |
| 103 | optional_ptr<const FunctionData> bind_data; |
| 104 | const vector<column_t> &column_ids; |
| 105 | const vector<idx_t> projection_ids; |
| 106 | optional_ptr<TableFilterSet> filters; |
| 107 | |
| 108 | bool CanRemoveFilterColumns() const { |
| 109 | if (projection_ids.empty()) { |
| 110 | // Not set, can't remove filter columns |
| 111 | return false; |
| 112 | } else if (projection_ids.size() == column_ids.size()) { |
| 113 | // Filter column is used in remainder of plan, can't remove |
| 114 | return false; |
| 115 | } else { |
| 116 | // Less columns need to be projected out than that we scan |
| 117 | return true; |
| 118 | } |
| 119 | } |
| 120 | }; |
| 121 | |
| 122 | struct TableFunctionInput { |
| 123 | public: |
| 124 | TableFunctionInput(optional_ptr<const FunctionData> bind_data_p, |
| 125 | optional_ptr<LocalTableFunctionState> local_state_p, |
| 126 | optional_ptr<GlobalTableFunctionState> global_state_p) |
| 127 | : bind_data(bind_data_p), local_state(local_state_p), global_state(global_state_p) { |
| 128 | } |
| 129 | |
| 130 | public: |
| 131 | optional_ptr<const FunctionData> bind_data; |
| 132 | optional_ptr<LocalTableFunctionState> local_state; |
| 133 | optional_ptr<GlobalTableFunctionState> global_state; |
| 134 | }; |
| 135 | |
| 136 | enum ScanType { TABLE, PARQUET }; |
| 137 | |
| 138 | struct BindInfo { |
| 139 | public: |
| 140 | explicit BindInfo(ScanType type_p) : type(type_p) {}; |
| 141 | unordered_map<string, Value> options; |
| 142 | ScanType type; |
| 143 | void InsertOption(const string &name, Value value) { |
| 144 | if (options.find(x: name) != options.end()) { |
| 145 | throw InternalException("This option already exists" ); |
| 146 | } |
| 147 | options[name] = std::move(value); |
| 148 | } |
| 149 | template <class T> |
| 150 | T GetOption(const string &name) { |
| 151 | if (options.find(x: name) == options.end()) { |
| 152 | throw InternalException("This option does not exist" ); |
| 153 | } |
| 154 | return options[name].GetValue<T>(); |
| 155 | } |
| 156 | template <class T> |
| 157 | vector<T> GetOptionList(const string &name) { |
| 158 | if (options.find(x: name) == options.end()) { |
| 159 | throw InternalException("This option does not exist" ); |
| 160 | } |
| 161 | auto option = options[name]; |
| 162 | if (option.type().id() != LogicalTypeId::LIST) { |
| 163 | throw InternalException("This option is not a list" ); |
| 164 | } |
| 165 | vector<T> result; |
| 166 | auto list_children = ListValue::GetChildren(value: option); |
| 167 | for (auto &child : list_children) { |
| 168 | result.emplace_back(child.GetValue<T>()); |
| 169 | } |
| 170 | return result; |
| 171 | } |
| 172 | }; |
| 173 | |
| 174 | typedef unique_ptr<FunctionData> (*table_function_bind_t)(ClientContext &context, TableFunctionBindInput &input, |
| 175 | vector<LogicalType> &return_types, vector<string> &names); |
| 176 | typedef unique_ptr<TableRef> (*table_function_bind_replace_t)(ClientContext &context, TableFunctionBindInput &input); |
| 177 | typedef unique_ptr<GlobalTableFunctionState> (*table_function_init_global_t)(ClientContext &context, |
| 178 | TableFunctionInitInput &input); |
| 179 | typedef unique_ptr<LocalTableFunctionState> (*table_function_init_local_t)(ExecutionContext &context, |
| 180 | TableFunctionInitInput &input, |
| 181 | GlobalTableFunctionState *global_state); |
| 182 | typedef unique_ptr<BaseStatistics> (*table_statistics_t)(ClientContext &context, const FunctionData *bind_data, |
| 183 | column_t column_index); |
| 184 | typedef void (*table_function_t)(ClientContext &context, TableFunctionInput &data, DataChunk &output); |
| 185 | typedef OperatorResultType (*table_in_out_function_t)(ExecutionContext &context, TableFunctionInput &data, |
| 186 | DataChunk &input, DataChunk &output); |
| 187 | typedef OperatorFinalizeResultType (*table_in_out_function_final_t)(ExecutionContext &context, TableFunctionInput &data, |
| 188 | DataChunk &output); |
| 189 | typedef idx_t (*table_function_get_batch_index_t)(ClientContext &context, const FunctionData *bind_data, |
| 190 | LocalTableFunctionState *local_state, |
| 191 | GlobalTableFunctionState *global_state); |
| 192 | |
| 193 | typedef BindInfo (*table_function_get_bind_info)(const FunctionData *bind_data); |
| 194 | |
| 195 | typedef double (*table_function_progress_t)(ClientContext &context, const FunctionData *bind_data, |
| 196 | const GlobalTableFunctionState *global_state); |
| 197 | typedef void (*table_function_dependency_t)(DependencyList &dependencies, const FunctionData *bind_data); |
| 198 | typedef unique_ptr<NodeStatistics> (*table_function_cardinality_t)(ClientContext &context, |
| 199 | const FunctionData *bind_data); |
| 200 | typedef void (*table_function_pushdown_complex_filter_t)(ClientContext &context, LogicalGet &get, |
| 201 | FunctionData *bind_data, |
| 202 | vector<unique_ptr<Expression>> &filters); |
| 203 | typedef string (*table_function_to_string_t)(const FunctionData *bind_data); |
| 204 | |
| 205 | typedef void (*table_function_serialize_t)(FieldWriter &writer, const FunctionData *bind_data, |
| 206 | const TableFunction &function); |
| 207 | typedef unique_ptr<FunctionData> (*table_function_deserialize_t)(PlanDeserializationState &context, FieldReader &reader, |
| 208 | TableFunction &function); |
| 209 | |
| 210 | class TableFunction : public SimpleNamedParameterFunction { |
| 211 | public: |
| 212 | DUCKDB_API |
| 213 | TableFunction(string name, vector<LogicalType> arguments, table_function_t function, |
| 214 | table_function_bind_t bind = nullptr, table_function_init_global_t init_global = nullptr, |
| 215 | table_function_init_local_t init_local = nullptr); |
| 216 | DUCKDB_API |
| 217 | TableFunction(const vector<LogicalType> &arguments, table_function_t function, table_function_bind_t bind = nullptr, |
| 218 | table_function_init_global_t init_global = nullptr, table_function_init_local_t init_local = nullptr); |
| 219 | DUCKDB_API TableFunction(); |
| 220 | |
| 221 | //! Bind function |
| 222 | //! This function is used for determining the return type of a table producing function and returning bind data |
| 223 | //! The returned FunctionData object should be constant and should not be changed during execution. |
| 224 | table_function_bind_t bind; |
| 225 | //! (Optional) Bind replace function |
| 226 | //! This function is called before the regular bind function. It allows returning a TableRef will be used to |
| 227 | //! to generate a logical plan that replaces the LogicalGet of a regularly bound TableFunction. The BindReplace can |
| 228 | //! also return a nullptr to indicate a regular bind needs to be performed instead. |
| 229 | table_function_bind_replace_t bind_replace; |
| 230 | //! (Optional) global init function |
| 231 | //! Initialize the global operator state of the function. |
| 232 | //! The global operator state is used to keep track of the progress in the table function and is shared between |
| 233 | //! all threads working on the table function. |
| 234 | table_function_init_global_t init_global; |
| 235 | //! (Optional) local init function |
| 236 | //! Initialize the local operator state of the function. |
| 237 | //! The local operator state is used to keep track of the progress in the table function and is thread-local. |
| 238 | table_function_init_local_t init_local; |
| 239 | //! The main function |
| 240 | table_function_t function; |
| 241 | //! The table in-out function (if this is an in-out function) |
| 242 | table_in_out_function_t in_out_function; |
| 243 | //! The table in-out final function (if this is an in-out function) |
| 244 | table_in_out_function_final_t in_out_function_final; |
| 245 | //! (Optional) statistics function |
| 246 | //! Returns the statistics of a specified column |
| 247 | table_statistics_t statistics; |
| 248 | //! (Optional) dependency function |
| 249 | //! Sets up which catalog entries this table function depend on |
| 250 | table_function_dependency_t dependency; |
| 251 | //! (Optional) cardinality function |
| 252 | //! Returns the expected cardinality of this scan |
| 253 | table_function_cardinality_t cardinality; |
| 254 | //! (Optional) pushdown a set of arbitrary filter expressions, rather than only simple comparisons with a constant |
| 255 | //! Any functions remaining in the expression list will be pushed as a regular filter after the scan |
| 256 | table_function_pushdown_complex_filter_t pushdown_complex_filter; |
| 257 | //! (Optional) function for rendering the operator to a string in profiling output |
| 258 | table_function_to_string_t to_string; |
| 259 | //! (Optional) return how much of the table we have scanned up to this point (% of the data) |
| 260 | table_function_progress_t table_scan_progress; |
| 261 | //! (Optional) returns the current batch index of the current scan operator |
| 262 | table_function_get_batch_index_t get_batch_index; |
| 263 | //! (Optional) returns the extra batch info, currently only used for the substrait extension |
| 264 | table_function_get_bind_info get_batch_info; |
| 265 | |
| 266 | table_function_serialize_t serialize; |
| 267 | table_function_deserialize_t deserialize; |
| 268 | bool verify_serialization = true; |
| 269 | |
| 270 | //! Whether or not the table function supports projection pushdown. If not supported a projection will be added |
| 271 | //! that filters out unused columns. |
| 272 | bool projection_pushdown; |
| 273 | //! Whether or not the table function supports filter pushdown. If not supported a filter will be added |
| 274 | //! that applies the table filter directly. |
| 275 | bool filter_pushdown; |
| 276 | //! Whether or not the table function can immediately prune out filter columns that are unused in the remainder of |
| 277 | //! the query plan, e.g., "SELECT i FROM tbl WHERE j = 42;" - j does not need to leave the table function at all |
| 278 | bool filter_prune; |
| 279 | //! Additional function info, passed to the bind |
| 280 | shared_ptr<TableFunctionInfo> function_info; |
| 281 | |
| 282 | DUCKDB_API bool Equal(const TableFunction &rhs) const; |
| 283 | }; |
| 284 | |
| 285 | } // namespace duckdb |
| 286 | |