1 | #pragma once |
2 | |
3 | #include <memory> |
4 | |
5 | #include "config_core.h" |
6 | #include <Core/Names.h> |
7 | #include <Core/Block.h> |
8 | #include <Core/ColumnNumbers.h> |
9 | #include <DataTypes/IDataType.h> |
10 | |
11 | /// This file contains user interface for functions. |
12 | /// For developer interface (in case you need to implement a new function) see IFunctionImpl.h |
13 | |
14 | namespace llvm |
15 | { |
16 | class LLVMContext; |
17 | class Value; |
18 | class IRBuilderBase; |
19 | } |
20 | |
21 | |
22 | namespace DB |
23 | { |
24 | |
25 | namespace ErrorCodes |
26 | { |
27 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; |
28 | extern const int NOT_IMPLEMENTED; |
29 | extern const int LOGICAL_ERROR; |
30 | } |
31 | |
32 | class Field; |
33 | |
34 | /// The simplest executable object. |
35 | /// Motivation: |
36 | /// * Prepare something heavy once before main execution loop instead of doing it for each block. |
37 | /// * Provide const interface for IFunctionBase (later). |
38 | /// * Create one executable function per thread to use caches without synchronization (later). |
39 | class IExecutableFunction |
40 | { |
41 | public: |
42 | virtual ~IExecutableFunction() = default; |
43 | |
44 | /// Get the main function name. |
45 | virtual String getName() const = 0; |
46 | |
47 | virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run) = 0; |
48 | |
49 | virtual void createLowCardinalityResultCache(size_t cache_size) = 0; |
50 | }; |
51 | |
52 | using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>; |
53 | |
54 | |
55 | using ValuePlaceholders = std::vector<std::function<llvm::Value * ()>>; |
56 | |
57 | /// Function with known arguments and return type (when the specific overload was chosen). |
58 | /// It is also the point where all function-specific properties are known. |
59 | class IFunctionBase |
60 | { |
61 | public: |
62 | virtual ~IFunctionBase() = default; |
63 | |
64 | /// Get the main function name. |
65 | virtual String getName() const = 0; |
66 | |
67 | virtual const DataTypes & getArgumentTypes() const = 0; |
68 | virtual const DataTypePtr & getReturnType() const = 0; |
69 | |
70 | /// Do preparations and return executable. |
71 | /// sample_block should contain data types of arguments and values of constants, if relevant. |
72 | virtual ExecutableFunctionPtr prepare(const Block & sample_block, const ColumnNumbers & arguments, size_t result) const = 0; |
73 | |
74 | /// TODO: make const |
75 | virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run = false) |
76 | { |
77 | return prepare(block, arguments, result)->execute(block, arguments, result, input_rows_count, dry_run); |
78 | } |
79 | |
80 | #if USE_EMBEDDED_COMPILER |
81 | |
82 | virtual bool isCompilable() const { return false; } |
83 | |
84 | /** Produce LLVM IR code that operates on scalar values. See `toNativeType` in DataTypes/Native.h |
85 | * for supported value types and how they map to LLVM types. |
86 | * |
87 | * NOTE: the builder is actually guaranteed to be exactly `llvm::IRBuilder<>`, so you may safely |
88 | * downcast it to that type. This method is specified with `IRBuilderBase` because forward-declaring |
89 | * templates with default arguments is impossible and including LLVM in such a generic header |
90 | * as this one is a major pain. |
91 | */ |
92 | virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, ValuePlaceholders /*values*/) const |
93 | { |
94 | throw Exception(getName() + " is not JIT-compilable" , ErrorCodes::NOT_IMPLEMENTED); |
95 | } |
96 | |
97 | #endif |
98 | |
99 | virtual bool isStateful() const { return false; } |
100 | |
101 | /** Should we evaluate this function while constant folding, if arguments are constants? |
102 | * Usually this is true. Notable counterexample is function 'sleep'. |
103 | * If we will call it during query analysis, we will sleep extra amount of time. |
104 | */ |
105 | virtual bool isSuitableForConstantFolding() const { return true; } |
106 | |
107 | /** Some functions like ignore(...) or toTypeName(...) always return constant result which doesn't depend on arguments. |
108 | * In this case we can calculate result and assume that it's constant in stream header. |
109 | * There is no need to implement function if it has zero arguments. |
110 | * Must return ColumnConst with single row or nullptr. |
111 | */ |
112 | virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & /*block*/, const ColumnNumbers & /*arguments*/) const { return nullptr; } |
113 | |
114 | /** Function is called "injective" if it returns different result for different values of arguments. |
115 | * Example: hex, negate, tuple... |
116 | * |
117 | * Function could be injective with some arguments fixed to some constant values. |
118 | * Examples: |
119 | * plus(const, x); |
120 | * multiply(const, x) where x is an integer and constant is not divisible by two; |
121 | * concat(x, 'const'); |
122 | * concat(x, 'const', y) where const contain at least one non-numeric character; |
123 | * concat with FixedString |
124 | * dictGet... functions takes name of dictionary as its argument, |
125 | * and some dictionaries could be explicitly defined as injective. |
126 | * |
127 | * It could be used, for example, to remove useless function applications from GROUP BY. |
128 | * |
129 | * Sometimes, function is not really injective, but considered as injective, for purpose of query optimization. |
130 | * For example, toString function is not injective for Float64 data type, |
131 | * as it returns 'nan' for many different representation of NaNs. |
132 | * But we assume, that it is injective. This could be documented as implementation-specific behaviour. |
133 | * |
134 | * sample_block should contain data types of arguments and values of constants, if relevant. |
135 | */ |
136 | virtual bool isInjective(const Block & /*sample_block*/) { return false; } |
137 | |
138 | /** Function is called "deterministic", if it returns same result for same values of arguments. |
139 | * Most of functions are deterministic. Notable counterexample is rand(). |
140 | * Sometimes, functions are "deterministic" in scope of single query |
141 | * (even for distributed query), but not deterministic it general. |
142 | * Example: now(). Another example: functions that work with periodically updated dictionaries. |
143 | */ |
144 | |
145 | virtual bool isDeterministic() const = 0; |
146 | |
147 | virtual bool isDeterministicInScopeOfQuery() const = 0; |
148 | |
149 | /** Lets you know if the function is monotonic in a range of values. |
150 | * This is used to work with the index in a sorted chunk of data. |
151 | * And allows to use the index not only when it is written, for example `date >= const`, but also, for example, `toMonth(date) >= 11`. |
152 | * All this is considered only for functions of one argument. |
153 | */ |
154 | virtual bool hasInformationAboutMonotonicity() const { return false; } |
155 | |
156 | /// The property of monotonicity for a certain range. |
157 | struct Monotonicity |
158 | { |
159 | bool is_monotonic = false; /// Is the function monotonous (nondecreasing or nonincreasing). |
160 | bool is_positive = true; /// true if the function is nondecreasing, false, if notincreasing. If is_monotonic = false, then it does not matter. |
161 | bool is_always_monotonic = false; /// Is true if function is monotonic on the whole input range I |
162 | |
163 | Monotonicity(bool is_monotonic_ = false, bool is_positive_ = true, bool is_always_monotonic_ = false) |
164 | : is_monotonic(is_monotonic_), is_positive(is_positive_), is_always_monotonic(is_always_monotonic_) {} |
165 | }; |
166 | |
167 | /** Get information about monotonicity on a range of values. Call only if hasInformationAboutMonotonicity. |
168 | * NULL can be passed as one of the arguments. This means that the corresponding range is unlimited on the left or on the right. |
169 | */ |
170 | virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const |
171 | { |
172 | throw Exception("Function " + getName() + " has no information about its monotonicity." , ErrorCodes::NOT_IMPLEMENTED); |
173 | } |
174 | }; |
175 | |
176 | using FunctionBasePtr = std::shared_ptr<IFunctionBase>; |
177 | |
178 | |
179 | /// Creates IFunctionBase from argument types list (chooses one function overload). |
180 | class IFunctionOverloadResolver |
181 | { |
182 | public: |
183 | virtual ~IFunctionOverloadResolver() = default; |
184 | |
185 | /// Get the main function name. |
186 | virtual String getName() const = 0; |
187 | |
188 | /// See the comment for the same method in IFunctionBase |
189 | virtual bool isDeterministic() const = 0; |
190 | virtual bool isDeterministicInScopeOfQuery() const = 0; |
191 | |
192 | /// Override and return true if function needs to depend on the state of the data. |
193 | virtual bool isStateful() const = 0; |
194 | |
195 | /// Override and return true if function could take different number of arguments. |
196 | virtual bool isVariadic() const = 0; |
197 | |
198 | /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored). |
199 | virtual size_t getNumberOfArguments() const = 0; |
200 | |
201 | /// Throw if number of arguments is incorrect. |
202 | virtual void checkNumberOfArguments(size_t number_of_arguments) const = 0; |
203 | |
204 | /// Check if arguments are correct and returns IFunctionBase. |
205 | virtual FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const = 0; |
206 | |
207 | /// For higher-order functions (functions, that have lambda expression as at least one argument). |
208 | /// You pass data types with empty DataTypeFunction for lambda arguments. |
209 | /// This function will replace it with DataTypeFunction containing actual types. |
210 | virtual void getLambdaArgumentTypes(DataTypes & arguments) const = 0; |
211 | |
212 | /// Returns indexes of arguments, that must be ColumnConst |
213 | virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const = 0; |
214 | /// Returns indexes if arguments, that can be Nullable without making result of function Nullable |
215 | /// (for functions like isNull(x)) |
216 | virtual ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const = 0; |
217 | }; |
218 | |
219 | using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>; |
220 | |
221 | |
222 | /** Return ColumnNullable of src, with null map as OR-ed null maps of args columns in blocks. |
223 | * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. |
224 | */ |
225 | ColumnPtr wrapInNullable(const ColumnPtr & src, const Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count); |
226 | |
227 | } |
228 | |