| 1 | #pragma once | 
|---|
| 2 |  | 
|---|
| 3 | #include <memory> | 
|---|
| 4 |  | 
|---|
| 5 | #include "config_core.h" | 
|---|
| 6 | #include <Core/Names.h> | 
|---|
| 7 | #include <Core/Block.h> | 
|---|
| 8 | #include <Core/ColumnNumbers.h> | 
|---|
| 9 | #include <DataTypes/IDataType.h> | 
|---|
| 10 |  | 
|---|
| 11 | /// This file contains user interface for functions. | 
|---|
| 12 | /// For developer interface (in case you need to implement a new function) see IFunctionImpl.h | 
|---|
| 13 |  | 
|---|
| 14 | namespace llvm | 
|---|
| 15 | { | 
|---|
| 16 | class LLVMContext; | 
|---|
| 17 | class Value; | 
|---|
| 18 | class IRBuilderBase; | 
|---|
| 19 | } | 
|---|
| 20 |  | 
|---|
| 21 |  | 
|---|
| 22 | namespace DB | 
|---|
| 23 | { | 
|---|
| 24 |  | 
|---|
| 25 | namespace ErrorCodes | 
|---|
| 26 | { | 
|---|
| 27 | extern const int ILLEGAL_TYPE_OF_ARGUMENT; | 
|---|
| 28 | extern const int NOT_IMPLEMENTED; | 
|---|
| 29 | extern const int LOGICAL_ERROR; | 
|---|
| 30 | } | 
|---|
| 31 |  | 
|---|
| 32 | class Field; | 
|---|
| 33 |  | 
|---|
| 34 | /// The simplest executable object. | 
|---|
| 35 | /// Motivation: | 
|---|
| 36 | ///  * Prepare something heavy once before main execution loop instead of doing it for each block. | 
|---|
| 37 | ///  * Provide const interface for IFunctionBase (later). | 
|---|
| 38 | ///  * Create one executable function per thread to use caches without synchronization (later). | 
|---|
| 39 | class IExecutableFunction | 
|---|
| 40 | { | 
|---|
| 41 | public: | 
|---|
| 42 | virtual ~IExecutableFunction() = default; | 
|---|
| 43 |  | 
|---|
| 44 | /// Get the main function name. | 
|---|
| 45 | virtual String getName() const = 0; | 
|---|
| 46 |  | 
|---|
| 47 | virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run) = 0; | 
|---|
| 48 |  | 
|---|
| 49 | virtual void createLowCardinalityResultCache(size_t cache_size) = 0; | 
|---|
| 50 | }; | 
|---|
| 51 |  | 
|---|
| 52 | using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>; | 
|---|
| 53 |  | 
|---|
| 54 |  | 
|---|
| 55 | using ValuePlaceholders = std::vector<std::function<llvm::Value * ()>>; | 
|---|
| 56 |  | 
|---|
| 57 | /// Function with known arguments and return type (when the specific overload was chosen). | 
|---|
| 58 | /// It is also the point where all function-specific properties are known. | 
|---|
| 59 | class IFunctionBase | 
|---|
| 60 | { | 
|---|
| 61 | public: | 
|---|
| 62 | virtual ~IFunctionBase() = default; | 
|---|
| 63 |  | 
|---|
| 64 | /// Get the main function name. | 
|---|
| 65 | virtual String getName() const = 0; | 
|---|
| 66 |  | 
|---|
| 67 | virtual const DataTypes & getArgumentTypes() const = 0; | 
|---|
| 68 | virtual const DataTypePtr & getReturnType() const = 0; | 
|---|
| 69 |  | 
|---|
| 70 | /// Do preparations and return executable. | 
|---|
| 71 | /// sample_block should contain data types of arguments and values of constants, if relevant. | 
|---|
| 72 | virtual ExecutableFunctionPtr prepare(const Block & sample_block, const ColumnNumbers & arguments, size_t result) const = 0; | 
|---|
| 73 |  | 
|---|
| 74 | /// TODO: make const | 
|---|
| 75 | virtual void execute(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count, bool dry_run = false) | 
|---|
| 76 | { | 
|---|
| 77 | return prepare(block, arguments, result)->execute(block, arguments, result, input_rows_count, dry_run); | 
|---|
| 78 | } | 
|---|
| 79 |  | 
|---|
| 80 | #if USE_EMBEDDED_COMPILER | 
|---|
| 81 |  | 
|---|
| 82 | virtual bool isCompilable() const { return false; } | 
|---|
| 83 |  | 
|---|
| 84 | /** Produce LLVM IR code that operates on scalar values. See `toNativeType` in DataTypes/Native.h | 
|---|
| 85 | * for supported value types and how they map to LLVM types. | 
|---|
| 86 | * | 
|---|
| 87 | * NOTE: the builder is actually guaranteed to be exactly `llvm::IRBuilder<>`, so you may safely | 
|---|
| 88 | *       downcast it to that type. This method is specified with `IRBuilderBase` because forward-declaring | 
|---|
| 89 | *       templates with default arguments is impossible and including LLVM in such a generic header | 
|---|
| 90 | *       as this one is a major pain. | 
|---|
| 91 | */ | 
|---|
| 92 | virtual llvm::Value * compile(llvm::IRBuilderBase & /*builder*/, ValuePlaceholders /*values*/) const | 
|---|
| 93 | { | 
|---|
| 94 | throw Exception(getName() + " is not JIT-compilable", ErrorCodes::NOT_IMPLEMENTED); | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | #endif | 
|---|
| 98 |  | 
|---|
| 99 | virtual bool isStateful() const { return false; } | 
|---|
| 100 |  | 
|---|
| 101 | /** Should we evaluate this function while constant folding, if arguments are constants? | 
|---|
| 102 | * Usually this is true. Notable counterexample is function 'sleep'. | 
|---|
| 103 | * If we will call it during query analysis, we will sleep extra amount of time. | 
|---|
| 104 | */ | 
|---|
| 105 | virtual bool isSuitableForConstantFolding() const { return true; } | 
|---|
| 106 |  | 
|---|
| 107 | /** Some functions like ignore(...) or toTypeName(...) always return constant result which doesn't depend on arguments. | 
|---|
| 108 | * In this case we can calculate result and assume that it's constant in stream header. | 
|---|
| 109 | * There is no need to implement function if it has zero arguments. | 
|---|
| 110 | * Must return ColumnConst with single row or nullptr. | 
|---|
| 111 | */ | 
|---|
| 112 | virtual ColumnPtr getResultIfAlwaysReturnsConstantAndHasArguments(const Block & /*block*/, const ColumnNumbers & /*arguments*/) const { return nullptr; } | 
|---|
| 113 |  | 
|---|
| 114 | /** Function is called "injective" if it returns different result for different values of arguments. | 
|---|
| 115 | * Example: hex, negate, tuple... | 
|---|
| 116 | * | 
|---|
| 117 | * Function could be injective with some arguments fixed to some constant values. | 
|---|
| 118 | * Examples: | 
|---|
| 119 | *  plus(const, x); | 
|---|
| 120 | *  multiply(const, x) where x is an integer and constant is not divisible by two; | 
|---|
| 121 | *  concat(x, 'const'); | 
|---|
| 122 | *  concat(x, 'const', y) where const contain at least one non-numeric character; | 
|---|
| 123 | *  concat with FixedString | 
|---|
| 124 | *  dictGet... functions takes name of dictionary as its argument, | 
|---|
| 125 | *   and some dictionaries could be explicitly defined as injective. | 
|---|
| 126 | * | 
|---|
| 127 | * It could be used, for example, to remove useless function applications from GROUP BY. | 
|---|
| 128 | * | 
|---|
| 129 | * Sometimes, function is not really injective, but considered as injective, for purpose of query optimization. | 
|---|
| 130 | * For example, toString function is not injective for Float64 data type, | 
|---|
| 131 | *  as it returns 'nan' for many different representation of NaNs. | 
|---|
| 132 | * But we assume, that it is injective. This could be documented as implementation-specific behaviour. | 
|---|
| 133 | * | 
|---|
| 134 | * sample_block should contain data types of arguments and values of constants, if relevant. | 
|---|
| 135 | */ | 
|---|
| 136 | virtual bool isInjective(const Block & /*sample_block*/) { return false; } | 
|---|
| 137 |  | 
|---|
| 138 | /** Function is called "deterministic", if it returns same result for same values of arguments. | 
|---|
| 139 | * Most of functions are deterministic. Notable counterexample is rand(). | 
|---|
| 140 | * Sometimes, functions are "deterministic" in scope of single query | 
|---|
| 141 | *  (even for distributed query), but not deterministic it general. | 
|---|
| 142 | * Example: now(). Another example: functions that work with periodically updated dictionaries. | 
|---|
| 143 | */ | 
|---|
| 144 |  | 
|---|
| 145 | virtual bool isDeterministic() const = 0; | 
|---|
| 146 |  | 
|---|
| 147 | virtual bool isDeterministicInScopeOfQuery() const = 0; | 
|---|
| 148 |  | 
|---|
| 149 | /** Lets you know if the function is monotonic in a range of values. | 
|---|
| 150 | * This is used to work with the index in a sorted chunk of data. | 
|---|
| 151 | * And allows to use the index not only when it is written, for example `date >= const`, but also, for example, `toMonth(date) >= 11`. | 
|---|
| 152 | * All this is considered only for functions of one argument. | 
|---|
| 153 | */ | 
|---|
| 154 | virtual bool hasInformationAboutMonotonicity() const { return false; } | 
|---|
| 155 |  | 
|---|
| 156 | /// The property of monotonicity for a certain range. | 
|---|
| 157 | struct Monotonicity | 
|---|
| 158 | { | 
|---|
| 159 | bool is_monotonic = false;    /// Is the function monotonous (nondecreasing or nonincreasing). | 
|---|
| 160 | bool is_positive = true;    /// true if the function is nondecreasing, false, if notincreasing. If is_monotonic = false, then it does not matter. | 
|---|
| 161 | bool is_always_monotonic = false; /// Is true if function is monotonic on the whole input range I | 
|---|
| 162 |  | 
|---|
| 163 | Monotonicity(bool is_monotonic_ = false, bool is_positive_ = true, bool is_always_monotonic_ = false) | 
|---|
| 164 | : is_monotonic(is_monotonic_), is_positive(is_positive_), is_always_monotonic(is_always_monotonic_) {} | 
|---|
| 165 | }; | 
|---|
| 166 |  | 
|---|
| 167 | /** Get information about monotonicity on a range of values. Call only if hasInformationAboutMonotonicity. | 
|---|
| 168 | * NULL can be passed as one of the arguments. This means that the corresponding range is unlimited on the left or on the right. | 
|---|
| 169 | */ | 
|---|
| 170 | virtual Monotonicity getMonotonicityForRange(const IDataType & /*type*/, const Field & /*left*/, const Field & /*right*/) const | 
|---|
| 171 | { | 
|---|
| 172 | throw Exception( "Function "+ getName() + " has no information about its monotonicity.", ErrorCodes::NOT_IMPLEMENTED); | 
|---|
| 173 | } | 
|---|
| 174 | }; | 
|---|
| 175 |  | 
|---|
| 176 | using FunctionBasePtr = std::shared_ptr<IFunctionBase>; | 
|---|
| 177 |  | 
|---|
| 178 |  | 
|---|
| 179 | /// Creates IFunctionBase from argument types list (chooses one function overload). | 
|---|
| 180 | class IFunctionOverloadResolver | 
|---|
| 181 | { | 
|---|
| 182 | public: | 
|---|
| 183 | virtual ~IFunctionOverloadResolver() = default; | 
|---|
| 184 |  | 
|---|
| 185 | /// Get the main function name. | 
|---|
| 186 | virtual String getName() const = 0; | 
|---|
| 187 |  | 
|---|
| 188 | /// See the comment for the same method in IFunctionBase | 
|---|
| 189 | virtual bool isDeterministic() const = 0; | 
|---|
| 190 | virtual bool isDeterministicInScopeOfQuery() const = 0; | 
|---|
| 191 |  | 
|---|
| 192 | /// Override and return true if function needs to depend on the state of the data. | 
|---|
| 193 | virtual bool isStateful() const = 0; | 
|---|
| 194 |  | 
|---|
| 195 | /// Override and return true if function could take different number of arguments. | 
|---|
| 196 | virtual bool isVariadic() const = 0; | 
|---|
| 197 |  | 
|---|
| 198 | /// For non-variadic functions, return number of arguments; otherwise return zero (that should be ignored). | 
|---|
| 199 | virtual size_t getNumberOfArguments() const = 0; | 
|---|
| 200 |  | 
|---|
| 201 | /// Throw if number of arguments is incorrect. | 
|---|
| 202 | virtual void checkNumberOfArguments(size_t number_of_arguments) const = 0; | 
|---|
| 203 |  | 
|---|
| 204 | /// Check if arguments are correct and returns IFunctionBase. | 
|---|
| 205 | virtual FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const = 0; | 
|---|
| 206 |  | 
|---|
| 207 | /// For higher-order functions (functions, that have lambda expression as at least one argument). | 
|---|
| 208 | /// You pass data types with empty DataTypeFunction for lambda arguments. | 
|---|
| 209 | /// This function will replace it with DataTypeFunction containing actual types. | 
|---|
| 210 | virtual void getLambdaArgumentTypes(DataTypes & arguments) const = 0; | 
|---|
| 211 |  | 
|---|
| 212 | /// Returns indexes of arguments, that must be ColumnConst | 
|---|
| 213 | virtual ColumnNumbers getArgumentsThatAreAlwaysConstant() const = 0; | 
|---|
| 214 | /// Returns indexes if arguments, that can be Nullable without making result of function Nullable | 
|---|
| 215 | /// (for functions like isNull(x)) | 
|---|
| 216 | virtual ColumnNumbers getArgumentsThatDontImplyNullableReturnType(size_t number_of_arguments) const = 0; | 
|---|
| 217 | }; | 
|---|
| 218 |  | 
|---|
| 219 | using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>; | 
|---|
| 220 |  | 
|---|
| 221 |  | 
|---|
| 222 | /** Return ColumnNullable of src, with null map as OR-ed null maps of args columns in blocks. | 
|---|
| 223 | * Or ColumnConst(ColumnNullable) if the result is always NULL or if the result is constant and always not NULL. | 
|---|
| 224 | */ | 
|---|
| 225 | ColumnPtr wrapInNullable(const ColumnPtr & src, const Block & block, const ColumnNumbers & args, size_t result, size_t input_rows_count); | 
|---|
| 226 |  | 
|---|
| 227 | } | 
|---|
| 228 |  | 
|---|