| 1 | #pragma once |
| 2 | |
| 3 | #include <Core/Block.h> |
| 4 | #include <Core/ColumnWithTypeAndName.h> |
| 5 | #include <Core/Names.h> |
| 6 | #include <Core/Settings.h> |
| 7 | #include <Interpreters/Context.h> |
| 8 | #include <Common/SipHash.h> |
| 9 | #include "config_core.h" |
| 10 | #include <unordered_map> |
| 11 | #include <unordered_set> |
| 12 | #include <Parsers/ASTTablesInSelectQuery.h> |
| 13 | |
| 14 | |
| 15 | namespace DB |
| 16 | { |
| 17 | |
| 18 | namespace ErrorCodes |
| 19 | { |
| 20 | extern const int LOGICAL_ERROR; |
| 21 | } |
| 22 | |
| 23 | class AnalyzedJoin; |
| 24 | class IJoin; |
| 25 | using JoinPtr = std::shared_ptr<IJoin>; |
| 26 | |
| 27 | class IExecutableFunction; |
| 28 | using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>; |
| 29 | |
| 30 | class IFunctionBase; |
| 31 | using FunctionBasePtr = std::shared_ptr<IFunctionBase>; |
| 32 | |
| 33 | class IFunctionOverloadResolver; |
| 34 | using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>; |
| 35 | |
| 36 | class IDataType; |
| 37 | using DataTypePtr = std::shared_ptr<const IDataType>; |
| 38 | |
| 39 | class ExpressionActions; |
| 40 | |
| 41 | /** Action on the block. |
| 42 | */ |
| 43 | struct ExpressionAction |
| 44 | { |
| 45 | private: |
| 46 | using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; |
| 47 | public: |
| 48 | enum Type |
| 49 | { |
| 50 | ADD_COLUMN, |
| 51 | REMOVE_COLUMN, |
| 52 | COPY_COLUMN, |
| 53 | |
| 54 | APPLY_FUNCTION, |
| 55 | |
| 56 | /** Replaces the specified columns with arrays into columns with elements. |
| 57 | * Duplicates the values in the remaining columns by the number of elements in the arrays. |
| 58 | * Arrays must be parallel (have the same lengths). |
| 59 | */ |
| 60 | ARRAY_JOIN, |
| 61 | |
| 62 | JOIN, |
| 63 | |
| 64 | /// Reorder and rename the columns, delete the extra ones. The same column names are allowed in the result. |
| 65 | PROJECT, |
| 66 | /// Add columns with alias names. This columns are the same as non-aliased. PROJECT columns if you need to modify them. |
| 67 | ADD_ALIASES, |
| 68 | }; |
| 69 | |
| 70 | Type type{}; |
| 71 | |
| 72 | /// For ADD/REMOVE/COPY_COLUMN. |
| 73 | std::string source_name; |
| 74 | std::string result_name; |
| 75 | DataTypePtr result_type; |
| 76 | |
| 77 | /// If COPY_COLUMN can replace the result column. |
| 78 | bool can_replace = false; |
| 79 | |
| 80 | /// For ADD_COLUMN. |
| 81 | ColumnPtr added_column; |
| 82 | |
| 83 | /// For APPLY_FUNCTION and LEFT ARRAY JOIN. |
| 84 | /// OverloadResolver is used before action was added to ExpressionActions (when we don't know types of arguments). |
| 85 | FunctionOverloadResolverPtr function_builder; |
| 86 | |
| 87 | /// For unaligned [LEFT] ARRAY JOIN |
| 88 | FunctionOverloadResolverPtr function_length; |
| 89 | FunctionOverloadResolverPtr function_greatest; |
| 90 | FunctionOverloadResolverPtr function_arrayResize; |
| 91 | |
| 92 | /// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity. |
| 93 | FunctionBasePtr function_base; |
| 94 | /// Prepared function which is used in function execution. |
| 95 | ExecutableFunctionPtr function; |
| 96 | Names argument_names; |
| 97 | bool is_function_compiled = false; |
| 98 | |
| 99 | /// For ARRAY_JOIN |
| 100 | NameSet array_joined_columns; |
| 101 | bool array_join_is_left = false; |
| 102 | bool unaligned_array_join = false; |
| 103 | |
| 104 | /// For JOIN |
| 105 | std::shared_ptr<const AnalyzedJoin> table_join; |
| 106 | JoinPtr join; |
| 107 | |
| 108 | /// For PROJECT. |
| 109 | NamesWithAliases projection; |
| 110 | |
| 111 | /// If result_name_ == "", as name "function_name(arguments separated by commas) is used". |
| 112 | static ExpressionAction applyFunction( |
| 113 | const FunctionOverloadResolverPtr & function_, const std::vector<std::string> & argument_names_, std::string result_name_ = "" ); |
| 114 | |
| 115 | static ExpressionAction addColumn(const ColumnWithTypeAndName & added_column_); |
| 116 | static ExpressionAction removeColumn(const std::string & removed_name); |
| 117 | static ExpressionAction copyColumn(const std::string & from_name, const std::string & to_name, bool can_replace = false); |
| 118 | static ExpressionAction project(const NamesWithAliases & projected_columns_); |
| 119 | static ExpressionAction project(const Names & projected_columns_); |
| 120 | static ExpressionAction addAliases(const NamesWithAliases & aliased_columns_); |
| 121 | static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context); |
| 122 | static ExpressionAction ordinaryJoin(std::shared_ptr<AnalyzedJoin> table_join, JoinPtr join); |
| 123 | |
| 124 | /// Which columns necessary to perform this action. |
| 125 | Names getNeededColumns() const; |
| 126 | |
| 127 | std::string toString() const; |
| 128 | |
| 129 | bool operator==(const ExpressionAction & other) const; |
| 130 | |
| 131 | struct ActionHash |
| 132 | { |
| 133 | UInt128 operator()(const ExpressionAction & action) const; |
| 134 | }; |
| 135 | |
| 136 | private: |
| 137 | friend class ExpressionActions; |
| 138 | |
| 139 | void prepare(Block & sample_block, const Settings & settings, NameSet & names_not_for_constant_folding); |
| 140 | void execute(Block & block, bool dry_run) const; |
| 141 | void executeOnTotals(Block & block) const; |
| 142 | }; |
| 143 | |
| 144 | |
| 145 | /** Contains a sequence of actions on the block. |
| 146 | */ |
| 147 | class ExpressionActions |
| 148 | { |
| 149 | public: |
| 150 | using Actions = std::vector<ExpressionAction>; |
| 151 | |
| 152 | ExpressionActions(const NamesAndTypesList & input_columns_, const Context & context_) |
| 153 | : input_columns(input_columns_), settings(context_.getSettingsRef()) |
| 154 | { |
| 155 | for (const auto & input_elem : input_columns) |
| 156 | sample_block.insert(ColumnWithTypeAndName(nullptr, input_elem.type, input_elem.name)); |
| 157 | |
| 158 | #if USE_EMBEDDED_COMPILER |
| 159 | compilation_cache = context_.getCompiledExpressionCache(); |
| 160 | #endif |
| 161 | } |
| 162 | |
| 163 | /// For constant columns the columns themselves can be contained in `input_columns_`. |
| 164 | ExpressionActions(const ColumnsWithTypeAndName & input_columns_, const Context & context_) |
| 165 | : settings(context_.getSettingsRef()) |
| 166 | { |
| 167 | for (const auto & input_elem : input_columns_) |
| 168 | { |
| 169 | input_columns.emplace_back(input_elem.name, input_elem.type); |
| 170 | sample_block.insert(input_elem); |
| 171 | } |
| 172 | #if USE_EMBEDDED_COMPILER |
| 173 | compilation_cache = context_.getCompiledExpressionCache(); |
| 174 | #endif |
| 175 | } |
| 176 | |
| 177 | /// Add the input column. |
| 178 | /// The name of the column must not match the names of the intermediate columns that occur when evaluating the expression. |
| 179 | /// The expression must not have any PROJECT actions. |
| 180 | void addInput(const ColumnWithTypeAndName & column); |
| 181 | void addInput(const NameAndTypePair & column); |
| 182 | |
| 183 | void add(const ExpressionAction & action); |
| 184 | |
| 185 | /// Adds new column names to out_new_columns (formed as a result of the added action). |
| 186 | void add(const ExpressionAction & action, Names & out_new_columns); |
| 187 | |
| 188 | /// Adds to the beginning the removal of all extra columns. |
| 189 | void prependProjectInput(); |
| 190 | |
| 191 | /// Add the specified ARRAY JOIN action to the beginning. Change the appropriate input types to arrays. |
| 192 | /// If there are unknown columns in the ARRAY JOIN list, take their types from sample_block, and immediately after ARRAY JOIN remove them. |
| 193 | void prependArrayJoin(const ExpressionAction & action, const Block & sample_block_before); |
| 194 | |
| 195 | /// If the last action is ARRAY JOIN, and it does not affect the columns from required_columns, discard and return it. |
| 196 | /// Change the corresponding output types to arrays. |
| 197 | bool popUnusedArrayJoin(const Names & required_columns, ExpressionAction & out_action); |
| 198 | |
| 199 | /// - Adds actions to delete all but the specified columns. |
| 200 | /// - Removes unused input columns. |
| 201 | /// - Can somehow optimize the expression. |
| 202 | /// - Does not reorder the columns. |
| 203 | /// - Does not remove "unexpected" columns (for example, added by functions). |
| 204 | /// - If output_columns is empty, leaves one arbitrary column (so that the number of rows in the block is not lost). |
| 205 | void finalize(const Names & output_columns); |
| 206 | |
| 207 | const Actions & getActions() const { return actions; } |
| 208 | |
| 209 | /// Get a list of input columns. |
| 210 | Names getRequiredColumns() const |
| 211 | { |
| 212 | Names names; |
| 213 | for (NamesAndTypesList::const_iterator it = input_columns.begin(); it != input_columns.end(); ++it) |
| 214 | names.push_back(it->name); |
| 215 | return names; |
| 216 | } |
| 217 | |
| 218 | const NamesAndTypesList & getRequiredColumnsWithTypes() const { return input_columns; } |
| 219 | |
| 220 | /// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns. |
| 221 | void execute(Block & block, bool dry_run = false) const; |
| 222 | |
| 223 | /// Check if joined subquery has totals. |
| 224 | bool hasTotalsInJoin() const; |
| 225 | |
| 226 | /** Execute the expression on the block of total values. |
| 227 | * Almost the same as `execute`. The difference is only when JOIN is executed. |
| 228 | */ |
| 229 | void executeOnTotals(Block & block) const; |
| 230 | |
| 231 | /// Obtain a sample block that contains the names and types of result columns. |
| 232 | const Block & getSampleBlock() const { return sample_block; } |
| 233 | |
| 234 | std::string dumpActions() const; |
| 235 | |
| 236 | static std::string getSmallestColumn(const NamesAndTypesList & columns); |
| 237 | |
| 238 | JoinPtr getTableJoinAlgo() const; |
| 239 | |
| 240 | const Settings & getSettings() const { return settings; } |
| 241 | |
| 242 | /// Check if result block has no rows. True if it's definite, false if we can't say for sure. |
| 243 | /// Call it only after subqueries for join were executed. |
| 244 | bool resultIsAlwaysEmpty() const; |
| 245 | |
| 246 | /// Check if column is always zero. True if it's definite, false if we can't say for sure. |
| 247 | /// Call it only after subqueries for sets were executed. |
| 248 | bool checkColumnIsAlwaysFalse(const String & column_name) const; |
| 249 | |
| 250 | struct ActionsHash |
| 251 | { |
| 252 | UInt128 operator()(const ExpressionActions::Actions & elems) const |
| 253 | { |
| 254 | SipHash hash; |
| 255 | for (const ExpressionAction & act : elems) |
| 256 | hash.update(ExpressionAction::ActionHash{}(act)); |
| 257 | UInt128 result; |
| 258 | hash.get128(result.low, result.high); |
| 259 | return result; |
| 260 | } |
| 261 | }; |
| 262 | |
| 263 | private: |
| 264 | /// These columns have to be in input blocks (arguments of execute* methods) |
| 265 | NamesAndTypesList input_columns; |
| 266 | /// These actions will be executed on input blocks |
| 267 | Actions actions; |
| 268 | /// The example of result (output) block. |
| 269 | Block sample_block; |
| 270 | /// Columns which can't be used for constant folding. |
| 271 | NameSet names_not_for_constant_folding; |
| 272 | |
| 273 | Settings settings; |
| 274 | #if USE_EMBEDDED_COMPILER |
| 275 | std::shared_ptr<CompiledExpressionCache> compilation_cache; |
| 276 | #endif |
| 277 | |
| 278 | void checkLimits(Block & block) const; |
| 279 | |
| 280 | void addImpl(ExpressionAction action, Names & new_names); |
| 281 | |
| 282 | /// Move all arrayJoin as close as possible to the end. |
| 283 | void optimizeArrayJoin(); |
| 284 | }; |
| 285 | |
| 286 | using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>; |
| 287 | |
| 288 | |
| 289 | /** The sequence of transformations over the block. |
| 290 | * It is assumed that the result of each step is fed to the input of the next step. |
| 291 | * Used to execute parts of the query individually. |
| 292 | * |
| 293 | * For example, you can create a chain of two steps: |
| 294 | * 1) evaluate the expression in the WHERE clause, |
| 295 | * 2) calculate the expression in the SELECT section, |
| 296 | * and between the two steps do the filtering by value in the WHERE clause. |
| 297 | */ |
| 298 | struct ExpressionActionsChain |
| 299 | { |
| 300 | ExpressionActionsChain(const Context & context_) |
| 301 | : context(context_) {} |
| 302 | struct Step |
| 303 | { |
| 304 | ExpressionActionsPtr actions; |
| 305 | /// Columns were added to the block before current step in addition to prev step output. |
| 306 | NameSet additional_input; |
| 307 | /// Columns which are required in the result of current step. |
| 308 | Names required_output; |
| 309 | /// True if column from required_output is needed only for current step and not used in next actions |
| 310 | /// (and can be removed from block). Example: filter column for where actions. |
| 311 | /// If not empty, has the same size with required_output; is filled in finalize(). |
| 312 | std::vector<bool> can_remove_required_output; |
| 313 | |
| 314 | Step(const ExpressionActionsPtr & actions_ = nullptr, const Names & required_output_ = Names()) |
| 315 | : actions(actions_), required_output(required_output_) {} |
| 316 | }; |
| 317 | |
| 318 | using Steps = std::vector<Step>; |
| 319 | |
| 320 | const Context & context; |
| 321 | Steps steps; |
| 322 | |
| 323 | void addStep(); |
| 324 | |
| 325 | void finalize(); |
| 326 | |
| 327 | void clear() |
| 328 | { |
| 329 | steps.clear(); |
| 330 | } |
| 331 | |
| 332 | ExpressionActionsPtr getLastActions() |
| 333 | { |
| 334 | if (steps.empty()) |
| 335 | throw Exception("Empty ExpressionActionsChain" , ErrorCodes::LOGICAL_ERROR); |
| 336 | |
| 337 | return steps.back().actions; |
| 338 | } |
| 339 | |
| 340 | Step & getLastStep() |
| 341 | { |
| 342 | if (steps.empty()) |
| 343 | throw Exception("Empty ExpressionActionsChain" , ErrorCodes::LOGICAL_ERROR); |
| 344 | |
| 345 | return steps.back(); |
| 346 | } |
| 347 | |
| 348 | std::string dumpChain(); |
| 349 | }; |
| 350 | |
| 351 | } |
| 352 | |