1 | //===----------------------------------------------------------------------===// |
2 | // DuckDB |
3 | // |
4 | // duckdb/main/query_profiler.hpp |
5 | // |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #pragma once |
10 | |
11 | #include "duckdb/common/common.hpp" |
12 | #include "duckdb/common/enums/profiler_format.hpp" |
13 | #include "duckdb/common/profiler.hpp" |
14 | #include "duckdb/common/string_util.hpp" |
15 | #include "duckdb/common/types/data_chunk.hpp" |
16 | #include "duckdb/common/unordered_map.hpp" |
17 | #include "duckdb/common/winapi.hpp" |
18 | #include "duckdb/execution/physical_operator.hpp" |
19 | #include "duckdb/execution/expression_executor_state.hpp" |
20 | #include "duckdb/common/reference_map.hpp" |
21 | #include <stack> |
22 | #include "duckdb/common/pair.hpp" |
23 | #include "duckdb/common/deque.hpp" |
24 | |
25 | namespace duckdb { |
26 | class ClientContext; |
27 | class ExpressionExecutor; |
28 | class PhysicalOperator; |
29 | class SQLStatement; |
30 | |
31 | //! The ExpressionInfo keeps information related to an expression |
32 | struct ExpressionInfo { |
33 | explicit ExpressionInfo() : hasfunction(false) { |
34 | } |
35 | // A vector of children |
36 | vector<unique_ptr<ExpressionInfo>> children; |
37 | // Extract ExpressionInformation from a given expression state |
38 | void (unique_ptr<ExpressionState> &state); |
39 | |
40 | //! Whether or not expression has function |
41 | bool hasfunction; |
42 | //! The function Name |
43 | string function_name; |
44 | //! The function time |
45 | uint64_t function_time = 0; |
46 | //! Count the number of ALL tuples |
47 | uint64_t tuples_count = 0; |
48 | //! Count the number of tuples sampled |
49 | uint64_t sample_tuples_count = 0; |
50 | }; |
51 | |
52 | //! The ExpressionRootInfo keeps information related to the root of an expression tree |
53 | struct ExpressionRootInfo { |
54 | ExpressionRootInfo(ExpressionExecutorState &executor, string name); |
55 | |
56 | //! Count the number of time the executor called |
57 | uint64_t total_count = 0; |
58 | //! Count the number of time the executor called since last sampling |
59 | uint64_t current_count = 0; |
60 | //! Count the number of samples |
61 | uint64_t sample_count = 0; |
62 | //! Count the number of tuples in all samples |
63 | uint64_t sample_tuples_count = 0; |
64 | //! Count the number of tuples processed by this executor |
65 | uint64_t tuples_count = 0; |
66 | //! A vector which contain the pointer to root of each expression tree |
67 | unique_ptr<ExpressionInfo> root; |
68 | //! Name |
69 | string name; |
70 | //! Elapsed time |
71 | double time; |
72 | //! Extra Info |
73 | string ; |
74 | }; |
75 | |
76 | struct ExpressionExecutorInfo { |
77 | explicit ExpressionExecutorInfo() {}; |
78 | explicit ExpressionExecutorInfo(ExpressionExecutor &executor, const string &name, int id); |
79 | |
80 | //! A vector which contain the pointer to all ExpressionRootInfo |
81 | vector<unique_ptr<ExpressionRootInfo>> roots; |
82 | //! Id, it will be used as index for executors_info vector |
83 | int id; |
84 | }; |
85 | |
86 | struct OperatorInformation { |
87 | explicit OperatorInformation(double time_ = 0, idx_t elements_ = 0) : time(time_), elements(elements_) { |
88 | } |
89 | |
90 | double time = 0; |
91 | idx_t elements = 0; |
92 | string name; |
93 | //! A vector of Expression Executor Info |
94 | vector<unique_ptr<ExpressionExecutorInfo>> executors_info; |
95 | }; |
96 | |
97 | //! The OperatorProfiler measures timings of individual operators |
98 | class OperatorProfiler { |
99 | friend class QueryProfiler; |
100 | |
101 | public: |
102 | DUCKDB_API explicit OperatorProfiler(bool enabled); |
103 | |
104 | DUCKDB_API void StartOperator(optional_ptr<const PhysicalOperator> phys_op); |
105 | DUCKDB_API void EndOperator(optional_ptr<DataChunk> chunk); |
106 | DUCKDB_API void Flush(const PhysicalOperator &phys_op, ExpressionExecutor &expression_executor, const string &name, |
107 | int id); |
108 | |
109 | ~OperatorProfiler() { |
110 | } |
111 | |
112 | private: |
113 | void AddTiming(const PhysicalOperator &op, double time, idx_t elements); |
114 | |
115 | //! Whether or not the profiler is enabled |
116 | bool enabled; |
117 | //! The timer used to time the execution time of the individual Physical Operators |
118 | Profiler op; |
119 | //! The stack of Physical Operators that are currently active |
120 | optional_ptr<const PhysicalOperator> active_operator; |
121 | //! A mapping of physical operators to recorded timings |
122 | reference_map_t<const PhysicalOperator, OperatorInformation> timings; |
123 | }; |
124 | |
125 | //! The QueryProfiler can be used to measure timings of queries |
126 | class QueryProfiler { |
127 | public: |
128 | DUCKDB_API QueryProfiler(ClientContext &context); |
129 | |
130 | public: |
131 | struct TreeNode { |
132 | PhysicalOperatorType type; |
133 | string name; |
134 | string ; |
135 | OperatorInformation info; |
136 | vector<unique_ptr<TreeNode>> children; |
137 | idx_t depth = 0; |
138 | }; |
139 | |
140 | // Propagate save_location, enabled, detailed_enabled and automatic_print_format. |
141 | void Propagate(QueryProfiler &qp); |
142 | |
143 | using TreeMap = reference_map_t<const PhysicalOperator, reference<TreeNode>>; |
144 | |
145 | private: |
146 | unique_ptr<TreeNode> CreateTree(const PhysicalOperator &root, idx_t depth = 0); |
147 | void Render(const TreeNode &node, std::ostream &str) const; |
148 | |
149 | public: |
150 | DUCKDB_API bool IsEnabled() const; |
151 | DUCKDB_API bool IsDetailedEnabled() const; |
152 | DUCKDB_API ProfilerPrintFormat GetPrintFormat() const; |
153 | DUCKDB_API bool PrintOptimizerOutput() const; |
154 | DUCKDB_API string GetSaveLocation() const; |
155 | |
156 | DUCKDB_API static QueryProfiler &Get(ClientContext &context); |
157 | |
158 | DUCKDB_API void StartQuery(string query, bool is_explain_analyze = false, bool start_at_optimizer = false); |
159 | DUCKDB_API void EndQuery(); |
160 | |
161 | DUCKDB_API void StartExplainAnalyze(); |
162 | |
163 | //! Adds the timings gathered by an OperatorProfiler to this query profiler |
164 | DUCKDB_API void Flush(OperatorProfiler &profiler); |
165 | |
166 | DUCKDB_API void StartPhase(string phase); |
167 | DUCKDB_API void EndPhase(); |
168 | |
169 | DUCKDB_API void Initialize(const PhysicalOperator &root); |
170 | |
171 | DUCKDB_API string QueryTreeToString() const; |
172 | DUCKDB_API void QueryTreeToStream(std::ostream &str) const; |
173 | DUCKDB_API void Print(); |
174 | |
175 | //! return the printed as a string. Unlike ToString, which is always formatted as a string, |
176 | //! the return value is formatted based on the current print format (see GetPrintFormat()). |
177 | DUCKDB_API string ToString() const; |
178 | |
179 | DUCKDB_API string ToJSON() const; |
180 | DUCKDB_API void WriteToFile(const char *path, string &info) const; |
181 | |
182 | idx_t OperatorSize() { |
183 | return tree_map.size(); |
184 | } |
185 | |
186 | void Finalize(TreeNode &node); |
187 | |
188 | private: |
189 | ClientContext &context; |
190 | |
191 | //! Whether or not the query profiler is running |
192 | bool running; |
193 | //! The lock used for flushing information from a thread into the global query profiler |
194 | mutex flush_lock; |
195 | |
196 | //! Whether or not the query requires profiling |
197 | bool query_requires_profiling; |
198 | |
199 | //! The root of the query tree |
200 | unique_ptr<TreeNode> root; |
201 | //! The query string |
202 | string query; |
203 | //! The timer used to time the execution time of the entire query |
204 | Profiler main_query; |
205 | //! A map of a Physical Operator pointer to a tree node |
206 | TreeMap tree_map; |
207 | //! Whether or not we are running as part of a explain_analyze query |
208 | bool is_explain_analyze; |
209 | |
210 | public: |
211 | const TreeMap &GetTreeMap() const { |
212 | return tree_map; |
213 | } |
214 | |
215 | private: |
216 | //! The timer used to time the individual phases of the planning process |
217 | Profiler phase_profiler; |
218 | //! A mapping of the phase names to the timings |
219 | using PhaseTimingStorage = unordered_map<string, double>; |
220 | PhaseTimingStorage phase_timings; |
221 | using PhaseTimingItem = PhaseTimingStorage::value_type; |
222 | //! The stack of currently active phases |
223 | vector<string> phase_stack; |
224 | |
225 | private: |
226 | vector<PhaseTimingItem> GetOrderedPhaseTimings() const; |
227 | |
228 | //! Check whether or not an operator type requires query profiling. If none of the ops in a query require profiling |
229 | //! no profiling information is output. |
230 | bool OperatorRequiresProfiling(PhysicalOperatorType op_type); |
231 | }; |
232 | |
233 | //! The QueryProfilerHistory can be used to access the profiler of previous queries |
234 | class QueryProfilerHistory { |
235 | private: |
236 | static constexpr uint64_t DEFAULT_SIZE = 20; |
237 | |
238 | //! Previous Query profilers |
239 | deque<pair<transaction_t, shared_ptr<QueryProfiler>>> prev_profilers; |
240 | //! Previous Query profilers size |
241 | uint64_t prev_profilers_size = DEFAULT_SIZE; |
242 | |
243 | public: |
244 | deque<pair<transaction_t, shared_ptr<QueryProfiler>>> &GetPrevProfilers() { |
245 | return prev_profilers; |
246 | } |
247 | QueryProfilerHistory() { |
248 | } |
249 | |
250 | void (uint64_t ) { |
251 | prev_profilers_size = prevProfilersSize; |
252 | } |
253 | uint64_t () const { |
254 | return prev_profilers_size; |
255 | } |
256 | |
257 | public: |
258 | void SetProfilerHistorySize(uint64_t size) { |
259 | this->prev_profilers_size = size; |
260 | } |
261 | void ResetProfilerHistorySize() { |
262 | this->prev_profilers_size = DEFAULT_SIZE; |
263 | } |
264 | }; |
265 | } // namespace duckdb |
266 | |