1#include "QueryProfiler.h"
2
3#include <random>
4#include <common/phdr_cache.h>
5#include <common/config_common.h>
6#include <common/StringRef.h>
7#include <common/logger_useful.h>
8#include <Common/PipeFDs.h>
9#include <Common/StackTrace.h>
10#include <Common/CurrentThread.h>
11#include <Common/Exception.h>
12#include <Common/thread_local_rng.h>
13#include <IO/WriteHelpers.h>
14#include <IO/WriteBufferFromFileDescriptorDiscardOnFailure.h>
15
16
17namespace ProfileEvents
18{
19 extern const Event QueryProfilerSignalOverruns;
20}
21
22namespace DB
23{
24
25extern LazyPipeFDs trace_pipe;
26
27namespace
28{
29 /// Normally query_id is a UUID (string with a fixed length) but user can provide custom query_id.
30 /// Thus upper bound on query_id length should be introduced to avoid buffer overflow in signal handler.
31 constexpr size_t QUERY_ID_MAX_LEN = 1024;
32
33#if defined(OS_LINUX)
34 thread_local size_t write_trace_iteration = 0;
35#endif
36
37 void writeTraceInfo(TimerType timer_type, int /* sig */, siginfo_t * info, void * context)
38 {
39#if defined(OS_LINUX)
40 /// Quickly drop if signal handler is called too frequently.
41 /// Otherwise we may end up infinitelly processing signals instead of doing any useful work.
42 ++write_trace_iteration;
43 if (info && info->si_overrun > 0)
44 {
45 /// But pass with some frequency to avoid drop of all traces.
46 if (write_trace_iteration % info->si_overrun == 0)
47 {
48 ProfileEvents::increment(ProfileEvents::QueryProfilerSignalOverruns, info->si_overrun);
49 }
50 else
51 {
52 ProfileEvents::increment(ProfileEvents::QueryProfilerSignalOverruns, info->si_overrun + 1);
53 return;
54 }
55 }
56#else
57 UNUSED(info);
58#endif
59
60 constexpr size_t buf_size = sizeof(char) + // TraceCollector stop flag
61 8 * sizeof(char) + // maximum VarUInt length for string size
62 QUERY_ID_MAX_LEN * sizeof(char) + // maximum query_id length
63 sizeof(UInt8) + // number of stack frames
64 sizeof(StackTrace::Frames) + // collected stack trace, maximum capacity
65 sizeof(TimerType) + // timer type
66 sizeof(UInt32); // thread_number
67 char buffer[buf_size];
68 WriteBufferFromFileDescriptorDiscardOnFailure out(trace_pipe.fds_rw[1], buf_size, buffer);
69
70 StringRef query_id = CurrentThread::getQueryId();
71 query_id.size = std::min(query_id.size, QUERY_ID_MAX_LEN);
72
73 UInt32 thread_number = CurrentThread::get().thread_number;
74
75 const auto signal_context = *reinterpret_cast<ucontext_t *>(context);
76 const StackTrace stack_trace(signal_context);
77
78 writeChar(false, out);
79 writeStringBinary(query_id, out);
80
81 size_t stack_trace_size = stack_trace.getSize();
82 size_t stack_trace_offset = stack_trace.getOffset();
83 writeIntBinary(UInt8(stack_trace_size - stack_trace_offset), out);
84 for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
85 writePODBinary(stack_trace.getFrames()[i], out);
86
87 writePODBinary(timer_type, out);
88 writePODBinary(thread_number, out);
89 out.next();
90 }
91
92 [[maybe_unused]] const UInt32 TIMER_PRECISION = 1e9;
93}
94
95namespace ErrorCodes
96{
97 extern const int CANNOT_MANIPULATE_SIGSET;
98 extern const int CANNOT_SET_SIGNAL_HANDLER;
99 extern const int CANNOT_CREATE_TIMER;
100 extern const int CANNOT_SET_TIMER_PERIOD;
101 extern const int CANNOT_DELETE_TIMER;
102 extern const int NOT_IMPLEMENTED;
103}
104
105template <typename ProfilerImpl>
106QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(const Int32 thread_id, const int clock_type, UInt32 period, const int pause_signal_)
107 : log(&Logger::get("QueryProfiler"))
108 , pause_signal(pause_signal_)
109{
110#if USE_UNWIND
111 /// Sanity check.
112 if (!hasPHDRCache())
113 throw Exception("QueryProfiler cannot be used without PHDR cache, that is not available for TSan build", ErrorCodes::NOT_IMPLEMENTED);
114
115 /// Too high frequency can introduce infinite busy loop of signal handlers. We will limit maximum frequency (with 1000 signals per second).
116 if (period < 1000000)
117 period = 1000000;
118
119 struct sigaction sa{};
120 sa.sa_sigaction = ProfilerImpl::signalHandler;
121 sa.sa_flags = SA_SIGINFO | SA_RESTART;
122
123 if (sigemptyset(&sa.sa_mask))
124 throwFromErrno("Failed to clean signal mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
125
126 if (sigaddset(&sa.sa_mask, pause_signal))
127 throwFromErrno("Failed to add signal to mask for query profiler", ErrorCodes::CANNOT_MANIPULATE_SIGSET);
128
129 if (sigaction(pause_signal, &sa, previous_handler))
130 throwFromErrno("Failed to setup signal handler for query profiler", ErrorCodes::CANNOT_SET_SIGNAL_HANDLER);
131
132 try
133 {
134 struct sigevent sev;
135 sev.sigev_notify = SIGEV_THREAD_ID;
136 sev.sigev_signo = pause_signal;
137
138#if defined(__FreeBSD__)
139 sev._sigev_un._threadid = thread_id;
140#else
141 sev._sigev_un._tid = thread_id;
142#endif
143 if (timer_create(clock_type, &sev, &timer_id))
144 throwFromErrno("Failed to create thread timer", ErrorCodes::CANNOT_CREATE_TIMER);
145
146 /// Randomize offset as uniform random value from 0 to period - 1.
147 /// It will allow to sample short queries even if timer period is large.
148 /// (For example, with period of 1 second, query with 50 ms duration will be sampled with 1 / 20 probability).
149 /// It also helps to avoid interference (moire).
150 UInt32 period_rand = std::uniform_int_distribution<UInt32>(0, period)(thread_local_rng);
151
152 struct timespec interval{.tv_sec = period / TIMER_PRECISION, .tv_nsec = period % TIMER_PRECISION};
153 struct timespec offset{.tv_sec = period_rand / TIMER_PRECISION, .tv_nsec = period_rand % TIMER_PRECISION};
154
155 struct itimerspec timer_spec = {.it_interval = interval, .it_value = offset};
156 if (timer_settime(timer_id, 0, &timer_spec, nullptr))
157 throwFromErrno("Failed to set thread timer period", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
158 }
159 catch (...)
160 {
161 tryCleanup();
162 throw;
163 }
164#else
165 UNUSED(thread_id);
166 UNUSED(clock_type);
167 UNUSED(period);
168 UNUSED(pause_signal);
169
170 throw Exception("QueryProfiler cannot work with stock libunwind", ErrorCodes::NOT_IMPLEMENTED);
171#endif
172}
173
174template <typename ProfilerImpl>
175QueryProfilerBase<ProfilerImpl>::~QueryProfilerBase()
176{
177 tryCleanup();
178}
179
180template <typename ProfilerImpl>
181void QueryProfilerBase<ProfilerImpl>::tryCleanup()
182{
183#if USE_UNWIND
184 if (timer_id != nullptr && timer_delete(timer_id))
185 LOG_ERROR(log, "Failed to delete query profiler timer " + errnoToString(ErrorCodes::CANNOT_DELETE_TIMER));
186
187 if (previous_handler != nullptr && sigaction(pause_signal, previous_handler, nullptr))
188 LOG_ERROR(log, "Failed to restore signal handler after query profiler " + errnoToString(ErrorCodes::CANNOT_SET_SIGNAL_HANDLER));
189#endif
190}
191
192template class QueryProfilerBase<QueryProfilerReal>;
193template class QueryProfilerBase<QueryProfilerCpu>;
194
195QueryProfilerReal::QueryProfilerReal(const Int32 thread_id, const UInt32 period)
196 : QueryProfilerBase(thread_id, CLOCK_REALTIME, period, SIGUSR1)
197{}
198
199void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context)
200{
201 writeTraceInfo(TimerType::Real, sig, info, context);
202}
203
204QueryProfilerCpu::QueryProfilerCpu(const Int32 thread_id, const UInt32 period)
205 : QueryProfilerBase(thread_id, CLOCK_THREAD_CPUTIME_ID, period, SIGUSR2)
206{}
207
208void QueryProfilerCpu::signalHandler(int sig, siginfo_t * info, void * context)
209{
210 writeTraceInfo(TimerType::Cpu, sig, info, context);
211}
212
213}
214