1/*
2 Copyright (c) 2017, Facebook, Inc.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; version 2 of the License.
7
8 This program is distributed in the hope that it will be useful,
9 but WITHOUT ANY WARRANTY; without even the implied warranty of
10 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 GNU General Public License for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with this program; if not, write to the Free Software
15 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
16
17/* This C++ file's header */
18#include "./rdb_io_watchdog.h"
19
20/* C++ standard header files */
21#include <string>
22#include <vector>
23
24/* Rdb_io_watchdog doesn't work on Windows [yet] */
25#ifdef HAVE_TIMER_DELETE
26
27namespace myrocks {
28
29void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) {
30 DBUG_ASSERT(timer_data.sival_ptr != nullptr);
31
32 // The treatment of any pending signal generated by the deleted timer is
33 // unspecified. Therefore we still need to handle the rare case where we
34 // finished the I/O operation right before the timer was deleted and callback
35 // was in flight.
36 if (!m_io_in_progress.load()) {
37 return;
38 }
39
40 // At this point we know that I/O has been stuck in `write()` for more than
41 // `m_write_timeout` seconds. We'll log a message and shut down the service.
42 // NO_LINT_DEBUG
43 sql_print_error("MyRocks has detected a combination of I/O requests which "
44 "have cumulatively been blocking for more than %u seconds. "
45 "Shutting the service down.",
46 m_write_timeout);
47
48 abort();
49}
50
51void Rdb_io_watchdog::io_check_callback(union sigval timer_data) {
52 RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
53
54 DBUG_ASSERT(timer_data.sival_ptr != nullptr);
55
56 struct sigevent e;
57
58 e.sigev_notify = SIGEV_THREAD;
59 e.sigev_notify_function = &Rdb_io_watchdog::expire_io_callback_wrapper;
60 e.sigev_value.sival_ptr = this;
61 e.sigev_notify_attributes = nullptr;
62
63 int ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_watchdog_timer);
64
65 if (unlikely(ret)) {
66 // NO_LINT_DEBUG
67 sql_print_warning("Creating a watchdog I/O timer failed with %d.", errno);
68 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
69 return;
70 }
71
72 struct itimerspec timer_spec;
73 memset(&timer_spec, 0, sizeof(timer_spec));
74
75 // One time execution only for the watchdog. No interval.
76 timer_spec.it_value.tv_sec = m_write_timeout;
77
78 ret = timer_settime(m_io_check_watchdog_timer, 0, &timer_spec, nullptr);
79
80 if (unlikely(ret)) {
81 // NO_LINT_DEBUG
82 sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
83 errno);
84 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
85 return;
86 }
87
88 m_io_in_progress.store(true);
89
90 // Verify the write access to all directories we care about.
91 for (const std::string &directory : m_dirs_to_check) {
92 ret = check_write_access(directory);
93
94 // We'll log a warning and attept to continue to see if the problem happens
95 // in other cases as well.
96 if (unlikely(ret != HA_EXIT_SUCCESS)) {
97 // NO_LINT_DEBUG
98 sql_print_warning("Unable to verify write access to %s (error code %d).",
99 directory.c_str(), ret);
100 }
101 }
102
103 m_io_in_progress.store(false);
104
105 // Clean up the watchdog timer.
106 ret = timer_delete(m_io_check_watchdog_timer);
107
108 if (unlikely(ret)) {
109 // NO_LINT_DEBUG
110 sql_print_warning("Deleting the watchdog I/O timer failed with %d.", errno);
111 }
112
113 m_io_check_watchdog_timer = nullptr;
114
115 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
116}
117
118int Rdb_io_watchdog::check_write_access(const std::string &dirname) const {
119 DBUG_ASSERT(!dirname.empty());
120 DBUG_ASSERT(m_buf != nullptr);
121
122 const std::string fname = dirname + FN_DIRSEP + RDB_IO_DUMMY_FILE_NAME;
123
124 // O_DIRECT is a key flag here to make sure that we'll bypass the kernel's
125 // buffer cache.
126 int fd = open(fname.c_str(), O_WRONLY | O_DIRECT | O_CREAT | O_SYNC,
127 S_IRWXU | S_IWUSR);
128
129 if (unlikely(fd == -1)) {
130 return fd;
131 }
132
133 int ret = write(fd, m_buf, RDB_IO_WRITE_BUFFER_SIZE);
134
135 if (unlikely(ret != RDB_IO_WRITE_BUFFER_SIZE)) {
136 return ret;
137 }
138
139 ret = close(fd);
140
141 if (unlikely(ret)) {
142 return ret;
143 }
144
145 ret = unlink(fname.c_str());
146
147 if (unlikely(ret)) {
148 return ret;
149 }
150
151 return HA_EXIT_SUCCESS;
152}
153
154int Rdb_io_watchdog::reset_timeout(const uint32_t &write_timeout) {
155 // This function will be called either from a thread initializing MyRocks
156 // engine or handling system variable changes. We need to account for the
157 // possibility of I/O callback executing at the same time. If that happens
158 // then we'll wait for it to finish.
159 RDB_MUTEX_LOCK_CHECK(m_reset_mutex);
160
161 struct sigevent e;
162
163 // In all the cases all the active timers needs to be stopped.
164 int ret = stop_timers();
165
166 if (unlikely(ret)) {
167 // NO_LINT_DEBUG
168 sql_print_warning("Stopping I/O timers failed with %d.", errno);
169 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
170 return ret;
171 }
172
173 m_write_timeout = write_timeout;
174 m_io_in_progress.store(false);
175
176 // Zero means that the I/O timer will be disabled. Therefore there's nothing
177 // for us to do here.
178 if (!write_timeout) {
179 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
180 return HA_EXIT_SUCCESS;
181 }
182
183 free(m_buf);
184
185 ret = posix_memalign(reinterpret_cast<void **>(&m_buf),
186 RDB_IO_WRITE_BUFFER_SIZE, RDB_IO_WRITE_BUFFER_SIZE);
187
188 if (unlikely(ret)) {
189 m_buf = nullptr;
190 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
191 // NB! The value of errno is not set.
192 return ret;
193 }
194
195 DBUG_ASSERT(m_buf != nullptr);
196 memset(m_buf, 0, RDB_IO_WRITE_BUFFER_SIZE);
197
198 // Common case gets handled here - we'll create a timer with a specific
199 // interval to check a set of directories for write access.
200 DBUG_ASSERT(m_dirs_to_check.size() > 0);
201
202 e.sigev_notify = SIGEV_THREAD;
203 e.sigev_notify_function = &Rdb_io_watchdog::io_check_callback_wrapper;
204 e.sigev_value.sival_ptr = this;
205 e.sigev_notify_attributes = nullptr;
206
207 ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_timer);
208
209 if (unlikely(ret)) {
210 // NO_LINT_DEBUG
211 sql_print_warning("Creating a I/O timer failed with %d.", errno);
212 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
213 return ret;
214 }
215
216 struct itimerspec timer_spec;
217 memset(&timer_spec, 0, sizeof(timer_spec));
218
219 // I/O timer will need to execute on a certain interval.
220 timer_spec.it_value.tv_sec = m_write_timeout;
221 timer_spec.it_interval.tv_sec = m_write_timeout;
222
223 ret = timer_settime(m_io_check_timer, 0, &timer_spec, nullptr);
224
225 if (unlikely(ret)) {
226 // NO_LINT_DEBUG
227 sql_print_warning("Setting time for a watchdog I/O timer failed with %d.",
228 errno);
229 }
230
231 RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex);
232
233 return HA_EXIT_SUCCESS;
234}
235
236} // namespace myrocks
237
238#endif
239
240