| 1 | /* |
| 2 | Copyright (c) 2017, Facebook, Inc. |
| 3 | |
| 4 | This program is free software; you can redistribute it and/or modify |
| 5 | it under the terms of the GNU General Public License as published by |
| 6 | the Free Software Foundation; version 2 of the License. |
| 7 | |
| 8 | This program is distributed in the hope that it will be useful, |
| 9 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 10 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 11 | GNU General Public License for more details. |
| 12 | |
| 13 | You should have received a copy of the GNU General Public License |
| 14 | along with this program; if not, write to the Free Software |
| 15 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ |
| 16 | |
| 17 | /* This C++ file's header */ |
| 18 | #include "./rdb_io_watchdog.h" |
| 19 | |
| 20 | /* C++ standard header files */ |
| 21 | #include <string> |
| 22 | #include <vector> |
| 23 | |
| 24 | /* Rdb_io_watchdog doesn't work on Windows [yet] */ |
| 25 | #ifdef HAVE_TIMER_DELETE |
| 26 | |
| 27 | namespace myrocks { |
| 28 | |
| 29 | void Rdb_io_watchdog::expire_io_callback(union sigval timer_data) { |
| 30 | DBUG_ASSERT(timer_data.sival_ptr != nullptr); |
| 31 | |
| 32 | // The treatment of any pending signal generated by the deleted timer is |
| 33 | // unspecified. Therefore we still need to handle the rare case where we |
| 34 | // finished the I/O operation right before the timer was deleted and callback |
| 35 | // was in flight. |
| 36 | if (!m_io_in_progress.load()) { |
| 37 | return; |
| 38 | } |
| 39 | |
| 40 | // At this point we know that I/O has been stuck in `write()` for more than |
| 41 | // `m_write_timeout` seconds. We'll log a message and shut down the service. |
| 42 | // NO_LINT_DEBUG |
| 43 | sql_print_error("MyRocks has detected a combination of I/O requests which " |
| 44 | "have cumulatively been blocking for more than %u seconds. " |
| 45 | "Shutting the service down." , |
| 46 | m_write_timeout); |
| 47 | |
| 48 | abort(); |
| 49 | } |
| 50 | |
| 51 | void Rdb_io_watchdog::io_check_callback(union sigval timer_data) { |
| 52 | RDB_MUTEX_LOCK_CHECK(m_reset_mutex); |
| 53 | |
| 54 | DBUG_ASSERT(timer_data.sival_ptr != nullptr); |
| 55 | |
| 56 | struct sigevent e; |
| 57 | |
| 58 | e.sigev_notify = SIGEV_THREAD; |
| 59 | e.sigev_notify_function = &Rdb_io_watchdog::expire_io_callback_wrapper; |
| 60 | e.sigev_value.sival_ptr = this; |
| 61 | e.sigev_notify_attributes = nullptr; |
| 62 | |
| 63 | int ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_watchdog_timer); |
| 64 | |
| 65 | if (unlikely(ret)) { |
| 66 | // NO_LINT_DEBUG |
| 67 | sql_print_warning("Creating a watchdog I/O timer failed with %d." , errno); |
| 68 | RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); |
| 69 | return; |
| 70 | } |
| 71 | |
| 72 | struct itimerspec timer_spec; |
| 73 | memset(&timer_spec, 0, sizeof(timer_spec)); |
| 74 | |
| 75 | // One time execution only for the watchdog. No interval. |
| 76 | timer_spec.it_value.tv_sec = m_write_timeout; |
| 77 | |
| 78 | ret = timer_settime(m_io_check_watchdog_timer, 0, &timer_spec, nullptr); |
| 79 | |
| 80 | if (unlikely(ret)) { |
| 81 | // NO_LINT_DEBUG |
| 82 | sql_print_warning("Setting time for a watchdog I/O timer failed with %d." , |
| 83 | errno); |
| 84 | RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); |
| 85 | return; |
| 86 | } |
| 87 | |
| 88 | m_io_in_progress.store(true); |
| 89 | |
| 90 | // Verify the write access to all directories we care about. |
| 91 | for (const std::string &directory : m_dirs_to_check) { |
| 92 | ret = check_write_access(directory); |
| 93 | |
| 94 | // We'll log a warning and attept to continue to see if the problem happens |
| 95 | // in other cases as well. |
| 96 | if (unlikely(ret != HA_EXIT_SUCCESS)) { |
| 97 | // NO_LINT_DEBUG |
| 98 | sql_print_warning("Unable to verify write access to %s (error code %d)." , |
| 99 | directory.c_str(), ret); |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | m_io_in_progress.store(false); |
| 104 | |
| 105 | // Clean up the watchdog timer. |
| 106 | ret = timer_delete(m_io_check_watchdog_timer); |
| 107 | |
| 108 | if (unlikely(ret)) { |
| 109 | // NO_LINT_DEBUG |
| 110 | sql_print_warning("Deleting the watchdog I/O timer failed with %d." , errno); |
| 111 | } |
| 112 | |
| 113 | m_io_check_watchdog_timer = nullptr; |
| 114 | |
| 115 | RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); |
| 116 | } |
| 117 | |
| 118 | int Rdb_io_watchdog::check_write_access(const std::string &dirname) const { |
| 119 | DBUG_ASSERT(!dirname.empty()); |
| 120 | DBUG_ASSERT(m_buf != nullptr); |
| 121 | |
| 122 | const std::string fname = dirname + FN_DIRSEP + RDB_IO_DUMMY_FILE_NAME; |
| 123 | |
| 124 | // O_DIRECT is a key flag here to make sure that we'll bypass the kernel's |
| 125 | // buffer cache. |
| 126 | int fd = open(fname.c_str(), O_WRONLY | O_DIRECT | O_CREAT | O_SYNC, |
| 127 | S_IRWXU | S_IWUSR); |
| 128 | |
| 129 | if (unlikely(fd == -1)) { |
| 130 | return fd; |
| 131 | } |
| 132 | |
| 133 | int ret = write(fd, m_buf, RDB_IO_WRITE_BUFFER_SIZE); |
| 134 | |
| 135 | if (unlikely(ret != RDB_IO_WRITE_BUFFER_SIZE)) { |
| 136 | return ret; |
| 137 | } |
| 138 | |
| 139 | ret = close(fd); |
| 140 | |
| 141 | if (unlikely(ret)) { |
| 142 | return ret; |
| 143 | } |
| 144 | |
| 145 | ret = unlink(fname.c_str()); |
| 146 | |
| 147 | if (unlikely(ret)) { |
| 148 | return ret; |
| 149 | } |
| 150 | |
| 151 | return HA_EXIT_SUCCESS; |
| 152 | } |
| 153 | |
| 154 | int Rdb_io_watchdog::reset_timeout(const uint32_t &write_timeout) { |
| 155 | // This function will be called either from a thread initializing MyRocks |
| 156 | // engine or handling system variable changes. We need to account for the |
| 157 | // possibility of I/O callback executing at the same time. If that happens |
| 158 | // then we'll wait for it to finish. |
| 159 | RDB_MUTEX_LOCK_CHECK(m_reset_mutex); |
| 160 | |
| 161 | struct sigevent e; |
| 162 | |
| 163 | // In all the cases all the active timers needs to be stopped. |
| 164 | int ret = stop_timers(); |
| 165 | |
| 166 | if (unlikely(ret)) { |
| 167 | // NO_LINT_DEBUG |
| 168 | sql_print_warning("Stopping I/O timers failed with %d." , errno); |
| 169 | RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); |
| 170 | return ret; |
| 171 | } |
| 172 | |
| 173 | m_write_timeout = write_timeout; |
| 174 | m_io_in_progress.store(false); |
| 175 | |
| 176 | // Zero means that the I/O timer will be disabled. Therefore there's nothing |
| 177 | // for us to do here. |
| 178 | if (!write_timeout) { |
| 179 | RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); |
| 180 | return HA_EXIT_SUCCESS; |
| 181 | } |
| 182 | |
| 183 | free(m_buf); |
| 184 | |
| 185 | ret = posix_memalign(reinterpret_cast<void **>(&m_buf), |
| 186 | RDB_IO_WRITE_BUFFER_SIZE, RDB_IO_WRITE_BUFFER_SIZE); |
| 187 | |
| 188 | if (unlikely(ret)) { |
| 189 | m_buf = nullptr; |
| 190 | RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); |
| 191 | // NB! The value of errno is not set. |
| 192 | return ret; |
| 193 | } |
| 194 | |
| 195 | DBUG_ASSERT(m_buf != nullptr); |
| 196 | memset(m_buf, 0, RDB_IO_WRITE_BUFFER_SIZE); |
| 197 | |
| 198 | // Common case gets handled here - we'll create a timer with a specific |
| 199 | // interval to check a set of directories for write access. |
| 200 | DBUG_ASSERT(m_dirs_to_check.size() > 0); |
| 201 | |
| 202 | e.sigev_notify = SIGEV_THREAD; |
| 203 | e.sigev_notify_function = &Rdb_io_watchdog::io_check_callback_wrapper; |
| 204 | e.sigev_value.sival_ptr = this; |
| 205 | e.sigev_notify_attributes = nullptr; |
| 206 | |
| 207 | ret = timer_create(CLOCK_MONOTONIC, &e, &m_io_check_timer); |
| 208 | |
| 209 | if (unlikely(ret)) { |
| 210 | // NO_LINT_DEBUG |
| 211 | sql_print_warning("Creating a I/O timer failed with %d." , errno); |
| 212 | RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); |
| 213 | return ret; |
| 214 | } |
| 215 | |
| 216 | struct itimerspec timer_spec; |
| 217 | memset(&timer_spec, 0, sizeof(timer_spec)); |
| 218 | |
| 219 | // I/O timer will need to execute on a certain interval. |
| 220 | timer_spec.it_value.tv_sec = m_write_timeout; |
| 221 | timer_spec.it_interval.tv_sec = m_write_timeout; |
| 222 | |
| 223 | ret = timer_settime(m_io_check_timer, 0, &timer_spec, nullptr); |
| 224 | |
| 225 | if (unlikely(ret)) { |
| 226 | // NO_LINT_DEBUG |
| 227 | sql_print_warning("Setting time for a watchdog I/O timer failed with %d." , |
| 228 | errno); |
| 229 | } |
| 230 | |
| 231 | RDB_MUTEX_UNLOCK_CHECK(m_reset_mutex); |
| 232 | |
| 233 | return HA_EXIT_SUCCESS; |
| 234 | } |
| 235 | |
| 236 | } // namespace myrocks |
| 237 | |
| 238 | #endif |
| 239 | |
| 240 | |