| 1 | /* -*- mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ |
| 2 | // vim: ft=cpp:expandtab:ts=8:sw=4:softtabstop=4: |
| 3 | #ident "$Id$" |
| 4 | /*====== |
| 5 | This file is part of PerconaFT. |
| 6 | |
| 7 | |
| 8 | Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved. |
| 9 | |
| 10 | PerconaFT is free software: you can redistribute it and/or modify |
| 11 | it under the terms of the GNU General Public License, version 2, |
| 12 | as published by the Free Software Foundation. |
| 13 | |
| 14 | PerconaFT is distributed in the hope that it will be useful, |
| 15 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 16 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 17 | GNU General Public License for more details. |
| 18 | |
| 19 | You should have received a copy of the GNU General Public License |
| 20 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 21 | |
| 22 | ---------------------------------------- |
| 23 | |
| 24 | PerconaFT is free software: you can redistribute it and/or modify |
| 25 | it under the terms of the GNU Affero General Public License, version 3, |
| 26 | as published by the Free Software Foundation. |
| 27 | |
| 28 | PerconaFT is distributed in the hope that it will be useful, |
| 29 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 30 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 31 | GNU Affero General Public License for more details. |
| 32 | |
| 33 | You should have received a copy of the GNU Affero General Public License |
| 34 | along with PerconaFT. If not, see <http://www.gnu.org/licenses/>. |
| 35 | ======= */ |
| 36 | |
| 37 | #ident "Copyright (c) 2006, 2015, Percona and/or its affiliates. All rights reserved." |
| 38 | |
| 39 | #include <toku_portability.h> |
| 40 | #include <unistd.h> |
| 41 | #include <errno.h> |
| 42 | #include <toku_assert.h> |
| 43 | #include <stdio.h> |
| 44 | #include <string.h> |
| 45 | #include <dirent.h> |
| 46 | #include <sys/types.h> |
| 47 | #include <sys/stat.h> |
| 48 | #include <fcntl.h> |
| 49 | |
| 50 | #include "memory.h" |
| 51 | #include "toku_time.h" |
| 52 | #include "toku_path.h" |
| 53 | #include <portability/toku_atomic.h> |
| 54 | |
| 55 | toku_instr_key *tokudb_file_data_key; |
| 56 | |
| 57 | static int toku_assert_on_write_enospc = 0; |
| 58 | static const int toku_write_enospc_sleep = 1; |
| 59 | static uint64_t toku_write_enospc_last_report; // timestamp of most recent |
| 60 | // report to error log |
| 61 | static time_t toku_write_enospc_last_time; // timestamp of most recent ENOSPC |
| 62 | static uint32_t toku_write_enospc_current; // number of threads currently blocked on ENOSPC |
| 63 | static uint64_t toku_write_enospc_total; // total number of times ENOSPC was returned from an attempt to write |
| 64 | |
| 65 | void toku_set_assert_on_write_enospc(int do_assert) { |
| 66 | toku_assert_on_write_enospc = do_assert; |
| 67 | } |
| 68 | |
| 69 | void toku_fs_get_write_info(time_t *enospc_last_time, uint64_t *enospc_current, uint64_t *enospc_total) { |
| 70 | *enospc_last_time = toku_write_enospc_last_time; |
| 71 | *enospc_current = toku_write_enospc_current; |
| 72 | *enospc_total = toku_write_enospc_total; |
| 73 | } |
| 74 | |
| 75 | //Print any necessary errors |
| 76 | //Return whether we should try the write again. |
| 77 | static void |
| 78 | try_again_after_handling_write_error(int fd, size_t len, ssize_t r_write) { |
| 79 | int try_again = 0; |
| 80 | |
| 81 | assert(r_write < 0); |
| 82 | int errno_write = get_error_errno(); |
| 83 | switch (errno_write) { |
| 84 | case EINTR: { //The call was interrupted by a signal before any data was written; see signal(7). |
| 85 | char err_msg[sizeof("Write of [] bytes to fd=[] interrupted. Retrying." ) + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars |
| 86 | snprintf(err_msg, sizeof(err_msg), "Write of [%" PRIu64 "] bytes to fd=[%d] interrupted. Retrying." , (uint64_t)len, fd); |
| 87 | perror(err_msg); |
| 88 | fflush(stderr); |
| 89 | try_again = 1; |
| 90 | break; |
| 91 | } |
| 92 | case ENOSPC: { |
| 93 | if (toku_assert_on_write_enospc) { |
| 94 | char err_msg[sizeof("Failed write of [] bytes to fd=[]." ) + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars |
| 95 | snprintf(err_msg, sizeof(err_msg), "Failed write of [%" PRIu64 "] bytes to fd=[%d]." , (uint64_t)len, fd); |
| 96 | perror(err_msg); |
| 97 | fflush(stderr); |
| 98 | int out_of_disk_space = 1; |
| 99 | assert(!out_of_disk_space); //Give an error message that might be useful if this is the only one that survives. |
| 100 | } else { |
| 101 | toku_sync_fetch_and_add(&toku_write_enospc_total, 1); |
| 102 | toku_sync_fetch_and_add(&toku_write_enospc_current, 1); |
| 103 | |
| 104 | time_t tnow = time(0); |
| 105 | toku_write_enospc_last_time = tnow; |
| 106 | if (toku_write_enospc_last_report == 0 || tnow - toku_write_enospc_last_report >= 60) { |
| 107 | toku_write_enospc_last_report = tnow; |
| 108 | |
| 109 | const int tstr_length = 26; |
| 110 | char tstr[tstr_length]; |
| 111 | time_t t = time(0); |
| 112 | ctime_r(&t, tstr); |
| 113 | |
| 114 | const int MY_MAX_PATH = 256; |
| 115 | char fname[MY_MAX_PATH], symname[MY_MAX_PATH+1]; |
| 116 | sprintf(fname, "/proc/%d/fd/%d" , getpid(), fd); |
| 117 | ssize_t n = readlink(fname, symname, MY_MAX_PATH); |
| 118 | |
| 119 | if ((int)n == -1) |
| 120 | fprintf(stderr, "%.24s PerconaFT No space when writing %" PRIu64 " bytes to fd=%d " , tstr, (uint64_t) len, fd); |
| 121 | else { |
| 122 | tstr[n] = 0; // readlink doesn't append a NUL to the end of the buffer. |
| 123 | fprintf(stderr, "%.24s PerconaFT No space when writing %" PRIu64 " bytes to %*s " , tstr, (uint64_t) len, (int) n, symname); |
| 124 | } |
| 125 | fprintf(stderr, "retry in %d second%s\n" , toku_write_enospc_sleep, toku_write_enospc_sleep > 1 ? "s" : "" ); |
| 126 | fflush(stderr); |
| 127 | } |
| 128 | sleep(toku_write_enospc_sleep); |
| 129 | try_again = 1; |
| 130 | toku_sync_fetch_and_sub(&toku_write_enospc_current, 1); |
| 131 | break; |
| 132 | } |
| 133 | } |
| 134 | default: |
| 135 | break; |
| 136 | } |
| 137 | assert(try_again); |
| 138 | errno = errno_write; |
| 139 | } |
| 140 | |
| 141 | static ssize_t (*t_write)(int, const void *, size_t); |
| 142 | static ssize_t (*t_full_write)(int, const void *, size_t); |
| 143 | static ssize_t (*t_pwrite)(int, const void *, size_t, off_t); |
| 144 | static ssize_t (*t_full_pwrite)(int, const void *, size_t, off_t); |
| 145 | static FILE * (*t_fdopen)(int, const char *); |
| 146 | static FILE * (*t_fopen)(const char *, const char *); |
| 147 | static int (*t_open)(const char *, int, int); |
| 148 | static int (*t_fclose)(FILE *); |
| 149 | static ssize_t (*t_read)(int, void *, size_t); |
| 150 | static ssize_t (*t_pread)(int, void *, size_t, off_t); |
| 151 | static size_t (*os_fwrite_fun)(const void *, size_t, size_t, FILE *) = nullptr; |
| 152 | |
| 153 | void toku_set_func_fwrite( |
| 154 | size_t (*fwrite_fun)(const void *, size_t, size_t, FILE *)) { |
| 155 | os_fwrite_fun = fwrite_fun; |
| 156 | } |
| 157 | |
| 158 | void toku_set_func_write(ssize_t (*write_fun)(int, const void *, size_t)) { |
| 159 | t_write = write_fun; |
| 160 | } |
| 161 | |
| 162 | void toku_set_func_full_write (ssize_t (*write_fun)(int, const void *, size_t)) { |
| 163 | t_full_write = write_fun; |
| 164 | } |
| 165 | |
| 166 | void toku_set_func_pwrite (ssize_t (*pwrite_fun)(int, const void *, size_t, off_t)) { |
| 167 | t_pwrite = pwrite_fun; |
| 168 | } |
| 169 | |
| 170 | void toku_set_func_full_pwrite (ssize_t (*pwrite_fun)(int, const void *, size_t, off_t)) { |
| 171 | t_full_pwrite = pwrite_fun; |
| 172 | } |
| 173 | |
| 174 | void toku_set_func_fdopen(FILE * (*fdopen_fun)(int, const char *)) { |
| 175 | t_fdopen = fdopen_fun; |
| 176 | } |
| 177 | |
| 178 | void toku_set_func_fopen(FILE * (*fopen_fun)(const char *, const char *)) { |
| 179 | t_fopen = fopen_fun; |
| 180 | } |
| 181 | |
| 182 | void toku_set_func_open(int (*open_fun)(const char *, int, int)) { |
| 183 | t_open = open_fun; |
| 184 | } |
| 185 | |
| 186 | void toku_set_func_fclose(int (*fclose_fun)(FILE*)) { |
| 187 | t_fclose = fclose_fun; |
| 188 | } |
| 189 | |
| 190 | void toku_set_func_read (ssize_t (*read_fun)(int, void *, size_t)) { |
| 191 | t_read = read_fun; |
| 192 | } |
| 193 | |
| 194 | void toku_set_func_pread (ssize_t (*pread_fun)(int, void *, size_t, off_t)) { |
| 195 | t_pread = pread_fun; |
| 196 | } |
| 197 | |
| 198 | int toku_os_delete_with_source_location(const char *name, |
| 199 | const char *src_file, |
| 200 | uint src_line) { |
| 201 | |
| 202 | toku_io_instrumentation io_annotation; |
| 203 | toku_instr_file_name_close_begin(io_annotation, |
| 204 | *tokudb_file_data_key, |
| 205 | toku_instr_file_op::file_delete, |
| 206 | name, |
| 207 | src_file, |
| 208 | src_line); |
| 209 | const int result = unlink(name); |
| 210 | |
| 211 | /* Register the result value with the instrumentation system */ |
| 212 | toku_instr_file_close_end(io_annotation, result); |
| 213 | |
| 214 | return result; |
| 215 | } |
| 216 | |
| 217 | int toku_os_rename_with_source_location(const char *old_name, |
| 218 | const char *new_name, |
| 219 | const char *src_file, |
| 220 | uint src_line) { |
| 221 | int result; |
| 222 | |
| 223 | toku_io_instrumentation io_annotation; |
| 224 | toku_instr_file_name_io_begin(io_annotation, |
| 225 | *tokudb_file_data_key, |
| 226 | toku_instr_file_op::file_rename, |
| 227 | new_name, |
| 228 | 0, |
| 229 | src_file, |
| 230 | src_line); |
| 231 | |
| 232 | result = rename(old_name, new_name); |
| 233 | /* Regsiter the result value with the instrumentation system */ |
| 234 | toku_instr_file_io_end(io_annotation, 0); |
| 235 | |
| 236 | return result; |
| 237 | } |
| 238 | |
| 239 | void toku_os_full_write_with_source_location(int fd, |
| 240 | const void *buf, |
| 241 | size_t len, |
| 242 | const char *src_file, |
| 243 | uint src_line) { |
| 244 | const char *bp = (const char *)buf; |
| 245 | size_t bytes_written = len; |
| 246 | |
| 247 | toku_io_instrumentation io_annotation; |
| 248 | toku_instr_file_io_begin(io_annotation, |
| 249 | toku_instr_file_op::file_write, |
| 250 | fd, |
| 251 | len, |
| 252 | src_file, |
| 253 | src_line); |
| 254 | |
| 255 | while (len > 0) { |
| 256 | ssize_t r; |
| 257 | if (t_full_write) { |
| 258 | r = t_full_write(fd, bp, len); |
| 259 | } else { |
| 260 | r = write(fd, bp, len); |
| 261 | } |
| 262 | if (r > 0) { |
| 263 | len -= r; |
| 264 | bp += r; |
| 265 | } |
| 266 | else { |
| 267 | try_again_after_handling_write_error(fd, len, r); |
| 268 | } |
| 269 | } |
| 270 | assert(len == 0); |
| 271 | |
| 272 | /* Register the result value with the instrumentaion system */ |
| 273 | toku_instr_file_io_end(io_annotation, bytes_written); |
| 274 | } |
| 275 | |
| 276 | int toku_os_write_with_source_location(int fd, |
| 277 | const void *buf, |
| 278 | size_t len, |
| 279 | const char *src_file, |
| 280 | uint src_line) { |
| 281 | const char *bp = (const char *)buf; |
| 282 | int result = 0; |
| 283 | ssize_t r; |
| 284 | |
| 285 | size_t bytes_written = len; |
| 286 | toku_io_instrumentation io_annotation; |
| 287 | toku_instr_file_io_begin(io_annotation, |
| 288 | toku_instr_file_op::file_write, |
| 289 | fd, |
| 290 | len, |
| 291 | src_file, |
| 292 | src_line); |
| 293 | |
| 294 | while (len > 0) { |
| 295 | if (t_write) { |
| 296 | r = t_write(fd, bp, len); |
| 297 | } else { |
| 298 | r = write(fd, bp, len); |
| 299 | } |
| 300 | if (r < 0) { |
| 301 | result = errno; |
| 302 | break; |
| 303 | } |
| 304 | len -= r; |
| 305 | bp += r; |
| 306 | } |
| 307 | /* Register the result value with the instrumentation system */ |
| 308 | toku_instr_file_io_end(io_annotation, bytes_written - len); |
| 309 | |
| 310 | return result; |
| 311 | } |
| 312 | |
| 313 | void toku_os_full_pwrite_with_source_location(int fd, |
| 314 | const void *buf, |
| 315 | size_t len, |
| 316 | toku_off_t off, |
| 317 | const char *src_file, |
| 318 | uint src_line) { |
| 319 | assert(0 == ((long long)buf) % 512); |
| 320 | assert((len % 512 == 0) && (off % 512) == 0); // to make pwrite work. |
| 321 | const char *bp = (const char *)buf; |
| 322 | |
| 323 | size_t bytes_written = len; |
| 324 | toku_io_instrumentation io_annotation; |
| 325 | toku_instr_file_io_begin(io_annotation, |
| 326 | toku_instr_file_op::file_write, |
| 327 | fd, |
| 328 | len, |
| 329 | src_file, |
| 330 | src_line); |
| 331 | while (len > 0) { |
| 332 | ssize_t r; |
| 333 | if (t_full_pwrite) { |
| 334 | r = t_full_pwrite(fd, bp, len, off); |
| 335 | } else { |
| 336 | r = pwrite(fd, bp, len, off); |
| 337 | } |
| 338 | if (r > 0) { |
| 339 | len -= r; |
| 340 | bp += r; |
| 341 | off += r; |
| 342 | } |
| 343 | else { |
| 344 | try_again_after_handling_write_error(fd, len, r); |
| 345 | } |
| 346 | } |
| 347 | assert(len == 0); |
| 348 | |
| 349 | /* Register the result value with the instrumentation system */ |
| 350 | toku_instr_file_io_end(io_annotation, bytes_written); |
| 351 | } |
| 352 | |
| 353 | ssize_t toku_os_pwrite_with_source_location(int fd, |
| 354 | const void *buf, |
| 355 | size_t len, |
| 356 | toku_off_t off, |
| 357 | const char *src_file, |
| 358 | uint src_line) { |
| 359 | assert(0 == |
| 360 | ((long long)buf) % |
| 361 | 512); // these asserts are to ensure that direct I/O will work. |
| 362 | assert(0 == len % 512); |
| 363 | assert(0 == off % 512); |
| 364 | const char *bp = (const char *)buf; |
| 365 | ssize_t result = 0; |
| 366 | ssize_t r; |
| 367 | |
| 368 | size_t bytes_written = len; |
| 369 | toku_io_instrumentation io_annotation; |
| 370 | toku_instr_file_io_begin(io_annotation, |
| 371 | toku_instr_file_op::file_write, |
| 372 | fd, |
| 373 | len, |
| 374 | src_file, |
| 375 | src_line); |
| 376 | while (len > 0) { |
| 377 | r = (t_pwrite) ? t_pwrite(fd, bp, len, off) : pwrite(fd, bp, len, off); |
| 378 | |
| 379 | if (r < 0) { |
| 380 | result = errno; |
| 381 | break; |
| 382 | } |
| 383 | len -= r; |
| 384 | bp += r; |
| 385 | off += r; |
| 386 | } |
| 387 | /* Register the result value with the instrumentation system */ |
| 388 | toku_instr_file_io_end(io_annotation, bytes_written - len); |
| 389 | |
| 390 | return result; |
| 391 | } |
| 392 | |
| 393 | int toku_os_fwrite_with_source_location(const void *ptr, |
| 394 | size_t size, |
| 395 | size_t nmemb, |
| 396 | TOKU_FILE *stream, |
| 397 | const char *src_file, |
| 398 | uint src_line) { |
| 399 | int result = 0; |
| 400 | size_t bytes_written; |
| 401 | |
| 402 | toku_io_instrumentation io_annotation; |
| 403 | toku_instr_file_stream_io_begin(io_annotation, |
| 404 | toku_instr_file_op::file_write, |
| 405 | *stream, |
| 406 | nmemb, |
| 407 | src_file, |
| 408 | src_line); |
| 409 | |
| 410 | if (os_fwrite_fun) { |
| 411 | bytes_written = os_fwrite_fun(ptr, size, nmemb, stream->file); |
| 412 | } else { |
| 413 | bytes_written = fwrite(ptr, size, nmemb, stream->file); |
| 414 | } |
| 415 | |
| 416 | if (bytes_written != nmemb) { |
| 417 | if (os_fwrite_fun) // if using hook to induce artificial errors (for |
| 418 | // testing) ... |
| 419 | result = get_maybe_error_errno(); // ... then there is no error in |
| 420 | // the stream, but there is one |
| 421 | // in errno |
| 422 | else |
| 423 | result = ferror(stream->file); |
| 424 | invariant(result != 0); // Should we assert here? |
| 425 | } |
| 426 | /* Register the result value with the instrumentation system */ |
| 427 | toku_instr_file_io_end(io_annotation, bytes_written); |
| 428 | |
| 429 | return result; |
| 430 | } |
| 431 | |
| 432 | int toku_os_fread_with_source_location(void *ptr, |
| 433 | size_t size, |
| 434 | size_t nmemb, |
| 435 | TOKU_FILE *stream, |
| 436 | const char *src_file, |
| 437 | uint src_line) { |
| 438 | int result = 0; |
| 439 | size_t bytes_read; |
| 440 | |
| 441 | toku_io_instrumentation io_annotation; |
| 442 | toku_instr_file_stream_io_begin(io_annotation, |
| 443 | toku_instr_file_op::file_read, |
| 444 | *stream, |
| 445 | nmemb, |
| 446 | src_file, |
| 447 | src_line); |
| 448 | |
| 449 | if ((bytes_read = fread(ptr, size, nmemb, stream->file)) != nmemb) { |
| 450 | if ((feof(stream->file))) |
| 451 | result = EOF; |
| 452 | else |
| 453 | result = ferror(stream->file); |
| 454 | invariant(result != 0); // Should we assert here? |
| 455 | } |
| 456 | /* Register the result value with the instrumentation system */ |
| 457 | toku_instr_file_io_end(io_annotation, bytes_read); |
| 458 | |
| 459 | return result; |
| 460 | } |
| 461 | |
| 462 | TOKU_FILE *toku_os_fdopen_with_source_location(int fildes, |
| 463 | const char *mode, |
| 464 | const char *filename, |
| 465 | const toku_instr_key &instr_key, |
| 466 | const char *src_file, |
| 467 | uint src_line) { |
| 468 | TOKU_FILE *XMALLOC(rval); |
| 469 | if (FT_LIKELY(rval != nullptr)) { |
| 470 | toku_io_instrumentation io_annotation; |
| 471 | toku_instr_file_open_begin(io_annotation, |
| 472 | instr_key, |
| 473 | toku_instr_file_op::file_stream_open, |
| 474 | filename, |
| 475 | src_file, |
| 476 | src_line); |
| 477 | |
| 478 | rval->file = (t_fdopen) ? t_fdopen(fildes, mode) : fdopen(fildes, mode); |
| 479 | toku_instr_file_stream_open_end(io_annotation, *rval); |
| 480 | |
| 481 | if (FT_UNLIKELY(rval->file == nullptr)) { |
| 482 | toku_free(rval); |
| 483 | rval = nullptr; |
| 484 | } |
| 485 | } |
| 486 | return rval; |
| 487 | } |
| 488 | |
| 489 | TOKU_FILE *toku_os_fopen_with_source_location(const char *filename, |
| 490 | const char *mode, |
| 491 | const toku_instr_key &instr_key, |
| 492 | const char *src_file, |
| 493 | uint src_line) { |
| 494 | TOKU_FILE *XMALLOC(rval); |
| 495 | if (FT_UNLIKELY(rval == nullptr)) |
| 496 | return nullptr; |
| 497 | |
| 498 | toku_io_instrumentation io_annotation; |
| 499 | toku_instr_file_open_begin(io_annotation, |
| 500 | instr_key, |
| 501 | toku_instr_file_op::file_stream_open, |
| 502 | filename, |
| 503 | src_file, |
| 504 | src_line); |
| 505 | rval->file = t_fopen ? t_fopen(filename, mode) : fopen(filename, mode); |
| 506 | /* Register the returning "file" value with the system */ |
| 507 | toku_instr_file_stream_open_end(io_annotation, *rval); |
| 508 | |
| 509 | if (FT_UNLIKELY(rval->file == nullptr)) { |
| 510 | toku_free(rval); |
| 511 | rval = nullptr; |
| 512 | } |
| 513 | return rval; |
| 514 | } |
| 515 | |
| 516 | int toku_os_open_with_source_location(const char *path, |
| 517 | int oflag, |
| 518 | int mode, |
| 519 | const toku_instr_key &instr_key, |
| 520 | const char *src_file, |
| 521 | uint src_line) { |
| 522 | int fd; |
| 523 | toku_io_instrumentation io_annotation; |
| 524 | /* register a file open or creation depending on "oflag" */ |
| 525 | toku_instr_file_open_begin( |
| 526 | io_annotation, |
| 527 | instr_key, |
| 528 | ((oflag & O_CREAT) ? toku_instr_file_op::file_create |
| 529 | : toku_instr_file_op::file_open), |
| 530 | path, |
| 531 | src_file, |
| 532 | src_line); |
| 533 | if (t_open) |
| 534 | fd = t_open(path, oflag, mode); |
| 535 | else |
| 536 | fd = open(path, oflag, mode); |
| 537 | |
| 538 | toku_instr_file_open_end(io_annotation, fd); |
| 539 | return fd; |
| 540 | } |
| 541 | |
| 542 | int toku_os_open_direct(const char *path, |
| 543 | int oflag, |
| 544 | int mode, |
| 545 | const toku_instr_key &instr_key) { |
| 546 | int rval; |
| 547 | #if defined(HAVE_O_DIRECT) |
| 548 | rval = toku_os_open(path, oflag | O_DIRECT, mode, instr_key); |
| 549 | #elif defined(HAVE_F_NOCACHE) |
| 550 | rval = toku_os_open(path, oflag, mode, instr_key); |
| 551 | if (rval >= 0) { |
| 552 | int r = fcntl(rval, F_NOCACHE, 1); |
| 553 | if (r == -1) { |
| 554 | perror("setting F_NOCACHE" ); |
| 555 | } |
| 556 | } |
| 557 | #else |
| 558 | # error "No direct I/O implementation found." |
| 559 | #endif |
| 560 | return rval; |
| 561 | } |
| 562 | |
| 563 | int toku_os_fclose_with_source_location(TOKU_FILE *stream, |
| 564 | const char *src_file, |
| 565 | uint src_line) { |
| 566 | int rval = -1; |
| 567 | if (FT_LIKELY(stream != nullptr)) { |
| 568 | /* register a file stream close " */ |
| 569 | toku_io_instrumentation io_annotation; |
| 570 | toku_instr_file_stream_close_begin( |
| 571 | io_annotation, |
| 572 | toku_instr_file_op::file_stream_close, |
| 573 | *stream, |
| 574 | src_file, |
| 575 | src_line); |
| 576 | |
| 577 | if (t_fclose) |
| 578 | rval = t_fclose(stream->file); |
| 579 | else { // if EINTR, retry until success |
| 580 | while (rval != 0) { |
| 581 | rval = fclose(stream->file); |
| 582 | if (rval && (errno != EINTR)) |
| 583 | break; |
| 584 | } |
| 585 | } |
| 586 | /* Register the returning "rval" value with the system */ |
| 587 | toku_instr_file_close_end(io_annotation, rval); |
| 588 | toku_free(stream); |
| 589 | stream = nullptr; |
| 590 | } |
| 591 | return rval; |
| 592 | } |
| 593 | |
| 594 | int toku_os_close_with_source_location( |
| 595 | int fd, |
| 596 | const char *src_file, |
| 597 | uint src_line) { // if EINTR, retry until success |
| 598 | /* register the file close */ |
| 599 | int r = -1; |
| 600 | |
| 601 | /* register a file descriptor close " */ |
| 602 | toku_io_instrumentation io_annotation; |
| 603 | toku_instr_file_fd_close_begin( |
| 604 | io_annotation, toku_instr_file_op::file_close, fd, src_file, src_line); |
| 605 | while (r != 0) { |
| 606 | r = close(fd); |
| 607 | if (r) { |
| 608 | int rr = errno; |
| 609 | if (rr != EINTR) |
| 610 | printf("rr=%d (%s)\n" , rr, strerror(rr)); |
| 611 | assert(rr == EINTR); |
| 612 | } |
| 613 | } |
| 614 | |
| 615 | /* Regsiter the returning value with the system */ |
| 616 | toku_instr_file_close_end(io_annotation, r); |
| 617 | |
| 618 | return r; |
| 619 | } |
| 620 | |
| 621 | ssize_t toku_os_read_with_source_location(int fd, |
| 622 | void *buf, |
| 623 | size_t count, |
| 624 | const char *src_file, |
| 625 | uint src_line) { |
| 626 | ssize_t bytes_read; |
| 627 | |
| 628 | toku_io_instrumentation io_annotation; |
| 629 | toku_instr_file_io_begin(io_annotation, |
| 630 | toku_instr_file_op::file_read, |
| 631 | fd, |
| 632 | count, |
| 633 | src_file, |
| 634 | src_line); |
| 635 | |
| 636 | bytes_read = (t_read) ? t_read(fd, buf, count) : read(fd, buf, count); |
| 637 | |
| 638 | toku_instr_file_io_end(io_annotation, bytes_read); |
| 639 | |
| 640 | return bytes_read; |
| 641 | } |
| 642 | |
| 643 | ssize_t inline_toku_os_pread_with_source_location(int fd, |
| 644 | void *buf, |
| 645 | size_t count, |
| 646 | off_t offset, |
| 647 | const char *src_file, |
| 648 | uint src_line) { |
| 649 | assert(0 == ((long long)buf) % 512); |
| 650 | assert(0 == count % 512); |
| 651 | assert(0 == offset % 512); |
| 652 | ssize_t bytes_read; |
| 653 | |
| 654 | toku_io_instrumentation io_annotation; |
| 655 | toku_instr_file_io_begin(io_annotation, |
| 656 | toku_instr_file_op::file_read, |
| 657 | fd, |
| 658 | count, |
| 659 | src_file, |
| 660 | src_line); |
| 661 | if (t_pread) { |
| 662 | bytes_read = t_pread(fd, buf, count, offset); |
| 663 | } else { |
| 664 | bytes_read = pread(fd, buf, count, offset); |
| 665 | } |
| 666 | toku_instr_file_io_end(io_annotation, bytes_read); |
| 667 | |
| 668 | return bytes_read; |
| 669 | } |
| 670 | |
| 671 | void toku_os_recursive_delete(const char *path) { |
| 672 | char buf[TOKU_PATH_MAX + sizeof("rm -rf " )]; |
| 673 | strcpy(buf, "rm -rf " ); |
| 674 | strncat(buf, path, TOKU_PATH_MAX); |
| 675 | int r = system(buf); |
| 676 | assert_zero(r); |
| 677 | } |
| 678 | |
| 679 | // fsync logic: |
| 680 | |
| 681 | // t_fsync exists for testing purposes only |
| 682 | static int (*t_fsync)(int) = 0; |
| 683 | static uint64_t toku_fsync_count; |
| 684 | static uint64_t toku_fsync_time; |
| 685 | static uint64_t toku_long_fsync_threshold = 1000000; |
| 686 | static uint64_t toku_long_fsync_count; |
| 687 | static uint64_t toku_long_fsync_time; |
| 688 | static uint64_t toku_long_fsync_eintr_count; |
| 689 | static int toku_fsync_debug = 0; |
| 690 | |
| 691 | void toku_set_func_fsync(int (*fsync_function)(int)) { |
| 692 | t_fsync = fsync_function; |
| 693 | } |
| 694 | |
| 695 | // keep trying if fsync fails because of EINTR |
| 696 | void file_fsync_internal_with_source_location(int fd, |
| 697 | const char *src_file, |
| 698 | uint src_line) { |
| 699 | uint64_t tstart = toku_current_time_microsec(); |
| 700 | int r = -1; |
| 701 | uint64_t eintr_count = 0; |
| 702 | |
| 703 | toku_io_instrumentation io_annotation; |
| 704 | toku_instr_file_io_begin(io_annotation, |
| 705 | toku_instr_file_op::file_sync, |
| 706 | fd, |
| 707 | 0, |
| 708 | src_file, |
| 709 | src_line); |
| 710 | |
| 711 | while (r != 0) { |
| 712 | if (t_fsync) { |
| 713 | r = t_fsync(fd); |
| 714 | } else { |
| 715 | r = fsync(fd); |
| 716 | } |
| 717 | if (r) { |
| 718 | assert(get_error_errno() == EINTR); |
| 719 | eintr_count++; |
| 720 | } |
| 721 | } |
| 722 | toku_sync_fetch_and_add(&toku_fsync_count, 1); |
| 723 | uint64_t duration = toku_current_time_microsec() - tstart; |
| 724 | toku_sync_fetch_and_add(&toku_fsync_time, duration); |
| 725 | |
| 726 | toku_instr_file_io_end(io_annotation, 0); |
| 727 | |
| 728 | if (duration >= toku_long_fsync_threshold) { |
| 729 | toku_sync_fetch_and_add(&toku_long_fsync_count, 1); |
| 730 | toku_sync_fetch_and_add(&toku_long_fsync_time, duration); |
| 731 | toku_sync_fetch_and_add(&toku_long_fsync_eintr_count, eintr_count); |
| 732 | if (toku_fsync_debug) { |
| 733 | const int tstr_length = 26; |
| 734 | char tstr[tstr_length]; |
| 735 | time_t t = time(0); |
| 736 | #if __linux__ |
| 737 | char fdname[256]; |
| 738 | snprintf(fdname, sizeof fdname, "/proc/%d/fd/%d" , getpid(), fd); |
| 739 | char lname[256]; |
| 740 | ssize_t s = readlink(fdname, lname, sizeof lname); |
| 741 | if (0 < s && s < (ssize_t) sizeof lname) |
| 742 | lname[s] = 0; |
| 743 | fprintf(stderr, "%.24s toku_file_fsync %s fd=%d %s duration=%" PRIu64 " usec eintr=%" PRIu64 "\n" , |
| 744 | ctime_r(&t, tstr), __FUNCTION__, fd, s > 0 ? lname : "?" , duration, eintr_count); |
| 745 | #else |
| 746 | fprintf(stderr, "%.24s toku_file_fsync %s fd=%d duration=%" PRIu64 " usec eintr=%" PRIu64 "\n" , |
| 747 | ctime_r(&t, tstr), __FUNCTION__, fd, duration, eintr_count); |
| 748 | #endif |
| 749 | fflush(stderr); |
| 750 | } |
| 751 | } |
| 752 | } |
| 753 | |
| 754 | void toku_file_fsync_without_accounting(int fd) { |
| 755 | file_fsync_internal(fd); |
| 756 | } |
| 757 | |
| 758 | void toku_fsync_dirfd_without_accounting(DIR *dir) { |
| 759 | int fd = dirfd(dir); |
| 760 | toku_file_fsync_without_accounting(fd); |
| 761 | } |
| 762 | |
| 763 | int toku_fsync_dir_by_name_without_accounting(const char *dir_name) { |
| 764 | int r = 0; |
| 765 | DIR * dir = opendir(dir_name); |
| 766 | if (!dir) { |
| 767 | r = get_error_errno(); |
| 768 | } else { |
| 769 | toku_fsync_dirfd_without_accounting(dir); |
| 770 | r = closedir(dir); |
| 771 | if (r != 0) { |
| 772 | r = get_error_errno(); |
| 773 | } |
| 774 | } |
| 775 | return r; |
| 776 | } |
| 777 | |
| 778 | // include fsync in scheduling accounting |
| 779 | void toku_file_fsync(int fd) { |
| 780 | file_fsync_internal (fd); |
| 781 | } |
| 782 | |
| 783 | // for real accounting |
| 784 | void toku_get_fsync_times(uint64_t *fsync_count, uint64_t *fsync_time, uint64_t *long_fsync_threshold, uint64_t *long_fsync_count, uint64_t *long_fsync_time) { |
| 785 | *fsync_count = toku_fsync_count; |
| 786 | *fsync_time = toku_fsync_time; |
| 787 | *long_fsync_threshold = toku_long_fsync_threshold; |
| 788 | *long_fsync_count = toku_long_fsync_count; |
| 789 | *long_fsync_time = toku_long_fsync_time; |
| 790 | } |
| 791 | |
| 792 | int toku_fsync_directory(const char *fname) { |
| 793 | int result = 0; |
| 794 | |
| 795 | // extract dirname from fname |
| 796 | const char *sp = strrchr(fname, '/'); |
| 797 | size_t len; |
| 798 | char *dirname = NULL; |
| 799 | if (sp) { |
| 800 | resource_assert(sp >= fname); |
| 801 | len = sp - fname + 1; |
| 802 | MALLOC_N(len+1, dirname); |
| 803 | if (dirname == NULL) { |
| 804 | result = get_error_errno(); |
| 805 | } else { |
| 806 | strncpy(dirname, fname, len); |
| 807 | dirname[len] = 0; |
| 808 | } |
| 809 | } else { |
| 810 | dirname = toku_strdup("." ); |
| 811 | if (dirname == NULL) { |
| 812 | result = get_error_errno(); |
| 813 | } |
| 814 | } |
| 815 | |
| 816 | if (result == 0) { |
| 817 | result = toku_fsync_dir_by_name_without_accounting(dirname); |
| 818 | } |
| 819 | toku_free(dirname); |
| 820 | return result; |
| 821 | } |
| 822 | |