1 | /* |
2 | * pg_test_fsync.c |
3 | * tests all supported fsync() methods |
4 | */ |
5 | |
6 | #include "postgres_fe.h" |
7 | |
8 | #include <sys/stat.h> |
9 | #include <sys/time.h> |
10 | #include <fcntl.h> |
11 | #include <time.h> |
12 | #include <unistd.h> |
13 | #include <signal.h> |
14 | |
15 | #include "getopt_long.h" |
16 | #include "access/xlogdefs.h" |
17 | #include "common/logging.h" |
18 | |
19 | |
20 | /* |
21 | * put the temp files in the local directory |
22 | * unless the user specifies otherwise |
23 | */ |
24 | #define FSYNC_FILENAME "./pg_test_fsync.out" |
25 | |
26 | #define XLOG_BLCKSZ_K (XLOG_BLCKSZ / 1024) |
27 | |
28 | #define LABEL_FORMAT " %-30s" |
29 | #define NA_FORMAT "%21s\n" |
30 | /* translator: maintain alignment with NA_FORMAT */ |
31 | #define OPS_FORMAT gettext_noop("%13.3f ops/sec %6.0f usecs/op\n") |
32 | #define USECS_SEC 1000000 |
33 | |
34 | /* These are macros to avoid timing the function call overhead. */ |
35 | #ifndef WIN32 |
36 | #define START_TIMER \ |
37 | do { \ |
38 | alarm_triggered = false; \ |
39 | alarm(secs_per_test); \ |
40 | gettimeofday(&start_t, NULL); \ |
41 | } while (0) |
42 | #else |
43 | /* WIN32 doesn't support alarm, so we create a thread and sleep there */ |
44 | #define START_TIMER \ |
45 | do { \ |
46 | alarm_triggered = false; \ |
47 | if (CreateThread(NULL, 0, process_alarm, NULL, 0, NULL) == \ |
48 | INVALID_HANDLE_VALUE) \ |
49 | { \ |
50 | pg_log_error("could not create thread for alarm"); \ |
51 | exit(1); \ |
52 | } \ |
53 | gettimeofday(&start_t, NULL); \ |
54 | } while (0) |
55 | #endif |
56 | |
57 | #define STOP_TIMER \ |
58 | do { \ |
59 | gettimeofday(&stop_t, NULL); \ |
60 | print_elapse(start_t, stop_t, ops); \ |
61 | } while (0) |
62 | |
63 | |
64 | static const char *progname; |
65 | |
66 | static int secs_per_test = 5; |
67 | static int needs_unlink = 0; |
68 | static char full_buf[DEFAULT_XLOG_SEG_SIZE], |
69 | *buf, |
70 | *filename = FSYNC_FILENAME; |
71 | static struct timeval start_t, |
72 | stop_t; |
73 | static bool alarm_triggered = false; |
74 | |
75 | |
76 | static void handle_args(int argc, char *argv[]); |
77 | static void prepare_buf(void); |
78 | static void test_open(void); |
79 | static void test_non_sync(void); |
80 | static void test_sync(int writes_per_op); |
81 | static void test_open_syncs(void); |
82 | static void test_open_sync(const char *msg, int writes_size); |
83 | static void test_file_descriptor_sync(void); |
84 | |
85 | #ifndef WIN32 |
86 | static void process_alarm(int sig); |
87 | #else |
88 | static DWORD WINAPI process_alarm(LPVOID param); |
89 | #endif |
90 | static void signal_cleanup(int sig); |
91 | |
92 | #ifdef HAVE_FSYNC_WRITETHROUGH |
93 | static int pg_fsync_writethrough(int fd); |
94 | #endif |
95 | static void print_elapse(struct timeval start_t, struct timeval stop_t, int ops); |
96 | |
97 | #define die(msg) do { pg_log_error("%s: %m", _(msg)); exit(1); } while(0) |
98 | |
99 | |
100 | int |
101 | main(int argc, char *argv[]) |
102 | { |
103 | pg_logging_init(argv[0]); |
104 | set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("pg_test_fsync" )); |
105 | progname = get_progname(argv[0]); |
106 | |
107 | handle_args(argc, argv); |
108 | |
109 | /* Prevent leaving behind the test file */ |
110 | pqsignal(SIGINT, signal_cleanup); |
111 | pqsignal(SIGTERM, signal_cleanup); |
112 | #ifndef WIN32 |
113 | pqsignal(SIGALRM, process_alarm); |
114 | #endif |
115 | #ifdef SIGHUP |
116 | /* Not defined on win32 */ |
117 | pqsignal(SIGHUP, signal_cleanup); |
118 | #endif |
119 | |
120 | prepare_buf(); |
121 | |
122 | test_open(); |
123 | |
124 | /* Test using 1 XLOG_BLCKSZ write */ |
125 | test_sync(1); |
126 | |
127 | /* Test using 2 XLOG_BLCKSZ writes */ |
128 | test_sync(2); |
129 | |
130 | test_open_syncs(); |
131 | |
132 | test_file_descriptor_sync(); |
133 | |
134 | test_non_sync(); |
135 | |
136 | unlink(filename); |
137 | |
138 | return 0; |
139 | } |
140 | |
141 | static void |
142 | handle_args(int argc, char *argv[]) |
143 | { |
144 | static struct option long_options[] = { |
145 | {"filename" , required_argument, NULL, 'f'}, |
146 | {"secs-per-test" , required_argument, NULL, 's'}, |
147 | {NULL, 0, NULL, 0} |
148 | }; |
149 | |
150 | int option; /* Command line option */ |
151 | int optindex = 0; /* used by getopt_long */ |
152 | |
153 | if (argc > 1) |
154 | { |
155 | if (strcmp(argv[1], "--help" ) == 0 || strcmp(argv[1], "-?" ) == 0) |
156 | { |
157 | printf(_("Usage: %s [-f FILENAME] [-s SECS-PER-TEST]\n" ), progname); |
158 | exit(0); |
159 | } |
160 | if (strcmp(argv[1], "--version" ) == 0 || strcmp(argv[1], "-V" ) == 0) |
161 | { |
162 | puts("pg_test_fsync (PostgreSQL) " PG_VERSION); |
163 | exit(0); |
164 | } |
165 | } |
166 | |
167 | while ((option = getopt_long(argc, argv, "f:s:" , |
168 | long_options, &optindex)) != -1) |
169 | { |
170 | switch (option) |
171 | { |
172 | case 'f': |
173 | filename = pg_strdup(optarg); |
174 | break; |
175 | |
176 | case 's': |
177 | secs_per_test = atoi(optarg); |
178 | break; |
179 | |
180 | default: |
181 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), |
182 | progname); |
183 | exit(1); |
184 | break; |
185 | } |
186 | } |
187 | |
188 | if (argc > optind) |
189 | { |
190 | pg_log_error("too many command-line arguments (first is \"%s\")" , |
191 | argv[optind]); |
192 | fprintf(stderr, _("Try \"%s --help\" for more information.\n" ), |
193 | progname); |
194 | exit(1); |
195 | } |
196 | |
197 | printf(ngettext("%d second per test\n" , |
198 | "%d seconds per test\n" , |
199 | secs_per_test), |
200 | secs_per_test); |
201 | #if PG_O_DIRECT != 0 |
202 | printf(_("O_DIRECT supported on this platform for open_datasync and open_sync.\n" )); |
203 | #else |
204 | printf(_("Direct I/O is not supported on this platform.\n" )); |
205 | #endif |
206 | } |
207 | |
208 | static void |
209 | prepare_buf(void) |
210 | { |
211 | int ops; |
212 | |
213 | /* write random data into buffer */ |
214 | for (ops = 0; ops < DEFAULT_XLOG_SEG_SIZE; ops++) |
215 | full_buf[ops] = random(); |
216 | |
217 | buf = (char *) TYPEALIGN(XLOG_BLCKSZ, full_buf); |
218 | } |
219 | |
220 | static void |
221 | test_open(void) |
222 | { |
223 | int tmpfile; |
224 | |
225 | /* |
226 | * test if we can open the target file |
227 | */ |
228 | if ((tmpfile = open(filename, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR)) == -1) |
229 | die("could not open output file" ); |
230 | needs_unlink = 1; |
231 | if (write(tmpfile, full_buf, DEFAULT_XLOG_SEG_SIZE) != |
232 | DEFAULT_XLOG_SEG_SIZE) |
233 | die("write failed" ); |
234 | |
235 | /* fsync now so that dirty buffers don't skew later tests */ |
236 | if (fsync(tmpfile) != 0) |
237 | die("fsync failed" ); |
238 | |
239 | close(tmpfile); |
240 | } |
241 | |
242 | static void |
243 | test_sync(int writes_per_op) |
244 | { |
245 | int tmpfile, |
246 | ops, |
247 | writes; |
248 | bool fs_warning = false; |
249 | |
250 | if (writes_per_op == 1) |
251 | printf(_("\nCompare file sync methods using one %dkB write:\n" ), XLOG_BLCKSZ_K); |
252 | else |
253 | printf(_("\nCompare file sync methods using two %dkB writes:\n" ), XLOG_BLCKSZ_K); |
254 | printf(_("(in wal_sync_method preference order, except fdatasync is Linux's default)\n" )); |
255 | |
256 | /* |
257 | * Test open_datasync if available |
258 | */ |
259 | printf(LABEL_FORMAT, "open_datasync" ); |
260 | fflush(stdout); |
261 | |
262 | #ifdef OPEN_DATASYNC_FLAG |
263 | if ((tmpfile = open(filename, O_RDWR | O_DSYNC | PG_O_DIRECT, 0)) == -1) |
264 | { |
265 | printf(NA_FORMAT, _("n/a*" )); |
266 | fs_warning = true; |
267 | } |
268 | else |
269 | { |
270 | START_TIMER; |
271 | for (ops = 0; alarm_triggered == false; ops++) |
272 | { |
273 | for (writes = 0; writes < writes_per_op; writes++) |
274 | if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) |
275 | die("write failed" ); |
276 | if (lseek(tmpfile, 0, SEEK_SET) == -1) |
277 | die("seek failed" ); |
278 | } |
279 | STOP_TIMER; |
280 | close(tmpfile); |
281 | } |
282 | #else |
283 | printf(NA_FORMAT, _("n/a" )); |
284 | #endif |
285 | |
286 | /* |
287 | * Test fdatasync if available |
288 | */ |
289 | printf(LABEL_FORMAT, "fdatasync" ); |
290 | fflush(stdout); |
291 | |
292 | #ifdef HAVE_FDATASYNC |
293 | if ((tmpfile = open(filename, O_RDWR, 0)) == -1) |
294 | die("could not open output file" ); |
295 | START_TIMER; |
296 | for (ops = 0; alarm_triggered == false; ops++) |
297 | { |
298 | for (writes = 0; writes < writes_per_op; writes++) |
299 | if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) |
300 | die("write failed" ); |
301 | fdatasync(tmpfile); |
302 | if (lseek(tmpfile, 0, SEEK_SET) == -1) |
303 | die("seek failed" ); |
304 | } |
305 | STOP_TIMER; |
306 | close(tmpfile); |
307 | #else |
308 | printf(NA_FORMAT, _("n/a" )); |
309 | #endif |
310 | |
311 | /* |
312 | * Test fsync |
313 | */ |
314 | printf(LABEL_FORMAT, "fsync" ); |
315 | fflush(stdout); |
316 | |
317 | if ((tmpfile = open(filename, O_RDWR, 0)) == -1) |
318 | die("could not open output file" ); |
319 | START_TIMER; |
320 | for (ops = 0; alarm_triggered == false; ops++) |
321 | { |
322 | for (writes = 0; writes < writes_per_op; writes++) |
323 | if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) |
324 | die("write failed" ); |
325 | if (fsync(tmpfile) != 0) |
326 | die("fsync failed" ); |
327 | if (lseek(tmpfile, 0, SEEK_SET) == -1) |
328 | die("seek failed" ); |
329 | } |
330 | STOP_TIMER; |
331 | close(tmpfile); |
332 | |
333 | /* |
334 | * If fsync_writethrough is available, test as well |
335 | */ |
336 | printf(LABEL_FORMAT, "fsync_writethrough" ); |
337 | fflush(stdout); |
338 | |
339 | #ifdef HAVE_FSYNC_WRITETHROUGH |
340 | if ((tmpfile = open(filename, O_RDWR, 0)) == -1) |
341 | die("could not open output file" ); |
342 | START_TIMER; |
343 | for (ops = 0; alarm_triggered == false; ops++) |
344 | { |
345 | for (writes = 0; writes < writes_per_op; writes++) |
346 | if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) |
347 | die("write failed" ); |
348 | if (pg_fsync_writethrough(tmpfile) != 0) |
349 | die("fsync failed" ); |
350 | if (lseek(tmpfile, 0, SEEK_SET) == -1) |
351 | die("seek failed" ); |
352 | } |
353 | STOP_TIMER; |
354 | close(tmpfile); |
355 | #else |
356 | printf(NA_FORMAT, _("n/a" )); |
357 | #endif |
358 | |
359 | /* |
360 | * Test open_sync if available |
361 | */ |
362 | printf(LABEL_FORMAT, "open_sync" ); |
363 | fflush(stdout); |
364 | |
365 | #ifdef OPEN_SYNC_FLAG |
366 | if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) |
367 | { |
368 | printf(NA_FORMAT, _("n/a*" )); |
369 | fs_warning = true; |
370 | } |
371 | else |
372 | { |
373 | START_TIMER; |
374 | for (ops = 0; alarm_triggered == false; ops++) |
375 | { |
376 | for (writes = 0; writes < writes_per_op; writes++) |
377 | if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) |
378 | |
379 | /* |
380 | * This can generate write failures if the filesystem has |
381 | * a large block size, e.g. 4k, and there is no support |
382 | * for O_DIRECT writes smaller than the file system block |
383 | * size, e.g. XFS. |
384 | */ |
385 | die("write failed" ); |
386 | if (lseek(tmpfile, 0, SEEK_SET) == -1) |
387 | die("seek failed" ); |
388 | } |
389 | STOP_TIMER; |
390 | close(tmpfile); |
391 | } |
392 | #else |
393 | printf(NA_FORMAT, _("n/a" )); |
394 | #endif |
395 | |
396 | if (fs_warning) |
397 | { |
398 | printf(_("* This file system and its mount options do not support direct\n" |
399 | " I/O, e.g. ext4 in journaled mode.\n" )); |
400 | } |
401 | } |
402 | |
403 | static void |
404 | test_open_syncs(void) |
405 | { |
406 | printf(_("\nCompare open_sync with different write sizes:\n" )); |
407 | printf(_("(This is designed to compare the cost of writing 16kB in different write\n" |
408 | "open_sync sizes.)\n" )); |
409 | |
410 | test_open_sync(_(" 1 * 16kB open_sync write" ), 16); |
411 | test_open_sync(_(" 2 * 8kB open_sync writes" ), 8); |
412 | test_open_sync(_(" 4 * 4kB open_sync writes" ), 4); |
413 | test_open_sync(_(" 8 * 2kB open_sync writes" ), 2); |
414 | test_open_sync(_("16 * 1kB open_sync writes" ), 1); |
415 | } |
416 | |
417 | /* |
418 | * Test open_sync with different size files |
419 | */ |
420 | static void |
421 | test_open_sync(const char *msg, int writes_size) |
422 | { |
423 | #ifdef OPEN_SYNC_FLAG |
424 | int tmpfile, |
425 | ops, |
426 | writes; |
427 | #endif |
428 | |
429 | printf(LABEL_FORMAT, msg); |
430 | fflush(stdout); |
431 | |
432 | #ifdef OPEN_SYNC_FLAG |
433 | if ((tmpfile = open(filename, O_RDWR | OPEN_SYNC_FLAG | PG_O_DIRECT, 0)) == -1) |
434 | printf(NA_FORMAT, _("n/a*" )); |
435 | else |
436 | { |
437 | START_TIMER; |
438 | for (ops = 0; alarm_triggered == false; ops++) |
439 | { |
440 | for (writes = 0; writes < 16 / writes_size; writes++) |
441 | if (write(tmpfile, buf, writes_size * 1024) != |
442 | writes_size * 1024) |
443 | die("write failed" ); |
444 | if (lseek(tmpfile, 0, SEEK_SET) == -1) |
445 | die("seek failed" ); |
446 | } |
447 | STOP_TIMER; |
448 | close(tmpfile); |
449 | } |
450 | #else |
451 | printf(NA_FORMAT, _("n/a" )); |
452 | #endif |
453 | } |
454 | |
455 | static void |
456 | test_file_descriptor_sync(void) |
457 | { |
458 | int tmpfile, |
459 | ops; |
460 | |
461 | /* |
462 | * Test whether fsync can sync data written on a different descriptor for |
463 | * the same file. This checks the efficiency of multi-process fsyncs |
464 | * against the same file. Possibly this should be done with writethrough |
465 | * on platforms which support it. |
466 | */ |
467 | printf(_("\nTest if fsync on non-write file descriptor is honored:\n" )); |
468 | printf(_("(If the times are similar, fsync() can sync data written on a different\n" |
469 | "descriptor.)\n" )); |
470 | |
471 | /* |
472 | * first write, fsync and close, which is the normal behavior without |
473 | * multiple descriptors |
474 | */ |
475 | printf(LABEL_FORMAT, "write, fsync, close" ); |
476 | fflush(stdout); |
477 | |
478 | START_TIMER; |
479 | for (ops = 0; alarm_triggered == false; ops++) |
480 | { |
481 | if ((tmpfile = open(filename, O_RDWR, 0)) == -1) |
482 | die("could not open output file" ); |
483 | if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) |
484 | die("write failed" ); |
485 | if (fsync(tmpfile) != 0) |
486 | die("fsync failed" ); |
487 | close(tmpfile); |
488 | |
489 | /* |
490 | * open and close the file again to be consistent with the following |
491 | * test |
492 | */ |
493 | if ((tmpfile = open(filename, O_RDWR, 0)) == -1) |
494 | die("could not open output file" ); |
495 | close(tmpfile); |
496 | } |
497 | STOP_TIMER; |
498 | |
499 | /* |
500 | * Now open, write, close, open again and fsync This simulates processes |
501 | * fsyncing each other's writes. |
502 | */ |
503 | printf(LABEL_FORMAT, "write, close, fsync" ); |
504 | fflush(stdout); |
505 | |
506 | START_TIMER; |
507 | for (ops = 0; alarm_triggered == false; ops++) |
508 | { |
509 | if ((tmpfile = open(filename, O_RDWR, 0)) == -1) |
510 | die("could not open output file" ); |
511 | if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) |
512 | die("write failed" ); |
513 | close(tmpfile); |
514 | /* reopen file */ |
515 | if ((tmpfile = open(filename, O_RDWR, 0)) == -1) |
516 | die("could not open output file" ); |
517 | if (fsync(tmpfile) != 0) |
518 | die("fsync failed" ); |
519 | close(tmpfile); |
520 | } |
521 | STOP_TIMER; |
522 | } |
523 | |
524 | static void |
525 | test_non_sync(void) |
526 | { |
527 | int tmpfile, |
528 | ops; |
529 | |
530 | /* |
531 | * Test a simple write without fsync |
532 | */ |
533 | printf(_("\nNon-sync'ed %dkB writes:\n" ), XLOG_BLCKSZ_K); |
534 | printf(LABEL_FORMAT, "write" ); |
535 | fflush(stdout); |
536 | |
537 | START_TIMER; |
538 | for (ops = 0; alarm_triggered == false; ops++) |
539 | { |
540 | if ((tmpfile = open(filename, O_RDWR, 0)) == -1) |
541 | die("could not open output file" ); |
542 | if (write(tmpfile, buf, XLOG_BLCKSZ) != XLOG_BLCKSZ) |
543 | die("write failed" ); |
544 | close(tmpfile); |
545 | } |
546 | STOP_TIMER; |
547 | } |
548 | |
549 | static void |
550 | signal_cleanup(int signum) |
551 | { |
552 | /* Delete the file if it exists. Ignore errors */ |
553 | if (needs_unlink) |
554 | unlink(filename); |
555 | /* Finish incomplete line on stdout */ |
556 | puts("" ); |
557 | exit(signum); |
558 | } |
559 | |
560 | #ifdef HAVE_FSYNC_WRITETHROUGH |
561 | |
562 | static int |
563 | pg_fsync_writethrough(int fd) |
564 | { |
565 | #ifdef WIN32 |
566 | return _commit(fd); |
567 | #elif defined(F_FULLFSYNC) |
568 | return (fcntl(fd, F_FULLFSYNC, 0) == -1) ? -1 : 0; |
569 | #else |
570 | errno = ENOSYS; |
571 | return -1; |
572 | #endif |
573 | } |
574 | #endif |
575 | |
576 | /* |
577 | * print out the writes per second for tests |
578 | */ |
579 | static void |
580 | print_elapse(struct timeval start_t, struct timeval stop_t, int ops) |
581 | { |
582 | double total_time = (stop_t.tv_sec - start_t.tv_sec) + |
583 | (stop_t.tv_usec - start_t.tv_usec) * 0.000001; |
584 | double per_second = ops / total_time; |
585 | double avg_op_time_us = (total_time / ops) * USECS_SEC; |
586 | |
587 | printf(_(OPS_FORMAT), per_second, avg_op_time_us); |
588 | } |
589 | |
590 | #ifndef WIN32 |
591 | static void |
592 | process_alarm(int sig) |
593 | { |
594 | alarm_triggered = true; |
595 | } |
596 | #else |
597 | static DWORD WINAPI |
598 | process_alarm(LPVOID param) |
599 | { |
600 | /* WIN32 doesn't support alarm, so we create a thread and sleep here */ |
601 | Sleep(secs_per_test * 1000); |
602 | alarm_triggered = true; |
603 | ExitThread(0); |
604 | } |
605 | #endif |
606 | |