1/* benchmark.h
2 */
3
4#ifndef BENCHMARKS_INCLUDE_BENCHMARK_H_
5#define BENCHMARKS_INCLUDE_BENCHMARK_H_
6#include <roaring/portability.h>
7#include <time.h>
8
9#ifdef ROARING_INLINE_ASM
10#define CLOBBER_MEMORY __asm volatile("" ::: /* pretend to clobber */ "memory")
11#else
12#define CLOBBER_MEMORY
13#endif
14
15#if defined(IS_X64) && defined(ROARING_INLINE_ASM)
16#define RDTSC_START(cycles) \
17 do { \
18 register unsigned cyc_high, cyc_low; \
19 __asm volatile( \
20 "cpuid\n\t" \
21 "rdtsc\n\t" \
22 "mov %%edx, %0\n\t" \
23 "mov %%eax, %1\n\t" \
24 : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
25 (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
26 } while (0)
27
28#define RDTSC_FINAL(cycles) \
29 do { \
30 register unsigned cyc_high, cyc_low; \
31 __asm volatile( \
32 "rdtscp\n\t" \
33 "mov %%edx, %0\n\t" \
34 "mov %%eax, %1\n\t" \
35 "cpuid\n\t" \
36 : "=r"(cyc_high), "=r"(cyc_low)::"%rax", "%rbx", "%rcx", "%rdx"); \
37 (cycles) = ((uint64_t)cyc_high << 32) | cyc_low; \
38 } while (0)
39
40#elif defined(__linux__) && defined(__GLIBC__)
41
42#include <time.h>
43#ifdef CLOCK_THREAD_CPUTIME_ID
44#define RDTSC_START(cycles) \
45 do { \
46 struct timespec ts; \
47 clock_gettime(CLOCK_THREAD_CPUTIME_ID, &ts); \
48 cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
49 } while (0)
50
51#define RDTSC_FINAL(cycles) \
52 do { \
53 struct timespec ts; \
54 clock_gettime(CLOCK_REALTIME, &ts); \
55 cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
56 } while (0)
57
58#elif defined(CLOCK_REALTIME) // #ifdef CLOCK_THREAD_CPUTIME_ID
59#define RDTSC_START(cycles) \
60 do { \
61 struct timespec ts; \
62 clock_gettime(CLOCK_REALTIME, &ts); \
63 cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
64 } while (0)
65
66#define RDTSC_FINAL(cycles) \
67 do { \
68 struct timespec ts; \
69 clock_gettime(CLOCK_REALTIME, &ts); \
70 cycles = ts.tv_sec * UINT64_C(1000000000) + ts.tv_nsec; \
71 } while (0)
72
73#else
74#define RDTSC_START(cycles) \
75 do { \
76 cycles = clock(); \
77 } while(0)
78
79#define RDTSC_FINAL(cycles) \
80 do { \
81 cycles = clock(); \
82 } while(0)
83
84#endif // #ifdef CLOCK_THREAD_CPUTIME_ID
85
86#else
87
88/**
89* Other architectures do not support rdtsc ?
90*/
91#include <time.h>
92
93#define RDTSC_START(cycles) \
94 do { \
95 cycles = clock(); \
96 } while (0)
97
98#define RDTSC_FINAL(cycles) \
99 do { \
100 cycles = clock(); \
101 } while (0)
102
103#endif
104
105/*
106 * Prints the best number of operations per cycle where
107 * test is the function call, answer is the expected answer generated by
108 * test, repeat is the number of times we should repeat and size is the
109 * number of operations represented by test.
110 */
111#define BEST_TIME(test, answer, repeat, size) \
112 do { \
113 printf("%s: ", #test); \
114 fflush(NULL); \
115 uint64_t cycles_start, cycles_final, cycles_diff; \
116 uint64_t min_diff = (uint64_t)-1; \
117 int wrong_answer = 0; \
118 for (int i = 0; i < repeat; i++) { \
119 CLOBBER_MEMORY; \
120 RDTSC_START(cycles_start); \
121 if (test != answer) wrong_answer = 1; \
122 RDTSC_FINAL(cycles_final); \
123 cycles_diff = (cycles_final - cycles_start); \
124 if (cycles_diff < min_diff) min_diff = cycles_diff; \
125 } \
126 uint64_t S = (uint64_t)size; \
127 float cycle_per_op = (min_diff) / (float)S; \
128 printf(" %.2f cycles per operation", cycle_per_op); \
129 if (wrong_answer) printf(" [ERROR]"); \
130 printf("\n"); \
131 fflush(NULL); \
132 } while (0)
133
134/*
135 * This is like BEST_TIME except that ... it runs functions "test" using the
136 * first parameter "base" and various parameters from "testvalues" (there
137 * are nbrtestvalues), calling pre on base between tests
138 */
139#define BEST_TIME_PRE_ARRAY(base, test, pre, testvalues, nbrtestvalues) \
140 do { \
141 printf("%s %s: ", #test, #pre); \
142 fflush(NULL); \
143 uint64_t cycles_start, cycles_final, cycles_diff; \
144 int sum = 0; \
145 for (size_t j = 0; j < nbrtestvalues; j++) { \
146 pre(base); \
147 CLOBBER_MEMORY; \
148 RDTSC_START(cycles_start); \
149 test(base, testvalues[j]); \
150 RDTSC_FINAL(cycles_final); \
151 cycles_diff = (cycles_final - cycles_start); \
152 sum += cycles_diff; \
153 } \
154 uint64_t S = (uint64_t)nbrtestvalues; \
155 float cycle_per_op = sum / (float)S; \
156 printf(" %.2f cycles per operation", cycle_per_op); \
157 printf("\n"); \
158 fflush(NULL); \
159 } while (0)
160
161#endif /* BENCHMARKS_INCLUDE_BENCHMARK_H_ */
162