1 | #include <roaring/portability.h> |
2 | #include <roaring/containers/run.h> |
3 | #include <roaring/misc/configreport.h> |
4 | #include "benchmark.h" |
5 | #include "random.h" |
6 | |
7 | enum { TESTSIZE = 2048 }; |
8 | |
9 | #if defined(IS_X64) && !(defined(_MSC_VER) && !defined(__clang__)) |
10 | // flushes the array from cache |
11 | void run_cache_flush(run_container_t* B) { |
12 | const int32_t CACHELINESIZE = |
13 | computecacheline(); // 64 bytes per cache line |
14 | for (int32_t k = 0; k < B->n_runs * 2; |
15 | k += CACHELINESIZE / (int32_t)sizeof(uint16_t)) { |
16 | __builtin_ia32_clflush(B->runs + k); |
17 | } |
18 | } |
19 | #else |
20 | void run_cache_flush(run_container_t* B) { (void)B; } |
21 | #endif |
22 | |
23 | // tries to put array in cache |
24 | void run_cache_prefetch(run_container_t* B) { |
25 | #ifdef IS_X64 |
26 | const int32_t CACHELINESIZE = |
27 | computecacheline(); // 64 bytes per cache line |
28 | #else |
29 | const int32_t CACHELINESIZE = 64; |
30 | #endif |
31 | #if !(defined(_MSC_VER) && !defined(__clang__)) |
32 | for (int32_t k = 0; k < B->n_runs * 2; |
33 | k += CACHELINESIZE / (int32_t)sizeof(uint16_t)) { |
34 | __builtin_prefetch(B->runs + k); |
35 | } |
36 | #endif |
37 | } |
38 | |
39 | int add_test(run_container_t* B) { |
40 | int x; |
41 | for (x = 0; x < (1 << 16); x += 3) { |
42 | run_container_add(B, (uint16_t)x); |
43 | } |
44 | return 0; |
45 | } |
46 | |
47 | int remove_test(run_container_t* B) { |
48 | int x; |
49 | for (x = 0; x < (1 << 16); x += 3) { |
50 | run_container_remove(B, (uint16_t)x); |
51 | } |
52 | return 0; |
53 | } |
54 | |
55 | int contains_test(run_container_t* B) { |
56 | int card = 0; |
57 | int x; |
58 | for (x = 0; x < (1 << 16); x++) { |
59 | card += run_container_contains(B, (uint16_t)x); |
60 | } |
61 | return card; |
62 | } |
63 | |
64 | int union_test(run_container_t* B1, run_container_t* B2, run_container_t* BO) { |
65 | run_container_union(B1, B2, BO); |
66 | return run_container_cardinality(BO); |
67 | } |
68 | |
69 | int intersection_test(run_container_t* B1, run_container_t* B2, |
70 | run_container_t* BO) { |
71 | run_container_intersection(B1, B2, BO); |
72 | return run_container_cardinality(BO); |
73 | } |
74 | |
75 | int main() { |
76 | int repeat = 500; |
77 | int size = TESTSIZE; |
78 | tellmeall(); |
79 | printf("run container benchmarks\n" ); |
80 | run_container_t* B = run_container_create(); |
81 | BEST_TIME(add_test(B), 0, repeat, size); |
82 | int answer = contains_test(B); |
83 | size = 1 << 16; |
84 | BEST_TIME(contains_test(B), answer, repeat, size); |
85 | size = (1 << 16) / 3; |
86 | BEST_TIME(remove_test(B), 0, repeat, size); |
87 | run_container_free(B); |
88 | |
89 | for (int howmany = 32; howmany <= (1 << 16); howmany *= 8) { |
90 | run_container_t* Bt = run_container_create(); |
91 | for (int j = 0; j < howmany; ++j) { |
92 | run_container_add(Bt, (uint16_t)pcg32_random()); |
93 | } |
94 | size_t nbrtestvalues = 1024; |
95 | uint16_t* testvalues = malloc(nbrtestvalues * sizeof(uint16_t)); |
96 | printf("\n number of values in container = %d\n" , |
97 | run_container_cardinality(Bt)); |
98 | int card = run_container_cardinality(Bt); |
99 | uint32_t* out = malloc(sizeof(uint32_t) * (unsigned long)card); |
100 | BEST_TIME(run_container_to_uint32_array(out, Bt, 1234), card, repeat, |
101 | card); |
102 | free(out); |
103 | BEST_TIME_PRE_ARRAY(Bt, run_container_contains, run_cache_prefetch, |
104 | testvalues, nbrtestvalues); |
105 | BEST_TIME_PRE_ARRAY(Bt, run_container_contains, run_cache_flush, |
106 | testvalues, nbrtestvalues); |
107 | free(testvalues); |
108 | run_container_free(Bt); |
109 | } |
110 | printf("\n" ); |
111 | |
112 | run_container_t* B1 = run_container_create(); |
113 | for (int x = 0; x < 1 << 16; x += 3) { |
114 | run_container_add(B1, (uint16_t)x); |
115 | } |
116 | run_container_t* B2 = run_container_create(); |
117 | for (int x = 0; x < 1 << 16; x += 5) { |
118 | run_container_add(B2, (uint16_t)x); |
119 | } |
120 | int32_t inputsize; |
121 | inputsize = B1->n_runs + B2->n_runs; |
122 | run_container_t* BO = run_container_create(); |
123 | printf("\nUnion and intersections...\n" ); |
124 | printf("\nNote:\n" ); |
125 | printf( |
126 | "union times are expressed in cycles per number of input elements " |
127 | "(both runs)\n" ); |
128 | printf( |
129 | "intersection times are expressed in cycles per number of output " |
130 | "elements\n\n" ); |
131 | printf("==intersection and union test 1 \n" ); |
132 | printf("input 1 cardinality = %d, input 2 cardinality = %d \n" , |
133 | run_container_cardinality(B1), run_container_cardinality(B2)); |
134 | answer = union_test(B1, B2, BO); |
135 | printf("union cardinality = %d \n" , answer); |
136 | printf("B1 card = %d B2 card = %d \n" , run_container_cardinality(B1), |
137 | run_container_cardinality(B2)); |
138 | inputsize = B1->n_runs + B2->n_runs; |
139 | BEST_TIME(union_test(B1, B2, BO), answer, repeat, inputsize); |
140 | answer = intersection_test(B1, B2, BO); |
141 | printf("intersection cardinality = %d \n" , answer); |
142 | BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer); |
143 | printf("==intersection and union test 2 \n" ); |
144 | run_container_clear(B1); |
145 | run_container_clear(B2); |
146 | for (int x = 0; x < (1 << 16); x += 64) { |
147 | int length = x % 11; |
148 | for (int y = 0; y < length; ++y) |
149 | run_container_add(B1, (uint16_t)(x + y)); |
150 | } |
151 | for (int x = 1; x < 1 << 16; x += x) { |
152 | run_container_add(B2, (uint16_t)x); |
153 | } |
154 | printf("input 1 cardinality = %d, input 2 cardinality = %d \n" , |
155 | run_container_cardinality(B1), run_container_cardinality(B2)); |
156 | answer = union_test(B1, B2, BO); |
157 | printf("union cardinality = %d \n" , answer); |
158 | printf("B1 card = %d B2 card = %d \n" , run_container_cardinality(B1), |
159 | run_container_cardinality(B2)); |
160 | inputsize = B1->n_runs + B2->n_runs; |
161 | |
162 | BEST_TIME(union_test(B1, B2, BO), answer, repeat, inputsize); |
163 | answer = intersection_test(B1, B2, BO); |
164 | printf("intersection cardinality = %d \n" , answer); |
165 | BEST_TIME(intersection_test(B1, B2, BO), answer, repeat, answer); |
166 | |
167 | run_container_free(B1); |
168 | run_container_free(B2); |
169 | run_container_free(BO); |
170 | return 0; |
171 | } |
172 | |