1 | #include <roaring/portability.h> |
2 | #include <roaring/containers/bitset.h> |
3 | #include <roaring/containers/convert.h> |
4 | #include <roaring/misc/configreport.h> |
5 | |
6 | #include "benchmark.h" |
7 | #include "random.h" |
8 | |
9 | #define DIV_CEIL_64K(denom) (((1 << 16) + ((denom) - 1)) / (denom)) |
10 | |
11 | const int repeat = 500; |
12 | |
13 | |
14 | #if defined(IS_X64) && !(defined(_MSC_VER) && !defined(__clang__)) |
15 | // flushes the array of words from cache |
16 | void bitset_cache_flush(bitset_container_t* B) { |
17 | const int32_t CACHELINESIZE = |
18 | computecacheline(); // 64 bytes per cache line |
19 | for (int32_t k = 0; k < BITSET_CONTAINER_SIZE_IN_WORDS; |
20 | k += CACHELINESIZE / (int32_t)sizeof(uint64_t)) { |
21 | __builtin_ia32_clflush(B->array + k); |
22 | } |
23 | } |
24 | #else |
25 | void bitset_cache_flush(bitset_container_t* B) { (void)B; } |
26 | |
27 | #endif |
28 | |
29 | // tries to put array of words in cache |
30 | void bitset_cache_prefetch(bitset_container_t* B) { |
31 | #ifdef IS_X64 |
32 | const int32_t CACHELINESIZE = |
33 | computecacheline(); // 64 bytes per cache line |
34 | #else |
35 | const int32_t CACHELINESIZE = 64; |
36 | #endif |
37 | #if !(defined(_MSC_VER) && !defined(__clang__)) |
38 | for (int32_t k = 0; k < BITSET_CONTAINER_SIZE_IN_WORDS; |
39 | k += CACHELINESIZE / (int32_t)sizeof(uint64_t)) { |
40 | __builtin_prefetch(B->array + k); |
41 | } |
42 | #endif |
43 | } |
44 | |
45 | // used to benchmark array_container_from_bitset |
46 | int get_cardinality_through_conversion_to_array(bitset_container_t* B) { |
47 | array_container_t* conv = array_container_from_bitset(B); |
48 | int card = conv->cardinality; |
49 | array_container_free(conv); |
50 | return card; |
51 | } |
52 | |
53 | int (bitset_container_t* B) { |
54 | int card = bitset_container_cardinality(B); |
55 | uint32_t* out = malloc(sizeof(uint32_t) * (unsigned)card); |
56 | bitset_container_to_uint32_array(out, B, 1234); |
57 | free(out); |
58 | return card; |
59 | } |
60 | |
61 | int set_test(bitset_container_t* B) { |
62 | int x; |
63 | for (x = 0; x < 1 << 16; x += 3) { |
64 | bitset_container_set(B, (uint16_t)x); |
65 | } |
66 | return 0; |
67 | } |
68 | |
69 | int unset_test(bitset_container_t* B) { |
70 | int x; |
71 | for (x = 0; x < 1 << 16; x += 3) { |
72 | bitset_container_unset(B, (uint16_t)x); |
73 | } |
74 | return 0; |
75 | } |
76 | |
77 | int get_test(bitset_container_t* B) { |
78 | int card = 0; |
79 | int x; |
80 | for (x = 0; x < 1 << 16; x++) { |
81 | card += bitset_container_get(B, (uint16_t)x); |
82 | } |
83 | return card; |
84 | } |
85 | |
86 | void benchmark_logical_operations() { |
87 | printf("\nLogical operations (time units per single operation):\n" ); |
88 | bitset_container_t* B1 = bitset_container_create(); |
89 | for (int x = 0; x < 1 << 16; x += 3) { |
90 | bitset_container_set(B1, (uint16_t)x); |
91 | } |
92 | bitset_container_t* B2 = bitset_container_create(); |
93 | for (int x = 0; x < 1 << 16; x += 5) { |
94 | bitset_container_set(B2, (uint16_t)x); |
95 | } |
96 | |
97 | bitset_container_t* BO = bitset_container_create(); |
98 | |
99 | const int and_cardinality = DIV_CEIL_64K(3*5); |
100 | BEST_TIME(bitset_container_and_nocard(B1, B2, BO), BITSET_UNKNOWN_CARDINALITY, repeat, 1); |
101 | BEST_TIME(bitset_container_and(B1, B2, BO), and_cardinality, repeat, 1); |
102 | BEST_TIME(bitset_container_and_justcard(B1, B2), and_cardinality, repeat, 1); |
103 | BEST_TIME(bitset_container_compute_cardinality(BO), and_cardinality, repeat, 1); |
104 | |
105 | const int or_cardinality = DIV_CEIL_64K(3) + DIV_CEIL_64K(5) - DIV_CEIL_64K(3*5); |
106 | BEST_TIME(bitset_container_or_nocard(B1, B2, BO), BITSET_UNKNOWN_CARDINALITY, repeat, 1); |
107 | BEST_TIME(bitset_container_or(B1, B2, BO), or_cardinality, repeat, 1); |
108 | BEST_TIME(bitset_container_or_justcard(B1, B2), or_cardinality, repeat, 1); |
109 | BEST_TIME(bitset_container_compute_cardinality(BO), or_cardinality, repeat, 1); |
110 | |
111 | bitset_container_free(BO); |
112 | bitset_container_free(B1); |
113 | bitset_container_free(B2); |
114 | printf("\n" ); |
115 | } |
116 | |
117 | |
118 | int main() { |
119 | int size = (1 << 16) / 3; |
120 | tellmeall(); |
121 | printf("bitset container benchmarks\n" ); |
122 | bitset_container_t* B = bitset_container_create(); |
123 | BEST_TIME(set_test(B), 0, repeat, size); |
124 | int answer = get_test(B); |
125 | size = 1 << 16; |
126 | BEST_TIME(get_test(B), answer, repeat, size); |
127 | BEST_TIME(bitset_container_cardinality(B), answer, repeat, 1); |
128 | BEST_TIME(bitset_container_compute_cardinality(B), answer, repeat, |
129 | BITSET_CONTAINER_SIZE_IN_WORDS); |
130 | |
131 | size = (1 << 16) / 3; |
132 | BEST_TIME(unset_test(B), 0, repeat, size); |
133 | bitset_container_free(B); |
134 | |
135 | for (int howmany = 4096; howmany <= (1 << 16); howmany *= 2) { |
136 | bitset_container_t* Bt = bitset_container_create(); |
137 | while (bitset_container_cardinality(Bt) < howmany) { |
138 | bitset_container_set(Bt, (uint16_t)pcg32_random()); |
139 | } |
140 | size_t nbrtestvalues = 1024; |
141 | uint16_t* testvalues = malloc(nbrtestvalues * sizeof(uint16_t)); |
142 | printf("\n number of values in container = %d\n" , |
143 | bitset_container_cardinality(Bt)); |
144 | int card = bitset_container_cardinality(Bt); |
145 | uint32_t* out = malloc(sizeof(uint32_t) * (unsigned)card + 32); |
146 | BEST_TIME(bitset_container_to_uint32_array(out, Bt, 1234), card, repeat, |
147 | card); |
148 | free(out); |
149 | BEST_TIME_PRE_ARRAY(Bt, bitset_container_get, bitset_cache_prefetch, |
150 | testvalues, nbrtestvalues); |
151 | BEST_TIME_PRE_ARRAY(Bt, bitset_container_get, bitset_cache_flush, |
152 | testvalues, nbrtestvalues); |
153 | free(testvalues); |
154 | bitset_container_free(Bt); |
155 | } |
156 | printf("\n" ); |
157 | |
158 | benchmark_logical_operations(); |
159 | |
160 | // next we are going to benchmark conversion from bitset to array (an |
161 | // important step) |
162 | bitset_container_t* B1 = bitset_container_create(); |
163 | for (int k = 0; k < 4096; ++k) { |
164 | bitset_container_set(B1, (uint16_t)ranged_random(1 << 16)); |
165 | } |
166 | answer = get_cardinality_through_conversion_to_array(B1); |
167 | BEST_TIME(get_cardinality_through_conversion_to_array(B1), answer, repeat, |
168 | BITSET_CONTAINER_SIZE_IN_WORDS); |
169 | |
170 | bitset_container_free(B1); |
171 | return 0; |
172 | } |
173 | |