1#include <roaring/portability.h>
2#include <roaring/containers/bitset.h>
3#include <roaring/containers/convert.h>
4#include <roaring/misc/configreport.h>
5
6#include "benchmark.h"
7#include "random.h"
8
9#define DIV_CEIL_64K(denom) (((1 << 16) + ((denom) - 1)) / (denom))
10
11const int repeat = 500;
12
13
14#if defined(IS_X64) && !(defined(_MSC_VER) && !defined(__clang__))
15// flushes the array of words from cache
16void bitset_cache_flush(bitset_container_t* B) {
17 const int32_t CACHELINESIZE =
18 computecacheline(); // 64 bytes per cache line
19 for (int32_t k = 0; k < BITSET_CONTAINER_SIZE_IN_WORDS;
20 k += CACHELINESIZE / (int32_t)sizeof(uint64_t)) {
21 __builtin_ia32_clflush(B->array + k);
22 }
23}
24#else
25void bitset_cache_flush(bitset_container_t* B) { (void)B; }
26
27#endif
28
29// tries to put array of words in cache
30void bitset_cache_prefetch(bitset_container_t* B) {
31#ifdef IS_X64
32 const int32_t CACHELINESIZE =
33 computecacheline(); // 64 bytes per cache line
34#else
35 const int32_t CACHELINESIZE = 64;
36#endif
37#if !(defined(_MSC_VER) && !defined(__clang__))
38 for (int32_t k = 0; k < BITSET_CONTAINER_SIZE_IN_WORDS;
39 k += CACHELINESIZE / (int32_t)sizeof(uint64_t)) {
40 __builtin_prefetch(B->array + k);
41 }
42#endif
43}
44
45// used to benchmark array_container_from_bitset
46int get_cardinality_through_conversion_to_array(bitset_container_t* B) {
47 array_container_t* conv = array_container_from_bitset(B);
48 int card = conv->cardinality;
49 array_container_free(conv);
50 return card;
51}
52
53int extract_test(bitset_container_t* B) {
54 int card = bitset_container_cardinality(B);
55 uint32_t* out = malloc(sizeof(uint32_t) * (unsigned)card);
56 bitset_container_to_uint32_array(out, B, 1234);
57 free(out);
58 return card;
59}
60
61int set_test(bitset_container_t* B) {
62 int x;
63 for (x = 0; x < 1 << 16; x += 3) {
64 bitset_container_set(B, (uint16_t)x);
65 }
66 return 0;
67}
68
69int unset_test(bitset_container_t* B) {
70 int x;
71 for (x = 0; x < 1 << 16; x += 3) {
72 bitset_container_unset(B, (uint16_t)x);
73 }
74 return 0;
75}
76
77int get_test(bitset_container_t* B) {
78 int card = 0;
79 int x;
80 for (x = 0; x < 1 << 16; x++) {
81 card += bitset_container_get(B, (uint16_t)x);
82 }
83 return card;
84}
85
86void benchmark_logical_operations() {
87 printf("\nLogical operations (time units per single operation):\n");
88 bitset_container_t* B1 = bitset_container_create();
89 for (int x = 0; x < 1 << 16; x += 3) {
90 bitset_container_set(B1, (uint16_t)x);
91 }
92 bitset_container_t* B2 = bitset_container_create();
93 for (int x = 0; x < 1 << 16; x += 5) {
94 bitset_container_set(B2, (uint16_t)x);
95 }
96
97 bitset_container_t* BO = bitset_container_create();
98
99 const int and_cardinality = DIV_CEIL_64K(3*5);
100 BEST_TIME(bitset_container_and_nocard(B1, B2, BO), BITSET_UNKNOWN_CARDINALITY, repeat, 1);
101 BEST_TIME(bitset_container_and(B1, B2, BO), and_cardinality, repeat, 1);
102 BEST_TIME(bitset_container_and_justcard(B1, B2), and_cardinality, repeat, 1);
103 BEST_TIME(bitset_container_compute_cardinality(BO), and_cardinality, repeat, 1);
104
105 const int or_cardinality = DIV_CEIL_64K(3) + DIV_CEIL_64K(5) - DIV_CEIL_64K(3*5);
106 BEST_TIME(bitset_container_or_nocard(B1, B2, BO), BITSET_UNKNOWN_CARDINALITY, repeat, 1);
107 BEST_TIME(bitset_container_or(B1, B2, BO), or_cardinality, repeat, 1);
108 BEST_TIME(bitset_container_or_justcard(B1, B2), or_cardinality, repeat, 1);
109 BEST_TIME(bitset_container_compute_cardinality(BO), or_cardinality, repeat, 1);
110
111 bitset_container_free(BO);
112 bitset_container_free(B1);
113 bitset_container_free(B2);
114 printf("\n");
115}
116
117
118int main() {
119 int size = (1 << 16) / 3;
120 tellmeall();
121 printf("bitset container benchmarks\n");
122 bitset_container_t* B = bitset_container_create();
123 BEST_TIME(set_test(B), 0, repeat, size);
124 int answer = get_test(B);
125 size = 1 << 16;
126 BEST_TIME(get_test(B), answer, repeat, size);
127 BEST_TIME(bitset_container_cardinality(B), answer, repeat, 1);
128 BEST_TIME(bitset_container_compute_cardinality(B), answer, repeat,
129 BITSET_CONTAINER_SIZE_IN_WORDS);
130
131 size = (1 << 16) / 3;
132 BEST_TIME(unset_test(B), 0, repeat, size);
133 bitset_container_free(B);
134
135 for (int howmany = 4096; howmany <= (1 << 16); howmany *= 2) {
136 bitset_container_t* Bt = bitset_container_create();
137 while (bitset_container_cardinality(Bt) < howmany) {
138 bitset_container_set(Bt, (uint16_t)pcg32_random());
139 }
140 size_t nbrtestvalues = 1024;
141 uint16_t* testvalues = malloc(nbrtestvalues * sizeof(uint16_t));
142 printf("\n number of values in container = %d\n",
143 bitset_container_cardinality(Bt));
144 int card = bitset_container_cardinality(Bt);
145 uint32_t* out = malloc(sizeof(uint32_t) * (unsigned)card + 32);
146 BEST_TIME(bitset_container_to_uint32_array(out, Bt, 1234), card, repeat,
147 card);
148 free(out);
149 BEST_TIME_PRE_ARRAY(Bt, bitset_container_get, bitset_cache_prefetch,
150 testvalues, nbrtestvalues);
151 BEST_TIME_PRE_ARRAY(Bt, bitset_container_get, bitset_cache_flush,
152 testvalues, nbrtestvalues);
153 free(testvalues);
154 bitset_container_free(Bt);
155 }
156 printf("\n");
157
158 benchmark_logical_operations();
159
160 // next we are going to benchmark conversion from bitset to array (an
161 // important step)
162 bitset_container_t* B1 = bitset_container_create();
163 for (int k = 0; k < 4096; ++k) {
164 bitset_container_set(B1, (uint16_t)ranged_random(1 << 16));
165 }
166 answer = get_cardinality_through_conversion_to_array(B1);
167 BEST_TIME(get_cardinality_through_conversion_to_array(B1), answer, repeat,
168 BITSET_CONTAINER_SIZE_IN_WORDS);
169
170 bitset_container_free(B1);
171 return 0;
172}
173