1/* auto-generated on Tue Dec 18 09:42:59 CST 2018. Do not edit! */
2#include "roaring/roaring.h"
3
4/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
5#ifdef DMALLOC
6#include "dmalloc.h"
7#endif
8
9/* begin file /opt/bitmap/CRoaring-0.2.57/src/array_util.c */
10#include <assert.h>
11#include <stdbool.h>
12#include <stdint.h>
13#include <stdio.h>
14#include <stdlib.h>
15#include <string.h>
16
17extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
18 uint16_t ikey);
19
20#ifdef USESSE4
21// used by intersect_vector16
22ALIGNED(0x1000)
23static const uint8_t shuffle_mask16[] = {
24 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
25 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
26 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF,
27 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
28 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
29 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
30 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
31 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
32 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
33 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF,
34 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF,
35 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
36 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
37 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF,
38 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
39 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
40 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
41 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF,
42 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
43 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
44 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF,
45 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
46 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
47 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
48 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
49 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF,
50 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
51 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
52 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
53 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF,
54 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
55 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
56 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
57 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF,
58 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
59 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
60 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF,
61 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF,
62 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
63 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
64 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF,
65 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
66 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF,
67 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
68 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
69 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
70 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
71 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
72 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
73 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF,
74 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
75 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
76 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
77 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
78 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
79 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
80 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
81 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11,
82 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
83 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
84 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
85 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11,
86 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
87 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
88 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
89 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF,
90 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
91 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
92 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
93 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF,
94 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
95 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
96 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
97 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
98 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9,
99 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
100 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
101 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11,
102 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
103 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
104 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
105 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
106 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
107 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
108 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
109 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
110 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13,
111 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
112 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
113 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF,
114 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13,
115 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
116 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
117 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF,
118 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
119 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
120 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
121 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF,
122 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
123 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
124 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
125 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF,
126 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
127 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
128 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
129 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
130 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13,
131 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
132 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
133 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF,
134 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
135 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
136 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
137 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13,
138 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
139 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
140 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF,
141 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF,
142 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
143 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
144 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
145 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9,
146 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
147 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
148 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF,
149 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9,
150 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
151 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
152 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
153 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF,
154 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11,
155 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
156 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
157 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF,
158 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
159 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
160 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
161 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11,
162 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11,
163 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
164 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
165 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13,
166 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
167 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
168 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
169 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11,
170 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
171 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
172 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13,
173 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF,
174 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
175 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
176 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
177 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11,
178 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
179 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
180 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF,
181 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11,
182 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
183 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
184 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
185 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11,
186 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
187 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
188 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13,
189 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11,
190 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
191 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
192 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
193 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
194 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF,
195 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
196 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
197 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
198 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
199 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
200 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
201 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF,
202 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
203 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
204 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
205 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
206 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
207 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
208 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
209 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15,
210 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
211 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
212 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
213 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15,
214 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
215 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
216 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
217 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF,
218 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
219 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
220 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
221 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF,
222 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
223 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
224 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
225 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
226 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9,
227 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
228 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
229 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15,
230 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
231 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
232 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
233 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
234 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
235 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
236 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15,
237 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
238 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11,
239 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
240 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
241 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15,
242 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11,
243 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
244 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
245 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15,
246 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
247 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
248 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
249 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15,
250 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
251 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
252 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF,
253 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15,
254 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
255 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
256 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF,
257 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
258 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11,
259 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
260 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
261 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15,
262 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
263 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
264 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
265 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11,
266 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
267 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
268 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15,
269 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15,
270 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
271 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
272 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF,
273 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9,
274 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
275 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
276 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15,
277 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9,
278 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
279 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF,
280 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
281 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF,
282 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13,
283 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
284 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
285 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF,
286 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
287 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
288 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
289 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13,
290 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13,
291 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
292 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
293 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15,
294 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
295 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
296 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
297 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13,
298 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
299 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
300 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15,
301 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF,
302 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
303 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
304 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
305 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13,
306 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
307 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
308 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF,
309 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13,
310 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
311 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
312 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
313 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13,
314 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
315 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
316 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15,
317 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13,
318 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
319 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
320 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15,
321 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
322 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13,
323 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
324 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
325 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15,
326 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
327 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
328 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
329 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13,
330 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
331 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
332 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15,
333 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15,
334 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
335 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
336 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
337 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11,
338 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
339 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
340 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15,
341 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11,
342 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
343 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
344 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
345 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13,
346 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
347 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
348 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15,
349 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13,
350 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
351 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
352 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15,
353 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
354 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9,
355 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
356 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
357 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11,
358 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
359 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
360 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
361 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
362 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5,
363 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
364 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
365 12, 13, 14, 15};
366
367/**
368 * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions
369 * Optimized by D. Lemire on May 3rd 2013
370 */
371int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
372 const uint16_t *__restrict__ B, size_t s_b,
373 uint16_t *C) {
374 size_t count = 0;
375 size_t i_a = 0, i_b = 0;
376 const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
377 const size_t st_a = (s_a / vectorlength) * vectorlength;
378 const size_t st_b = (s_b / vectorlength) * vectorlength;
379 __m128i v_a, v_b;
380 if ((i_a < st_a) && (i_b < st_b)) {
381 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
382 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
383 while ((A[i_a] == 0) || (B[i_b] == 0)) {
384 const __m128i res_v = _mm_cmpestrm(
385 v_b, vectorlength, v_a, vectorlength,
386 _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
387 const int r = _mm_extract_epi32(res_v, 0);
388 __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r);
389 __m128i p = _mm_shuffle_epi8(v_a, sm16);
390 _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
391 count += _mm_popcnt_u32(r);
392 const uint16_t a_max = A[i_a + vectorlength - 1];
393 const uint16_t b_max = B[i_b + vectorlength - 1];
394 if (a_max <= b_max) {
395 i_a += vectorlength;
396 if (i_a == st_a) break;
397 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
398 }
399 if (b_max <= a_max) {
400 i_b += vectorlength;
401 if (i_b == st_b) break;
402 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
403 }
404 }
405 if ((i_a < st_a) && (i_b < st_b))
406 while (true) {
407 const __m128i res_v = _mm_cmpistrm(
408 v_b, v_a,
409 _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
410 const int r = _mm_extract_epi32(res_v, 0);
411 __m128i sm16 =
412 _mm_load_si128((const __m128i *)shuffle_mask16 + r);
413 __m128i p = _mm_shuffle_epi8(v_a, sm16);
414 _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
415 count += _mm_popcnt_u32(r);
416 const uint16_t a_max = A[i_a + vectorlength - 1];
417 const uint16_t b_max = B[i_b + vectorlength - 1];
418 if (a_max <= b_max) {
419 i_a += vectorlength;
420 if (i_a == st_a) break;
421 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
422 }
423 if (b_max <= a_max) {
424 i_b += vectorlength;
425 if (i_b == st_b) break;
426 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
427 }
428 }
429 }
430 // intersect the tail using scalar intersection
431 while (i_a < s_a && i_b < s_b) {
432 uint16_t a = A[i_a];
433 uint16_t b = B[i_b];
434 if (a < b) {
435 i_a++;
436 } else if (b < a) {
437 i_b++;
438 } else {
439 C[count] = a; //==b;
440 count++;
441 i_a++;
442 i_b++;
443 }
444 }
445 return (int32_t)count;
446}
447
448int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
449 size_t s_a,
450 const uint16_t *__restrict__ B,
451 size_t s_b) {
452 size_t count = 0;
453 size_t i_a = 0, i_b = 0;
454 const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
455 const size_t st_a = (s_a / vectorlength) * vectorlength;
456 const size_t st_b = (s_b / vectorlength) * vectorlength;
457 __m128i v_a, v_b;
458 if ((i_a < st_a) && (i_b < st_b)) {
459 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
460 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
461 while ((A[i_a] == 0) || (B[i_b] == 0)) {
462 const __m128i res_v = _mm_cmpestrm(
463 v_b, vectorlength, v_a, vectorlength,
464 _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
465 const int r = _mm_extract_epi32(res_v, 0);
466 count += _mm_popcnt_u32(r);
467 const uint16_t a_max = A[i_a + vectorlength - 1];
468 const uint16_t b_max = B[i_b + vectorlength - 1];
469 if (a_max <= b_max) {
470 i_a += vectorlength;
471 if (i_a == st_a) break;
472 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
473 }
474 if (b_max <= a_max) {
475 i_b += vectorlength;
476 if (i_b == st_b) break;
477 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
478 }
479 }
480 if ((i_a < st_a) && (i_b < st_b))
481 while (true) {
482 const __m128i res_v = _mm_cmpistrm(
483 v_b, v_a,
484 _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
485 const int r = _mm_extract_epi32(res_v, 0);
486 count += _mm_popcnt_u32(r);
487 const uint16_t a_max = A[i_a + vectorlength - 1];
488 const uint16_t b_max = B[i_b + vectorlength - 1];
489 if (a_max <= b_max) {
490 i_a += vectorlength;
491 if (i_a == st_a) break;
492 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
493 }
494 if (b_max <= a_max) {
495 i_b += vectorlength;
496 if (i_b == st_b) break;
497 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
498 }
499 }
500 }
501 // intersect the tail using scalar intersection
502 while (i_a < s_a && i_b < s_b) {
503 uint16_t a = A[i_a];
504 uint16_t b = B[i_b];
505 if (a < b) {
506 i_a++;
507 } else if (b < a) {
508 i_b++;
509 } else {
510 count++;
511 i_a++;
512 i_b++;
513 }
514 }
515 return (int32_t)count;
516}
517
518int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
519 const uint16_t *__restrict__ B, size_t s_b,
520 uint16_t *C) {
521 // we handle the degenerate case
522 if (s_a == 0) return 0;
523 if (s_b == 0) {
524 if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a);
525 return (int32_t)s_a;
526 }
527 // handle the leading zeroes, it is messy but it allows us to use the fast
528 // _mm_cmpistrm instrinsic safely
529 int32_t count = 0;
530 if ((A[0] == 0) || (B[0] == 0)) {
531 if ((A[0] == 0) && (B[0] == 0)) {
532 A++;
533 s_a--;
534 B++;
535 s_b--;
536 } else if (A[0] == 0) {
537 C[count++] = 0;
538 A++;
539 s_a--;
540 } else {
541 B++;
542 s_b--;
543 }
544 }
545 // at this point, we have two non-empty arrays, made of non-zero
546 // increasing values.
547 size_t i_a = 0, i_b = 0;
548 const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t);
549 const size_t st_a = (s_a / vectorlength) * vectorlength;
550 const size_t st_b = (s_b / vectorlength) * vectorlength;
551 if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path
552 __m128i v_a, v_b; //, v_bmax;
553 // we load a vector from A and a vector from B
554 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
555 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
556 // we have a runningmask which indicates which values from A have been
557 // spotted in B, these don't get written out.
558 __m128i runningmask_a_found_in_b = _mm_setzero_si128();
559 /****
560 * start of the main vectorized loop
561 *****/
562 while (true) {
563 // afoundinb will contain a mask indicate for each entry in A
564 // whether it is seen
565 // in B
566 const __m128i a_found_in_b =
567 _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
568 _SIDD_BIT_MASK);
569 runningmask_a_found_in_b =
570 _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
571 // we always compare the last values of A and B
572 const uint16_t a_max = A[i_a + vectorlength - 1];
573 const uint16_t b_max = B[i_b + vectorlength - 1];
574 if (a_max <= b_max) {
575 // Ok. In this code path, we are ready to write our v_a
576 // because there is no need to read more from B, they will
577 // all be large values.
578 const int bitmask_belongs_to_difference =
579 _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
580 /*** next few lines are probably expensive *****/
581 __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 +
582 bitmask_belongs_to_difference);
583 __m128i p = _mm_shuffle_epi8(v_a, sm16);
584 _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
585 count += _mm_popcnt_u32(bitmask_belongs_to_difference);
586 // we advance a
587 i_a += vectorlength;
588 if (i_a == st_a) // no more
589 break;
590 runningmask_a_found_in_b = _mm_setzero_si128();
591 v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
592 }
593 if (b_max <= a_max) {
594 // in this code path, the current v_b has become useless
595 i_b += vectorlength;
596 if (i_b == st_b) break;
597 v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
598 }
599 }
600 // at this point, either we have i_a == st_a, which is the end of the
601 // vectorized processing,
602 // or we have i_b == st_b, and we are not done processing the vector...
603 // so we need to finish it off.
604 if (i_a < st_a) { // we have unfinished business...
605 uint16_t buffer[8]; // buffer to do a masked load
606 memset(buffer, 0, 8 * sizeof(uint16_t));
607 memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t));
608 v_b = _mm_lddqu_si128((__m128i *)buffer);
609 const __m128i a_found_in_b =
610 _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
611 _SIDD_BIT_MASK);
612 runningmask_a_found_in_b =
613 _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
614 const int bitmask_belongs_to_difference =
615 _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
616 __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 +
617 bitmask_belongs_to_difference);
618 __m128i p = _mm_shuffle_epi8(v_a, sm16);
619 _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
620 count += _mm_popcnt_u32(bitmask_belongs_to_difference);
621 i_a += vectorlength;
622 }
623 // at this point we should have i_a == st_a and i_b == st_b
624 }
625 // do the tail using scalar code
626 while (i_a < s_a && i_b < s_b) {
627 uint16_t a = A[i_a];
628 uint16_t b = B[i_b];
629 if (b < a) {
630 i_b++;
631 } else if (a < b) {
632 C[count] = a;
633 count++;
634 i_a++;
635 } else { //==
636 i_a++;
637 i_b++;
638 }
639 }
640 if (i_a < s_a) {
641 memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a));
642 count += (int32_t)(s_a - i_a);
643 }
644 return count;
645}
646
647#endif // USESSE4
648
649
650
651#ifdef USE_OLD_SKEW_INTERSECT
652// TODO: given enough experience with the new skew intersect, drop the old one from the code base.
653
654
655/* Computes the intersection between one small and one large set of uint16_t.
656 * Stores the result into buffer and return the number of elements. */
657int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s,
658 const uint16_t *large, size_t size_l,
659 uint16_t *buffer) {
660 size_t pos = 0, idx_l = 0, idx_s = 0;
661
662 if (0 == size_s) {
663 return 0;
664 }
665
666 uint16_t val_l = large[idx_l], val_s = small[idx_s];
667
668 while (true) {
669 if (val_l < val_s) {
670 idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
671 if (idx_l == size_l) break;
672 val_l = large[idx_l];
673 } else if (val_s < val_l) {
674 idx_s++;
675 if (idx_s == size_s) break;
676 val_s = small[idx_s];
677 } else {
678 buffer[pos++] = val_s;
679 idx_s++;
680 if (idx_s == size_s) break;
681 val_s = small[idx_s];
682 idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
683 if (idx_l == size_l) break;
684 val_l = large[idx_l];
685 }
686 }
687
688 return (int32_t)pos;
689}
690#else // USE_OLD_SKEW_INTERSECT
691
692
693/**
694* Branchless binary search going after 4 values at once.
695* Assumes that array is sorted.
696* You have that array[*index1] >= target1, array[*index12] >= target2, ...
697* except when *index1 = n, in which case you know that all values in array are
698* smaller than target1, and so forth.
699* It has logarithmic complexity.
700*/
701static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1,
702 uint16_t target2, uint16_t target3, uint16_t target4,
703 int32_t *index1, int32_t *index2, int32_t *index3,
704 int32_t *index4) {
705 const uint16_t *base1 = array;
706 const uint16_t *base2 = array;
707 const uint16_t *base3 = array;
708 const uint16_t *base4 = array;
709 if (n == 0)
710 return;
711 while (n > 1) {
712 int32_t half = n >> 1;
713 base1 = (base1[half] < target1) ? &base1[half] : base1;
714 base2 = (base2[half] < target2) ? &base2[half] : base2;
715 base3 = (base3[half] < target3) ? &base3[half] : base3;
716 base4 = (base4[half] < target4) ? &base4[half] : base4;
717 n -= half;
718 }
719 *index1 = (int32_t)((*base1 < target1) + base1 - array);
720 *index2 = (int32_t)((*base2 < target2) + base2 - array);
721 *index3 = (int32_t)((*base3 < target3) + base3 - array);
722 *index4 = (int32_t)((*base4 < target4) + base4 - array);
723}
724
725/**
726* Branchless binary search going after 2 values at once.
727* Assumes that array is sorted.
728* You have that array[*index1] >= target1, array[*index12] >= target2.
729* except when *index1 = n, in which case you know that all values in array are
730* smaller than target1, and so forth.
731* It has logarithmic complexity.
732*/
733static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1,
734 uint16_t target2, int32_t *index1, int32_t *index2) {
735 const uint16_t *base1 = array;
736 const uint16_t *base2 = array;
737 if (n == 0)
738 return;
739 while (n > 1) {
740 int32_t half = n >> 1;
741 base1 = (base1[half] < target1) ? &base1[half] : base1;
742 base2 = (base2[half] < target2) ? &base2[half] : base2;
743 n -= half;
744 }
745 *index1 = (int32_t)((*base1 < target1) + base1 - array);
746 *index2 = (int32_t)((*base2 < target2) + base2 - array);
747}
748
749/* Computes the intersection between one small and one large set of uint16_t.
750 * Stores the result into buffer and return the number of elements.
751 * Processes the small set in blocks of 4 values calling binarySearch4
752 * and binarySearch2. This approach can be slightly superior to a conventional
753 * galloping search in some instances.
754 */
755int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s,
756 const uint16_t *large, size_t size_l,
757 uint16_t *buffer) {
758 size_t pos = 0, idx_l = 0, idx_s = 0;
759
760 if (0 == size_s) {
761 return 0;
762 }
763 int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0;
764 while ((idx_s + 4 <= size_s) && (idx_l < size_l)) {
765 uint16_t target1 = small[idx_s];
766 uint16_t target2 = small[idx_s + 1];
767 uint16_t target3 = small[idx_s + 2];
768 uint16_t target4 = small[idx_s + 3];
769 binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3,
770 target4, &index1, &index2, &index3, &index4);
771 if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
772 buffer[pos++] = target1;
773 }
774 if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
775 buffer[pos++] = target2;
776 }
777 if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) {
778 buffer[pos++] = target3;
779 }
780 if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) {
781 buffer[pos++] = target4;
782 }
783 idx_s += 4;
784 idx_l += index1;
785 }
786 if ((idx_s + 2 <= size_s) && (idx_l < size_l)) {
787 uint16_t target1 = small[idx_s];
788 uint16_t target2 = small[idx_s + 1];
789 binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1,
790 &index2);
791 if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
792 buffer[pos++] = target1;
793 }
794 if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
795 buffer[pos++] = target2;
796 }
797 idx_s += 2;
798 idx_l += index1;
799 }
800 if ((idx_s < size_s) && (idx_l < size_l)) {
801 uint16_t val_s = small[idx_s];
802 int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s);
803 if (index >= 0)
804 buffer[pos++] = val_s;
805 }
806 return (int32_t)pos;
807}
808
809
810#endif //USE_OLD_SKEW_INTERSECT
811
812
813// TODO: this could be accelerated, possibly, by using binarySearch4 as above.
814int32_t intersect_skewed_uint16_cardinality(const uint16_t *small,
815 size_t size_s,
816 const uint16_t *large,
817 size_t size_l) {
818 size_t pos = 0, idx_l = 0, idx_s = 0;
819
820 if (0 == size_s) {
821 return 0;
822 }
823
824 uint16_t val_l = large[idx_l], val_s = small[idx_s];
825
826 while (true) {
827 if (val_l < val_s) {
828 idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
829 if (idx_l == size_l) break;
830 val_l = large[idx_l];
831 } else if (val_s < val_l) {
832 idx_s++;
833 if (idx_s == size_s) break;
834 val_s = small[idx_s];
835 } else {
836 pos++;
837 idx_s++;
838 if (idx_s == size_s) break;
839 val_s = small[idx_s];
840 idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
841 if (idx_l == size_l) break;
842 val_l = large[idx_l];
843 }
844 }
845
846 return (int32_t)pos;
847}
848
849bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s,
850 const uint16_t *large, size_t size_l) {
851 size_t idx_l = 0, idx_s = 0;
852
853 if (0 == size_s) {
854 return false;
855 }
856
857 uint16_t val_l = large[idx_l], val_s = small[idx_s];
858
859 while (true) {
860 if (val_l < val_s) {
861 idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
862 if (idx_l == size_l) break;
863 val_l = large[idx_l];
864 } else if (val_s < val_l) {
865 idx_s++;
866 if (idx_s == size_s) break;
867 val_s = small[idx_s];
868 } else {
869 return true;
870 }
871 }
872
873 return false;
874}
875
876/**
877 * Generic intersection function.
878 */
879int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
880 const uint16_t *B, const size_t lenB, uint16_t *out) {
881 const uint16_t *initout = out;
882 if (lenA == 0 || lenB == 0) return 0;
883 const uint16_t *endA = A + lenA;
884 const uint16_t *endB = B + lenB;
885
886 while (1) {
887 while (*A < *B) {
888 SKIP_FIRST_COMPARE:
889 if (++A == endA) return (int32_t)(out - initout);
890 }
891 while (*A > *B) {
892 if (++B == endB) return (int32_t)(out - initout);
893 }
894 if (*A == *B) {
895 *out++ = *A;
896 if (++A == endA || ++B == endB) return (int32_t)(out - initout);
897 } else {
898 goto SKIP_FIRST_COMPARE;
899 }
900 }
901 return (int32_t)(out - initout); // NOTREACHED
902}
903
904int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
905 const uint16_t *B, const size_t lenB) {
906 int32_t answer = 0;
907 if (lenA == 0 || lenB == 0) return 0;
908 const uint16_t *endA = A + lenA;
909 const uint16_t *endB = B + lenB;
910
911 while (1) {
912 while (*A < *B) {
913 SKIP_FIRST_COMPARE:
914 if (++A == endA) return answer;
915 }
916 while (*A > *B) {
917 if (++B == endB) return answer;
918 }
919 if (*A == *B) {
920 ++answer;
921 if (++A == endA || ++B == endB) return answer;
922 } else {
923 goto SKIP_FIRST_COMPARE;
924 }
925 }
926 return answer; // NOTREACHED
927}
928
929
930bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
931 const uint16_t *B, const size_t lenB) {
932 if (lenA == 0 || lenB == 0) return 0;
933 const uint16_t *endA = A + lenA;
934 const uint16_t *endB = B + lenB;
935
936 while (1) {
937 while (*A < *B) {
938 SKIP_FIRST_COMPARE:
939 if (++A == endA) return false;
940 }
941 while (*A > *B) {
942 if (++B == endB) return false;
943 }
944 if (*A == *B) {
945 return true;
946 } else {
947 goto SKIP_FIRST_COMPARE;
948 }
949 }
950 return false; // NOTREACHED
951}
952
953
954
955/**
956 * Generic intersection function.
957 */
958size_t intersection_uint32(const uint32_t *A, const size_t lenA,
959 const uint32_t *B, const size_t lenB,
960 uint32_t *out) {
961 const uint32_t *initout = out;
962 if (lenA == 0 || lenB == 0) return 0;
963 const uint32_t *endA = A + lenA;
964 const uint32_t *endB = B + lenB;
965
966 while (1) {
967 while (*A < *B) {
968 SKIP_FIRST_COMPARE:
969 if (++A == endA) return (out - initout);
970 }
971 while (*A > *B) {
972 if (++B == endB) return (out - initout);
973 }
974 if (*A == *B) {
975 *out++ = *A;
976 if (++A == endA || ++B == endB) return (out - initout);
977 } else {
978 goto SKIP_FIRST_COMPARE;
979 }
980 }
981 return (out - initout); // NOTREACHED
982}
983
984size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
985 const uint32_t *B, const size_t lenB) {
986 if (lenA == 0 || lenB == 0) return 0;
987 size_t card = 0;
988 const uint32_t *endA = A + lenA;
989 const uint32_t *endB = B + lenB;
990
991 while (1) {
992 while (*A < *B) {
993 SKIP_FIRST_COMPARE:
994 if (++A == endA) return card;
995 }
996 while (*A > *B) {
997 if (++B == endB) return card;
998 }
999 if (*A == *B) {
1000 card++;
1001 if (++A == endA || ++B == endB) return card;
1002 } else {
1003 goto SKIP_FIRST_COMPARE;
1004 }
1005 }
1006 return card; // NOTREACHED
1007}
1008
1009// can one vectorize the computation of the union? (Update: Yes! See
1010// union_vector16).
1011
1012size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
1013 size_t size_2, uint16_t *buffer) {
1014 size_t pos = 0, idx_1 = 0, idx_2 = 0;
1015
1016 if (0 == size_2) {
1017 memmove(buffer, set_1, size_1 * sizeof(uint16_t));
1018 return size_1;
1019 }
1020 if (0 == size_1) {
1021 memmove(buffer, set_2, size_2 * sizeof(uint16_t));
1022 return size_2;
1023 }
1024
1025 uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
1026
1027 while (true) {
1028 if (val_1 < val_2) {
1029 buffer[pos++] = val_1;
1030 ++idx_1;
1031 if (idx_1 >= size_1) break;
1032 val_1 = set_1[idx_1];
1033 } else if (val_2 < val_1) {
1034 buffer[pos++] = val_2;
1035 ++idx_2;
1036 if (idx_2 >= size_2) break;
1037 val_2 = set_2[idx_2];
1038 } else {
1039 buffer[pos++] = val_1;
1040 ++idx_1;
1041 ++idx_2;
1042 if (idx_1 >= size_1 || idx_2 >= size_2) break;
1043 val_1 = set_1[idx_1];
1044 val_2 = set_2[idx_2];
1045 }
1046 }
1047
1048 if (idx_1 < size_1) {
1049 const size_t n_elems = size_1 - idx_1;
1050 memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t));
1051 pos += n_elems;
1052 } else if (idx_2 < size_2) {
1053 const size_t n_elems = size_2 - idx_2;
1054 memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t));
1055 pos += n_elems;
1056 }
1057
1058 return pos;
1059}
1060
1061int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
1062 int length2, uint16_t *a_out) {
1063 int out_card = 0;
1064 int k1 = 0, k2 = 0;
1065 if (length1 == 0) return 0;
1066 if (length2 == 0) {
1067 if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1);
1068 return length1;
1069 }
1070 uint16_t s1 = a1[k1];
1071 uint16_t s2 = a2[k2];
1072 while (true) {
1073 if (s1 < s2) {
1074 a_out[out_card++] = s1;
1075 ++k1;
1076 if (k1 >= length1) {
1077 break;
1078 }
1079 s1 = a1[k1];
1080 } else if (s1 == s2) {
1081 ++k1;
1082 ++k2;
1083 if (k1 >= length1) {
1084 break;
1085 }
1086 if (k2 >= length2) {
1087 memmove(a_out + out_card, a1 + k1,
1088 sizeof(uint16_t) * (length1 - k1));
1089 return out_card + length1 - k1;
1090 }
1091 s1 = a1[k1];
1092 s2 = a2[k2];
1093 } else { // if (val1>val2)
1094 ++k2;
1095 if (k2 >= length2) {
1096 memmove(a_out + out_card, a1 + k1,
1097 sizeof(uint16_t) * (length1 - k1));
1098 return out_card + length1 - k1;
1099 }
1100 s2 = a2[k2];
1101 }
1102 }
1103 return out_card;
1104}
1105
1106int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
1107 const uint16_t *array_2, int32_t card_2, uint16_t *out) {
1108 int32_t pos1 = 0, pos2 = 0, pos_out = 0;
1109 while (pos1 < card_1 && pos2 < card_2) {
1110 const uint16_t v1 = array_1[pos1];
1111 const uint16_t v2 = array_2[pos2];
1112 if (v1 == v2) {
1113 ++pos1;
1114 ++pos2;
1115 continue;
1116 }
1117 if (v1 < v2) {
1118 out[pos_out++] = v1;
1119 ++pos1;
1120 } else {
1121 out[pos_out++] = v2;
1122 ++pos2;
1123 }
1124 }
1125 if (pos1 < card_1) {
1126 const size_t n_elems = card_1 - pos1;
1127 memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t));
1128 pos_out += (int32_t)n_elems;
1129 } else if (pos2 < card_2) {
1130 const size_t n_elems = card_2 - pos2;
1131 memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t));
1132 pos_out += (int32_t)n_elems;
1133 }
1134 return pos_out;
1135}
1136
1137#ifdef USESSE4
1138
1139/***
1140 * start of the SIMD 16-bit union code
1141 *
1142 */
1143
1144// Assuming that vInput1 and vInput2 are sorted, produces a sorted output going
1145// from vecMin all the way to vecMax
1146// developed originally for merge sort using SIMD instructions.
1147// Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly
1148// Algorithm for Sorting an Array of Structures
1149static inline void sse_merge(const __m128i *vInput1,
1150 const __m128i *vInput2, // input 1 & 2
1151 __m128i *vecMin, __m128i *vecMax) { // output
1152 __m128i vecTmp;
1153 vecTmp = _mm_min_epu16(*vInput1, *vInput2);
1154 *vecMax = _mm_max_epu16(*vInput1, *vInput2);
1155 vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2);
1156 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1157 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1158 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1159 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1160 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1161 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1162 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1163 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1164 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1165 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1166 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1167 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1168 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1169 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1170 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1171 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1172 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1173 vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1174 *vecMin = _mm_min_epu16(vecTmp, *vecMax);
1175 *vecMax = _mm_max_epu16(vecTmp, *vecMax);
1176 *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2);
1177}
1178
1179// used by store_unique, generated by simdunion.py
1180static uint8_t uniqshuf[] = {
1181 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1182 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1183 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1184 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1185 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1186 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
1187 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1188 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1189 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1190 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1191 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1192 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1193 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1194 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
1195 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
1196 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1197 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1198 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1199 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
1200 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1201 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1202 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1203 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1204 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1205 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1206 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
1207 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1208 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1209 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1210 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
1211 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb,
1212 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1213 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
1214 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd,
1215 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1216 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1217 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1218 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd,
1219 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb,
1220 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1221 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1222 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1223 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1224 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1225 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
1226 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1227 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1228 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1229 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
1230 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
1231 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1232 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1233 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1234 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
1235 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1236 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1237 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1238 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
1239 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1240 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1241 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1242 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
1243 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd,
1244 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1245 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf,
1246 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd,
1247 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1248 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1249 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1250 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd,
1251 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1252 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1253 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1254 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1255 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1256 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1257 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1258 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf,
1259 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd,
1260 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1261 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1262 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
1263 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd,
1264 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1265 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1266 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1267 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1268 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1269 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf,
1270 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1271 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1272 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1273 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1274 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1275 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9,
1276 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1277 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf,
1278 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
1279 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1280 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1281 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1282 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb,
1283 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9,
1284 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1285 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1286 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1287 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1288 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1289 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1290 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb,
1291 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1292 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1293 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1294 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf,
1295 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1296 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1297 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1298 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb,
1299 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1300 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1301 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1302 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1303 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1304 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1305 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1306 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
1307 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf,
1308 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1309 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf,
1310 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1311 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1312 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1313 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1314 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
1315 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1316 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1317 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1318 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF,
1319 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1320 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1321 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1322 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf,
1323 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
1324 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1325 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1326 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF,
1327 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
1328 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1329 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1330 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1331 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1332 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1333 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1334 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF,
1335 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1336 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1337 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1338 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF,
1339 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf,
1340 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1341 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
1342 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF,
1343 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1344 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1345 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1346 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF,
1347 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf,
1348 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1349 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1350 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1351 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1352 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
1353 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1354 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1355 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1356 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1357 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1358 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1359 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1360 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1361 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1362 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
1363 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1364 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1365 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
1366 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1367 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1368 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1369 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1370 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
1371 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb,
1372 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1373 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
1374 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb,
1375 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1376 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1377 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1378 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb,
1379 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1380 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1381 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1382 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
1383 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1384 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1385 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1386 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd,
1387 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb,
1388 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1389 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1390 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
1391 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb,
1392 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1393 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1394 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1395 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1396 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1397 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF,
1398 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
1399 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1400 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1401 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1402 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
1403 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9,
1404 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1405 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF,
1406 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd,
1407 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1408 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1409 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1410 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd,
1411 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9,
1412 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1413 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1414 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1415 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1416 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1417 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1418 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd,
1419 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1420 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1421 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1422 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF,
1423 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1424 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1425 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1426 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd,
1427 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1428 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1429 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1430 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1431 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1432 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1433 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1434 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
1435 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF,
1436 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1437 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
1438 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1439 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1440 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1441 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1442 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
1443 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1444 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1445 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1446 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF,
1447 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1448 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1449 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1450 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
1451 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
1452 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1453 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1454 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF,
1455 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
1456 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1457 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1458 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
1459 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1460 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1461 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1462 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF,
1463 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1464 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1465 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1466 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF,
1467 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb,
1468 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1469 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
1470 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF,
1471 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1472 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1473 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1474 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF,
1475 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb,
1476 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1477 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1478 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1479 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1480 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1481 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1482 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
1483 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
1484 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1485 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1486 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF,
1487 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
1488 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1489 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1490 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
1491 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
1492 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1493 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1494 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1495 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1496 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1497 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1498 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
1499 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF,
1500 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1501 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF,
1502 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF,
1503 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
1504 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1505 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1506 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF,
1507 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
1508 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1509 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1510 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1511 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
1512 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1513 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1514 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF,
1515 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF,
1516 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1517 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1518 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1519 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF,
1520 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1521 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
1522 0xFF, 0xFF, 0xFF, 0xFF};
1523
1524// write vector new, while omitting repeated values assuming that previously
1525// written vector was "old"
1526static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) {
1527 __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2);
1528 // lots of high latency instructions follow (optimize?)
1529 int M = _mm_movemask_epi8(
1530 _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128()));
1531 int numberofnewvalues = 8 - _mm_popcnt_u32(M);
1532 __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
1533 __m128i val = _mm_shuffle_epi8(newval, key);
1534 _mm_storeu_si128((__m128i *)output, val);
1535 return numberofnewvalues;
1536}
1537
1538// working in-place, this function overwrites the repeated values
1539// could be avoided?
1540static inline uint32_t unique(uint16_t *out, uint32_t len) {
1541 uint32_t pos = 1;
1542 for (uint32_t i = 1; i < len; ++i) {
1543 if (out[i] != out[i - 1]) {
1544 out[pos++] = out[i];
1545 }
1546 }
1547 return pos;
1548}
1549
1550// use with qsort, could be avoided
1551static int uint16_compare(const void *a, const void *b) {
1552 return (*(uint16_t *)a - *(uint16_t *)b);
1553}
1554
1555// a one-pass SSE union algorithm
1556uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
1557 const uint16_t *__restrict__ array2, uint32_t length2,
1558 uint16_t *__restrict__ output) {
1559 if ((length1 < 8) || (length2 < 8)) {
1560 return (uint32_t)union_uint16(array1, length1, array2, length2, output);
1561 }
1562 __m128i vA, vB, V, vecMin, vecMax;
1563 __m128i laststore;
1564 uint16_t *initoutput = output;
1565 uint32_t len1 = length1 / 8;
1566 uint32_t len2 = length2 / 8;
1567 uint32_t pos1 = 0;
1568 uint32_t pos2 = 0;
1569 // we start the machine
1570 vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
1571 pos1++;
1572 vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
1573 pos2++;
1574 sse_merge(&vA, &vB, &vecMin, &vecMax);
1575 laststore = _mm_set1_epi16(-1);
1576 output += store_unique(laststore, vecMin, output);
1577 laststore = vecMin;
1578 if ((pos1 < len1) && (pos2 < len2)) {
1579 uint16_t curA, curB;
1580 curA = array1[8 * pos1];
1581 curB = array2[8 * pos2];
1582 while (true) {
1583 if (curA <= curB) {
1584 V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
1585 pos1++;
1586 if (pos1 < len1) {
1587 curA = array1[8 * pos1];
1588 } else {
1589 break;
1590 }
1591 } else {
1592 V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
1593 pos2++;
1594 if (pos2 < len2) {
1595 curB = array2[8 * pos2];
1596 } else {
1597 break;
1598 }
1599 }
1600 sse_merge(&V, &vecMax, &vecMin, &vecMax);
1601 output += store_unique(laststore, vecMin, output);
1602 laststore = vecMin;
1603 }
1604 sse_merge(&V, &vecMax, &vecMin, &vecMax);
1605 output += store_unique(laststore, vecMin, output);
1606 laststore = vecMin;
1607 }
1608 // we finish the rest off using a scalar algorithm
1609 // could be improved?
1610 //
1611 // copy the small end on a tmp buffer
1612 uint32_t len = (uint32_t)(output - initoutput);
1613 uint16_t buffer[16];
1614 uint32_t leftoversize = store_unique(laststore, vecMax, buffer);
1615 if (pos1 == len1) {
1616 memcpy(buffer + leftoversize, array1 + 8 * pos1,
1617 (length1 - 8 * len1) * sizeof(uint16_t));
1618 leftoversize += length1 - 8 * len1;
1619 qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
1620
1621 leftoversize = unique(buffer, leftoversize);
1622 len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2,
1623 length2 - 8 * pos2, output);
1624 } else {
1625 memcpy(buffer + leftoversize, array2 + 8 * pos2,
1626 (length2 - 8 * len2) * sizeof(uint16_t));
1627 leftoversize += length2 - 8 * len2;
1628 qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
1629 leftoversize = unique(buffer, leftoversize);
1630 len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1,
1631 length1 - 8 * pos1, output);
1632 }
1633 return len;
1634}
1635
1636/**
1637 * End of the SIMD 16-bit union code
1638 *
1639 */
1640
1641/**
1642 * Start of SIMD 16-bit XOR code
1643 */
1644
1645// write vector new, while omitting repeated values assuming that previously
1646// written vector was "old"
1647static inline int store_unique_xor(__m128i old, __m128i newval,
1648 uint16_t *output) {
1649 __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4);
1650 __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2);
1651 __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1);
1652 __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval);
1653 __m128i equalleftoright = _mm_or_si128(equalleft, equalright);
1654 int M = _mm_movemask_epi8(
1655 _mm_packs_epi16(equalleftoright, _mm_setzero_si128()));
1656 int numberofnewvalues = 8 - _mm_popcnt_u32(M);
1657 __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
1658 __m128i val = _mm_shuffle_epi8(vecTmp2, key);
1659 _mm_storeu_si128((__m128i *)output, val);
1660 return numberofnewvalues;
1661}
1662
1663// working in-place, this function overwrites the repeated values
1664// could be avoided? Warning: assumes len > 0
1665static inline uint32_t unique_xor(uint16_t *out, uint32_t len) {
1666 uint32_t pos = 1;
1667 for (uint32_t i = 1; i < len; ++i) {
1668 if (out[i] != out[i - 1]) {
1669 out[pos++] = out[i];
1670 } else
1671 pos--; // if it is identical to previous, delete it
1672 }
1673 return pos;
1674}
1675
1676// a one-pass SSE xor algorithm
1677uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
1678 const uint16_t *__restrict__ array2, uint32_t length2,
1679 uint16_t *__restrict__ output) {
1680 if ((length1 < 8) || (length2 < 8)) {
1681 return xor_uint16(array1, length1, array2, length2, output);
1682 }
1683 __m128i vA, vB, V, vecMin, vecMax;
1684 __m128i laststore;
1685 uint16_t *initoutput = output;
1686 uint32_t len1 = length1 / 8;
1687 uint32_t len2 = length2 / 8;
1688 uint32_t pos1 = 0;
1689 uint32_t pos2 = 0;
1690 // we start the machine
1691 vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
1692 pos1++;
1693 vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
1694 pos2++;
1695 sse_merge(&vA, &vB, &vecMin, &vecMax);
1696 laststore = _mm_set1_epi16(-1);
1697 uint16_t buffer[17];
1698 output += store_unique_xor(laststore, vecMin, output);
1699
1700 laststore = vecMin;
1701 if ((pos1 < len1) && (pos2 < len2)) {
1702 uint16_t curA, curB;
1703 curA = array1[8 * pos1];
1704 curB = array2[8 * pos2];
1705 while (true) {
1706 if (curA <= curB) {
1707 V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
1708 pos1++;
1709 if (pos1 < len1) {
1710 curA = array1[8 * pos1];
1711 } else {
1712 break;
1713 }
1714 } else {
1715 V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
1716 pos2++;
1717 if (pos2 < len2) {
1718 curB = array2[8 * pos2];
1719 } else {
1720 break;
1721 }
1722 }
1723 sse_merge(&V, &vecMax, &vecMin, &vecMax);
1724 // conditionally stores the last value of laststore as well as all
1725 // but the
1726 // last value of vecMin
1727 output += store_unique_xor(laststore, vecMin, output);
1728 laststore = vecMin;
1729 }
1730 sse_merge(&V, &vecMax, &vecMin, &vecMax);
1731 // conditionally stores the last value of laststore as well as all but
1732 // the
1733 // last value of vecMin
1734 output += store_unique_xor(laststore, vecMin, output);
1735 laststore = vecMin;
1736 }
1737 uint32_t len = (uint32_t)(output - initoutput);
1738
1739 // we finish the rest off using a scalar algorithm
1740 // could be improved?
1741 // conditionally stores the last value of laststore as well as all but the
1742 // last value of vecMax,
1743 // we store to "buffer"
1744 int leftoversize = store_unique_xor(laststore, vecMax, buffer);
1745 uint16_t vec7 = _mm_extract_epi16(vecMax, 7);
1746 uint16_t vec6 = _mm_extract_epi16(vecMax, 6);
1747 if (vec7 != vec6) buffer[leftoversize++] = vec7;
1748 if (pos1 == len1) {
1749 memcpy(buffer + leftoversize, array1 + 8 * pos1,
1750 (length1 - 8 * len1) * sizeof(uint16_t));
1751 leftoversize += length1 - 8 * len1;
1752 if (leftoversize == 0) { // trivial case
1753 memcpy(output, array2 + 8 * pos2,
1754 (length2 - 8 * pos2) * sizeof(uint16_t));
1755 len += (length2 - 8 * pos2);
1756 } else {
1757 qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
1758 leftoversize = unique_xor(buffer, leftoversize);
1759 len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2,
1760 length2 - 8 * pos2, output);
1761 }
1762 } else {
1763 memcpy(buffer + leftoversize, array2 + 8 * pos2,
1764 (length2 - 8 * len2) * sizeof(uint16_t));
1765 leftoversize += length2 - 8 * len2;
1766 if (leftoversize == 0) { // trivial case
1767 memcpy(output, array1 + 8 * pos1,
1768 (length1 - 8 * pos1) * sizeof(uint16_t));
1769 len += (length1 - 8 * pos1);
1770 } else {
1771 qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
1772 leftoversize = unique_xor(buffer, leftoversize);
1773 len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1,
1774 length1 - 8 * pos1, output);
1775 }
1776 }
1777 return len;
1778}
1779
1780/**
1781 * End of SIMD 16-bit XOR code
1782 */
1783
1784#endif // USESSE4
1785
1786size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
1787 size_t size_2, uint32_t *buffer) {
1788 size_t pos = 0, idx_1 = 0, idx_2 = 0;
1789
1790 if (0 == size_2) {
1791 memmove(buffer, set_1, size_1 * sizeof(uint32_t));
1792 return size_1;
1793 }
1794 if (0 == size_1) {
1795 memmove(buffer, set_2, size_2 * sizeof(uint32_t));
1796 return size_2;
1797 }
1798
1799 uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
1800
1801 while (true) {
1802 if (val_1 < val_2) {
1803 buffer[pos++] = val_1;
1804 ++idx_1;
1805 if (idx_1 >= size_1) break;
1806 val_1 = set_1[idx_1];
1807 } else if (val_2 < val_1) {
1808 buffer[pos++] = val_2;
1809 ++idx_2;
1810 if (idx_2 >= size_2) break;
1811 val_2 = set_2[idx_2];
1812 } else {
1813 buffer[pos++] = val_1;
1814 ++idx_1;
1815 ++idx_2;
1816 if (idx_1 >= size_1 || idx_2 >= size_2) break;
1817 val_1 = set_1[idx_1];
1818 val_2 = set_2[idx_2];
1819 }
1820 }
1821
1822 if (idx_1 < size_1) {
1823 const size_t n_elems = size_1 - idx_1;
1824 memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint32_t));
1825 pos += n_elems;
1826 } else if (idx_2 < size_2) {
1827 const size_t n_elems = size_2 - idx_2;
1828 memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint32_t));
1829 pos += n_elems;
1830 }
1831
1832 return pos;
1833}
1834
1835size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
1836 const uint32_t *set_2, size_t size_2) {
1837 size_t pos = 0, idx_1 = 0, idx_2 = 0;
1838
1839 if (0 == size_2) {
1840 return size_1;
1841 }
1842 if (0 == size_1) {
1843 return size_2;
1844 }
1845
1846 uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
1847
1848 while (true) {
1849 if (val_1 < val_2) {
1850 ++idx_1;
1851 ++pos;
1852 if (idx_1 >= size_1) break;
1853 val_1 = set_1[idx_1];
1854 } else if (val_2 < val_1) {
1855 ++idx_2;
1856 ++pos;
1857 if (idx_2 >= size_2) break;
1858 val_2 = set_2[idx_2];
1859 } else {
1860 ++idx_1;
1861 ++idx_2;
1862 ++pos;
1863 if (idx_1 >= size_1 || idx_2 >= size_2) break;
1864 val_1 = set_1[idx_1];
1865 val_2 = set_2[idx_2];
1866 }
1867 }
1868
1869 if (idx_1 < size_1) {
1870 const size_t n_elems = size_1 - idx_1;
1871 pos += n_elems;
1872 } else if (idx_2 < size_2) {
1873 const size_t n_elems = size_2 - idx_2;
1874 pos += n_elems;
1875 }
1876 return pos;
1877}
1878
1879
1880
1881size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
1882 size_t size_2, uint16_t *buffer) {
1883#ifdef ROARING_VECTOR_OPERATIONS_ENABLED
1884 // compute union with smallest array first
1885 if (size_1 < size_2) {
1886 return union_vector16(set_1, (uint32_t)size_1,
1887 set_2, (uint32_t)size_2, buffer);
1888 } else {
1889 return union_vector16(set_2, (uint32_t)size_2,
1890 set_1, (uint32_t)size_1, buffer);
1891 }
1892#else
1893 // compute union with smallest array first
1894 if (size_1 < size_2) {
1895 return union_uint16(
1896 set_1, size_1, set_2, size_2, buffer);
1897 } else {
1898 return union_uint16(
1899 set_2, size_2, set_1, size_1, buffer);
1900 }
1901#endif
1902}
1903/* end file /opt/bitmap/CRoaring-0.2.57/src/array_util.c */
1904/* begin file /opt/bitmap/CRoaring-0.2.57/src/bitset_util.c */
1905#include <assert.h>
1906#include <stdint.h>
1907#include <stdio.h>
1908#include <stdlib.h>
1909#include <string.h>
1910
1911
1912#ifdef IS_X64
1913static uint8_t lengthTable[256] = {
1914 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4,
1915 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1916 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4,
1917 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1918 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
1919 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1920 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5,
1921 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1922 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6,
1923 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1924 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
1925#endif
1926
1927#ifdef USEAVX
1928ALIGNED(32)
1929static uint32_t vecDecodeTable[256][8] = {
1930 {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
1931 {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
1932 {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
1933 {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
1934 {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
1935 {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
1936 {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
1937 {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
1938 {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
1939 {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
1940 {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
1941 {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
1942 {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
1943 {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
1944 {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
1945 {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
1946 {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
1947 {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
1948 {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
1949 {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
1950 {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
1951 {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
1952 {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
1953 {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
1954 {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
1955 {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
1956 {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
1957 {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
1958 {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
1959 {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
1960 {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
1961 {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
1962 {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
1963 {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
1964 {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
1965 {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
1966 {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
1967 {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
1968 {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
1969 {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
1970 {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
1971 {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
1972 {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
1973 {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
1974 {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
1975 {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
1976 {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
1977 {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
1978 {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
1979 {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
1980 {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
1981 {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
1982 {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
1983 {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
1984 {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
1985 {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
1986 {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
1987 {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
1988 {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
1989 {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
1990 {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
1991 {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
1992 {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
1993 {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
1994 {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
1995 {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
1996 {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
1997 {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
1998 {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
1999 {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
2000 {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
2001 {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
2002 {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
2003 {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
2004 {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
2005 {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
2006 {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
2007 {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
2008 {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
2009 {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
2010 {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
2011 {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
2012 {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
2013 {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
2014 {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
2015 {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
2016 {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
2017 {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
2018 {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
2019 {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
2020 {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
2021 {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
2022 {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
2023 {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
2024 {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
2025 {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
2026 {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
2027 {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
2028 {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
2029 {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
2030 {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
2031 {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
2032 {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
2033 {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
2034 {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
2035 {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
2036 {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
2037 {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
2038 {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
2039 {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
2040 {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
2041 {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
2042 {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
2043 {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
2044 {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
2045 {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
2046 {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
2047 {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
2048 {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
2049 {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
2050 {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
2051 {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
2052 {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
2053 {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
2054 {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
2055 {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
2056 {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
2057 {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
2058 {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
2059 {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
2060 {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
2061 {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
2062 {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
2063 {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
2064 {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
2065 {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
2066 {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
2067 {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
2068 {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
2069 {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
2070 {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
2071 {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
2072 {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
2073 {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
2074 {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
2075 {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
2076 {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
2077 {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
2078 {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
2079 {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
2080 {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
2081 {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
2082 {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
2083 {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
2084 {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
2085 {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
2086 {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
2087 {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
2088 {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
2089 {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
2090 {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
2091 {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
2092 {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
2093 {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
2094 {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
2095 {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
2096 {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
2097 {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
2098 {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
2099 {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
2100 {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
2101 {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
2102 {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
2103 {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
2104 {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
2105 {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
2106 {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
2107 {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
2108 {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
2109 {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
2110 {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
2111 {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
2112 {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
2113 {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
2114 {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
2115 {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
2116 {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
2117 {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
2118 {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
2119 {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
2120 {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
2121 {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
2122 {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
2123 {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
2124 {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
2125 {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
2126 {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
2127 {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
2128 {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
2129 {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
2130 {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
2131 {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
2132 {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
2133 {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
2134 {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
2135 {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
2136 {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
2137 {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
2138 {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
2139 {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
2140 {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
2141 {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
2142 {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
2143 {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
2144 {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
2145 {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
2146 {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
2147 {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
2148 {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
2149 {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
2150 {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
2151 {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
2152 {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
2153 {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
2154 {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
2155 {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
2156 {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
2157 {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
2158 {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
2159 {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
2160 {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
2161 {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
2162 {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
2163 {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
2164 {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
2165 {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
2166 {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
2167 {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
2168 {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
2169 {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
2170 {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
2171 {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
2172 {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
2173 {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
2174 {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
2175 {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
2176 {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
2177 {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
2178 {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
2179 {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
2180 {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
2181 {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
2182 {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
2183 {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
2184 {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
2185 {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
2186};
2187
2188#endif // #ifdef USEAVX
2189
2190#ifdef IS_X64
2191// same as vecDecodeTable but in 16 bits
2192ALIGNED(32)
2193static uint16_t vecDecodeTable_uint16[256][8] = {
2194 {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */
2195 {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */
2196 {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */
2197 {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */
2198 {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */
2199 {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */
2200 {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */
2201 {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */
2202 {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */
2203 {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */
2204 {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */
2205 {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */
2206 {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */
2207 {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */
2208 {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */
2209 {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */
2210 {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */
2211 {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */
2212 {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */
2213 {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */
2214 {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */
2215 {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */
2216 {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */
2217 {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */
2218 {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */
2219 {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */
2220 {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */
2221 {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */
2222 {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */
2223 {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */
2224 {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */
2225 {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */
2226 {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */
2227 {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */
2228 {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */
2229 {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */
2230 {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */
2231 {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */
2232 {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */
2233 {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */
2234 {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */
2235 {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */
2236 {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */
2237 {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */
2238 {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */
2239 {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */
2240 {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */
2241 {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */
2242 {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */
2243 {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */
2244 {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */
2245 {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */
2246 {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */
2247 {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */
2248 {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */
2249 {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */
2250 {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */
2251 {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */
2252 {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */
2253 {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */
2254 {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */
2255 {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */
2256 {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */
2257 {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */
2258 {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */
2259 {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */
2260 {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */
2261 {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */
2262 {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */
2263 {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */
2264 {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */
2265 {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */
2266 {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */
2267 {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */
2268 {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */
2269 {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */
2270 {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */
2271 {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */
2272 {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */
2273 {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */
2274 {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */
2275 {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */
2276 {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */
2277 {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */
2278 {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */
2279 {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */
2280 {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */
2281 {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */
2282 {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */
2283 {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */
2284 {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */
2285 {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */
2286 {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */
2287 {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */
2288 {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */
2289 {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */
2290 {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */
2291 {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */
2292 {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */
2293 {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */
2294 {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */
2295 {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */
2296 {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */
2297 {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */
2298 {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */
2299 {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */
2300 {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */
2301 {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */
2302 {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */
2303 {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */
2304 {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */
2305 {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */
2306 {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */
2307 {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */
2308 {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */
2309 {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */
2310 {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */
2311 {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */
2312 {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */
2313 {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */
2314 {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */
2315 {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */
2316 {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */
2317 {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */
2318 {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */
2319 {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */
2320 {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */
2321 {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */
2322 {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */
2323 {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */
2324 {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */
2325 {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */
2326 {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */
2327 {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */
2328 {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */
2329 {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */
2330 {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */
2331 {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */
2332 {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */
2333 {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */
2334 {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */
2335 {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */
2336 {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */
2337 {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */
2338 {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */
2339 {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */
2340 {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */
2341 {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */
2342 {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */
2343 {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */
2344 {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */
2345 {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */
2346 {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */
2347 {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */
2348 {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */
2349 {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */
2350 {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */
2351 {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */
2352 {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */
2353 {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */
2354 {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */
2355 {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */
2356 {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */
2357 {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */
2358 {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */
2359 {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */
2360 {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */
2361 {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */
2362 {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */
2363 {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */
2364 {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */
2365 {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */
2366 {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */
2367 {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */
2368 {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */
2369 {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */
2370 {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */
2371 {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */
2372 {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */
2373 {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */
2374 {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */
2375 {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */
2376 {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */
2377 {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */
2378 {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */
2379 {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */
2380 {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */
2381 {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */
2382 {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */
2383 {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */
2384 {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */
2385 {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */
2386 {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */
2387 {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */
2388 {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */
2389 {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */
2390 {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */
2391 {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */
2392 {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */
2393 {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */
2394 {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */
2395 {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */
2396 {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */
2397 {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */
2398 {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */
2399 {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */
2400 {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */
2401 {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */
2402 {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */
2403 {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */
2404 {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */
2405 {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */
2406 {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */
2407 {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */
2408 {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */
2409 {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */
2410 {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */
2411 {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */
2412 {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */
2413 {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */
2414 {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */
2415 {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */
2416 {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */
2417 {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */
2418 {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */
2419 {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */
2420 {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */
2421 {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */
2422 {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */
2423 {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */
2424 {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */
2425 {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */
2426 {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */
2427 {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */
2428 {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */
2429 {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */
2430 {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */
2431 {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */
2432 {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */
2433 {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */
2434 {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */
2435 {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */
2436 {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */
2437 {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */
2438 {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */
2439 {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */
2440 {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */
2441 {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */
2442 {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */
2443 {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */
2444 {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */
2445 {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */
2446 {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */
2447 {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */
2448 {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */
2449 {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */
2450};
2451
2452#endif
2453
2454#ifdef USEAVX
2455
2456size_t bitset_extract_setbits_avx2(uint64_t *array, size_t length, void *vout,
2457 size_t outcapacity, uint32_t base) {
2458 uint32_t *out = (uint32_t *)vout;
2459 uint32_t *initout = out;
2460 __m256i baseVec = _mm256_set1_epi32(base - 1);
2461 __m256i incVec = _mm256_set1_epi32(64);
2462 __m256i add8 = _mm256_set1_epi32(8);
2463 uint32_t *safeout = out + outcapacity;
2464 size_t i = 0;
2465 for (; (i < length) && (out + 64 <= safeout); ++i) {
2466 uint64_t w = array[i];
2467 if (w == 0) {
2468 baseVec = _mm256_add_epi32(baseVec, incVec);
2469 } else {
2470 for (int k = 0; k < 4; ++k) {
2471 uint8_t byteA = (uint8_t)w;
2472 uint8_t byteB = (uint8_t)(w >> 8);
2473 w >>= 16;
2474 __m256i vecA =
2475 _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]);
2476 __m256i vecB =
2477 _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]);
2478 uint8_t advanceA = lengthTable[byteA];
2479 uint8_t advanceB = lengthTable[byteB];
2480 vecA = _mm256_add_epi32(baseVec, vecA);
2481 baseVec = _mm256_add_epi32(baseVec, add8);
2482 vecB = _mm256_add_epi32(baseVec, vecB);
2483 baseVec = _mm256_add_epi32(baseVec, add8);
2484 _mm256_storeu_si256((__m256i *)out, vecA);
2485 out += advanceA;
2486 _mm256_storeu_si256((__m256i *)out, vecB);
2487 out += advanceB;
2488 }
2489 }
2490 }
2491 base += i * 64;
2492 for (; (i < length) && (out < safeout); ++i) {
2493 uint64_t w = array[i];
2494 while ((w != 0) && (out < safeout)) {
2495 uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
2496 int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
2497 uint32_t val = r + base;
2498 memcpy(out, &val,
2499 sizeof(uint32_t)); // should be compiled as a MOV on x64
2500 out++;
2501 w ^= t;
2502 }
2503 base += 64;
2504 }
2505 return out - initout;
2506}
2507#endif // USEAVX
2508
2509size_t bitset_extract_setbits(uint64_t *bitset, size_t length, void *vout,
2510 uint32_t base) {
2511 int outpos = 0;
2512 uint32_t *out = (uint32_t *)vout;
2513 for (size_t i = 0; i < length; ++i) {
2514 uint64_t w = bitset[i];
2515 while (w != 0) {
2516 uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail)
2517 int r = __builtin_ctzll(w); // on x64, should compile to TZCNT
2518 uint32_t val = r + base;
2519 memcpy(out + outpos, &val,
2520 sizeof(uint32_t)); // should be compiled as a MOV on x64
2521 outpos++;
2522 w ^= t;
2523 }
2524 base += 64;
2525 }
2526 return outpos;
2527}
2528
2529size_t bitset_extract_intersection_setbits_uint16(const uint64_t * __restrict__ bitset1,
2530 const uint64_t * __restrict__ bitset2,
2531 size_t length, uint16_t *out,
2532 uint16_t base) {
2533 int outpos = 0;
2534 for (size_t i = 0; i < length; ++i) {
2535 uint64_t w = bitset1[i] & bitset2[i];
2536 while (w != 0) {
2537 uint64_t t = w & (~w + 1);
2538 int r = __builtin_ctzll(w);
2539 out[outpos++] = r + base;
2540 w ^= t;
2541 }
2542 base += 64;
2543 }
2544 return outpos;
2545}
2546
2547#ifdef IS_X64
2548/*
2549 * Given a bitset containing "length" 64-bit words, write out the position
2550 * of all the set bits to "out" as 16-bit integers, values start at "base" (can
2551 *be set to zero).
2552 *
2553 * The "out" pointer should be sufficient to store the actual number of bits
2554 *set.
2555 *
2556 * Returns how many values were actually decoded.
2557 *
2558 * This function uses SSE decoding.
2559 */
2560size_t bitset_extract_setbits_sse_uint16(const uint64_t *bitset, size_t length,
2561 uint16_t *out, size_t outcapacity,
2562 uint16_t base) {
2563 uint16_t *initout = out;
2564 __m128i baseVec = _mm_set1_epi16(base - 1);
2565 __m128i incVec = _mm_set1_epi16(64);
2566 __m128i add8 = _mm_set1_epi16(8);
2567 uint16_t *safeout = out + outcapacity;
2568 const int numberofbytes = 2; // process two bytes at a time
2569 size_t i = 0;
2570 for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) {
2571 uint64_t w = bitset[i];
2572 if (w == 0) {
2573 baseVec = _mm_add_epi16(baseVec, incVec);
2574 } else {
2575 for (int k = 0; k < 4; ++k) {
2576 uint8_t byteA = (uint8_t)w;
2577 uint8_t byteB = (uint8_t)(w >> 8);
2578 w >>= 16;
2579 __m128i vecA = _mm_load_si128(
2580 (const __m128i *)vecDecodeTable_uint16[byteA]);
2581 __m128i vecB = _mm_load_si128(
2582 (const __m128i *)vecDecodeTable_uint16[byteB]);
2583 uint8_t advanceA = lengthTable[byteA];
2584 uint8_t advanceB = lengthTable[byteB];
2585 vecA = _mm_add_epi16(baseVec, vecA);
2586 baseVec = _mm_add_epi16(baseVec, add8);
2587 vecB = _mm_add_epi16(baseVec, vecB);
2588 baseVec = _mm_add_epi16(baseVec, add8);
2589 _mm_storeu_si128((__m128i *)out, vecA);
2590 out += advanceA;
2591 _mm_storeu_si128((__m128i *)out, vecB);
2592 out += advanceB;
2593 }
2594 }
2595 }
2596 base += (uint16_t)(i * 64);
2597 for (; (i < length) && (out < safeout); ++i) {
2598 uint64_t w = bitset[i];
2599 while ((w != 0) && (out < safeout)) {
2600 uint64_t t = w & (~w + 1);
2601 int r = __builtin_ctzll(w);
2602 *out = r + base;
2603 out++;
2604 w ^= t;
2605 }
2606 base += 64;
2607 }
2608 return out - initout;
2609}
2610#endif
2611
2612/*
2613 * Given a bitset containing "length" 64-bit words, write out the position
2614 * of all the set bits to "out", values start at "base" (can be set to zero).
2615 *
2616 * The "out" pointer should be sufficient to store the actual number of bits
2617 *set.
2618 *
2619 * Returns how many values were actually decoded.
2620 */
2621size_t bitset_extract_setbits_uint16(const uint64_t *bitset, size_t length,
2622 uint16_t *out, uint16_t base) {
2623 int outpos = 0;
2624 for (size_t i = 0; i < length; ++i) {
2625 uint64_t w = bitset[i];
2626 while (w != 0) {
2627 uint64_t t = w & (~w + 1);
2628 int r = __builtin_ctzll(w);
2629 out[outpos++] = r + base;
2630 w ^= t;
2631 }
2632 base += 64;
2633 }
2634 return outpos;
2635}
2636
2637#if defined(ASMBITMANIPOPTIMIZATION)
2638
2639uint64_t bitset_set_list_withcard(void *bitset, uint64_t card,
2640 const uint16_t *list, uint64_t length) {
2641 uint64_t offset, load, pos;
2642 uint64_t shift = 6;
2643 const uint16_t *end = list + length;
2644 if (!length) return card;
2645 // TODO: could unroll for performance, see bitset_set_list
2646 // bts is not available as an intrinsic in GCC
2647 __asm volatile(
2648 "1:\n"
2649 "movzwq (%[list]), %[pos]\n"
2650 "shrx %[shift], %[pos], %[offset]\n"
2651 "mov (%[bitset],%[offset],8), %[load]\n"
2652 "bts %[pos], %[load]\n"
2653 "mov %[load], (%[bitset],%[offset],8)\n"
2654 "sbb $-1, %[card]\n"
2655 "add $2, %[list]\n"
2656 "cmp %[list], %[end]\n"
2657 "jnz 1b"
2658 : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
2659 [pos] "=&r"(pos), [offset] "=&r"(offset)
2660 : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift));
2661 return card;
2662}
2663
2664void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) {
2665 uint64_t pos;
2666 const uint16_t *end = list + length;
2667
2668 uint64_t shift = 6;
2669 uint64_t offset;
2670 uint64_t load;
2671 for (; list + 3 < end; list += 4) {
2672 pos = list[0];
2673 __asm volatile(
2674 "shrx %[shift], %[pos], %[offset]\n"
2675 "mov (%[bitset],%[offset],8), %[load]\n"
2676 "bts %[pos], %[load]\n"
2677 "mov %[load], (%[bitset],%[offset],8)"
2678 : [load] "=&r"(load), [offset] "=&r"(offset)
2679 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2680 pos = list[1];
2681 __asm volatile(
2682 "shrx %[shift], %[pos], %[offset]\n"
2683 "mov (%[bitset],%[offset],8), %[load]\n"
2684 "bts %[pos], %[load]\n"
2685 "mov %[load], (%[bitset],%[offset],8)"
2686 : [load] "=&r"(load), [offset] "=&r"(offset)
2687 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2688 pos = list[2];
2689 __asm volatile(
2690 "shrx %[shift], %[pos], %[offset]\n"
2691 "mov (%[bitset],%[offset],8), %[load]\n"
2692 "bts %[pos], %[load]\n"
2693 "mov %[load], (%[bitset],%[offset],8)"
2694 : [load] "=&r"(load), [offset] "=&r"(offset)
2695 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2696 pos = list[3];
2697 __asm volatile(
2698 "shrx %[shift], %[pos], %[offset]\n"
2699 "mov (%[bitset],%[offset],8), %[load]\n"
2700 "bts %[pos], %[load]\n"
2701 "mov %[load], (%[bitset],%[offset],8)"
2702 : [load] "=&r"(load), [offset] "=&r"(offset)
2703 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2704 }
2705
2706 while (list != end) {
2707 pos = list[0];
2708 __asm volatile(
2709 "shrx %[shift], %[pos], %[offset]\n"
2710 "mov (%[bitset],%[offset],8), %[load]\n"
2711 "bts %[pos], %[load]\n"
2712 "mov %[load], (%[bitset],%[offset],8)"
2713 : [load] "=&r"(load), [offset] "=&r"(offset)
2714 : [bitset] "r"(bitset), [shift] "r"(shift), [pos] "r"(pos));
2715 list++;
2716 }
2717}
2718
2719uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list,
2720 uint64_t length) {
2721 uint64_t offset, load, pos;
2722 uint64_t shift = 6;
2723 const uint16_t *end = list + length;
2724 if (!length) return card;
2725 // btr is not available as an intrinsic in GCC
2726 __asm volatile(
2727 "1:\n"
2728 "movzwq (%[list]), %[pos]\n"
2729 "shrx %[shift], %[pos], %[offset]\n"
2730 "mov (%[bitset],%[offset],8), %[load]\n"
2731 "btr %[pos], %[load]\n"
2732 "mov %[load], (%[bitset],%[offset],8)\n"
2733 "sbb $0, %[card]\n"
2734 "add $2, %[list]\n"
2735 "cmp %[list], %[end]\n"
2736 "jnz 1b"
2737 : [card] "+&r"(card), [list] "+&r"(list), [load] "=&r"(load),
2738 [pos] "=&r"(pos), [offset] "=&r"(offset)
2739 : [end] "r"(end), [bitset] "r"(bitset), [shift] "r"(shift)
2740 :
2741 /* clobbers */ "memory");
2742 return card;
2743}
2744
2745#else
2746uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list,
2747 uint64_t length) {
2748 uint64_t offset, load, newload, pos, index;
2749 const uint16_t *end = list + length;
2750 while (list != end) {
2751 pos = *(const uint16_t *)list;
2752 offset = pos >> 6;
2753 index = pos % 64;
2754 load = ((uint64_t *)bitset)[offset];
2755 newload = load & ~(UINT64_C(1) << index);
2756 card -= (load ^ newload) >> index;
2757 ((uint64_t *)bitset)[offset] = newload;
2758 list++;
2759 }
2760 return card;
2761}
2762
2763uint64_t bitset_set_list_withcard(void *bitset, uint64_t card,
2764 const uint16_t *list, uint64_t length) {
2765 uint64_t offset, load, newload, pos, index;
2766 const uint16_t *end = list + length;
2767 while (list != end) {
2768 pos = *(const uint16_t *)list;
2769 offset = pos >> 6;
2770 index = pos % 64;
2771 load = ((uint64_t *)bitset)[offset];
2772 newload = load | (UINT64_C(1) << index);
2773 card += (load ^ newload) >> index;
2774 ((uint64_t *)bitset)[offset] = newload;
2775 list++;
2776 }
2777 return card;
2778}
2779
2780void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) {
2781 uint64_t offset, load, newload, pos, index;
2782 const uint16_t *end = list + length;
2783 while (list != end) {
2784 pos = *(const uint16_t *)list;
2785 offset = pos >> 6;
2786 index = pos % 64;
2787 load = ((uint64_t *)bitset)[offset];
2788 newload = load | (UINT64_C(1) << index);
2789 ((uint64_t *)bitset)[offset] = newload;
2790 list++;
2791 }
2792}
2793
2794#endif
2795
2796/* flip specified bits */
2797/* TODO: consider whether worthwhile to make an asm version */
2798
2799uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card,
2800 const uint16_t *list, uint64_t length) {
2801 uint64_t offset, load, newload, pos, index;
2802 const uint16_t *end = list + length;
2803 while (list != end) {
2804 pos = *(const uint16_t *)list;
2805 offset = pos >> 6;
2806 index = pos % 64;
2807 load = ((uint64_t *)bitset)[offset];
2808 newload = load ^ (UINT64_C(1) << index);
2809 // todo: is a branch here all that bad?
2810 card +=
2811 (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1
2812 ((uint64_t *)bitset)[offset] = newload;
2813 list++;
2814 }
2815 return card;
2816}
2817
2818void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length) {
2819 uint64_t offset, load, newload, pos, index;
2820 const uint16_t *end = list + length;
2821 while (list != end) {
2822 pos = *(const uint16_t *)list;
2823 offset = pos >> 6;
2824 index = pos % 64;
2825 load = ((uint64_t *)bitset)[offset];
2826 newload = load ^ (UINT64_C(1) << index);
2827 ((uint64_t *)bitset)[offset] = newload;
2828 list++;
2829 }
2830}
2831/* end file /opt/bitmap/CRoaring-0.2.57/src/bitset_util.c */
2832/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/array.c */
2833/*
2834 * array.c
2835 *
2836 */
2837
2838#include <assert.h>
2839#include <stdio.h>
2840#include <stdlib.h>
2841
2842extern inline uint16_t array_container_minimum(const array_container_t *arr);
2843extern inline uint16_t array_container_maximum(const array_container_t *arr);
2844extern inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x);
2845
2846extern inline int array_container_rank(const array_container_t *arr,
2847 uint16_t x);
2848extern inline bool array_container_contains(const array_container_t *arr,
2849 uint16_t pos);
2850extern int array_container_cardinality(const array_container_t *array);
2851extern bool array_container_nonzero_cardinality(const array_container_t *array);
2852extern void array_container_clear(array_container_t *array);
2853extern int32_t array_container_serialized_size_in_bytes(int32_t card);
2854extern bool array_container_empty(const array_container_t *array);
2855extern bool array_container_full(const array_container_t *array);
2856
2857/* Create a new array with capacity size. Return NULL in case of failure. */
2858array_container_t *array_container_create_given_capacity(int32_t size) {
2859 array_container_t *container;
2860
2861 if ((container = (array_container_t *)malloc(sizeof(array_container_t))) ==
2862 NULL) {
2863 return NULL;
2864 }
2865
2866 if( size <= 0 ) { // we don't want to rely on malloc(0)
2867 container->array = NULL;
2868 } else if ((container->array = (uint16_t *)malloc(sizeof(uint16_t) * size)) ==
2869 NULL) {
2870 free(container);
2871 return NULL;
2872 }
2873
2874 container->capacity = size;
2875 container->cardinality = 0;
2876
2877 return container;
2878}
2879
2880/* Create a new array. Return NULL in case of failure. */
2881array_container_t *array_container_create() {
2882 return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE);
2883}
2884
2885/* Create a new array containing all values in [min,max). */
2886array_container_t * array_container_create_range(uint32_t min, uint32_t max) {
2887 array_container_t * answer = array_container_create_given_capacity(max - min + 1);
2888 if(answer == NULL) return answer;
2889 answer->cardinality = 0;
2890 for(uint32_t k = min; k < max; k++) {
2891 answer->array[answer->cardinality++] = k;
2892 }
2893 return answer;
2894}
2895
2896/* Duplicate container */
2897array_container_t *array_container_clone(const array_container_t *src) {
2898 array_container_t *newcontainer =
2899 array_container_create_given_capacity(src->capacity);
2900 if (newcontainer == NULL) return NULL;
2901
2902 newcontainer->cardinality = src->cardinality;
2903
2904 memcpy(newcontainer->array, src->array,
2905 src->cardinality * sizeof(uint16_t));
2906
2907 return newcontainer;
2908}
2909
2910int array_container_shrink_to_fit(array_container_t *src) {
2911 if (src->cardinality == src->capacity) return 0; // nothing to do
2912 int savings = src->capacity - src->cardinality;
2913 src->capacity = src->cardinality;
2914 if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs
2915 free(src->array);
2916 src->array = NULL;
2917 } else {
2918 uint16_t *oldarray = src->array;
2919 src->array =
2920 (uint16_t *)realloc(oldarray, src->capacity * sizeof(uint16_t));
2921 if (src->array == NULL) free(oldarray); // should never happen?
2922 }
2923 return savings;
2924}
2925
2926/* Free memory. */
2927void array_container_free(array_container_t *arr) {
2928 if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise
2929 free(arr->array);
2930 arr->array = NULL; // pedantic
2931 }
2932 free(arr);
2933}
2934
2935static inline int32_t grow_capacity(int32_t capacity) {
2936 return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE
2937 : capacity < 64 ? capacity * 2
2938 : capacity < 1024 ? capacity * 3 / 2
2939 : capacity * 5 / 4;
2940}
2941
2942static inline int32_t clamp(int32_t val, int32_t min, int32_t max) {
2943 return ((val < min) ? min : (val > max) ? max : val);
2944}
2945
2946void array_container_grow(array_container_t *container, int32_t min,
2947 bool preserve) {
2948
2949 int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536);
2950 int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max);
2951
2952 container->capacity = new_capacity;
2953 uint16_t *array = container->array;
2954
2955 if (preserve) {
2956 container->array =
2957 (uint16_t *)realloc(array, new_capacity * sizeof(uint16_t));
2958 if (container->array == NULL) free(array);
2959 } else {
2960 // Jon Strabala reports that some tools complain otherwise
2961 if (array != NULL) {
2962 free(array);
2963 }
2964 container->array = (uint16_t *)malloc(new_capacity * sizeof(uint16_t));
2965 }
2966
2967 // handle the case where realloc fails
2968 if (container->array == NULL) {
2969 fprintf(stderr, "could not allocate memory\n");
2970 }
2971 assert(container->array != NULL);
2972}
2973
2974/* Copy one container into another. We assume that they are distinct. */
2975void array_container_copy(const array_container_t *src,
2976 array_container_t *dst) {
2977 const int32_t cardinality = src->cardinality;
2978 if (cardinality > dst->capacity) {
2979 array_container_grow(dst, cardinality, false);
2980 }
2981
2982 dst->cardinality = cardinality;
2983 memcpy(dst->array, src->array, cardinality * sizeof(uint16_t));
2984}
2985
2986void array_container_add_from_range(array_container_t *arr, uint32_t min,
2987 uint32_t max, uint16_t step) {
2988 for (uint32_t value = min; value < max; value += step) {
2989 array_container_append(arr, value);
2990 }
2991}
2992
2993/* Computes the union of array1 and array2 and write the result to arrayout.
2994 * It is assumed that arrayout is distinct from both array1 and array2.
2995 */
2996void array_container_union(const array_container_t *array_1,
2997 const array_container_t *array_2,
2998 array_container_t *out) {
2999 const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
3000 const int32_t max_cardinality = card_1 + card_2;
3001
3002 if (out->capacity < max_cardinality) {
3003 array_container_grow(out, max_cardinality, false);
3004 }
3005 out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1,
3006 array_2->array, card_2, out->array);
3007
3008}
3009
3010/* Computes the difference of array1 and array2 and write the result
3011 * to array out.
3012 * Array out does not need to be distinct from array_1
3013 */
3014void array_container_andnot(const array_container_t *array_1,
3015 const array_container_t *array_2,
3016 array_container_t *out) {
3017 if (out->capacity < array_1->cardinality)
3018 array_container_grow(out, array_1->cardinality, false);
3019#ifdef ROARING_VECTOR_OPERATIONS_ENABLED
3020 out->cardinality =
3021 difference_vector16(array_1->array, array_1->cardinality,
3022 array_2->array, array_2->cardinality, out->array);
3023#else
3024 out->cardinality =
3025 difference_uint16(array_1->array, array_1->cardinality, array_2->array,
3026 array_2->cardinality, out->array);
3027#endif
3028}
3029
3030/* Computes the symmetric difference of array1 and array2 and write the
3031 * result
3032 * to arrayout.
3033 * It is assumed that arrayout is distinct from both array1 and array2.
3034 */
3035void array_container_xor(const array_container_t *array_1,
3036 const array_container_t *array_2,
3037 array_container_t *out) {
3038 const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality;
3039 const int32_t max_cardinality = card_1 + card_2;
3040 if (out->capacity < max_cardinality) {
3041 array_container_grow(out, max_cardinality, false);
3042 }
3043
3044#ifdef ROARING_VECTOR_OPERATIONS_ENABLED
3045 out->cardinality =
3046 xor_vector16(array_1->array, array_1->cardinality, array_2->array,
3047 array_2->cardinality, out->array);
3048#else
3049 out->cardinality =
3050 xor_uint16(array_1->array, array_1->cardinality, array_2->array,
3051 array_2->cardinality, out->array);
3052#endif
3053}
3054
3055static inline int32_t minimum_int32(int32_t a, int32_t b) {
3056 return (a < b) ? a : b;
3057}
3058
3059/* computes the intersection of array1 and array2 and write the result to
3060 * arrayout.
3061 * It is assumed that arrayout is distinct from both array1 and array2.
3062 * */
3063void array_container_intersection(const array_container_t *array1,
3064 const array_container_t *array2,
3065 array_container_t *out) {
3066 int32_t card_1 = array1->cardinality, card_2 = array2->cardinality,
3067 min_card = minimum_int32(card_1, card_2);
3068 const int threshold = 64; // subject to tuning
3069#ifdef USEAVX
3070 if (out->capacity < min_card) {
3071 array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t),
3072 false);
3073 }
3074#else
3075 if (out->capacity < min_card) {
3076 array_container_grow(out, min_card, false);
3077 }
3078#endif
3079
3080 if (card_1 * threshold < card_2) {
3081 out->cardinality = intersect_skewed_uint16(
3082 array1->array, card_1, array2->array, card_2, out->array);
3083 } else if (card_2 * threshold < card_1) {
3084 out->cardinality = intersect_skewed_uint16(
3085 array2->array, card_2, array1->array, card_1, out->array);
3086 } else {
3087#ifdef USEAVX
3088 out->cardinality = intersect_vector16(
3089 array1->array, card_1, array2->array, card_2, out->array);
3090#else
3091 out->cardinality = intersect_uint16(array1->array, card_1,
3092 array2->array, card_2, out->array);
3093#endif
3094 }
3095}
3096
3097/* computes the size of the intersection of array1 and array2
3098 * */
3099int array_container_intersection_cardinality(const array_container_t *array1,
3100 const array_container_t *array2) {
3101 int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
3102 const int threshold = 64; // subject to tuning
3103 if (card_1 * threshold < card_2) {
3104 return intersect_skewed_uint16_cardinality(array1->array, card_1,
3105 array2->array, card_2);
3106 } else if (card_2 * threshold < card_1) {
3107 return intersect_skewed_uint16_cardinality(array2->array, card_2,
3108 array1->array, card_1);
3109 } else {
3110#ifdef USEAVX
3111 return intersect_vector16_cardinality(array1->array, card_1,
3112 array2->array, card_2);
3113#else
3114 return intersect_uint16_cardinality(array1->array, card_1,
3115 array2->array, card_2);
3116#endif
3117 }
3118}
3119
3120bool array_container_intersect(const array_container_t *array1,
3121 const array_container_t *array2) {
3122 int32_t card_1 = array1->cardinality, card_2 = array2->cardinality;
3123 const int threshold = 64; // subject to tuning
3124 if (card_1 * threshold < card_2) {
3125 return intersect_skewed_uint16_nonempty(
3126 array1->array, card_1, array2->array, card_2);
3127 } else if (card_2 * threshold < card_1) {
3128 return intersect_skewed_uint16_nonempty(
3129 array2->array, card_2, array1->array, card_1);
3130 } else {
3131 // we do not bother vectorizing
3132 return intersect_uint16_nonempty(array1->array, card_1,
3133 array2->array, card_2);
3134 }
3135}
3136
3137/* computes the intersection of array1 and array2 and write the result to
3138 * array1.
3139 * */
3140void array_container_intersection_inplace(array_container_t *src_1,
3141 const array_container_t *src_2) {
3142 // todo: can any of this be vectorized?
3143 int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality;
3144 const int threshold = 64; // subject to tuning
3145 if (card_1 * threshold < card_2) {
3146 src_1->cardinality = intersect_skewed_uint16(
3147 src_1->array, card_1, src_2->array, card_2, src_1->array);
3148 } else if (card_2 * threshold < card_1) {
3149 src_1->cardinality = intersect_skewed_uint16(
3150 src_2->array, card_2, src_1->array, card_1, src_1->array);
3151 } else {
3152 src_1->cardinality = intersect_uint16(
3153 src_1->array, card_1, src_2->array, card_2, src_1->array);
3154 }
3155}
3156
3157int array_container_to_uint32_array(void *vout, const array_container_t *cont,
3158 uint32_t base) {
3159 int outpos = 0;
3160 uint32_t *out = (uint32_t *)vout;
3161 for (int i = 0; i < cont->cardinality; ++i) {
3162 const uint32_t val = base + cont->array[i];
3163 memcpy(out + outpos, &val,
3164 sizeof(uint32_t)); // should be compiled as a MOV on x64
3165 outpos++;
3166 }
3167 return outpos;
3168}
3169
3170void array_container_printf(const array_container_t *v) {
3171 if (v->cardinality == 0) {
3172 printf("{}");
3173 return;
3174 }
3175 printf("{");
3176 printf("%d", v->array[0]);
3177 for (int i = 1; i < v->cardinality; ++i) {
3178 printf(",%d", v->array[i]);
3179 }
3180 printf("}");
3181}
3182
3183void array_container_printf_as_uint32_array(const array_container_t *v,
3184 uint32_t base) {
3185 if (v->cardinality == 0) {
3186 return;
3187 }
3188 printf("%u", v->array[0] + base);
3189 for (int i = 1; i < v->cardinality; ++i) {
3190 printf(",%u", v->array[i] + base);
3191 }
3192}
3193
3194/* Compute the number of runs */
3195int32_t array_container_number_of_runs(const array_container_t *a) {
3196 // Can SIMD work here?
3197 int32_t nr_runs = 0;
3198 int32_t prev = -2;
3199 for (const uint16_t *p = a->array; p != a->array + a->cardinality; ++p) {
3200 if (*p != prev + 1) nr_runs++;
3201 prev = *p;
3202 }
3203 return nr_runs;
3204}
3205
3206int32_t array_container_serialize(const array_container_t *container, char *buf) {
3207 int32_t l, off;
3208 uint16_t cardinality = (uint16_t)container->cardinality;
3209
3210 memcpy(buf, &cardinality, off = sizeof(cardinality));
3211 l = sizeof(uint16_t) * container->cardinality;
3212 if (l) memcpy(&buf[off], container->array, l);
3213
3214 return (off + l);
3215}
3216
3217/**
3218 * Writes the underlying array to buf, outputs how many bytes were written.
3219 * The number of bytes written should be
3220 * array_container_size_in_bytes(container).
3221 *
3222 */
3223int32_t array_container_write(const array_container_t *container, char *buf) {
3224 memcpy(buf, container->array, container->cardinality * sizeof(uint16_t));
3225 return array_container_size_in_bytes(container);
3226}
3227
3228bool array_container_equals(const array_container_t *container1,
3229 const array_container_t *container2) {
3230 if (container1->cardinality != container2->cardinality) {
3231 return false;
3232 }
3233 // could be vectorized:
3234 for (int32_t i = 0; i < container1->cardinality; ++i) {
3235 if (container1->array[i] != container2->array[i]) return false;
3236 }
3237 return true;
3238}
3239
3240bool array_container_is_subset(const array_container_t *container1,
3241 const array_container_t *container2) {
3242 if (container1->cardinality > container2->cardinality) {
3243 return false;
3244 }
3245 int i1 = 0, i2 = 0;
3246 while (i1 < container1->cardinality && i2 < container2->cardinality) {
3247 if (container1->array[i1] == container2->array[i2]) {
3248 i1++;
3249 i2++;
3250 } else if (container1->array[i1] > container2->array[i2]) {
3251 i2++;
3252 } else { // container1->array[i1] < container2->array[i2]
3253 return false;
3254 }
3255 }
3256 if (i1 == container1->cardinality) {
3257 return true;
3258 } else {
3259 return false;
3260 }
3261}
3262
3263int32_t array_container_read(int32_t cardinality, array_container_t *container,
3264 const char *buf) {
3265 if (container->capacity < cardinality) {
3266 array_container_grow(container, cardinality, false);
3267 }
3268 container->cardinality = cardinality;
3269 memcpy(container->array, buf, container->cardinality * sizeof(uint16_t));
3270
3271 return array_container_size_in_bytes(container);
3272}
3273
3274uint32_t array_container_serialization_len(const array_container_t *container) {
3275 return (sizeof(uint16_t) /* container->cardinality converted to 16 bit */ +
3276 (sizeof(uint16_t) * container->cardinality));
3277}
3278
3279void *array_container_deserialize(const char *buf, size_t buf_len) {
3280 array_container_t *ptr;
3281
3282 if (buf_len < 2) /* capacity converted to 16 bit */
3283 return (NULL);
3284 else
3285 buf_len -= 2;
3286
3287 if ((ptr = (array_container_t *)malloc(sizeof(array_container_t))) !=
3288 NULL) {
3289 size_t len;
3290 int32_t off;
3291 uint16_t cardinality;
3292
3293 memcpy(&cardinality, buf, off = sizeof(cardinality));
3294
3295 ptr->capacity = ptr->cardinality = (uint32_t)cardinality;
3296 len = sizeof(uint16_t) * ptr->cardinality;
3297
3298 if (len != buf_len) {
3299 free(ptr);
3300 return (NULL);
3301 }
3302
3303 if ((ptr->array = (uint16_t *)malloc(sizeof(uint16_t) *
3304 ptr->capacity)) == NULL) {
3305 free(ptr);
3306 return (NULL);
3307 }
3308
3309 if (len) memcpy(ptr->array, &buf[off], len);
3310
3311 /* Check if returned values are monotonically increasing */
3312 for (int32_t i = 0, j = 0; i < ptr->cardinality; i++) {
3313 if (ptr->array[i] < j) {
3314 free(ptr->array);
3315 free(ptr);
3316 return (NULL);
3317 } else
3318 j = ptr->array[i];
3319 }
3320 }
3321
3322 return (ptr);
3323}
3324
3325bool array_container_iterate(const array_container_t *cont, uint32_t base,
3326 roaring_iterator iterator, void *ptr) {
3327 for (int i = 0; i < cont->cardinality; i++)
3328 if (!iterator(cont->array[i] + base, ptr)) return false;
3329 return true;
3330}
3331
3332bool array_container_iterate64(const array_container_t *cont, uint32_t base,
3333 roaring_iterator64 iterator, uint64_t high_bits,
3334 void *ptr) {
3335 for (int i = 0; i < cont->cardinality; i++)
3336 if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr))
3337 return false;
3338 return true;
3339}
3340/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/array.c */
3341/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/bitset.c */
3342/*
3343 * bitset.c
3344 *
3345 */
3346#ifndef _POSIX_C_SOURCE
3347#define _POSIX_C_SOURCE 200809L
3348#endif
3349#include <assert.h>
3350#include <stdio.h>
3351#include <stdlib.h>
3352#include <string.h>
3353
3354
3355extern int bitset_container_cardinality(const bitset_container_t *bitset);
3356extern bool bitset_container_nonzero_cardinality(bitset_container_t *bitset);
3357extern void bitset_container_set(bitset_container_t *bitset, uint16_t pos);
3358extern void bitset_container_unset(bitset_container_t *bitset, uint16_t pos);
3359extern inline bool bitset_container_get(const bitset_container_t *bitset,
3360 uint16_t pos);
3361extern int32_t bitset_container_serialized_size_in_bytes();
3362extern bool bitset_container_add(bitset_container_t *bitset, uint16_t pos);
3363extern bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos);
3364extern inline bool bitset_container_contains(const bitset_container_t *bitset,
3365 uint16_t pos);
3366
3367void bitset_container_clear(bitset_container_t *bitset) {
3368 memset(bitset->array, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3369 bitset->cardinality = 0;
3370}
3371
3372void bitset_container_set_all(bitset_container_t *bitset) {
3373 memset(bitset->array, INT64_C(-1),
3374 sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3375 bitset->cardinality = (1 << 16);
3376}
3377
3378
3379
3380/* Create a new bitset. Return NULL in case of failure. */
3381bitset_container_t *bitset_container_create(void) {
3382 bitset_container_t *bitset =
3383 (bitset_container_t *)malloc(sizeof(bitset_container_t));
3384
3385 if (!bitset) {
3386 return NULL;
3387 }
3388 // sizeof(__m256i) == 32
3389 bitset->array = (uint64_t *)aligned_malloc(
3390 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3391 if (!bitset->array) {
3392 free(bitset);
3393 return NULL;
3394 }
3395 bitset_container_clear(bitset);
3396 return bitset;
3397}
3398
3399/* Copy one container into another. We assume that they are distinct. */
3400void bitset_container_copy(const bitset_container_t *source,
3401 bitset_container_t *dest) {
3402 dest->cardinality = source->cardinality;
3403 memcpy(dest->array, source->array,
3404 sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3405}
3406
3407void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min,
3408 uint32_t max, uint16_t step) {
3409 if (step == 0) return; // refuse to crash
3410 if ((64 % step) == 0) { // step divides 64
3411 uint64_t mask = 0; // construct the repeated mask
3412 for (uint32_t value = (min % step); value < 64; value += step) {
3413 mask |= ((uint64_t)1 << value);
3414 }
3415 uint32_t firstword = min / 64;
3416 uint32_t endword = (max - 1) / 64;
3417 bitset->cardinality = (max - min + step - 1) / step;
3418 if (firstword == endword) {
3419 bitset->array[firstword] |=
3420 mask & (((~UINT64_C(0)) << (min % 64)) &
3421 ((~UINT64_C(0)) >> ((~max + 1) % 64)));
3422 return;
3423 }
3424 bitset->array[firstword] = mask & ((~UINT64_C(0)) << (min % 64));
3425 for (uint32_t i = firstword + 1; i < endword; i++)
3426 bitset->array[i] = mask;
3427 bitset->array[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64));
3428 } else {
3429 for (uint32_t value = min; value < max; value += step) {
3430 bitset_container_add(bitset, value);
3431 }
3432 }
3433}
3434
3435/* Free memory. */
3436void bitset_container_free(bitset_container_t *bitset) {
3437 if(bitset->array != NULL) {// Jon Strabala reports that some tools complain otherwise
3438 aligned_free(bitset->array);
3439 bitset->array = NULL; // pedantic
3440 }
3441 free(bitset);
3442}
3443
3444/* duplicate container. */
3445bitset_container_t *bitset_container_clone(const bitset_container_t *src) {
3446 bitset_container_t *bitset =
3447 (bitset_container_t *)malloc(sizeof(bitset_container_t));
3448
3449 if (!bitset) {
3450 return NULL;
3451 }
3452 // sizeof(__m256i) == 32
3453 bitset->array = (uint64_t *)aligned_malloc(
3454 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3455 if (!bitset->array) {
3456 free(bitset);
3457 return NULL;
3458 }
3459 bitset->cardinality = src->cardinality;
3460 memcpy(bitset->array, src->array,
3461 sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3462 return bitset;
3463}
3464
3465void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin,
3466 uint32_t end) {
3467 bitset_set_range(bitset->array, begin, end);
3468 bitset->cardinality =
3469 bitset_container_compute_cardinality(bitset); // could be smarter
3470}
3471
3472
3473bool bitset_container_intersect(const bitset_container_t *src_1,
3474 const bitset_container_t *src_2) {
3475 // could vectorize, but this is probably already quite fast in practice
3476 const uint64_t * __restrict__ array_1 = src_1->array;
3477 const uint64_t * __restrict__ array_2 = src_2->array;
3478 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) {
3479 if((array_1[i] & array_2[i]) != 0) return true;
3480 }
3481 return false;
3482}
3483
3484
3485#ifdef USEAVX
3486#ifndef WORDS_IN_AVX2_REG
3487#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
3488#endif
3489/* Get the number of bits set (force computation) */
3490int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
3491 return (int) avx2_harley_seal_popcount256(
3492 (const __m256i *)bitset->array,
3493 BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));
3494}
3495#else
3496
3497/* Get the number of bits set (force computation) */
3498int bitset_container_compute_cardinality(const bitset_container_t *bitset) {
3499 const uint64_t *array = bitset->array;
3500 int32_t sum = 0;
3501 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) {
3502 sum += hamming(array[i]);
3503 sum += hamming(array[i + 1]);
3504 sum += hamming(array[i + 2]);
3505 sum += hamming(array[i + 3]);
3506 }
3507 return sum;
3508}
3509
3510#endif
3511
3512#ifdef USEAVX
3513
3514#define BITSET_CONTAINER_FN_REPEAT 8
3515#ifndef WORDS_IN_AVX2_REG
3516#define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t)
3517#endif
3518#define LOOP_SIZE \
3519 BITSET_CONTAINER_SIZE_IN_WORDS / \
3520 ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT)
3521
3522/* Computes a binary operation (eg union) on bitset1 and bitset2 and write the
3523 result to bitsetout */
3524// clang-format off
3525#define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic) \
3526int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
3527 const bitset_container_t *src_2, \
3528 bitset_container_t *dst) { \
3529 const uint8_t * __restrict__ array_1 = (const uint8_t *)src_1->array; \
3530 const uint8_t * __restrict__ array_2 = (const uint8_t *)src_2->array; \
3531 /* not using the blocking optimization for some reason*/ \
3532 uint8_t *out = (uint8_t*)dst->array; \
3533 const int innerloop = 8; \
3534 for (size_t i = 0; \
3535 i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \
3536 i+=innerloop) {\
3537 __m256i A1, A2, AO; \
3538 A1 = _mm256_lddqu_si256((const __m256i *)(array_1)); \
3539 A2 = _mm256_lddqu_si256((const __m256i *)(array_2)); \
3540 AO = avx_intrinsic(A2, A1); \
3541 _mm256_storeu_si256((__m256i *)out, AO); \
3542 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 32)); \
3543 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 32)); \
3544 AO = avx_intrinsic(A2, A1); \
3545 _mm256_storeu_si256((__m256i *)(out+32), AO); \
3546 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 64)); \
3547 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 64)); \
3548 AO = avx_intrinsic(A2, A1); \
3549 _mm256_storeu_si256((__m256i *)(out+64), AO); \
3550 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 96)); \
3551 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 96)); \
3552 AO = avx_intrinsic(A2, A1); \
3553 _mm256_storeu_si256((__m256i *)(out+96), AO); \
3554 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 128)); \
3555 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 128)); \
3556 AO = avx_intrinsic(A2, A1); \
3557 _mm256_storeu_si256((__m256i *)(out+128), AO); \
3558 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 160)); \
3559 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 160)); \
3560 AO = avx_intrinsic(A2, A1); \
3561 _mm256_storeu_si256((__m256i *)(out+160), AO); \
3562 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 192)); \
3563 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 192)); \
3564 AO = avx_intrinsic(A2, A1); \
3565 _mm256_storeu_si256((__m256i *)(out+192), AO); \
3566 A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 224)); \
3567 A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 224)); \
3568 AO = avx_intrinsic(A2, A1); \
3569 _mm256_storeu_si256((__m256i *)(out+224), AO); \
3570 out+=256; \
3571 array_1 += 256; \
3572 array_2 += 256; \
3573 } \
3574 dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
3575 return dst->cardinality; \
3576} \
3577/* next, a version that updates cardinality*/ \
3578int bitset_container_##opname(const bitset_container_t *src_1, \
3579 const bitset_container_t *src_2, \
3580 bitset_container_t *dst) { \
3581 const __m256i * __restrict__ array_1 = (const __m256i *) src_1->array; \
3582 const __m256i * __restrict__ array_2 = (const __m256i *) src_2->array; \
3583 __m256i *out = (__m256i *) dst->array; \
3584 dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname(array_2,\
3585 array_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\
3586 return dst->cardinality; \
3587} \
3588/* next, a version that just computes the cardinality*/ \
3589int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
3590 const bitset_container_t *src_2) { \
3591 const __m256i * __restrict__ data1 = (const __m256i *) src_1->array; \
3592 const __m256i * __restrict__ data2 = (const __m256i *) src_2->array; \
3593 return (int)avx2_harley_seal_popcount256_##opname(data2, \
3594 data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\
3595}
3596
3597
3598
3599#else /* not USEAVX */
3600
3601#define BITSET_CONTAINER_FN(opname, opsymbol, avxintrinsic) \
3602int bitset_container_##opname(const bitset_container_t *src_1, \
3603 const bitset_container_t *src_2, \
3604 bitset_container_t *dst) { \
3605 const uint64_t * __restrict__ array_1 = src_1->array; \
3606 const uint64_t * __restrict__ array_2 = src_2->array; \
3607 uint64_t *out = dst->array; \
3608 int32_t sum = 0; \
3609 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
3610 const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \
3611 word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \
3612 out[i] = word_1; \
3613 out[i + 1] = word_2; \
3614 sum += hamming(word_1); \
3615 sum += hamming(word_2); \
3616 } \
3617 dst->cardinality = sum; \
3618 return dst->cardinality; \
3619} \
3620int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \
3621 const bitset_container_t *src_2, \
3622 bitset_container_t *dst) { \
3623 const uint64_t * __restrict__ array_1 = src_1->array; \
3624 const uint64_t * __restrict__ array_2 = src_2->array; \
3625 uint64_t *out = dst->array; \
3626 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \
3627 out[i] = (array_1[i])opsymbol(array_2[i]); \
3628 } \
3629 dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \
3630 return dst->cardinality; \
3631} \
3632int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \
3633 const bitset_container_t *src_2) { \
3634 const uint64_t * __restrict__ array_1 = src_1->array; \
3635 const uint64_t * __restrict__ array_2 = src_2->array; \
3636 int32_t sum = 0; \
3637 for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \
3638 const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \
3639 word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \
3640 sum += hamming(word_1); \
3641 sum += hamming(word_2); \
3642 } \
3643 return sum; \
3644}
3645
3646#endif
3647
3648// we duplicate the function because other containers use the "or" term, makes API more consistent
3649BITSET_CONTAINER_FN(or, |, _mm256_or_si256)
3650BITSET_CONTAINER_FN(union, |, _mm256_or_si256)
3651
3652// we duplicate the function because other containers use the "intersection" term, makes API more consistent
3653BITSET_CONTAINER_FN(and, &, _mm256_and_si256)
3654BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256)
3655
3656BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256)
3657BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256)
3658// clang-format On
3659
3660
3661
3662int bitset_container_to_uint32_array( void *vout, const bitset_container_t *cont, uint32_t base) {
3663#ifdef USEAVX2FORDECODING
3664 if(cont->cardinality >= 8192)// heuristic
3665 return (int) bitset_extract_setbits_avx2(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,cont->cardinality,base);
3666 else
3667 return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base);
3668#else
3669 return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base);
3670#endif
3671}
3672
3673/*
3674 * Print this container using printf (useful for debugging).
3675 */
3676void bitset_container_printf(const bitset_container_t * v) {
3677 printf("{");
3678 uint32_t base = 0;
3679 bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
3680 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
3681 uint64_t w = v->array[i];
3682 while (w != 0) {
3683 uint64_t t = w & (~w + 1);
3684 int r = __builtin_ctzll(w);
3685 if(iamfirst) {// predicted to be false
3686 printf("%u",base + r);
3687 iamfirst = false;
3688 } else {
3689 printf(",%u",base + r);
3690 }
3691 w ^= t;
3692 }
3693 base += 64;
3694 }
3695 printf("}");
3696}
3697
3698
3699/*
3700 * Print this container using printf as a comma-separated list of 32-bit integers starting at base.
3701 */
3702void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) {
3703 bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable
3704 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
3705 uint64_t w = v->array[i];
3706 while (w != 0) {
3707 uint64_t t = w & (~w + 1);
3708 int r = __builtin_ctzll(w);
3709 if(iamfirst) {// predicted to be false
3710 printf("%u", r + base);
3711 iamfirst = false;
3712 } else {
3713 printf(",%u",r + base);
3714 }
3715 w ^= t;
3716 }
3717 base += 64;
3718 }
3719}
3720
3721
3722// TODO: use the fast lower bound, also
3723int bitset_container_number_of_runs(bitset_container_t *b) {
3724 int num_runs = 0;
3725 uint64_t next_word = b->array[0];
3726
3727 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) {
3728 uint64_t word = next_word;
3729 next_word = b->array[i+1];
3730 num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word);
3731 }
3732
3733 uint64_t word = next_word;
3734 num_runs += hamming((~word) & (word << 1));
3735 if((word & 0x8000000000000000ULL) != 0)
3736 num_runs++;
3737 return num_runs;
3738}
3739
3740int32_t bitset_container_serialize(const bitset_container_t *container, char *buf) {
3741 int32_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS;
3742 memcpy(buf, container->array, l);
3743 return(l);
3744}
3745
3746
3747
3748int32_t bitset_container_write(const bitset_container_t *container,
3749 char *buf) {
3750 memcpy(buf, container->array, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
3751 return bitset_container_size_in_bytes(container);
3752}
3753
3754
3755int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container,
3756 const char *buf) {
3757 container->cardinality = cardinality;
3758 memcpy(container->array, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
3759 return bitset_container_size_in_bytes(container);
3760}
3761
3762uint32_t bitset_container_serialization_len() {
3763 return(sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS);
3764}
3765
3766void* bitset_container_deserialize(const char *buf, size_t buf_len) {
3767 bitset_container_t *ptr;
3768 size_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS;
3769
3770 if(l != buf_len)
3771 return(NULL);
3772
3773 if((ptr = (bitset_container_t *)malloc(sizeof(bitset_container_t))) != NULL) {
3774 memcpy(ptr, buf, sizeof(bitset_container_t));
3775 // sizeof(__m256i) == 32
3776 ptr->array = (uint64_t *) aligned_malloc(32, l);
3777 if (! ptr->array) {
3778 free(ptr);
3779 return NULL;
3780 }
3781 memcpy(ptr->array, buf, l);
3782 ptr->cardinality = bitset_container_compute_cardinality(ptr);
3783 }
3784
3785 return((void*)ptr);
3786}
3787
3788bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) {
3789 for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
3790 uint64_t w = cont->array[i];
3791 while (w != 0) {
3792 uint64_t t = w & (~w + 1);
3793 int r = __builtin_ctzll(w);
3794 if(!iterator(r + base, ptr)) return false;
3795 w ^= t;
3796 }
3797 base += 64;
3798 }
3799 return true;
3800}
3801
3802bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) {
3803 for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
3804 uint64_t w = cont->array[i];
3805 while (w != 0) {
3806 uint64_t t = w & (~w + 1);
3807 int r = __builtin_ctzll(w);
3808 if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false;
3809 w ^= t;
3810 }
3811 base += 64;
3812 }
3813 return true;
3814}
3815
3816
3817bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) {
3818 if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
3819 if(container1->cardinality != container2->cardinality) {
3820 return false;
3821 }
3822 }
3823 for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
3824 if(container1->array[i] != container2->array[i]) {
3825 return false;
3826 }
3827 }
3828 return true;
3829}
3830
3831bool bitset_container_is_subset(const bitset_container_t *container1,
3832 const bitset_container_t *container2) {
3833 if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) {
3834 if(container1->cardinality > container2->cardinality) {
3835 return false;
3836 }
3837 }
3838 for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
3839 if((container1->array[i] & container2->array[i]) != container1->array[i]) {
3840 return false;
3841 }
3842 }
3843 return true;
3844}
3845
3846bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) {
3847 int card = bitset_container_cardinality(container);
3848 if(rank >= *start_rank + card) {
3849 *start_rank += card;
3850 return false;
3851 }
3852 const uint64_t *array = container->array;
3853 int32_t size;
3854 for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) {
3855 size = hamming(array[i]);
3856 if(rank <= *start_rank + size) {
3857 uint64_t w = container->array[i];
3858 uint16_t base = i*64;
3859 while (w != 0) {
3860 uint64_t t = w & (~w + 1);
3861 int r = __builtin_ctzll(w);
3862 if(*start_rank == rank) {
3863 *element = r+base;
3864 return true;
3865 }
3866 w ^= t;
3867 *start_rank += 1;
3868 }
3869 }
3870 else
3871 *start_rank += size;
3872 }
3873 assert(false);
3874 __builtin_unreachable();
3875}
3876
3877
3878/* Returns the smallest value (assumes not empty) */
3879uint16_t bitset_container_minimum(const bitset_container_t *container) {
3880 for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) {
3881 uint64_t w = container->array[i];
3882 if (w != 0) {
3883 int r = __builtin_ctzll(w);
3884 return r + i * 64;
3885 }
3886 }
3887 return UINT16_MAX;
3888}
3889
3890/* Returns the largest value (assumes not empty) */
3891uint16_t bitset_container_maximum(const bitset_container_t *container) {
3892 for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) {
3893 uint64_t w = container->array[i];
3894 if (w != 0) {
3895 int r = __builtin_clzll(w);
3896 return i * 64 + 63 - r;
3897 }
3898 }
3899 return 0;
3900}
3901
3902/* Returns the number of values equal or smaller than x */
3903int bitset_container_rank(const bitset_container_t *container, uint16_t x) {
3904 uint32_t x32 = x;
3905 int sum = 0;
3906 uint32_t k = 0;
3907 for (; k + 63 <= x32; k += 64) {
3908 sum += hamming(container->array[k / 64]);
3909 }
3910 // at this point, we have covered everything up to k, k not included.
3911 // we have that k < x, but not so large that k+63<=x
3912 // k is a power of 64
3913 int bitsleft = x32 - k + 1;// will be in [0,64)
3914 uint64_t leftoverword = container->array[k / 64];// k / 64 should be within scope
3915 leftoverword = leftoverword & ((UINT64_C(1) << bitsleft) - 1);
3916 sum += hamming(leftoverword);
3917 return sum;
3918}
3919
3920/* Returns the index of the first value equal or larger than x, or -1 */
3921int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) {
3922 uint32_t x32 = x;
3923 uint32_t k = x32 / 64;
3924 uint64_t word = container->array[k];
3925 const int diff = x32 - k * 64; // in [0,64)
3926 word = (word >> diff) << diff; // a mask is faster, but we don't care
3927 while(word == 0) {
3928 k++;
3929 if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1;
3930 word = container->array[k];
3931 }
3932 return k * 64 + __builtin_ctzll(word);
3933}
3934/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/bitset.c */
3935/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/containers.c */
3936
3937
3938extern inline const void *container_unwrap_shared(
3939 const void *candidate_shared_container, uint8_t *type);
3940extern inline void *container_mutable_unwrap_shared(
3941 void *candidate_shared_container, uint8_t *type);
3942
3943extern const char *get_container_name(uint8_t typecode);
3944
3945extern int container_get_cardinality(const void *container, uint8_t typecode);
3946
3947extern void *container_iand(void *c1, uint8_t type1, const void *c2,
3948 uint8_t type2, uint8_t *result_type);
3949
3950extern void *container_ior(void *c1, uint8_t type1, const void *c2,
3951 uint8_t type2, uint8_t *result_type);
3952
3953extern void *container_ixor(void *c1, uint8_t type1, const void *c2,
3954 uint8_t type2, uint8_t *result_type);
3955
3956extern void *container_iandnot(void *c1, uint8_t type1, const void *c2,
3957 uint8_t type2, uint8_t *result_type);
3958
3959void container_free(void *container, uint8_t typecode) {
3960 switch (typecode) {
3961 case BITSET_CONTAINER_TYPE_CODE:
3962 bitset_container_free((bitset_container_t *)container);
3963 break;
3964 case ARRAY_CONTAINER_TYPE_CODE:
3965 array_container_free((array_container_t *)container);
3966 break;
3967 case RUN_CONTAINER_TYPE_CODE:
3968 run_container_free((run_container_t *)container);
3969 break;
3970 case SHARED_CONTAINER_TYPE_CODE:
3971 shared_container_free((shared_container_t *)container);
3972 break;
3973 default:
3974 assert(false);
3975 __builtin_unreachable();
3976 }
3977}
3978
3979void container_printf(const void *container, uint8_t typecode) {
3980 container = container_unwrap_shared(container, &typecode);
3981 switch (typecode) {
3982 case BITSET_CONTAINER_TYPE_CODE:
3983 bitset_container_printf((const bitset_container_t *)container);
3984 return;
3985 case ARRAY_CONTAINER_TYPE_CODE:
3986 array_container_printf((const array_container_t *)container);
3987 return;
3988 case RUN_CONTAINER_TYPE_CODE:
3989 run_container_printf((const run_container_t *)container);
3990 return;
3991 default:
3992 __builtin_unreachable();
3993 }
3994}
3995
3996void container_printf_as_uint32_array(const void *container, uint8_t typecode,
3997 uint32_t base) {
3998 container = container_unwrap_shared(container, &typecode);
3999 switch (typecode) {
4000 case BITSET_CONTAINER_TYPE_CODE:
4001 bitset_container_printf_as_uint32_array(
4002 (const bitset_container_t *)container, base);
4003 return;
4004 case ARRAY_CONTAINER_TYPE_CODE:
4005 array_container_printf_as_uint32_array(
4006 (const array_container_t *)container, base);
4007 return;
4008 case RUN_CONTAINER_TYPE_CODE:
4009 run_container_printf_as_uint32_array(
4010 (const run_container_t *)container, base);
4011 return;
4012 return;
4013 default:
4014 __builtin_unreachable();
4015 }
4016}
4017
4018int32_t container_serialize(const void *container, uint8_t typecode,
4019 char *buf) {
4020 container = container_unwrap_shared(container, &typecode);
4021 switch (typecode) {
4022 case BITSET_CONTAINER_TYPE_CODE:
4023 return (bitset_container_serialize((const bitset_container_t *)container,
4024 buf));
4025 case ARRAY_CONTAINER_TYPE_CODE:
4026 return (
4027 array_container_serialize((const array_container_t *)container, buf));
4028 case RUN_CONTAINER_TYPE_CODE:
4029 return (run_container_serialize((const run_container_t *)container, buf));
4030 default:
4031 assert(0);
4032 __builtin_unreachable();
4033 return (-1);
4034 }
4035}
4036
4037uint32_t container_serialization_len(const void *container, uint8_t typecode) {
4038 container = container_unwrap_shared(container, &typecode);
4039 switch (typecode) {
4040 case BITSET_CONTAINER_TYPE_CODE:
4041 return bitset_container_serialization_len();
4042 case ARRAY_CONTAINER_TYPE_CODE:
4043 return array_container_serialization_len(
4044 (const array_container_t *)container);
4045 case RUN_CONTAINER_TYPE_CODE:
4046 return run_container_serialization_len(
4047 (const run_container_t *)container);
4048 default:
4049 assert(0);
4050 __builtin_unreachable();
4051 return (0);
4052 }
4053}
4054
4055void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) {
4056 switch (typecode) {
4057 case BITSET_CONTAINER_TYPE_CODE:
4058 return (bitset_container_deserialize(buf, buf_len));
4059 case ARRAY_CONTAINER_TYPE_CODE:
4060 return (array_container_deserialize(buf, buf_len));
4061 case RUN_CONTAINER_TYPE_CODE:
4062 return (run_container_deserialize(buf, buf_len));
4063 case SHARED_CONTAINER_TYPE_CODE:
4064 printf("this should never happen.\n");
4065 assert(0);
4066 __builtin_unreachable();
4067 return (NULL);
4068 default:
4069 assert(0);
4070 __builtin_unreachable();
4071 return (NULL);
4072 }
4073}
4074
4075extern bool container_nonzero_cardinality(const void *container,
4076 uint8_t typecode);
4077
4078extern void container_free(void *container, uint8_t typecode);
4079
4080extern int container_to_uint32_array(uint32_t *output, const void *container,
4081 uint8_t typecode, uint32_t base);
4082
4083extern void *container_add(void *container, uint16_t val, uint8_t typecode,
4084 uint8_t *new_typecode);
4085
4086extern inline bool container_contains(const void *container, uint16_t val,
4087 uint8_t typecode);
4088
4089extern void *container_clone(const void *container, uint8_t typecode);
4090
4091extern void *container_and(const void *c1, uint8_t type1, const void *c2,
4092 uint8_t type2, uint8_t *result_type);
4093
4094extern void *container_or(const void *c1, uint8_t type1, const void *c2,
4095 uint8_t type2, uint8_t *result_type);
4096
4097extern void *container_xor(const void *c1, uint8_t type1, const void *c2,
4098 uint8_t type2, uint8_t *result_type);
4099
4100void *get_copy_of_container(void *container, uint8_t *typecode,
4101 bool copy_on_write) {
4102 if (copy_on_write) {
4103 shared_container_t *shared_container;
4104 if (*typecode == SHARED_CONTAINER_TYPE_CODE) {
4105 shared_container = (shared_container_t *)container;
4106 shared_container->counter += 1;
4107 return shared_container;
4108 }
4109 assert(*typecode != SHARED_CONTAINER_TYPE_CODE);
4110
4111 if ((shared_container = (shared_container_t *)malloc(
4112 sizeof(shared_container_t))) == NULL) {
4113 return NULL;
4114 }
4115
4116 shared_container->container = container;
4117 shared_container->typecode = *typecode;
4118
4119 shared_container->counter = 2;
4120 *typecode = SHARED_CONTAINER_TYPE_CODE;
4121
4122 return shared_container;
4123 } // copy_on_write
4124 // otherwise, no copy on write...
4125 const void *actualcontainer =
4126 container_unwrap_shared((const void *)container, typecode);
4127 assert(*typecode != SHARED_CONTAINER_TYPE_CODE);
4128 return container_clone(actualcontainer, *typecode);
4129}
4130/**
4131 * Copies a container, requires a typecode. This allocates new memory, caller
4132 * is responsible for deallocation.
4133 */
4134void *container_clone(const void *container, uint8_t typecode) {
4135 container = container_unwrap_shared(container, &typecode);
4136 switch (typecode) {
4137 case BITSET_CONTAINER_TYPE_CODE:
4138 return bitset_container_clone((const bitset_container_t *)container);
4139 case ARRAY_CONTAINER_TYPE_CODE:
4140 return array_container_clone((const array_container_t *)container);
4141 case RUN_CONTAINER_TYPE_CODE:
4142 return run_container_clone((const run_container_t *)container);
4143 case SHARED_CONTAINER_TYPE_CODE:
4144 printf("shared containers are not cloneable\n");
4145 assert(false);
4146 return NULL;
4147 default:
4148 assert(false);
4149 __builtin_unreachable();
4150 return NULL;
4151 }
4152}
4153
4154void *shared_container_extract_copy(shared_container_t *container,
4155 uint8_t *typecode) {
4156 assert(container->counter > 0);
4157 assert(container->typecode != SHARED_CONTAINER_TYPE_CODE);
4158 container->counter--;
4159 *typecode = container->typecode;
4160 void *answer;
4161 if (container->counter == 0) {
4162 answer = container->container;
4163 container->container = NULL; // paranoid
4164 free(container);
4165 } else {
4166 answer = container_clone(container->container, *typecode);
4167 }
4168 assert(*typecode != SHARED_CONTAINER_TYPE_CODE);
4169 return answer;
4170}
4171
4172void shared_container_free(shared_container_t *container) {
4173 assert(container->counter > 0);
4174 container->counter--;
4175 if (container->counter == 0) {
4176 assert(container->typecode != SHARED_CONTAINER_TYPE_CODE);
4177 container_free(container->container, container->typecode);
4178 container->container = NULL; // paranoid
4179 free(container);
4180 }
4181}
4182
4183extern void *container_not(const void *c1, uint8_t type1, uint8_t *result_type);
4184
4185extern void *container_not_range(const void *c1, uint8_t type1,
4186 uint32_t range_start, uint32_t range_end,
4187 uint8_t *result_type);
4188
4189extern void *container_inot(void *c1, uint8_t type1, uint8_t *result_type);
4190
4191extern void *container_inot_range(void *c1, uint8_t type1, uint32_t range_start,
4192 uint32_t range_end, uint8_t *result_type);
4193
4194extern void *container_range_of_ones(uint32_t range_start, uint32_t range_end,
4195 uint8_t *result_type);
4196
4197// where are the correponding things for union and intersection??
4198extern void *container_lazy_xor(const void *c1, uint8_t type1, const void *c2,
4199 uint8_t type2, uint8_t *result_type);
4200
4201extern void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2,
4202 uint8_t type2, uint8_t *result_type);
4203
4204extern void *container_andnot(const void *c1, uint8_t type1, const void *c2,
4205 uint8_t type2, uint8_t *result_type);
4206/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/containers.c */
4207/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/convert.c */
4208#include <stdio.h>
4209
4210
4211// file contains grubby stuff that must know impl. details of all container
4212// types.
4213bitset_container_t *bitset_container_from_array(const array_container_t *a) {
4214 bitset_container_t *ans = bitset_container_create();
4215 int limit = array_container_cardinality(a);
4216 for (int i = 0; i < limit; ++i) bitset_container_set(ans, a->array[i]);
4217 return ans;
4218}
4219
4220bitset_container_t *bitset_container_from_run(const run_container_t *arr) {
4221 int card = run_container_cardinality(arr);
4222 bitset_container_t *answer = bitset_container_create();
4223 for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
4224 rle16_t vl = arr->runs[rlepos];
4225 bitset_set_lenrange(answer->array, vl.value, vl.length);
4226 }
4227 answer->cardinality = card;
4228 return answer;
4229}
4230
4231array_container_t *array_container_from_run(const run_container_t *arr) {
4232 array_container_t *answer =
4233 array_container_create_given_capacity(run_container_cardinality(arr));
4234 answer->cardinality = 0;
4235 for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) {
4236 int run_start = arr->runs[rlepos].value;
4237 int run_end = run_start + arr->runs[rlepos].length;
4238
4239 for (int run_value = run_start; run_value <= run_end; ++run_value) {
4240 answer->array[answer->cardinality++] = (uint16_t)run_value;
4241 }
4242 }
4243 return answer;
4244}
4245
4246array_container_t *array_container_from_bitset(const bitset_container_t *bits) {
4247 array_container_t *result =
4248 array_container_create_given_capacity(bits->cardinality);
4249 result->cardinality = bits->cardinality;
4250 // sse version ends up being slower here
4251 // (bitset_extract_setbits_sse_uint16)
4252 // because of the sparsity of the data
4253 bitset_extract_setbits_uint16(bits->array, BITSET_CONTAINER_SIZE_IN_WORDS,
4254 result->array, 0);
4255 return result;
4256}
4257
4258/* assumes that container has adequate space. Run from [s,e] (inclusive) */
4259static void add_run(run_container_t *r, int s, int e) {
4260 r->runs[r->n_runs].value = s;
4261 r->runs[r->n_runs].length = e - s;
4262 r->n_runs++;
4263}
4264
4265run_container_t *run_container_from_array(const array_container_t *c) {
4266 int32_t n_runs = array_container_number_of_runs(c);
4267 run_container_t *answer = run_container_create_given_capacity(n_runs);
4268 int prev = -2;
4269 int run_start = -1;
4270 int32_t card = c->cardinality;
4271 if (card == 0) return answer;
4272 for (int i = 0; i < card; ++i) {
4273 const uint16_t cur_val = c->array[i];
4274 if (cur_val != prev + 1) {
4275 // new run starts; flush old one, if any
4276 if (run_start != -1) add_run(answer, run_start, prev);
4277 run_start = cur_val;
4278 }
4279 prev = c->array[i];
4280 }
4281 // now prev is the last seen value
4282 add_run(answer, run_start, prev);
4283 // assert(run_container_cardinality(answer) == c->cardinality);
4284 return answer;
4285}
4286
4287/**
4288 * Convert the runcontainer to either a Bitmap or an Array Container, depending
4289 * on the cardinality. Frees the container.
4290 * Allocates and returns new container, which caller is responsible for freeing
4291 */
4292
4293void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card,
4294 uint8_t *resulttype) {
4295 if (card <= DEFAULT_MAX_SIZE) {
4296 array_container_t *answer = array_container_create_given_capacity(card);
4297 answer->cardinality = 0;
4298 for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) {
4299 uint16_t run_start = r->runs[rlepos].value;
4300 uint16_t run_end = run_start + r->runs[rlepos].length;
4301 for (uint16_t run_value = run_start; run_value <= run_end;
4302 ++run_value) {
4303 answer->array[answer->cardinality++] = run_value;
4304 }
4305 }
4306 assert(card == answer->cardinality);
4307 *resulttype = ARRAY_CONTAINER_TYPE_CODE;
4308 run_container_free(r);
4309 return answer;
4310 }
4311 bitset_container_t *answer = bitset_container_create();
4312 for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) {
4313 uint16_t run_start = r->runs[rlepos].value;
4314 bitset_set_lenrange(answer->array, run_start, r->runs[rlepos].length);
4315 }
4316 answer->cardinality = card;
4317 *resulttype = BITSET_CONTAINER_TYPE_CODE;
4318 run_container_free(r);
4319 return answer;
4320}
4321
4322/* Converts a run container to either an array or a bitset, IF it saves space.
4323 */
4324/* If a conversion occurs, the caller is responsible to free the original
4325 * container and
4326 * he becomes responsible to free the new one. */
4327void *convert_run_to_efficient_container(run_container_t *c,
4328 uint8_t *typecode_after) {
4329 int32_t size_as_run_container =
4330 run_container_serialized_size_in_bytes(c->n_runs);
4331
4332 int32_t size_as_bitset_container =
4333 bitset_container_serialized_size_in_bytes();
4334 int32_t card = run_container_cardinality(c);
4335 int32_t size_as_array_container =
4336 array_container_serialized_size_in_bytes(card);
4337
4338 int32_t min_size_non_run =
4339 size_as_bitset_container < size_as_array_container
4340 ? size_as_bitset_container
4341 : size_as_array_container;
4342 if (size_as_run_container <= min_size_non_run) { // no conversion
4343 *typecode_after = RUN_CONTAINER_TYPE_CODE;
4344 return c;
4345 }
4346 if (card <= DEFAULT_MAX_SIZE) {
4347 // to array
4348 array_container_t *answer = array_container_create_given_capacity(card);
4349 answer->cardinality = 0;
4350 for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
4351 int run_start = c->runs[rlepos].value;
4352 int run_end = run_start + c->runs[rlepos].length;
4353
4354 for (int run_value = run_start; run_value <= run_end; ++run_value) {
4355 answer->array[answer->cardinality++] = (uint16_t)run_value;
4356 }
4357 }
4358 *typecode_after = ARRAY_CONTAINER_TYPE_CODE;
4359 return answer;
4360 }
4361
4362 // else to bitset
4363 bitset_container_t *answer = bitset_container_create();
4364
4365 for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) {
4366 int start = c->runs[rlepos].value;
4367 int end = start + c->runs[rlepos].length;
4368 bitset_set_range(answer->array, start, end + 1);
4369 }
4370 answer->cardinality = card;
4371 *typecode_after = BITSET_CONTAINER_TYPE_CODE;
4372 return answer;
4373}
4374
4375// like convert_run_to_efficient_container but frees the old result if needed
4376void *convert_run_to_efficient_container_and_free(run_container_t *c,
4377 uint8_t *typecode_after) {
4378 void *answer = convert_run_to_efficient_container(c, typecode_after);
4379 if (answer != c) run_container_free(c);
4380 return answer;
4381}
4382
4383/* once converted, the original container is disposed here, rather than
4384 in roaring_array
4385*/
4386
4387// TODO: split into run- array- and bitset- subfunctions for sanity;
4388// a few function calls won't really matter.
4389
4390void *convert_run_optimize(void *c, uint8_t typecode_original,
4391 uint8_t *typecode_after) {
4392 if (typecode_original == RUN_CONTAINER_TYPE_CODE) {
4393 void *newc = convert_run_to_efficient_container((run_container_t *)c,
4394 typecode_after);
4395 if (newc != c) {
4396 container_free(c, typecode_original);
4397 }
4398 return newc;
4399 } else if (typecode_original == ARRAY_CONTAINER_TYPE_CODE) {
4400 // it might need to be converted to a run container.
4401 array_container_t *c_qua_array = (array_container_t *)c;
4402 int32_t n_runs = array_container_number_of_runs(c_qua_array);
4403 int32_t size_as_run_container =
4404 run_container_serialized_size_in_bytes(n_runs);
4405 int32_t card = array_container_cardinality(c_qua_array);
4406 int32_t size_as_array_container =
4407 array_container_serialized_size_in_bytes(card);
4408
4409 if (size_as_run_container >= size_as_array_container) {
4410 *typecode_after = ARRAY_CONTAINER_TYPE_CODE;
4411 return c;
4412 }
4413 // else convert array to run container
4414 run_container_t *answer = run_container_create_given_capacity(n_runs);
4415 int prev = -2;
4416 int run_start = -1;
4417
4418 assert(card > 0);
4419 for (int i = 0; i < card; ++i) {
4420 uint16_t cur_val = c_qua_array->array[i];
4421 if (cur_val != prev + 1) {
4422 // new run starts; flush old one, if any
4423 if (run_start != -1) add_run(answer, run_start, prev);
4424 run_start = cur_val;
4425 }
4426 prev = c_qua_array->array[i];
4427 }
4428 assert(run_start >= 0);
4429 // now prev is the last seen value
4430 add_run(answer, run_start, prev);
4431 *typecode_after = RUN_CONTAINER_TYPE_CODE;
4432 array_container_free(c_qua_array);
4433 return answer;
4434 } else if (typecode_original ==
4435 BITSET_CONTAINER_TYPE_CODE) { // run conversions on bitset
4436 // does bitset need conversion to run?
4437 bitset_container_t *c_qua_bitset = (bitset_container_t *)c;
4438 int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset);
4439 int32_t size_as_run_container =
4440 run_container_serialized_size_in_bytes(n_runs);
4441 int32_t size_as_bitset_container =
4442 bitset_container_serialized_size_in_bytes();
4443
4444 if (size_as_bitset_container <= size_as_run_container) {
4445 // no conversion needed.
4446 *typecode_after = BITSET_CONTAINER_TYPE_CODE;
4447 return c;
4448 }
4449 // bitset to runcontainer (ported from Java RunContainer(
4450 // BitmapContainer bc, int nbrRuns))
4451 assert(n_runs > 0); // no empty bitmaps
4452 run_container_t *answer = run_container_create_given_capacity(n_runs);
4453
4454 int long_ctr = 0;
4455 uint64_t cur_word = c_qua_bitset->array[0];
4456 int run_count = 0;
4457 while (true) {
4458 while (cur_word == UINT64_C(0) &&
4459 long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
4460 cur_word = c_qua_bitset->array[++long_ctr];
4461
4462 if (cur_word == UINT64_C(0)) {
4463 bitset_container_free(c_qua_bitset);
4464 *typecode_after = RUN_CONTAINER_TYPE_CODE;
4465 return answer;
4466 }
4467
4468 int local_run_start = __builtin_ctzll(cur_word);
4469 int run_start = local_run_start + 64 * long_ctr;
4470 uint64_t cur_word_with_1s = cur_word | (cur_word - 1);
4471
4472 int run_end = 0;
4473 while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) &&
4474 long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1)
4475 cur_word_with_1s = c_qua_bitset->array[++long_ctr];
4476
4477 if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) {
4478 run_end = 64 + long_ctr * 64; // exclusive, I guess
4479 add_run(answer, run_start, run_end - 1);
4480 bitset_container_free(c_qua_bitset);
4481 *typecode_after = RUN_CONTAINER_TYPE_CODE;
4482 return answer;
4483 }
4484 int local_run_end = __builtin_ctzll(~cur_word_with_1s);
4485 run_end = local_run_end + long_ctr * 64;
4486 add_run(answer, run_start, run_end - 1);
4487 run_count++;
4488 cur_word = cur_word_with_1s & (cur_word_with_1s + 1);
4489 }
4490 return answer;
4491 } else {
4492 assert(false);
4493 __builtin_unreachable();
4494 return NULL;
4495 }
4496}
4497
4498bitset_container_t *bitset_container_from_run_range(const run_container_t *run,
4499 uint32_t min, uint32_t max) {
4500 bitset_container_t *bitset = bitset_container_create();
4501 int32_t union_cardinality = 0;
4502 for (int32_t i = 0; i < run->n_runs; ++i) {
4503 uint32_t rle_min = run->runs[i].value;
4504 uint32_t rle_max = rle_min + run->runs[i].length;
4505 bitset_set_lenrange(bitset->array, rle_min, rle_max - rle_min);
4506 union_cardinality += run->runs[i].length + 1;
4507 }
4508 union_cardinality += max - min + 1;
4509 union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min);
4510 bitset_set_lenrange(bitset->array, min, max - min);
4511 bitset->cardinality = union_cardinality;
4512 return bitset;
4513}
4514/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/convert.c */
4515/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_andnot.c */
4516/*
4517 * mixed_andnot.c. More methods since operation is not symmetric,
4518 * except no "wide" andnot , so no lazy options motivated.
4519 */
4520
4521#include <assert.h>
4522#include <string.h>
4523
4524
4525/* Compute the andnot of src_1 and src_2 and write the result to
4526 * dst, a valid array container that could be the same as dst.*/
4527void array_bitset_container_andnot(const array_container_t *src_1,
4528 const bitset_container_t *src_2,
4529 array_container_t *dst) {
4530 // follows Java implementation as of June 2016
4531 if (dst->capacity < src_1->cardinality) {
4532 array_container_grow(dst, src_1->cardinality, false);
4533 }
4534 int32_t newcard = 0;
4535 const int32_t origcard = src_1->cardinality;
4536 for (int i = 0; i < origcard; ++i) {
4537 uint16_t key = src_1->array[i];
4538 dst->array[newcard] = key;
4539 newcard += 1 - bitset_container_contains(src_2, key);
4540 }
4541 dst->cardinality = newcard;
4542}
4543
4544/* Compute the andnot of src_1 and src_2 and write the result to
4545 * src_1 */
4546
4547void array_bitset_container_iandnot(array_container_t *src_1,
4548 const bitset_container_t *src_2) {
4549 array_bitset_container_andnot(src_1, src_2, src_1);
4550}
4551
4552/* Compute the andnot of src_1 and src_2 and write the result to
4553 * dst, which does not initially have a valid container.
4554 * Return true for a bitset result; false for array
4555 */
4556
4557bool bitset_array_container_andnot(const bitset_container_t *src_1,
4558 const array_container_t *src_2, void **dst) {
4559 // Java did this directly, but we have option of asm or avx
4560 bitset_container_t *result = bitset_container_create();
4561 bitset_container_copy(src_1, result);
4562 result->cardinality =
4563 (int32_t)bitset_clear_list(result->array, (uint64_t)result->cardinality,
4564 src_2->array, (uint64_t)src_2->cardinality);
4565
4566 // do required type conversions.
4567 if (result->cardinality <= DEFAULT_MAX_SIZE) {
4568 *dst = array_container_from_bitset(result);
4569 bitset_container_free(result);
4570 return false;
4571 }
4572 *dst = result;
4573 return true;
4574}
4575
4576/* Compute the andnot of src_1 and src_2 and write the result to
4577 * dst (which has no container initially). It will modify src_1
4578 * to be dst if the result is a bitset. Otherwise, it will
4579 * free src_1 and dst will be a new array container. In both
4580 * cases, the caller is responsible for deallocating dst.
4581 * Returns true iff dst is a bitset */
4582
4583bool bitset_array_container_iandnot(bitset_container_t *src_1,
4584 const array_container_t *src_2,
4585 void **dst) {
4586 *dst = src_1;
4587 src_1->cardinality =
4588 (int32_t)bitset_clear_list(src_1->array, (uint64_t)src_1->cardinality,
4589 src_2->array, (uint64_t)src_2->cardinality);
4590
4591 if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
4592 *dst = array_container_from_bitset(src_1);
4593 bitset_container_free(src_1);
4594 return false; // not bitset
4595 } else
4596 return true;
4597}
4598
4599/* Compute the andnot of src_1 and src_2 and write the result to
4600 * dst. Result may be either a bitset or an array container
4601 * (returns "result is bitset"). dst does not initially have
4602 * any container, but becomes either a bitset container (return
4603 * result true) or an array container.
4604 */
4605
4606bool run_bitset_container_andnot(const run_container_t *src_1,
4607 const bitset_container_t *src_2, void **dst) {
4608 // follows the Java implementation as of June 2016
4609 int card = run_container_cardinality(src_1);
4610 if (card <= DEFAULT_MAX_SIZE) {
4611 // must be an array
4612 array_container_t *answer = array_container_create_given_capacity(card);
4613 answer->cardinality = 0;
4614 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
4615 rle16_t rle = src_1->runs[rlepos];
4616 for (int run_value = rle.value; run_value <= rle.value + rle.length;
4617 ++run_value) {
4618 if (!bitset_container_get(src_2, (uint16_t)run_value)) {
4619 answer->array[answer->cardinality++] = (uint16_t)run_value;
4620 }
4621 }
4622 }
4623 *dst = answer;
4624 return false;
4625 } else { // we guess it will be a bitset, though have to check guess when
4626 // done
4627 bitset_container_t *answer = bitset_container_clone(src_2);
4628
4629 uint32_t last_pos = 0;
4630 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
4631 rle16_t rle = src_1->runs[rlepos];
4632
4633 uint32_t start = rle.value;
4634 uint32_t end = start + rle.length + 1;
4635 bitset_reset_range(answer->array, last_pos, start);
4636 bitset_flip_range(answer->array, start, end);
4637 last_pos = end;
4638 }
4639 bitset_reset_range(answer->array, last_pos, (uint32_t)(1 << 16));
4640
4641 answer->cardinality = bitset_container_compute_cardinality(answer);
4642
4643 if (answer->cardinality <= DEFAULT_MAX_SIZE) {
4644 *dst = array_container_from_bitset(answer);
4645 bitset_container_free(answer);
4646 return false; // not bitset
4647 }
4648 *dst = answer;
4649 return true; // bitset
4650 }
4651}
4652
4653/* Compute the andnot of src_1 and src_2 and write the result to
4654 * dst. Result may be either a bitset or an array container
4655 * (returns "result is bitset"). dst does not initially have
4656 * any container, but becomes either a bitset container (return
4657 * result true) or an array container.
4658 */
4659
4660bool run_bitset_container_iandnot(run_container_t *src_1,
4661 const bitset_container_t *src_2, void **dst) {
4662 // dummy implementation
4663 bool ans = run_bitset_container_andnot(src_1, src_2, dst);
4664 run_container_free(src_1);
4665 return ans;
4666}
4667
4668/* Compute the andnot of src_1 and src_2 and write the result to
4669 * dst. Result may be either a bitset or an array container
4670 * (returns "result is bitset"). dst does not initially have
4671 * any container, but becomes either a bitset container (return
4672 * result true) or an array container.
4673 */
4674
4675bool bitset_run_container_andnot(const bitset_container_t *src_1,
4676 const run_container_t *src_2, void **dst) {
4677 // follows Java implementation
4678 bitset_container_t *result = bitset_container_create();
4679
4680 bitset_container_copy(src_1, result);
4681 for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
4682 rle16_t rle = src_2->runs[rlepos];
4683 bitset_reset_range(result->array, rle.value,
4684 rle.value + rle.length + UINT32_C(1));
4685 }
4686 result->cardinality = bitset_container_compute_cardinality(result);
4687
4688 if (result->cardinality <= DEFAULT_MAX_SIZE) {
4689 *dst = array_container_from_bitset(result);
4690 bitset_container_free(result);
4691 return false; // not bitset
4692 }
4693 *dst = result;
4694 return true; // bitset
4695}
4696
4697/* Compute the andnot of src_1 and src_2 and write the result to
4698 * dst (which has no container initially). It will modify src_1
4699 * to be dst if the result is a bitset. Otherwise, it will
4700 * free src_1 and dst will be a new array container. In both
4701 * cases, the caller is responsible for deallocating dst.
4702 * Returns true iff dst is a bitset */
4703
4704bool bitset_run_container_iandnot(bitset_container_t *src_1,
4705 const run_container_t *src_2, void **dst) {
4706 *dst = src_1;
4707
4708 for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) {
4709 rle16_t rle = src_2->runs[rlepos];
4710 bitset_reset_range(src_1->array, rle.value,
4711 rle.value + rle.length + UINT32_C(1));
4712 }
4713 src_1->cardinality = bitset_container_compute_cardinality(src_1);
4714
4715 if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
4716 *dst = array_container_from_bitset(src_1);
4717 bitset_container_free(src_1);
4718 return false; // not bitset
4719 } else
4720 return true;
4721}
4722
4723/* helper. a_out must be a valid array container with adequate capacity.
4724 * Returns the cardinality of the output container. Partly Based on Java
4725 * implementation Util.unsignedDifference.
4726 *
4727 * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper
4728 * to avoid advanceUntil?
4729 */
4730
4731static int run_array_array_subtract(const run_container_t *r,
4732 const array_container_t *a_in,
4733 array_container_t *a_out) {
4734 int out_card = 0;
4735 int32_t in_array_pos =
4736 -1; // since advanceUntil always assumes we start the search AFTER this
4737
4738 for (int rlepos = 0; rlepos < r->n_runs; rlepos++) {
4739 int32_t start = r->runs[rlepos].value;
4740 int32_t end = start + r->runs[rlepos].length + 1;
4741
4742 in_array_pos = advanceUntil(a_in->array, in_array_pos,
4743 a_in->cardinality, (uint16_t)start);
4744
4745 if (in_array_pos >= a_in->cardinality) { // run has no items subtracted
4746 for (int32_t i = start; i < end; ++i)
4747 a_out->array[out_card++] = (uint16_t)i;
4748 } else {
4749 uint16_t next_nonincluded = a_in->array[in_array_pos];
4750 if (next_nonincluded >= end) {
4751 // another case when run goes unaltered
4752 for (int32_t i = start; i < end; ++i)
4753 a_out->array[out_card++] = (uint16_t)i;
4754 in_array_pos--; // ensure we see this item again if necessary
4755 } else {
4756 for (int32_t i = start; i < end; ++i)
4757 if (i != next_nonincluded)
4758 a_out->array[out_card++] = (uint16_t)i;
4759 else // 0 should ensure we don't match
4760 next_nonincluded =
4761 (in_array_pos + 1 >= a_in->cardinality)
4762 ? 0
4763 : a_in->array[++in_array_pos];
4764 in_array_pos--; // see again
4765 }
4766 }
4767 }
4768 return out_card;
4769}
4770
4771/* dst does not indicate a valid container initially. Eventually it
4772 * can become any type of container.
4773 */
4774
4775int run_array_container_andnot(const run_container_t *src_1,
4776 const array_container_t *src_2, void **dst) {
4777 // follows the Java impl as of June 2016
4778
4779 int card = run_container_cardinality(src_1);
4780 const int arbitrary_threshold = 32;
4781
4782 if (card <= arbitrary_threshold) {
4783 if (src_2->cardinality == 0) {
4784 *dst = run_container_clone(src_1);
4785 return RUN_CONTAINER_TYPE_CODE;
4786 }
4787 // Java's "lazyandNot.toEfficientContainer" thing
4788 run_container_t *answer = run_container_create_given_capacity(
4789 card + array_container_cardinality(src_2));
4790
4791 int rlepos = 0;
4792 int xrlepos = 0; // "x" is src_2
4793 rle16_t rle = src_1->runs[rlepos];
4794 int32_t start = rle.value;
4795 int32_t end = start + rle.length + 1;
4796 int32_t xstart = src_2->array[xrlepos];
4797
4798 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) {
4799 if (end <= xstart) {
4800 // output the first run
4801 answer->runs[answer->n_runs++] =
4802 (rle16_t){.value = (uint16_t)start,
4803 .length = (uint16_t)(end - start - 1)};
4804 rlepos++;
4805 if (rlepos < src_1->n_runs) {
4806 start = src_1->runs[rlepos].value;
4807 end = start + src_1->runs[rlepos].length + 1;
4808 }
4809 } else if (xstart + 1 <= start) {
4810 // exit the second run
4811 xrlepos++;
4812 if (xrlepos < src_2->cardinality) {
4813 xstart = src_2->array[xrlepos];
4814 }
4815 } else {
4816 if (start < xstart) {
4817 answer->runs[answer->n_runs++] =
4818 (rle16_t){.value = (uint16_t)start,
4819 .length = (uint16_t)(xstart - start - 1)};
4820 }
4821 if (xstart + 1 < end) {
4822 start = xstart + 1;
4823 } else {
4824 rlepos++;
4825 if (rlepos < src_1->n_runs) {
4826 start = src_1->runs[rlepos].value;
4827 end = start + src_1->runs[rlepos].length + 1;
4828 }
4829 }
4830 }
4831 }
4832 if (rlepos < src_1->n_runs) {
4833 answer->runs[answer->n_runs++] =
4834 (rle16_t){.value = (uint16_t)start,
4835 .length = (uint16_t)(end - start - 1)};
4836 rlepos++;
4837 if (rlepos < src_1->n_runs) {
4838 memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos,
4839 (src_1->n_runs - rlepos) * sizeof(rle16_t));
4840 answer->n_runs += (src_1->n_runs - rlepos);
4841 }
4842 }
4843 uint8_t return_type;
4844 *dst = convert_run_to_efficient_container(answer, &return_type);
4845 if (answer != *dst) run_container_free(answer);
4846 return return_type;
4847 }
4848 // else it's a bitmap or array
4849
4850 if (card <= DEFAULT_MAX_SIZE) {
4851 array_container_t *ac = array_container_create_given_capacity(card);
4852 // nb Java code used a generic iterator-based merge to compute
4853 // difference
4854 ac->cardinality = run_array_array_subtract(src_1, src_2, ac);
4855 *dst = ac;
4856 return ARRAY_CONTAINER_TYPE_CODE;
4857 }
4858 bitset_container_t *ans = bitset_container_from_run(src_1);
4859 bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst);
4860 return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE
4861 : ARRAY_CONTAINER_TYPE_CODE);
4862}
4863
4864/* Compute the andnot of src_1 and src_2 and write the result to
4865 * dst (which has no container initially). It will modify src_1
4866 * to be dst if the result is a bitset. Otherwise, it will
4867 * free src_1 and dst will be a new array container. In both
4868 * cases, the caller is responsible for deallocating dst.
4869 * Returns true iff dst is a bitset */
4870
4871int run_array_container_iandnot(run_container_t *src_1,
4872 const array_container_t *src_2, void **dst) {
4873 // dummy implementation same as June 2016 Java
4874 int ans = run_array_container_andnot(src_1, src_2, dst);
4875 run_container_free(src_1);
4876 return ans;
4877}
4878
4879/* dst must be a valid array container, allowed to be src_1 */
4880
4881void array_run_container_andnot(const array_container_t *src_1,
4882 const run_container_t *src_2,
4883 array_container_t *dst) {
4884 // basically following Java impl as of June 2016
4885 if (src_1->cardinality > dst->capacity) {
4886 array_container_grow(dst, src_1->cardinality, false);
4887 }
4888
4889 if (src_2->n_runs == 0) {
4890 memmove(dst->array, src_1->array,
4891 sizeof(uint16_t) * src_1->cardinality);
4892 dst->cardinality = src_1->cardinality;
4893 return;
4894 }
4895 int32_t run_start = src_2->runs[0].value;
4896 int32_t run_end = run_start + src_2->runs[0].length;
4897 int which_run = 0;
4898
4899 uint16_t val = 0;
4900 int dest_card = 0;
4901 for (int i = 0; i < src_1->cardinality; ++i) {
4902 val = src_1->array[i];
4903 if (val < run_start)
4904 dst->array[dest_card++] = val;
4905 else if (val <= run_end) {
4906 ; // omitted item
4907 } else {
4908 do {
4909 if (which_run + 1 < src_2->n_runs) {
4910 ++which_run;
4911 run_start = src_2->runs[which_run].value;
4912 run_end = run_start + src_2->runs[which_run].length;
4913
4914 } else
4915 run_start = run_end = (1 << 16) + 1;
4916 } while (val > run_end);
4917 --i;
4918 }
4919 }
4920 dst->cardinality = dest_card;
4921}
4922
4923/* dst does not indicate a valid container initially. Eventually it
4924 * can become any kind of container.
4925 */
4926
4927void array_run_container_iandnot(array_container_t *src_1,
4928 const run_container_t *src_2) {
4929 array_run_container_andnot(src_1, src_2, src_1);
4930}
4931
4932/* dst does not indicate a valid container initially. Eventually it
4933 * can become any kind of container.
4934 */
4935
4936int run_run_container_andnot(const run_container_t *src_1,
4937 const run_container_t *src_2, void **dst) {
4938 run_container_t *ans = run_container_create();
4939 run_container_andnot(src_1, src_2, ans);
4940 uint8_t typecode_after;
4941 *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
4942 return typecode_after;
4943}
4944
4945/* Compute the andnot of src_1 and src_2 and write the result to
4946 * dst (which has no container initially). It will modify src_1
4947 * to be dst if the result is a bitset. Otherwise, it will
4948 * free src_1 and dst will be a new array container. In both
4949 * cases, the caller is responsible for deallocating dst.
4950 * Returns true iff dst is a bitset */
4951
4952int run_run_container_iandnot(run_container_t *src_1,
4953 const run_container_t *src_2, void **dst) {
4954 // following Java impl as of June 2016 (dummy)
4955 int ans = run_run_container_andnot(src_1, src_2, dst);
4956 run_container_free(src_1);
4957 return ans;
4958}
4959
4960/*
4961 * dst is a valid array container and may be the same as src_1
4962 */
4963
4964void array_array_container_andnot(const array_container_t *src_1,
4965 const array_container_t *src_2,
4966 array_container_t *dst) {
4967 array_container_andnot(src_1, src_2, dst);
4968}
4969
4970/* inplace array-array andnot will always be able to reuse the space of
4971 * src_1 */
4972void array_array_container_iandnot(array_container_t *src_1,
4973 const array_container_t *src_2) {
4974 array_container_andnot(src_1, src_2, src_1);
4975}
4976
4977/* Compute the andnot of src_1 and src_2 and write the result to
4978 * dst (which has no container initially). Return value is
4979 * "dst is a bitset"
4980 */
4981
4982bool bitset_bitset_container_andnot(const bitset_container_t *src_1,
4983 const bitset_container_t *src_2,
4984 void **dst) {
4985 bitset_container_t *ans = bitset_container_create();
4986 int card = bitset_container_andnot(src_1, src_2, ans);
4987 if (card <= DEFAULT_MAX_SIZE) {
4988 *dst = array_container_from_bitset(ans);
4989 bitset_container_free(ans);
4990 return false; // not bitset
4991 } else {
4992 *dst = ans;
4993 return true;
4994 }
4995}
4996
4997/* Compute the andnot of src_1 and src_2 and write the result to
4998 * dst (which has no container initially). It will modify src_1
4999 * to be dst if the result is a bitset. Otherwise, it will
5000 * free src_1 and dst will be a new array container. In both
5001 * cases, the caller is responsible for deallocating dst.
5002 * Returns true iff dst is a bitset */
5003
5004bool bitset_bitset_container_iandnot(bitset_container_t *src_1,
5005 const bitset_container_t *src_2,
5006 void **dst) {
5007 int card = bitset_container_andnot(src_1, src_2, src_1);
5008 if (card <= DEFAULT_MAX_SIZE) {
5009 *dst = array_container_from_bitset(src_1);
5010 bitset_container_free(src_1);
5011 return false; // not bitset
5012 } else {
5013 *dst = src_1;
5014 return true;
5015 }
5016}
5017/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_andnot.c */
5018/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_equal.c */
5019
5020bool array_container_equal_bitset(const array_container_t* container1,
5021 const bitset_container_t* container2) {
5022 if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
5023 if (container2->cardinality != container1->cardinality) {
5024 return false;
5025 }
5026 }
5027 int32_t pos = 0;
5028 for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) {
5029 uint64_t w = container2->array[i];
5030 while (w != 0) {
5031 uint64_t t = w & (~w + 1);
5032 uint16_t r = i * 64 + __builtin_ctzll(w);
5033 if (pos >= container1->cardinality) {
5034 return false;
5035 }
5036 if (container1->array[pos] != r) {
5037 return false;
5038 }
5039 ++pos;
5040 w ^= t;
5041 }
5042 }
5043 return (pos == container1->cardinality);
5044}
5045
5046bool run_container_equals_array(const run_container_t* container1,
5047 const array_container_t* container2) {
5048 if (run_container_cardinality(container1) != container2->cardinality)
5049 return false;
5050 int32_t pos = 0;
5051 for (int i = 0; i < container1->n_runs; ++i) {
5052 const uint32_t run_start = container1->runs[i].value;
5053 const uint32_t le = container1->runs[i].length;
5054
5055 if (container2->array[pos] != run_start) {
5056 return false;
5057 }
5058
5059 if (container2->array[pos + le] != run_start + le) {
5060 return false;
5061 }
5062
5063 pos += le + 1;
5064 }
5065 return true;
5066}
5067
5068bool run_container_equals_bitset(const run_container_t* container1,
5069 const bitset_container_t* container2) {
5070 if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
5071 if (container2->cardinality != run_container_cardinality(container1)) {
5072 return false;
5073 }
5074 } else {
5075 int32_t card = bitset_container_compute_cardinality(
5076 container2); // modify container2?
5077 if (card != run_container_cardinality(container1)) {
5078 return false;
5079 }
5080 }
5081 for (int i = 0; i < container1->n_runs; ++i) {
5082 uint32_t run_start = container1->runs[i].value;
5083 uint32_t le = container1->runs[i].length;
5084 for (uint32_t j = run_start; j <= run_start + le; ++j) {
5085 // todo: this code could be much faster
5086 if (!bitset_container_contains(container2, j)) {
5087 return false;
5088 }
5089 }
5090 }
5091 return true;
5092}
5093/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_equal.c */
5094/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_intersection.c */
5095/*
5096 * mixed_intersection.c
5097 *
5098 */
5099
5100
5101/* Compute the intersection of src_1 and src_2 and write the result to
5102 * dst. */
5103void array_bitset_container_intersection(const array_container_t *src_1,
5104 const bitset_container_t *src_2,
5105 array_container_t *dst) {
5106 if (dst->capacity < src_1->cardinality) {
5107 array_container_grow(dst, src_1->cardinality, false);
5108 }
5109 int32_t newcard = 0; // dst could be src_1
5110 const int32_t origcard = src_1->cardinality;
5111 for (int i = 0; i < origcard; ++i) {
5112 uint16_t key = src_1->array[i];
5113 // this branchless approach is much faster...
5114 dst->array[newcard] = key;
5115 newcard += bitset_container_contains(src_2, key);
5116 /**
5117 * we could do it this way instead...
5118 * if (bitset_container_contains(src_2, key)) {
5119 * dst->array[newcard++] = key;
5120 * }
5121 * but if the result is unpredictible, the processor generates
5122 * many mispredicted branches.
5123 * Difference can be huge (from 3 cycles when predictible all the way
5124 * to 16 cycles when unpredictible.
5125 * See
5126 * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c
5127 */
5128 }
5129 dst->cardinality = newcard;
5130}
5131
5132/* Compute the size of the intersection of src_1 and src_2. */
5133int array_bitset_container_intersection_cardinality(
5134 const array_container_t *src_1, const bitset_container_t *src_2) {
5135 int32_t newcard = 0;
5136 const int32_t origcard = src_1->cardinality;
5137 for (int i = 0; i < origcard; ++i) {
5138 uint16_t key = src_1->array[i];
5139 newcard += bitset_container_contains(src_2, key);
5140 }
5141 return newcard;
5142}
5143
5144
5145bool array_bitset_container_intersect(const array_container_t *src_1,
5146 const bitset_container_t *src_2) {
5147 const int32_t origcard = src_1->cardinality;
5148 for (int i = 0; i < origcard; ++i) {
5149 uint16_t key = src_1->array[i];
5150 if(bitset_container_contains(src_2, key)) return true;
5151 }
5152 return false;
5153}
5154
5155/* Compute the intersection of src_1 and src_2 and write the result to
5156 * dst. It is allowed for dst to be equal to src_1. We assume that dst is a
5157 * valid container. */
5158void array_run_container_intersection(const array_container_t *src_1,
5159 const run_container_t *src_2,
5160 array_container_t *dst) {
5161 if (run_container_is_full(src_2)) {
5162 if (dst != src_1) array_container_copy(src_1, dst);
5163 return;
5164 }
5165 if (dst->capacity < src_1->cardinality) {
5166 array_container_grow(dst, src_1->cardinality, false);
5167 }
5168 if (src_2->n_runs == 0) {
5169 return;
5170 }
5171 int32_t rlepos = 0;
5172 int32_t arraypos = 0;
5173 rle16_t rle = src_2->runs[rlepos];
5174 int32_t newcard = 0;
5175 while (arraypos < src_1->cardinality) {
5176 const uint16_t arrayval = src_1->array[arraypos];
5177 while (rle.value + rle.length <
5178 arrayval) { // this will frequently be false
5179 ++rlepos;
5180 if (rlepos == src_2->n_runs) {
5181 dst->cardinality = newcard;
5182 return; // we are done
5183 }
5184 rle = src_2->runs[rlepos];
5185 }
5186 if (rle.value > arrayval) {
5187 arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality,
5188 rle.value);
5189 } else {
5190 dst->array[newcard] = arrayval;
5191 newcard++;
5192 arraypos++;
5193 }
5194 }
5195 dst->cardinality = newcard;
5196}
5197
5198/* Compute the intersection of src_1 and src_2 and write the result to
5199 * *dst. If the result is true then the result is a bitset_container_t
5200 * otherwise is a array_container_t. If *dst == src_2, an in-place processing
5201 * is attempted.*/
5202bool run_bitset_container_intersection(const run_container_t *src_1,
5203 const bitset_container_t *src_2,
5204 void **dst) {
5205 if (run_container_is_full(src_1)) {
5206 if (*dst != src_2) *dst = bitset_container_clone(src_2);
5207 return true;
5208 }
5209 int32_t card = run_container_cardinality(src_1);
5210 if (card <= DEFAULT_MAX_SIZE) {
5211 // result can only be an array (assuming that we never make a
5212 // RunContainer)
5213 if (card > src_2->cardinality) {
5214 card = src_2->cardinality;
5215 }
5216 array_container_t *answer = array_container_create_given_capacity(card);
5217 *dst = answer;
5218 if (*dst == NULL) {
5219 return false;
5220 }
5221 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5222 rle16_t rle = src_1->runs[rlepos];
5223 uint32_t endofrun = (uint32_t)rle.value + rle.length;
5224 for (uint32_t runValue = rle.value; runValue <= endofrun;
5225 ++runValue) {
5226 answer->array[answer->cardinality] = (uint16_t)runValue;
5227 answer->cardinality +=
5228 bitset_container_contains(src_2, runValue);
5229 }
5230 }
5231 return false;
5232 }
5233 if (*dst == src_2) { // we attempt in-place
5234 bitset_container_t *answer = (bitset_container_t *)*dst;
5235 uint32_t start = 0;
5236 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5237 const rle16_t rle = src_1->runs[rlepos];
5238 uint32_t end = rle.value;
5239 bitset_reset_range(src_2->array, start, end);
5240
5241 start = end + rle.length + 1;
5242 }
5243 bitset_reset_range(src_2->array, start, UINT32_C(1) << 16);
5244 answer->cardinality = bitset_container_compute_cardinality(answer);
5245 if (src_2->cardinality > DEFAULT_MAX_SIZE) {
5246 return true;
5247 } else {
5248 array_container_t *newanswer = array_container_from_bitset(src_2);
5249 if (newanswer == NULL) {
5250 *dst = NULL;
5251 return false;
5252 }
5253 *dst = newanswer;
5254 return false;
5255 }
5256 } else { // no inplace
5257 // we expect the answer to be a bitmap (if we are lucky)
5258 bitset_container_t *answer = bitset_container_clone(src_2);
5259
5260 *dst = answer;
5261 if (answer == NULL) {
5262 return true;
5263 }
5264 uint32_t start = 0;
5265 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5266 const rle16_t rle = src_1->runs[rlepos];
5267 uint32_t end = rle.value;
5268 bitset_reset_range(answer->array, start, end);
5269 start = end + rle.length + 1;
5270 }
5271 bitset_reset_range(answer->array, start, UINT32_C(1) << 16);
5272 answer->cardinality = bitset_container_compute_cardinality(answer);
5273
5274 if (answer->cardinality > DEFAULT_MAX_SIZE) {
5275 return true;
5276 } else {
5277 array_container_t *newanswer = array_container_from_bitset(answer);
5278 bitset_container_free((bitset_container_t *)*dst);
5279 if (newanswer == NULL) {
5280 *dst = NULL;
5281 return false;
5282 }
5283 *dst = newanswer;
5284 return false;
5285 }
5286 }
5287}
5288
5289/* Compute the size of the intersection between src_1 and src_2 . */
5290int array_run_container_intersection_cardinality(const array_container_t *src_1,
5291 const run_container_t *src_2) {
5292 if (run_container_is_full(src_2)) {
5293 return src_1->cardinality;
5294 }
5295 if (src_2->n_runs == 0) {
5296 return 0;
5297 }
5298 int32_t rlepos = 0;
5299 int32_t arraypos = 0;
5300 rle16_t rle = src_2->runs[rlepos];
5301 int32_t newcard = 0;
5302 while (arraypos < src_1->cardinality) {
5303 const uint16_t arrayval = src_1->array[arraypos];
5304 while (rle.value + rle.length <
5305 arrayval) { // this will frequently be false
5306 ++rlepos;
5307 if (rlepos == src_2->n_runs) {
5308 return newcard; // we are done
5309 }
5310 rle = src_2->runs[rlepos];
5311 }
5312 if (rle.value > arrayval) {
5313 arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality,
5314 rle.value);
5315 } else {
5316 newcard++;
5317 arraypos++;
5318 }
5319 }
5320 return newcard;
5321}
5322
5323/* Compute the intersection between src_1 and src_2
5324 **/
5325int run_bitset_container_intersection_cardinality(
5326 const run_container_t *src_1, const bitset_container_t *src_2) {
5327 if (run_container_is_full(src_1)) {
5328 return bitset_container_cardinality(src_2);
5329 }
5330 int answer = 0;
5331 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5332 rle16_t rle = src_1->runs[rlepos];
5333 answer +=
5334 bitset_lenrange_cardinality(src_2->array, rle.value, rle.length);
5335 }
5336 return answer;
5337}
5338
5339
5340bool array_run_container_intersect(const array_container_t *src_1,
5341 const run_container_t *src_2) {
5342 if( run_container_is_full(src_2) ) {
5343 return !array_container_empty(src_1);
5344 }
5345 if (src_2->n_runs == 0) {
5346 return false;
5347 }
5348 int32_t rlepos = 0;
5349 int32_t arraypos = 0;
5350 rle16_t rle = src_2->runs[rlepos];
5351 while (arraypos < src_1->cardinality) {
5352 const uint16_t arrayval = src_1->array[arraypos];
5353 while (rle.value + rle.length <
5354 arrayval) { // this will frequently be false
5355 ++rlepos;
5356 if (rlepos == src_2->n_runs) {
5357 return false; // we are done
5358 }
5359 rle = src_2->runs[rlepos];
5360 }
5361 if (rle.value > arrayval) {
5362 arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality,
5363 rle.value);
5364 } else {
5365 return true;
5366 }
5367 }
5368 return false;
5369}
5370
5371/* Compute the intersection between src_1 and src_2
5372 **/
5373bool run_bitset_container_intersect(const run_container_t *src_1,
5374 const bitset_container_t *src_2) {
5375 if( run_container_is_full(src_1) ) {
5376 return !bitset_container_empty(src_2);
5377 }
5378 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5379 rle16_t rle = src_1->runs[rlepos];
5380 if(!bitset_lenrange_empty(src_2->array, rle.value,rle.length)) return true;
5381 }
5382 return false;
5383}
5384
5385/*
5386 * Compute the intersection between src_1 and src_2 and write the result
5387 * to *dst. If the return function is true, the result is a bitset_container_t
5388 * otherwise is a array_container_t.
5389 */
5390bool bitset_bitset_container_intersection(const bitset_container_t *src_1,
5391 const bitset_container_t *src_2,
5392 void **dst) {
5393 const int newCardinality = bitset_container_and_justcard(src_1, src_2);
5394 if (newCardinality > DEFAULT_MAX_SIZE) {
5395 *dst = bitset_container_create();
5396 if (*dst != NULL) {
5397 bitset_container_and_nocard(src_1, src_2,
5398 (bitset_container_t *)*dst);
5399 ((bitset_container_t *)*dst)->cardinality = newCardinality;
5400 }
5401 return true; // it is a bitset
5402 }
5403 *dst = array_container_create_given_capacity(newCardinality);
5404 if (*dst != NULL) {
5405 ((array_container_t *)*dst)->cardinality = newCardinality;
5406 bitset_extract_intersection_setbits_uint16(
5407 ((const bitset_container_t *)src_1)->array,
5408 ((const bitset_container_t *)src_2)->array,
5409 BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array,
5410 0);
5411 }
5412 return false; // not a bitset
5413}
5414
5415bool bitset_bitset_container_intersection_inplace(
5416 bitset_container_t *src_1, const bitset_container_t *src_2, void **dst) {
5417 const int newCardinality = bitset_container_and_justcard(src_1, src_2);
5418 if (newCardinality > DEFAULT_MAX_SIZE) {
5419 *dst = src_1;
5420 bitset_container_and_nocard(src_1, src_2, src_1);
5421 ((bitset_container_t *)*dst)->cardinality = newCardinality;
5422 return true; // it is a bitset
5423 }
5424 *dst = array_container_create_given_capacity(newCardinality);
5425 if (*dst != NULL) {
5426 ((array_container_t *)*dst)->cardinality = newCardinality;
5427 bitset_extract_intersection_setbits_uint16(
5428 ((const bitset_container_t *)src_1)->array,
5429 ((const bitset_container_t *)src_2)->array,
5430 BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array,
5431 0);
5432 }
5433 return false; // not a bitset
5434}
5435/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_intersection.c */
5436/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_negation.c */
5437/*
5438 * mixed_negation.c
5439 *
5440 */
5441
5442#include <assert.h>
5443#include <string.h>
5444
5445
5446// TODO: make simplified and optimized negation code across
5447// the full range.
5448
5449/* Negation across the entire range of the container.
5450 * Compute the negation of src and write the result
5451 * to *dst. The complement of a
5452 * sufficiently sparse set will always be dense and a hence a bitmap
5453' * We assume that dst is pre-allocated and a valid bitset container
5454 * There can be no in-place version.
5455 */
5456void array_container_negation(const array_container_t *src,
5457 bitset_container_t *dst) {
5458 uint64_t card = UINT64_C(1 << 16);
5459 bitset_container_set_all(dst);
5460
5461 dst->cardinality = (int32_t)bitset_clear_list(dst->array, card, src->array,
5462 (uint64_t)src->cardinality);
5463}
5464
5465/* Negation across the entire range of the container
5466 * Compute the negation of src and write the result
5467 * to *dst. A true return value indicates a bitset result,
5468 * otherwise the result is an array container.
5469 * We assume that dst is not pre-allocated. In
5470 * case of failure, *dst will be NULL.
5471 */
5472bool bitset_container_negation(const bitset_container_t *src, void **dst) {
5473 return bitset_container_negation_range(src, 0, (1 << 16), dst);
5474}
5475
5476/* inplace version */
5477/*
5478 * Same as bitset_container_negation except that if the output is to
5479 * be a
5480 * bitset_container_t, then src is modified and no allocation is made.
5481 * If the output is to be an array_container_t, then caller is responsible
5482 * to free the container.
5483 * In all cases, the result is in *dst.
5484 */
5485bool bitset_container_negation_inplace(bitset_container_t *src, void **dst) {
5486 return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst);
5487}
5488
5489/* Negation across the entire range of container
5490 * Compute the negation of src and write the result
5491 * to *dst. Return values are the *_TYPECODES as defined * in containers.h
5492 * We assume that dst is not pre-allocated. In
5493 * case of failure, *dst will be NULL.
5494 */
5495int run_container_negation(const run_container_t *src, void **dst) {
5496 return run_container_negation_range(src, 0, (1 << 16), dst);
5497}
5498
5499/*
5500 * Same as run_container_negation except that if the output is to
5501 * be a
5502 * run_container_t, and has the capacity to hold the result,
5503 * then src is modified and no allocation is made.
5504 * In all cases, the result is in *dst.
5505 */
5506int run_container_negation_inplace(run_container_t *src, void **dst) {
5507 return run_container_negation_range_inplace(src, 0, (1 << 16), dst);
5508}
5509
5510/* Negation across a range of the container.
5511 * Compute the negation of src and write the result
5512 * to *dst. Returns true if the result is a bitset container
5513 * and false for an array container. *dst is not preallocated.
5514 */
5515bool array_container_negation_range(const array_container_t *src,
5516 const int range_start, const int range_end,
5517 void **dst) {
5518 /* close port of the Java implementation */
5519 if (range_start >= range_end) {
5520 *dst = array_container_clone(src);
5521 return false;
5522 }
5523
5524 int32_t start_index =
5525 binarySearch(src->array, src->cardinality, (uint16_t)range_start);
5526 if (start_index < 0) start_index = -start_index - 1;
5527
5528 int32_t last_index =
5529 binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1));
5530 if (last_index < 0) last_index = -last_index - 2;
5531
5532 const int32_t current_values_in_range = last_index - start_index + 1;
5533 const int32_t span_to_be_flipped = range_end - range_start;
5534 const int32_t new_values_in_range =
5535 span_to_be_flipped - current_values_in_range;
5536 const int32_t cardinality_change =
5537 new_values_in_range - current_values_in_range;
5538 const int32_t new_cardinality = src->cardinality + cardinality_change;
5539
5540 if (new_cardinality > DEFAULT_MAX_SIZE) {
5541 bitset_container_t *temp = bitset_container_from_array(src);
5542 bitset_flip_range(temp->array, (uint32_t)range_start,
5543 (uint32_t)range_end);
5544 temp->cardinality = new_cardinality;
5545 *dst = temp;
5546 return true;
5547 }
5548
5549 array_container_t *arr =
5550 array_container_create_given_capacity(new_cardinality);
5551 *dst = (void *)arr;
5552 if(new_cardinality == 0) {
5553 arr->cardinality = new_cardinality;
5554 return false; // we are done.
5555 }
5556 // copy stuff before the active area
5557 memcpy(arr->array, src->array, start_index * sizeof(uint16_t));
5558
5559 // work on the range
5560 int32_t out_pos = start_index, in_pos = start_index;
5561 int32_t val_in_range = range_start;
5562 for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) {
5563 if ((uint16_t)val_in_range != src->array[in_pos]) {
5564 arr->array[out_pos++] = (uint16_t)val_in_range;
5565 } else {
5566 ++in_pos;
5567 }
5568 }
5569 for (; val_in_range < range_end; ++val_in_range)
5570 arr->array[out_pos++] = (uint16_t)val_in_range;
5571
5572 // content after the active range
5573 memcpy(arr->array + out_pos, src->array + (last_index + 1),
5574 (src->cardinality - (last_index + 1)) * sizeof(uint16_t));
5575 arr->cardinality = new_cardinality;
5576 return false;
5577}
5578
5579/* Even when the result would fit, it is unclear how to make an
5580 * inplace version without inefficient copying.
5581 */
5582
5583bool array_container_negation_range_inplace(array_container_t *src,
5584 const int range_start,
5585 const int range_end, void **dst) {
5586 bool ans = array_container_negation_range(src, range_start, range_end, dst);
5587 // TODO : try a real inplace version
5588 array_container_free(src);
5589 return ans;
5590}
5591
5592/* Negation across a range of the container
5593 * Compute the negation of src and write the result
5594 * to *dst. A true return value indicates a bitset result,
5595 * otherwise the result is an array container.
5596 * We assume that dst is not pre-allocated. In
5597 * case of failure, *dst will be NULL.
5598 */
5599bool bitset_container_negation_range(const bitset_container_t *src,
5600 const int range_start, const int range_end,
5601 void **dst) {
5602 // TODO maybe consider density-based estimate
5603 // and sometimes build result directly as array, with
5604 // conversion back to bitset if wrong. Or determine
5605 // actual result cardinality, then go directly for the known final cont.
5606
5607 // keep computation using bitsets as long as possible.
5608 bitset_container_t *t = bitset_container_clone(src);
5609 bitset_flip_range(t->array, (uint32_t)range_start, (uint32_t)range_end);
5610 t->cardinality = bitset_container_compute_cardinality(t);
5611
5612 if (t->cardinality > DEFAULT_MAX_SIZE) {
5613 *dst = t;
5614 return true;
5615 } else {
5616 *dst = array_container_from_bitset(t);
5617 bitset_container_free(t);
5618 return false;
5619 }
5620}
5621
5622/* inplace version */
5623/*
5624 * Same as bitset_container_negation except that if the output is to
5625 * be a
5626 * bitset_container_t, then src is modified and no allocation is made.
5627 * If the output is to be an array_container_t, then caller is responsible
5628 * to free the container.
5629 * In all cases, the result is in *dst.
5630 */
5631bool bitset_container_negation_range_inplace(bitset_container_t *src,
5632 const int range_start,
5633 const int range_end, void **dst) {
5634 bitset_flip_range(src->array, (uint32_t)range_start, (uint32_t)range_end);
5635 src->cardinality = bitset_container_compute_cardinality(src);
5636 if (src->cardinality > DEFAULT_MAX_SIZE) {
5637 *dst = src;
5638 return true;
5639 }
5640 *dst = array_container_from_bitset(src);
5641 bitset_container_free(src);
5642 return false;
5643}
5644
5645/* Negation across a range of container
5646 * Compute the negation of src and write the result
5647 * to *dst. Return values are the *_TYPECODES as defined * in containers.h
5648 * We assume that dst is not pre-allocated. In
5649 * case of failure, *dst will be NULL.
5650 */
5651int run_container_negation_range(const run_container_t *src,
5652 const int range_start, const int range_end,
5653 void **dst) {
5654 uint8_t return_typecode;
5655
5656 // follows the Java implementation
5657 if (range_end <= range_start) {
5658 *dst = run_container_clone(src);
5659 return RUN_CONTAINER_TYPE_CODE;
5660 }
5661
5662 run_container_t *ans = run_container_create_given_capacity(
5663 src->n_runs + 1); // src->n_runs + 1);
5664 int k = 0;
5665 for (; k < src->n_runs && src->runs[k].value < range_start; ++k) {
5666 ans->runs[k] = src->runs[k];
5667 ans->n_runs++;
5668 }
5669
5670 run_container_smart_append_exclusive(
5671 ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
5672
5673 for (; k < src->n_runs; ++k) {
5674 run_container_smart_append_exclusive(ans, src->runs[k].value,
5675 src->runs[k].length);
5676 }
5677
5678 *dst = convert_run_to_efficient_container(ans, &return_typecode);
5679 if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans);
5680
5681 return return_typecode;
5682}
5683
5684/*
5685 * Same as run_container_negation except that if the output is to
5686 * be a
5687 * run_container_t, and has the capacity to hold the result,
5688 * then src is modified and no allocation is made.
5689 * In all cases, the result is in *dst.
5690 */
5691int run_container_negation_range_inplace(run_container_t *src,
5692 const int range_start,
5693 const int range_end, void **dst) {
5694 uint8_t return_typecode;
5695
5696 if (range_end <= range_start) {
5697 *dst = src;
5698 return RUN_CONTAINER_TYPE_CODE;
5699 }
5700
5701 // TODO: efficient special case when range is 0 to 65535 inclusive
5702
5703 if (src->capacity == src->n_runs) {
5704 // no excess room. More checking to see if result can fit
5705 bool last_val_before_range = false;
5706 bool first_val_in_range = false;
5707 bool last_val_in_range = false;
5708 bool first_val_past_range = false;
5709
5710 if (range_start > 0)
5711 last_val_before_range =
5712 run_container_contains(src, (uint16_t)(range_start - 1));
5713 first_val_in_range = run_container_contains(src, (uint16_t)range_start);
5714
5715 if (last_val_before_range == first_val_in_range) {
5716 last_val_in_range =
5717 run_container_contains(src, (uint16_t)(range_end - 1));
5718 if (range_end != 0x10000)
5719 first_val_past_range =
5720 run_container_contains(src, (uint16_t)range_end);
5721
5722 if (last_val_in_range ==
5723 first_val_past_range) { // no space for inplace
5724 int ans = run_container_negation_range(src, range_start,
5725 range_end, dst);
5726 run_container_free(src);
5727 return ans;
5728 }
5729 }
5730 }
5731 // all other cases: result will fit
5732
5733 run_container_t *ans = src;
5734 int my_nbr_runs = src->n_runs;
5735
5736 ans->n_runs = 0;
5737 int k = 0;
5738 for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) {
5739 // ans->runs[k] = src->runs[k]; (would be self-copy)
5740 ans->n_runs++;
5741 }
5742
5743 // as with Java implementation, use locals to give self a buffer of depth 1
5744 rle16_t buffered = (rle16_t){.value = (uint16_t)0, .length = (uint16_t)0};
5745 rle16_t next = buffered;
5746 if (k < my_nbr_runs) buffered = src->runs[k];
5747
5748 run_container_smart_append_exclusive(
5749 ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1));
5750
5751 for (; k < my_nbr_runs; ++k) {
5752 if (k + 1 < my_nbr_runs) next = src->runs[k + 1];
5753
5754 run_container_smart_append_exclusive(ans, buffered.value,
5755 buffered.length);
5756 buffered = next;
5757 }
5758
5759 *dst = convert_run_to_efficient_container(ans, &return_typecode);
5760 if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans);
5761
5762 return return_typecode;
5763}
5764/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_negation.c */
5765/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_subset.c */
5766
5767bool array_container_is_subset_bitset(const array_container_t* container1,
5768 const bitset_container_t* container2) {
5769 if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
5770 if (container2->cardinality < container1->cardinality) {
5771 return false;
5772 }
5773 }
5774 for (int i = 0; i < container1->cardinality; ++i) {
5775 if (!bitset_container_contains(container2, container1->array[i])) {
5776 return false;
5777 }
5778 }
5779 return true;
5780}
5781
5782bool run_container_is_subset_array(const run_container_t* container1,
5783 const array_container_t* container2) {
5784 if (run_container_cardinality(container1) > container2->cardinality)
5785 return false;
5786 int32_t start_pos = -1, stop_pos = -1;
5787 for (int i = 0; i < container1->n_runs; ++i) {
5788 int32_t start = container1->runs[i].value;
5789 int32_t stop = start + container1->runs[i].length;
5790 start_pos = advanceUntil(container2->array, stop_pos,
5791 container2->cardinality, start);
5792 stop_pos = advanceUntil(container2->array, stop_pos,
5793 container2->cardinality, stop);
5794 if (start_pos == container2->cardinality) {
5795 return false;
5796 } else if (stop_pos - start_pos != stop - start ||
5797 container2->array[start_pos] != start ||
5798 container2->array[stop_pos] != stop) {
5799 return false;
5800 }
5801 }
5802 return true;
5803}
5804
5805bool array_container_is_subset_run(const array_container_t* container1,
5806 const run_container_t* container2) {
5807 if (container1->cardinality > run_container_cardinality(container2))
5808 return false;
5809 int i_array = 0, i_run = 0;
5810 while (i_array < container1->cardinality && i_run < container2->n_runs) {
5811 uint32_t start = container2->runs[i_run].value;
5812 uint32_t stop = start + container2->runs[i_run].length;
5813 if (container1->array[i_array] < start) {
5814 return false;
5815 } else if (container1->array[i_array] > stop) {
5816 i_run++;
5817 } else { // the value of the array is in the run
5818 i_array++;
5819 }
5820 }
5821 if (i_array == container1->cardinality) {
5822 return true;
5823 } else {
5824 return false;
5825 }
5826}
5827
5828bool run_container_is_subset_bitset(const run_container_t* container1,
5829 const bitset_container_t* container2) {
5830 // todo: this code could be much faster
5831 if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) {
5832 if (container2->cardinality < run_container_cardinality(container1)) {
5833 return false;
5834 }
5835 } else {
5836 int32_t card = bitset_container_compute_cardinality(
5837 container2); // modify container2?
5838 if (card < run_container_cardinality(container1)) {
5839 return false;
5840 }
5841 }
5842 for (int i = 0; i < container1->n_runs; ++i) {
5843 uint32_t run_start = container1->runs[i].value;
5844 uint32_t le = container1->runs[i].length;
5845 for (uint32_t j = run_start; j <= run_start + le; ++j) {
5846 if (!bitset_container_contains(container2, j)) {
5847 return false;
5848 }
5849 }
5850 }
5851 return true;
5852}
5853
5854bool bitset_container_is_subset_run(const bitset_container_t* container1,
5855 const run_container_t* container2) {
5856 // todo: this code could be much faster
5857 if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) {
5858 if (container1->cardinality > run_container_cardinality(container2)) {
5859 return false;
5860 }
5861 }
5862 int32_t i_bitset = 0, i_run = 0;
5863 while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS &&
5864 i_run < container2->n_runs) {
5865 uint64_t w = container1->array[i_bitset];
5866 while (w != 0 && i_run < container2->n_runs) {
5867 uint32_t start = container2->runs[i_run].value;
5868 uint32_t stop = start + container2->runs[i_run].length;
5869 uint64_t t = w & (~w + 1);
5870 uint16_t r = i_bitset * 64 + __builtin_ctzll(w);
5871 if (r < start) {
5872 return false;
5873 } else if (r > stop) {
5874 i_run++;
5875 continue;
5876 } else {
5877 w ^= t;
5878 }
5879 }
5880 if (w == 0) {
5881 i_bitset++;
5882 } else {
5883 return false;
5884 }
5885 }
5886 if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) {
5887 // terminated iterating on the run containers, check that rest of bitset
5888 // is empty
5889 for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) {
5890 if (container1->array[i_bitset] != 0) {
5891 return false;
5892 }
5893 }
5894 }
5895 return true;
5896}
5897/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_subset.c */
5898/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_union.c */
5899/*
5900 * mixed_union.c
5901 *
5902 */
5903
5904#include <assert.h>
5905#include <string.h>
5906
5907
5908/* Compute the union of src_1 and src_2 and write the result to
5909 * dst. */
5910void array_bitset_container_union(const array_container_t *src_1,
5911 const bitset_container_t *src_2,
5912 bitset_container_t *dst) {
5913 if (src_2 != dst) bitset_container_copy(src_2, dst);
5914 dst->cardinality = (int32_t)bitset_set_list_withcard(
5915 dst->array, dst->cardinality, src_1->array, src_1->cardinality);
5916}
5917
5918/* Compute the union of src_1 and src_2 and write the result to
5919 * dst. It is allowed for src_2 to be dst. This version does not
5920 * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */
5921void array_bitset_container_lazy_union(const array_container_t *src_1,
5922 const bitset_container_t *src_2,
5923 bitset_container_t *dst) {
5924 if (src_2 != dst) bitset_container_copy(src_2, dst);
5925 bitset_set_list(dst->array, src_1->array, src_1->cardinality);
5926 dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
5927}
5928
5929void run_bitset_container_union(const run_container_t *src_1,
5930 const bitset_container_t *src_2,
5931 bitset_container_t *dst) {
5932 assert(!run_container_is_full(src_1)); // catch this case upstream
5933 if (src_2 != dst) bitset_container_copy(src_2, dst);
5934 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5935 rle16_t rle = src_1->runs[rlepos];
5936 bitset_set_lenrange(dst->array, rle.value, rle.length);
5937 }
5938 dst->cardinality = bitset_container_compute_cardinality(dst);
5939}
5940
5941void run_bitset_container_lazy_union(const run_container_t *src_1,
5942 const bitset_container_t *src_2,
5943 bitset_container_t *dst) {
5944 assert(!run_container_is_full(src_1)); // catch this case upstream
5945 if (src_2 != dst) bitset_container_copy(src_2, dst);
5946 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
5947 rle16_t rle = src_1->runs[rlepos];
5948 bitset_set_lenrange(dst->array, rle.value, rle.length);
5949 }
5950 dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
5951}
5952
5953// why do we leave the result as a run container??
5954void array_run_container_union(const array_container_t *src_1,
5955 const run_container_t *src_2,
5956 run_container_t *dst) {
5957 if (run_container_is_full(src_2)) {
5958 run_container_copy(src_2, dst);
5959 return;
5960 }
5961 // TODO: see whether the "2*" is spurious
5962 run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false);
5963 int32_t rlepos = 0;
5964 int32_t arraypos = 0;
5965 rle16_t previousrle;
5966 if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
5967 previousrle = run_container_append_first(dst, src_2->runs[rlepos]);
5968 rlepos++;
5969 } else {
5970 previousrle =
5971 run_container_append_value_first(dst, src_1->array[arraypos]);
5972 arraypos++;
5973 }
5974 while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
5975 if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
5976 run_container_append(dst, src_2->runs[rlepos], &previousrle);
5977 rlepos++;
5978 } else {
5979 run_container_append_value(dst, src_1->array[arraypos],
5980 &previousrle);
5981 arraypos++;
5982 }
5983 }
5984 if (arraypos < src_1->cardinality) {
5985 while (arraypos < src_1->cardinality) {
5986 run_container_append_value(dst, src_1->array[arraypos],
5987 &previousrle);
5988 arraypos++;
5989 }
5990 } else {
5991 while (rlepos < src_2->n_runs) {
5992 run_container_append(dst, src_2->runs[rlepos], &previousrle);
5993 rlepos++;
5994 }
5995 }
5996}
5997
5998void array_run_container_inplace_union(const array_container_t *src_1,
5999 run_container_t *src_2) {
6000 if (run_container_is_full(src_2)) {
6001 return;
6002 }
6003 const int32_t maxoutput = src_1->cardinality + src_2->n_runs;
6004 const int32_t neededcapacity = maxoutput + src_2->n_runs;
6005 if (src_2->capacity < neededcapacity)
6006 run_container_grow(src_2, neededcapacity, true);
6007 memmove(src_2->runs + maxoutput, src_2->runs,
6008 src_2->n_runs * sizeof(rle16_t));
6009 rle16_t *inputsrc2 = src_2->runs + maxoutput;
6010 int32_t rlepos = 0;
6011 int32_t arraypos = 0;
6012 int src2nruns = src_2->n_runs;
6013 src_2->n_runs = 0;
6014
6015 rle16_t previousrle;
6016
6017 if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
6018 previousrle = run_container_append_first(src_2, inputsrc2[rlepos]);
6019 rlepos++;
6020 } else {
6021 previousrle =
6022 run_container_append_value_first(src_2, src_1->array[arraypos]);
6023 arraypos++;
6024 }
6025
6026 while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) {
6027 if (inputsrc2[rlepos].value <= src_1->array[arraypos]) {
6028 run_container_append(src_2, inputsrc2[rlepos], &previousrle);
6029 rlepos++;
6030 } else {
6031 run_container_append_value(src_2, src_1->array[arraypos],
6032 &previousrle);
6033 arraypos++;
6034 }
6035 }
6036 if (arraypos < src_1->cardinality) {
6037 while (arraypos < src_1->cardinality) {
6038 run_container_append_value(src_2, src_1->array[arraypos],
6039 &previousrle);
6040 arraypos++;
6041 }
6042 } else {
6043 while (rlepos < src2nruns) {
6044 run_container_append(src_2, inputsrc2[rlepos], &previousrle);
6045 rlepos++;
6046 }
6047 }
6048}
6049
6050bool array_array_container_union(const array_container_t *src_1,
6051 const array_container_t *src_2, void **dst) {
6052 int totalCardinality = src_1->cardinality + src_2->cardinality;
6053 if (totalCardinality <= DEFAULT_MAX_SIZE) {
6054 *dst = array_container_create_given_capacity(totalCardinality);
6055 if (*dst != NULL) {
6056 array_container_union(src_1, src_2, (array_container_t *)*dst);
6057 } else {
6058 return true; // otherwise failure won't be caught
6059 }
6060 return false; // not a bitset
6061 }
6062 *dst = bitset_container_create();
6063 bool returnval = true; // expect a bitset
6064 if (*dst != NULL) {
6065 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6066 bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality);
6067 ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
6068 ourbitset->array, src_1->cardinality, src_2->array,
6069 src_2->cardinality);
6070 if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
6071 // need to convert!
6072 *dst = array_container_from_bitset(ourbitset);
6073 bitset_container_free(ourbitset);
6074 returnval = false; // not going to be a bitset
6075 }
6076 }
6077 return returnval;
6078}
6079
6080bool array_array_container_inplace_union(array_container_t *src_1,
6081 const array_container_t *src_2, void **dst) {
6082 int totalCardinality = src_1->cardinality + src_2->cardinality;
6083 *dst = NULL;
6084 if (totalCardinality <= DEFAULT_MAX_SIZE) {
6085 if(src_1->capacity < totalCardinality) {
6086 *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
6087 if (*dst != NULL) {
6088 array_container_union(src_1, src_2, (array_container_t *)*dst);
6089 } else {
6090 return true; // otherwise failure won't be caught
6091 }
6092 return false; // not a bitset
6093 } else {
6094 memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
6095 src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
6096 src_2->array, src_2->cardinality, src_1->array);
6097 return false; // not a bitset
6098 }
6099 }
6100 *dst = bitset_container_create();
6101 bool returnval = true; // expect a bitset
6102 if (*dst != NULL) {
6103 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6104 bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality);
6105 ourbitset->cardinality = (int32_t)bitset_set_list_withcard(
6106 ourbitset->array, src_1->cardinality, src_2->array,
6107 src_2->cardinality);
6108 if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
6109 // need to convert!
6110 if(src_1->capacity < ourbitset->cardinality) {
6111 array_container_grow(src_1, ourbitset->cardinality, false);
6112 }
6113
6114 bitset_extract_setbits_uint16(ourbitset->array, BITSET_CONTAINER_SIZE_IN_WORDS,
6115 src_1->array, 0);
6116 src_1->cardinality = ourbitset->cardinality;
6117 *dst = src_1;
6118 bitset_container_free(ourbitset);
6119 returnval = false; // not going to be a bitset
6120 }
6121 }
6122 return returnval;
6123}
6124
6125
6126bool array_array_container_lazy_union(const array_container_t *src_1,
6127 const array_container_t *src_2,
6128 void **dst) {
6129 int totalCardinality = src_1->cardinality + src_2->cardinality;
6130 if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
6131 *dst = array_container_create_given_capacity(totalCardinality);
6132 if (*dst != NULL) {
6133 array_container_union(src_1, src_2, (array_container_t *)*dst);
6134 } else {
6135 return true; // otherwise failure won't be caught
6136 }
6137 return false; // not a bitset
6138 }
6139 *dst = bitset_container_create();
6140 bool returnval = true; // expect a bitset
6141 if (*dst != NULL) {
6142 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6143 bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality);
6144 bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality);
6145 ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
6146 }
6147 return returnval;
6148}
6149
6150
6151bool array_array_container_lazy_inplace_union(array_container_t *src_1,
6152 const array_container_t *src_2,
6153 void **dst) {
6154 int totalCardinality = src_1->cardinality + src_2->cardinality;
6155 *dst = NULL;
6156 if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
6157 if(src_1->capacity < totalCardinality) {
6158 *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous
6159 if (*dst != NULL) {
6160 array_container_union(src_1, src_2, (array_container_t *)*dst);
6161 } else {
6162 return true; // otherwise failure won't be caught
6163 }
6164 return false; // not a bitset
6165 } else {
6166 memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t));
6167 src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality,
6168 src_2->array, src_2->cardinality, src_1->array);
6169 return false; // not a bitset
6170 }
6171 }
6172 *dst = bitset_container_create();
6173 bool returnval = true; // expect a bitset
6174 if (*dst != NULL) {
6175 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6176 bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality);
6177 bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality);
6178 ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
6179 }
6180 return returnval;
6181}
6182/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_union.c */
6183/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_xor.c */
6184/*
6185 * mixed_xor.c
6186 */
6187
6188#include <assert.h>
6189#include <string.h>
6190
6191
6192/* Compute the xor of src_1 and src_2 and write the result to
6193 * dst (which has no container initially).
6194 * Result is true iff dst is a bitset */
6195bool array_bitset_container_xor(const array_container_t *src_1,
6196 const bitset_container_t *src_2, void **dst) {
6197 bitset_container_t *result = bitset_container_create();
6198 bitset_container_copy(src_2, result);
6199 result->cardinality = (int32_t)bitset_flip_list_withcard(
6200 result->array, result->cardinality, src_1->array, src_1->cardinality);
6201
6202 // do required type conversions.
6203 if (result->cardinality <= DEFAULT_MAX_SIZE) {
6204 *dst = array_container_from_bitset(result);
6205 bitset_container_free(result);
6206 return false; // not bitset
6207 }
6208 *dst = result;
6209 return true; // bitset
6210}
6211
6212/* Compute the xor of src_1 and src_2 and write the result to
6213 * dst. It is allowed for src_2 to be dst. This version does not
6214 * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY).
6215 */
6216
6217void array_bitset_container_lazy_xor(const array_container_t *src_1,
6218 const bitset_container_t *src_2,
6219 bitset_container_t *dst) {
6220 if (src_2 != dst) bitset_container_copy(src_2, dst);
6221 bitset_flip_list(dst->array, src_1->array, src_1->cardinality);
6222 dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
6223}
6224
6225/* Compute the xor of src_1 and src_2 and write the result to
6226 * dst. Result may be either a bitset or an array container
6227 * (returns "result is bitset"). dst does not initially have
6228 * any container, but becomes either a bitset container (return
6229 * result true) or an array container.
6230 */
6231
6232bool run_bitset_container_xor(const run_container_t *src_1,
6233 const bitset_container_t *src_2, void **dst) {
6234 bitset_container_t *result = bitset_container_create();
6235
6236 bitset_container_copy(src_2, result);
6237 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
6238 rle16_t rle = src_1->runs[rlepos];
6239 bitset_flip_range(result->array, rle.value,
6240 rle.value + rle.length + UINT32_C(1));
6241 }
6242 result->cardinality = bitset_container_compute_cardinality(result);
6243
6244 if (result->cardinality <= DEFAULT_MAX_SIZE) {
6245 *dst = array_container_from_bitset(result);
6246 bitset_container_free(result);
6247 return false; // not bitset
6248 }
6249 *dst = result;
6250 return true; // bitset
6251}
6252
6253/* lazy xor. Dst is initialized and may be equal to src_2.
6254 * Result is left as a bitset container, even if actual
6255 * cardinality would dictate an array container.
6256 */
6257
6258void run_bitset_container_lazy_xor(const run_container_t *src_1,
6259 const bitset_container_t *src_2,
6260 bitset_container_t *dst) {
6261 if (src_2 != dst) bitset_container_copy(src_2, dst);
6262 for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) {
6263 rle16_t rle = src_1->runs[rlepos];
6264 bitset_flip_range(dst->array, rle.value,
6265 rle.value + rle.length + UINT32_C(1));
6266 }
6267 dst->cardinality = BITSET_UNKNOWN_CARDINALITY;
6268}
6269
6270/* dst does not indicate a valid container initially. Eventually it
6271 * can become any kind of container.
6272 */
6273
6274int array_run_container_xor(const array_container_t *src_1,
6275 const run_container_t *src_2, void **dst) {
6276 // semi following Java XOR implementation as of May 2016
6277 // the C OR implementation works quite differently and can return a run
6278 // container
6279 // TODO could optimize for full run containers.
6280
6281 // use of lazy following Java impl.
6282 const int arbitrary_threshold = 32;
6283 if (src_1->cardinality < arbitrary_threshold) {
6284 run_container_t *ans = run_container_create();
6285 array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs.
6286 uint8_t typecode_after;
6287 *dst =
6288 convert_run_to_efficient_container_and_free(ans, &typecode_after);
6289 return typecode_after;
6290 }
6291
6292 int card = run_container_cardinality(src_2);
6293 if (card <= DEFAULT_MAX_SIZE) {
6294 // Java implementation works with the array, xoring the run elements via
6295 // iterator
6296 array_container_t *temp = array_container_from_run(src_2);
6297 bool ret_is_bitset = array_array_container_xor(temp, src_1, dst);
6298 array_container_free(temp);
6299 return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE
6300 : ARRAY_CONTAINER_TYPE_CODE;
6301
6302 } else { // guess that it will end up as a bitset
6303 bitset_container_t *result = bitset_container_from_run(src_2);
6304 bool is_bitset = bitset_array_container_ixor(result, src_1, dst);
6305 // any necessary type conversion has been done by the ixor
6306 int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE
6307 : ARRAY_CONTAINER_TYPE_CODE);
6308 return retval;
6309 }
6310}
6311
6312/* Dst is a valid run container. (Can it be src_2? Let's say not.)
6313 * Leaves result as run container, even if other options are
6314 * smaller.
6315 */
6316
6317void array_run_container_lazy_xor(const array_container_t *src_1,
6318 const run_container_t *src_2,
6319 run_container_t *dst) {
6320 run_container_grow(dst, src_1->cardinality + src_2->n_runs, false);
6321 int32_t rlepos = 0;
6322 int32_t arraypos = 0;
6323 dst->n_runs = 0;
6324
6325 while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) {
6326 if (src_2->runs[rlepos].value <= src_1->array[arraypos]) {
6327 run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value,
6328 src_2->runs[rlepos].length);
6329 rlepos++;
6330 } else {
6331 run_container_smart_append_exclusive(dst, src_1->array[arraypos],
6332 0);
6333 arraypos++;
6334 }
6335 }
6336 while (arraypos < src_1->cardinality) {
6337 run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0);
6338 arraypos++;
6339 }
6340 while (rlepos < src_2->n_runs) {
6341 run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value,
6342 src_2->runs[rlepos].length);
6343 rlepos++;
6344 }
6345}
6346
6347/* dst does not indicate a valid container initially. Eventually it
6348 * can become any kind of container.
6349 */
6350
6351int run_run_container_xor(const run_container_t *src_1,
6352 const run_container_t *src_2, void **dst) {
6353 run_container_t *ans = run_container_create();
6354 run_container_xor(src_1, src_2, ans);
6355 uint8_t typecode_after;
6356 *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after);
6357 return typecode_after;
6358}
6359
6360/*
6361 * Java implementation (as of May 2016) for array_run, run_run
6362 * and bitset_run don't do anything different for inplace.
6363 * Could adopt the mixed_union.c approach instead (ie, using
6364 * smart_append_exclusive)
6365 *
6366 */
6367
6368bool array_array_container_xor(const array_container_t *src_1,
6369 const array_container_t *src_2, void **dst) {
6370 int totalCardinality =
6371 src_1->cardinality + src_2->cardinality; // upper bound
6372 if (totalCardinality <= DEFAULT_MAX_SIZE) {
6373 *dst = array_container_create_given_capacity(totalCardinality);
6374 array_container_xor(src_1, src_2, (array_container_t *)*dst);
6375 return false; // not a bitset
6376 }
6377 *dst = bitset_container_from_array(src_1);
6378 bool returnval = true; // expect a bitset
6379 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6380 ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard(
6381 ourbitset->array, src_1->cardinality, src_2->array, src_2->cardinality);
6382 if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) {
6383 // need to convert!
6384 *dst = array_container_from_bitset(ourbitset);
6385 bitset_container_free(ourbitset);
6386 returnval = false; // not going to be a bitset
6387 }
6388
6389 return returnval;
6390}
6391
6392bool array_array_container_lazy_xor(const array_container_t *src_1,
6393 const array_container_t *src_2,
6394 void **dst) {
6395 int totalCardinality = src_1->cardinality + src_2->cardinality;
6396 // upper bound, but probably poor estimate for xor
6397 if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) {
6398 *dst = array_container_create_given_capacity(totalCardinality);
6399 if (*dst != NULL)
6400 array_container_xor(src_1, src_2, (array_container_t *)*dst);
6401 return false; // not a bitset
6402 }
6403 *dst = bitset_container_from_array(src_1);
6404 bool returnval = true; // expect a bitset (maybe, for XOR??)
6405 if (*dst != NULL) {
6406 bitset_container_t *ourbitset = (bitset_container_t *)*dst;
6407 bitset_flip_list(ourbitset->array, src_2->array, src_2->cardinality);
6408 ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY;
6409 }
6410 return returnval;
6411}
6412
6413/* Compute the xor of src_1 and src_2 and write the result to
6414 * dst (which has no container initially). Return value is
6415 * "dst is a bitset"
6416 */
6417
6418bool bitset_bitset_container_xor(const bitset_container_t *src_1,
6419 const bitset_container_t *src_2, void **dst) {
6420 bitset_container_t *ans = bitset_container_create();
6421 int card = bitset_container_xor(src_1, src_2, ans);
6422 if (card <= DEFAULT_MAX_SIZE) {
6423 *dst = array_container_from_bitset(ans);
6424 bitset_container_free(ans);
6425 return false; // not bitset
6426 } else {
6427 *dst = ans;
6428 return true;
6429 }
6430}
6431
6432/* Compute the xor of src_1 and src_2 and write the result to
6433 * dst (which has no container initially). It will modify src_1
6434 * to be dst if the result is a bitset. Otherwise, it will
6435 * free src_1 and dst will be a new array container. In both
6436 * cases, the caller is responsible for deallocating dst.
6437 * Returns true iff dst is a bitset */
6438
6439bool bitset_array_container_ixor(bitset_container_t *src_1,
6440 const array_container_t *src_2, void **dst) {
6441 *dst = src_1;
6442 src_1->cardinality = (uint32_t)bitset_flip_list_withcard(
6443 src_1->array, src_1->cardinality, src_2->array, src_2->cardinality);
6444
6445 if (src_1->cardinality <= DEFAULT_MAX_SIZE) {
6446 *dst = array_container_from_bitset(src_1);
6447 bitset_container_free(src_1);
6448 return false; // not bitset
6449 } else
6450 return true;
6451}
6452
6453/* a bunch of in-place, some of which may not *really* be inplace.
6454 * TODO: write actual inplace routine if efficiency warrants it
6455 * Anything inplace with a bitset is a good candidate
6456 */
6457
6458bool bitset_bitset_container_ixor(bitset_container_t *src_1,
6459 const bitset_container_t *src_2, void **dst) {
6460 bool ans = bitset_bitset_container_xor(src_1, src_2, dst);
6461 bitset_container_free(src_1);
6462 return ans;
6463}
6464
6465bool array_bitset_container_ixor(array_container_t *src_1,
6466 const bitset_container_t *src_2, void **dst) {
6467 bool ans = array_bitset_container_xor(src_1, src_2, dst);
6468 array_container_free(src_1);
6469 return ans;
6470}
6471
6472/* Compute the xor of src_1 and src_2 and write the result to
6473 * dst. Result may be either a bitset or an array container
6474 * (returns "result is bitset"). dst does not initially have
6475 * any container, but becomes either a bitset container (return
6476 * result true) or an array container.
6477 */
6478
6479bool run_bitset_container_ixor(run_container_t *src_1,
6480 const bitset_container_t *src_2, void **dst) {
6481 bool ans = run_bitset_container_xor(src_1, src_2, dst);
6482 run_container_free(src_1);
6483 return ans;
6484}
6485
6486bool bitset_run_container_ixor(bitset_container_t *src_1,
6487 const run_container_t *src_2, void **dst) {
6488 bool ans = run_bitset_container_xor(src_2, src_1, dst);
6489 bitset_container_free(src_1);
6490 return ans;
6491}
6492
6493/* dst does not indicate a valid container initially. Eventually it
6494 * can become any kind of container.
6495 */
6496
6497int array_run_container_ixor(array_container_t *src_1,
6498 const run_container_t *src_2, void **dst) {
6499 int ans = array_run_container_xor(src_1, src_2, dst);
6500 array_container_free(src_1);
6501 return ans;
6502}
6503
6504int run_array_container_ixor(run_container_t *src_1,
6505 const array_container_t *src_2, void **dst) {
6506 int ans = array_run_container_xor(src_2, src_1, dst);
6507 run_container_free(src_1);
6508 return ans;
6509}
6510
6511bool array_array_container_ixor(array_container_t *src_1,
6512 const array_container_t *src_2, void **dst) {
6513 bool ans = array_array_container_xor(src_1, src_2, dst);
6514 array_container_free(src_1);
6515 return ans;
6516}
6517
6518int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2,
6519 void **dst) {
6520 int ans = run_run_container_xor(src_1, src_2, dst);
6521 run_container_free(src_1);
6522 return ans;
6523}
6524/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_xor.c */
6525/* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/run.c */
6526#include <stdio.h>
6527#include <stdlib.h>
6528
6529
6530extern inline uint16_t run_container_minimum(const run_container_t *run);
6531extern inline uint16_t run_container_maximum(const run_container_t *run);
6532extern inline int32_t interleavedBinarySearch(const rle16_t *array,
6533 int32_t lenarray, uint16_t ikey);
6534extern inline bool run_container_contains(const run_container_t *run,
6535 uint16_t pos);
6536extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x);
6537extern bool run_container_is_full(const run_container_t *run);
6538extern bool run_container_nonzero_cardinality(const run_container_t *r);
6539extern void run_container_clear(run_container_t *run);
6540extern int32_t run_container_serialized_size_in_bytes(int32_t num_runs);
6541extern run_container_t *run_container_create_range(uint32_t start,
6542 uint32_t stop);
6543
6544bool run_container_add(run_container_t *run, uint16_t pos) {
6545 int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos);
6546 if (index >= 0) return false; // already there
6547 index = -index - 2; // points to preceding value, possibly -1
6548 if (index >= 0) { // possible match
6549 int32_t offset = pos - run->runs[index].value;
6550 int32_t le = run->runs[index].length;
6551 if (offset <= le) return false; // already there
6552 if (offset == le + 1) {
6553 // we may need to fuse
6554 if (index + 1 < run->n_runs) {
6555 if (run->runs[index + 1].value == pos + 1) {
6556 // indeed fusion is needed
6557 run->runs[index].length = run->runs[index + 1].value +
6558 run->runs[index + 1].length -
6559 run->runs[index].value;
6560 recoverRoomAtIndex(run, (uint16_t)(index + 1));
6561 return true;
6562 }
6563 }
6564 run->runs[index].length++;
6565 return true;
6566 }
6567 if (index + 1 < run->n_runs) {
6568 // we may need to fuse
6569 if (run->runs[index + 1].value == pos + 1) {
6570 // indeed fusion is needed
6571 run->runs[index + 1].value = pos;
6572 run->runs[index + 1].length = run->runs[index + 1].length + 1;
6573 return true;
6574 }
6575 }
6576 }
6577 if (index == -1) {
6578 // we may need to extend the first run
6579 if (0 < run->n_runs) {
6580 if (run->runs[0].value == pos + 1) {
6581 run->runs[0].length++;
6582 run->runs[0].value--;
6583 return true;
6584 }
6585 }
6586 }
6587 makeRoomAtIndex(run, (uint16_t)(index + 1));
6588 run->runs[index + 1].value = pos;
6589 run->runs[index + 1].length = 0;
6590 return true;
6591}
6592
6593/* Create a new run container. Return NULL in case of failure. */
6594run_container_t *run_container_create_given_capacity(int32_t size) {
6595 run_container_t *run;
6596 /* Allocate the run container itself. */
6597 if ((run = (run_container_t *)malloc(sizeof(run_container_t))) == NULL) {
6598 return NULL;
6599 }
6600 if (size <= 0 ) { // we don't want to rely on malloc(0)
6601 run->runs = NULL;
6602 } else if ((run->runs = (rle16_t *)malloc(sizeof(rle16_t) * size)) == NULL) {
6603 free(run);
6604 return NULL;
6605 }
6606 run->capacity = size;
6607 run->n_runs = 0;
6608 return run;
6609}
6610
6611int run_container_shrink_to_fit(run_container_t *src) {
6612 if (src->n_runs == src->capacity) return 0; // nothing to do
6613 int savings = src->capacity - src->n_runs;
6614 src->capacity = src->n_runs;
6615 rle16_t *oldruns = src->runs;
6616 src->runs = (rle16_t *)realloc(oldruns, src->capacity * sizeof(rle16_t));
6617 if (src->runs == NULL) free(oldruns); // should never happen?
6618 return savings;
6619}
6620/* Create a new run container. Return NULL in case of failure. */
6621run_container_t *run_container_create(void) {
6622 return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE);
6623}
6624
6625run_container_t *run_container_clone(const run_container_t *src) {
6626 run_container_t *run = run_container_create_given_capacity(src->capacity);
6627 if (run == NULL) return NULL;
6628 run->capacity = src->capacity;
6629 run->n_runs = src->n_runs;
6630 memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t));
6631 return run;
6632}
6633
6634/* Free memory. */
6635void run_container_free(run_container_t *run) {
6636 if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise
6637 free(run->runs);
6638 run->runs = NULL; // pedantic
6639 }
6640 free(run);
6641}
6642
6643void run_container_grow(run_container_t *run, int32_t min, bool copy) {
6644 int32_t newCapacity =
6645 (run->capacity == 0)
6646 ? RUN_DEFAULT_INIT_SIZE
6647 : run->capacity < 64 ? run->capacity * 2
6648 : run->capacity < 1024 ? run->capacity * 3 / 2
6649 : run->capacity * 5 / 4;
6650 if (newCapacity < min) newCapacity = min;
6651 run->capacity = newCapacity;
6652 assert(run->capacity >= min);
6653 if (copy) {
6654 rle16_t *oldruns = run->runs;
6655 run->runs =
6656 (rle16_t *)realloc(oldruns, run->capacity * sizeof(rle16_t));
6657 if (run->runs == NULL) free(oldruns);
6658 } else {
6659 // Jon Strabala reports that some tools complain otherwise
6660 if (run->runs != NULL) {
6661 free(run->runs);
6662 }
6663 run->runs = (rle16_t *)malloc(run->capacity * sizeof(rle16_t));
6664 }
6665 // handle the case where realloc fails
6666 if (run->runs == NULL) {
6667 fprintf(stderr, "could not allocate memory\n");
6668 }
6669 assert(run->runs != NULL);
6670}
6671
6672/* copy one container into another */
6673void run_container_copy(const run_container_t *src, run_container_t *dst) {
6674 const int32_t n_runs = src->n_runs;
6675 if (src->n_runs > dst->capacity) {
6676 run_container_grow(dst, n_runs, false);
6677 }
6678 dst->n_runs = n_runs;
6679 memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs);
6680}
6681
6682/* Compute the union of `src_1' and `src_2' and write the result to `dst'
6683 * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
6684void run_container_union(const run_container_t *src_1,
6685 const run_container_t *src_2, run_container_t *dst) {
6686 // TODO: this could be a lot more efficient
6687
6688 // we start out with inexpensive checks
6689 const bool if1 = run_container_is_full(src_1);
6690 const bool if2 = run_container_is_full(src_2);
6691 if (if1 || if2) {
6692 if (if1) {
6693 run_container_copy(src_1, dst);
6694 return;
6695 }
6696 if (if2) {
6697 run_container_copy(src_2, dst);
6698 return;
6699 }
6700 }
6701 const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
6702 if (dst->capacity < neededcapacity)
6703 run_container_grow(dst, neededcapacity, false);
6704 dst->n_runs = 0;
6705 int32_t rlepos = 0;
6706 int32_t xrlepos = 0;
6707
6708 rle16_t previousrle;
6709 if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
6710 previousrle = run_container_append_first(dst, src_1->runs[rlepos]);
6711 rlepos++;
6712 } else {
6713 previousrle = run_container_append_first(dst, src_2->runs[xrlepos]);
6714 xrlepos++;
6715 }
6716
6717 while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) {
6718 rle16_t newrl;
6719 if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) {
6720 newrl = src_1->runs[rlepos];
6721 rlepos++;
6722 } else {
6723 newrl = src_2->runs[xrlepos];
6724 xrlepos++;
6725 }
6726 run_container_append(dst, newrl, &previousrle);
6727 }
6728 while (xrlepos < src_2->n_runs) {
6729 run_container_append(dst, src_2->runs[xrlepos], &previousrle);
6730 xrlepos++;
6731 }
6732 while (rlepos < src_1->n_runs) {
6733 run_container_append(dst, src_1->runs[rlepos], &previousrle);
6734 rlepos++;
6735 }
6736}
6737
6738/* Compute the union of `src_1' and `src_2' and write the result to `src_1'
6739 */
6740void run_container_union_inplace(run_container_t *src_1,
6741 const run_container_t *src_2) {
6742 // TODO: this could be a lot more efficient
6743
6744 // we start out with inexpensive checks
6745 const bool if1 = run_container_is_full(src_1);
6746 const bool if2 = run_container_is_full(src_2);
6747 if (if1 || if2) {
6748 if (if1) {
6749 return;
6750 }
6751 if (if2) {
6752 run_container_copy(src_2, src_1);
6753 return;
6754 }
6755 }
6756 // we move the data to the end of the current array
6757 const int32_t maxoutput = src_1->n_runs + src_2->n_runs;
6758 const int32_t neededcapacity = maxoutput + src_1->n_runs;
6759 if (src_1->capacity < neededcapacity)
6760 run_container_grow(src_1, neededcapacity, true);
6761 memmove(src_1->runs + maxoutput, src_1->runs,
6762 src_1->n_runs * sizeof(rle16_t));
6763 rle16_t *inputsrc1 = src_1->runs + maxoutput;
6764 const int32_t input1nruns = src_1->n_runs;
6765 src_1->n_runs = 0;
6766 int32_t rlepos = 0;
6767 int32_t xrlepos = 0;
6768
6769 rle16_t previousrle;
6770 if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
6771 previousrle = run_container_append_first(src_1, inputsrc1[rlepos]);
6772 rlepos++;
6773 } else {
6774 previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]);
6775 xrlepos++;
6776 }
6777 while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) {
6778 rle16_t newrl;
6779 if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) {
6780 newrl = inputsrc1[rlepos];
6781 rlepos++;
6782 } else {
6783 newrl = src_2->runs[xrlepos];
6784 xrlepos++;
6785 }
6786 run_container_append(src_1, newrl, &previousrle);
6787 }
6788 while (xrlepos < src_2->n_runs) {
6789 run_container_append(src_1, src_2->runs[xrlepos], &previousrle);
6790 xrlepos++;
6791 }
6792 while (rlepos < input1nruns) {
6793 run_container_append(src_1, inputsrc1[rlepos], &previousrle);
6794 rlepos++;
6795 }
6796}
6797
6798/* Compute the symmetric difference of `src_1' and `src_2' and write the result
6799 * to `dst'
6800 * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */
6801void run_container_xor(const run_container_t *src_1,
6802 const run_container_t *src_2, run_container_t *dst) {
6803 // don't bother to convert xor with full range into negation
6804 // since negation is implemented similarly
6805
6806 const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
6807 if (dst->capacity < neededcapacity)
6808 run_container_grow(dst, neededcapacity, false);
6809
6810 int32_t pos1 = 0;
6811 int32_t pos2 = 0;
6812 dst->n_runs = 0;
6813
6814 while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) {
6815 if (src_1->runs[pos1].value <= src_2->runs[pos2].value) {
6816 run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
6817 src_1->runs[pos1].length);
6818 pos1++;
6819 } else {
6820 run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
6821 src_2->runs[pos2].length);
6822 pos2++;
6823 }
6824 }
6825 while (pos1 < src_1->n_runs) {
6826 run_container_smart_append_exclusive(dst, src_1->runs[pos1].value,
6827 src_1->runs[pos1].length);
6828 pos1++;
6829 }
6830
6831 while (pos2 < src_2->n_runs) {
6832 run_container_smart_append_exclusive(dst, src_2->runs[pos2].value,
6833 src_2->runs[pos2].length);
6834 pos2++;
6835 }
6836}
6837
6838/* Compute the intersection of src_1 and src_2 and write the result to
6839 * dst. It is assumed that dst is distinct from both src_1 and src_2. */
6840void run_container_intersection(const run_container_t *src_1,
6841 const run_container_t *src_2,
6842 run_container_t *dst) {
6843 const bool if1 = run_container_is_full(src_1);
6844 const bool if2 = run_container_is_full(src_2);
6845 if (if1 || if2) {
6846 if (if1) {
6847 run_container_copy(src_2, dst);
6848 return;
6849 }
6850 if (if2) {
6851 run_container_copy(src_1, dst);
6852 return;
6853 }
6854 }
6855 // TODO: this could be a lot more efficient, could use SIMD optimizations
6856 const int32_t neededcapacity = src_1->n_runs + src_2->n_runs;
6857 if (dst->capacity < neededcapacity)
6858 run_container_grow(dst, neededcapacity, false);
6859 dst->n_runs = 0;
6860 int32_t rlepos = 0;
6861 int32_t xrlepos = 0;
6862 int32_t start = src_1->runs[rlepos].value;
6863 int32_t end = start + src_1->runs[rlepos].length + 1;
6864 int32_t xstart = src_2->runs[xrlepos].value;
6865 int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
6866 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
6867 if (end <= xstart) {
6868 ++rlepos;
6869 if (rlepos < src_1->n_runs) {
6870 start = src_1->runs[rlepos].value;
6871 end = start + src_1->runs[rlepos].length + 1;
6872 }
6873 } else if (xend <= start) {
6874 ++xrlepos;
6875 if (xrlepos < src_2->n_runs) {
6876 xstart = src_2->runs[xrlepos].value;
6877 xend = xstart + src_2->runs[xrlepos].length + 1;
6878 }
6879 } else { // they overlap
6880 const int32_t lateststart = start > xstart ? start : xstart;
6881 int32_t earliestend;
6882 if (end == xend) { // improbable
6883 earliestend = end;
6884 rlepos++;
6885 xrlepos++;
6886 if (rlepos < src_1->n_runs) {
6887 start = src_1->runs[rlepos].value;
6888 end = start + src_1->runs[rlepos].length + 1;
6889 }
6890 if (xrlepos < src_2->n_runs) {
6891 xstart = src_2->runs[xrlepos].value;
6892 xend = xstart + src_2->runs[xrlepos].length + 1;
6893 }
6894 } else if (end < xend) {
6895 earliestend = end;
6896 rlepos++;
6897 if (rlepos < src_1->n_runs) {
6898 start = src_1->runs[rlepos].value;
6899 end = start + src_1->runs[rlepos].length + 1;
6900 }
6901
6902 } else { // end > xend
6903 earliestend = xend;
6904 xrlepos++;
6905 if (xrlepos < src_2->n_runs) {
6906 xstart = src_2->runs[xrlepos].value;
6907 xend = xstart + src_2->runs[xrlepos].length + 1;
6908 }
6909 }
6910 dst->runs[dst->n_runs].value = (uint16_t)lateststart;
6911 dst->runs[dst->n_runs].length =
6912 (uint16_t)(earliestend - lateststart - 1);
6913 dst->n_runs++;
6914 }
6915 }
6916}
6917
6918/* Compute the size of the intersection of src_1 and src_2 . */
6919int run_container_intersection_cardinality(const run_container_t *src_1,
6920 const run_container_t *src_2) {
6921 const bool if1 = run_container_is_full(src_1);
6922 const bool if2 = run_container_is_full(src_2);
6923 if (if1 || if2) {
6924 if (if1) {
6925 return run_container_cardinality(src_2);
6926 }
6927 if (if2) {
6928 return run_container_cardinality(src_1);
6929 }
6930 }
6931 int answer = 0;
6932 int32_t rlepos = 0;
6933 int32_t xrlepos = 0;
6934 int32_t start = src_1->runs[rlepos].value;
6935 int32_t end = start + src_1->runs[rlepos].length + 1;
6936 int32_t xstart = src_2->runs[xrlepos].value;
6937 int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
6938 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
6939 if (end <= xstart) {
6940 ++rlepos;
6941 if (rlepos < src_1->n_runs) {
6942 start = src_1->runs[rlepos].value;
6943 end = start + src_1->runs[rlepos].length + 1;
6944 }
6945 } else if (xend <= start) {
6946 ++xrlepos;
6947 if (xrlepos < src_2->n_runs) {
6948 xstart = src_2->runs[xrlepos].value;
6949 xend = xstart + src_2->runs[xrlepos].length + 1;
6950 }
6951 } else { // they overlap
6952 const int32_t lateststart = start > xstart ? start : xstart;
6953 int32_t earliestend;
6954 if (end == xend) { // improbable
6955 earliestend = end;
6956 rlepos++;
6957 xrlepos++;
6958 if (rlepos < src_1->n_runs) {
6959 start = src_1->runs[rlepos].value;
6960 end = start + src_1->runs[rlepos].length + 1;
6961 }
6962 if (xrlepos < src_2->n_runs) {
6963 xstart = src_2->runs[xrlepos].value;
6964 xend = xstart + src_2->runs[xrlepos].length + 1;
6965 }
6966 } else if (end < xend) {
6967 earliestend = end;
6968 rlepos++;
6969 if (rlepos < src_1->n_runs) {
6970 start = src_1->runs[rlepos].value;
6971 end = start + src_1->runs[rlepos].length + 1;
6972 }
6973
6974 } else { // end > xend
6975 earliestend = xend;
6976 xrlepos++;
6977 if (xrlepos < src_2->n_runs) {
6978 xstart = src_2->runs[xrlepos].value;
6979 xend = xstart + src_2->runs[xrlepos].length + 1;
6980 }
6981 }
6982 answer += earliestend - lateststart;
6983 }
6984 }
6985 return answer;
6986}
6987
6988bool run_container_intersect(const run_container_t *src_1,
6989 const run_container_t *src_2) {
6990 const bool if1 = run_container_is_full(src_1);
6991 const bool if2 = run_container_is_full(src_2);
6992 if (if1 || if2) {
6993 if (if1) {
6994 return !run_container_empty(src_2);
6995 }
6996 if (if2) {
6997 return !run_container_empty(src_1);
6998 }
6999 }
7000 int32_t rlepos = 0;
7001 int32_t xrlepos = 0;
7002 int32_t start = src_1->runs[rlepos].value;
7003 int32_t end = start + src_1->runs[rlepos].length + 1;
7004 int32_t xstart = src_2->runs[xrlepos].value;
7005 int32_t xend = xstart + src_2->runs[xrlepos].length + 1;
7006 while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) {
7007 if (end <= xstart) {
7008 ++rlepos;
7009 if (rlepos < src_1->n_runs) {
7010 start = src_1->runs[rlepos].value;
7011 end = start + src_1->runs[rlepos].length + 1;
7012 }
7013 } else if (xend <= start) {
7014 ++xrlepos;
7015 if (xrlepos < src_2->n_runs) {
7016 xstart = src_2->runs[xrlepos].value;
7017 xend = xstart + src_2->runs[xrlepos].length + 1;
7018 }
7019 } else { // they overlap
7020 return true;
7021 }
7022 }
7023 return false;
7024}
7025
7026
7027/* Compute the difference of src_1 and src_2 and write the result to
7028 * dst. It is assumed that dst is distinct from both src_1 and src_2. */
7029void run_container_andnot(const run_container_t *src_1,
7030 const run_container_t *src_2, run_container_t *dst) {
7031 // following Java implementation as of June 2016
7032
7033 if (dst->capacity < src_1->n_runs + src_2->n_runs)
7034 run_container_grow(dst, src_1->n_runs + src_2->n_runs, false);
7035
7036 dst->n_runs = 0;
7037
7038 int rlepos1 = 0;
7039 int rlepos2 = 0;
7040 int32_t start = src_1->runs[rlepos1].value;
7041 int32_t end = start + src_1->runs[rlepos1].length + 1;
7042 int32_t start2 = src_2->runs[rlepos2].value;
7043 int32_t end2 = start2 + src_2->runs[rlepos2].length + 1;
7044
7045 while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) {
7046 if (end <= start2) {
7047 // output the first run
7048 dst->runs[dst->n_runs++] =
7049 (rle16_t){.value = (uint16_t)start,
7050 .length = (uint16_t)(end - start - 1)};
7051 rlepos1++;
7052 if (rlepos1 < src_1->n_runs) {
7053 start = src_1->runs[rlepos1].value;
7054 end = start + src_1->runs[rlepos1].length + 1;
7055 }
7056 } else if (end2 <= start) {
7057 // exit the second run
7058 rlepos2++;
7059 if (rlepos2 < src_2->n_runs) {
7060 start2 = src_2->runs[rlepos2].value;
7061 end2 = start2 + src_2->runs[rlepos2].length + 1;
7062 }
7063 } else {
7064 if (start < start2) {
7065 dst->runs[dst->n_runs++] =
7066 (rle16_t){.value = (uint16_t)start,
7067 .length = (uint16_t)(start2 - start - 1)};
7068 }
7069 if (end2 < end) {
7070 start = end2;
7071 } else {
7072 rlepos1++;
7073 if (rlepos1 < src_1->n_runs) {
7074 start = src_1->runs[rlepos1].value;
7075 end = start + src_1->runs[rlepos1].length + 1;
7076 }
7077 }
7078 }
7079 }
7080 if (rlepos1 < src_1->n_runs) {
7081 dst->runs[dst->n_runs++] = (rle16_t){
7082 .value = (uint16_t)start, .length = (uint16_t)(end - start - 1)};
7083 rlepos1++;
7084 if (rlepos1 < src_1->n_runs) {
7085 memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1,
7086 sizeof(rle16_t) * (src_1->n_runs - rlepos1));
7087 dst->n_runs += src_1->n_runs - rlepos1;
7088 }
7089 }
7090}
7091
7092int run_container_to_uint32_array(void *vout, const run_container_t *cont,
7093 uint32_t base) {
7094 int outpos = 0;
7095 uint32_t *out = (uint32_t *)vout;
7096 for (int i = 0; i < cont->n_runs; ++i) {
7097 uint32_t run_start = base + cont->runs[i].value;
7098 uint16_t le = cont->runs[i].length;
7099 for (int j = 0; j <= le; ++j) {
7100 uint32_t val = run_start + j;
7101 memcpy(out + outpos, &val,
7102 sizeof(uint32_t)); // should be compiled as a MOV on x64
7103 outpos++;
7104 }
7105 }
7106 return outpos;
7107}
7108
7109/*
7110 * Print this container using printf (useful for debugging).
7111 */
7112void run_container_printf(const run_container_t *cont) {
7113 for (int i = 0; i < cont->n_runs; ++i) {
7114 uint16_t run_start = cont->runs[i].value;
7115 uint16_t le = cont->runs[i].length;
7116 printf("[%d,%d]", run_start, run_start + le);
7117 }
7118}
7119
7120/*
7121 * Print this container using printf as a comma-separated list of 32-bit
7122 * integers starting at base.
7123 */
7124void run_container_printf_as_uint32_array(const run_container_t *cont,
7125 uint32_t base) {
7126 if (cont->n_runs == 0) return;
7127 {
7128 uint32_t run_start = base + cont->runs[0].value;
7129 uint16_t le = cont->runs[0].length;
7130 printf("%u", run_start);
7131 for (uint32_t j = 1; j <= le; ++j) printf(",%u", run_start + j);
7132 }
7133 for (int32_t i = 1; i < cont->n_runs; ++i) {
7134 uint32_t run_start = base + cont->runs[i].value;
7135 uint16_t le = cont->runs[i].length;
7136 for (uint32_t j = 0; j <= le; ++j) printf(",%u", run_start + j);
7137 }
7138}
7139
7140int32_t run_container_serialize(const run_container_t *container, char *buf) {
7141 int32_t l, off;
7142
7143 memcpy(buf, &container->n_runs, off = sizeof(container->n_runs));
7144 memcpy(&buf[off], &container->capacity, sizeof(container->capacity));
7145 off += sizeof(container->capacity);
7146
7147 l = sizeof(rle16_t) * container->n_runs;
7148 memcpy(&buf[off], container->runs, l);
7149 return (off + l);
7150}
7151
7152int32_t run_container_write(const run_container_t *container, char *buf) {
7153 memcpy(buf, &container->n_runs, sizeof(uint16_t));
7154 memcpy(buf + sizeof(uint16_t), container->runs,
7155 container->n_runs * sizeof(rle16_t));
7156 return run_container_size_in_bytes(container);
7157}
7158
7159int32_t run_container_read(int32_t cardinality, run_container_t *container,
7160 const char *buf) {
7161 (void)cardinality;
7162 memcpy(&container->n_runs, buf, sizeof(uint16_t));
7163 if (container->n_runs > container->capacity)
7164 run_container_grow(container, container->n_runs, false);
7165 if(container->n_runs > 0) {
7166 memcpy(container->runs, buf + sizeof(uint16_t),
7167 container->n_runs * sizeof(rle16_t));
7168 }
7169 return run_container_size_in_bytes(container);
7170}
7171
7172uint32_t run_container_serialization_len(const run_container_t *container) {
7173 return (sizeof(container->n_runs) + sizeof(container->capacity) +
7174 sizeof(rle16_t) * container->n_runs);
7175}
7176
7177void *run_container_deserialize(const char *buf, size_t buf_len) {
7178 run_container_t *ptr;
7179
7180 if (buf_len < 8 /* n_runs + capacity */)
7181 return (NULL);
7182 else
7183 buf_len -= 8;
7184
7185 if ((ptr = (run_container_t *)malloc(sizeof(run_container_t))) != NULL) {
7186 size_t len;
7187 int32_t off;
7188
7189 memcpy(&ptr->n_runs, buf, off = 4);
7190 memcpy(&ptr->capacity, &buf[off], 4);
7191 off += 4;
7192
7193 len = sizeof(rle16_t) * ptr->n_runs;
7194
7195 if (len != buf_len) {
7196 free(ptr);
7197 return (NULL);
7198 }
7199
7200 if ((ptr->runs = (rle16_t *)malloc(len)) == NULL) {
7201 free(ptr);
7202 return (NULL);
7203 }
7204
7205 memcpy(ptr->runs, &buf[off], len);
7206
7207 /* Check if returned values are monotonically increasing */
7208 for (int32_t i = 0, j = 0; i < ptr->n_runs; i++) {
7209 if (ptr->runs[i].value < j) {
7210 free(ptr->runs);
7211 free(ptr);
7212 return (NULL);
7213 } else
7214 j = ptr->runs[i].value;
7215 }
7216 }
7217
7218 return (ptr);
7219}
7220
7221bool run_container_iterate(const run_container_t *cont, uint32_t base,
7222 roaring_iterator iterator, void *ptr) {
7223 for (int i = 0; i < cont->n_runs; ++i) {
7224 uint32_t run_start = base + cont->runs[i].value;
7225 uint16_t le = cont->runs[i].length;
7226
7227 for (int j = 0; j <= le; ++j)
7228 if (!iterator(run_start + j, ptr)) return false;
7229 }
7230 return true;
7231}
7232
7233bool run_container_iterate64(const run_container_t *cont, uint32_t base,
7234 roaring_iterator64 iterator, uint64_t high_bits,
7235 void *ptr) {
7236 for (int i = 0; i < cont->n_runs; ++i) {
7237 uint32_t run_start = base + cont->runs[i].value;
7238 uint16_t le = cont->runs[i].length;
7239
7240 for (int j = 0; j <= le; ++j)
7241 if (!iterator(high_bits | (uint64_t)(run_start + j), ptr))
7242 return false;
7243 }
7244 return true;
7245}
7246
7247bool run_container_equals(const run_container_t *container1,
7248 const run_container_t *container2) {
7249 if (container1->n_runs != container2->n_runs) {
7250 return false;
7251 }
7252 for (int32_t i = 0; i < container1->n_runs; ++i) {
7253 if ((container1->runs[i].value != container2->runs[i].value) ||
7254 (container1->runs[i].length != container2->runs[i].length))
7255 return false;
7256 }
7257 return true;
7258}
7259
7260bool run_container_is_subset(const run_container_t *container1,
7261 const run_container_t *container2) {
7262 int i1 = 0, i2 = 0;
7263 while (i1 < container1->n_runs && i2 < container2->n_runs) {
7264 int start1 = container1->runs[i1].value;
7265 int stop1 = start1 + container1->runs[i1].length;
7266 int start2 = container2->runs[i2].value;
7267 int stop2 = start2 + container2->runs[i2].length;
7268 if (start1 < start2) {
7269 return false;
7270 } else { // start1 >= start2
7271 if (stop1 < stop2) {
7272 i1++;
7273 } else if (stop1 == stop2) {
7274 i1++;
7275 i2++;
7276 } else { // stop1 > stop2
7277 i2++;
7278 }
7279 }
7280 }
7281 if (i1 == container1->n_runs) {
7282 return true;
7283 } else {
7284 return false;
7285 }
7286}
7287
7288// TODO: write smart_append_exclusive version to match the overloaded 1 param
7289// Java version (or is it even used?)
7290
7291// follows the Java implementation closely
7292// length is the rle-value. Ie, run [10,12) uses a length value 1.
7293void run_container_smart_append_exclusive(run_container_t *src,
7294 const uint16_t start,
7295 const uint16_t length) {
7296 int old_end;
7297 rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL;
7298 rle16_t *appended_last_run = src->runs + src->n_runs;
7299
7300 if (!src->n_runs ||
7301 (start > (old_end = last_run->value + last_run->length + 1))) {
7302 *appended_last_run = (rle16_t){.value = start, .length = length};
7303 src->n_runs++;
7304 return;
7305 }
7306 if (old_end == start) {
7307 // we merge
7308 last_run->length += (length + 1);
7309 return;
7310 }
7311 int new_end = start + length + 1;
7312
7313 if (start == last_run->value) {
7314 // wipe out previous
7315 if (new_end < old_end) {
7316 *last_run = (rle16_t){.value = (uint16_t)new_end,
7317 .length = (uint16_t)(old_end - new_end - 1)};
7318 return;
7319 } else if (new_end > old_end) {
7320 *last_run = (rle16_t){.value = (uint16_t)old_end,
7321 .length = (uint16_t)(new_end - old_end - 1)};
7322 return;
7323 } else {
7324 src->n_runs--;
7325 return;
7326 }
7327 }
7328 last_run->length = start - last_run->value - 1;
7329 if (new_end < old_end) {
7330 *appended_last_run =
7331 (rle16_t){.value = (uint16_t)new_end,
7332 .length = (uint16_t)(old_end - new_end - 1)};
7333 src->n_runs++;
7334 } else if (new_end > old_end) {
7335 *appended_last_run =
7336 (rle16_t){.value = (uint16_t)old_end,
7337 .length = (uint16_t)(new_end - old_end - 1)};
7338 src->n_runs++;
7339 }
7340}
7341
7342bool run_container_select(const run_container_t *container,
7343 uint32_t *start_rank, uint32_t rank,
7344 uint32_t *element) {
7345 for (int i = 0; i < container->n_runs; i++) {
7346 uint16_t length = container->runs[i].length;
7347 if (rank <= *start_rank + length) {
7348 uint16_t value = container->runs[i].value;
7349 *element = value + rank - (*start_rank);
7350 return true;
7351 } else
7352 *start_rank += length + 1;
7353 }
7354 return false;
7355}
7356
7357int run_container_rank(const run_container_t *container, uint16_t x) {
7358 int sum = 0;
7359 uint32_t x32 = x;
7360 for (int i = 0; i < container->n_runs; i++) {
7361 uint32_t startpoint = container->runs[i].value;
7362 uint32_t length = container->runs[i].length;
7363 uint32_t endpoint = length + startpoint;
7364 if (x <= endpoint) {
7365 if (x < startpoint) break;
7366 return sum + (x32 - startpoint) + 1;
7367 } else {
7368 sum += length + 1;
7369 }
7370 }
7371 return sum;
7372}
7373/* end file /opt/bitmap/CRoaring-0.2.57/src/containers/run.c */
7374/* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring.c */
7375#include <assert.h>
7376#include <stdarg.h>
7377#include <stdint.h>
7378#include <stdio.h>
7379#include <string.h>
7380#include <inttypes.h>
7381
7382extern inline bool roaring_bitmap_contains(const roaring_bitmap_t *r,
7383 uint32_t val);
7384
7385// this is like roaring_bitmap_add, but it populates pointer arguments in such a
7386// way
7387// that we can recover the container touched, which, in turn can be used to
7388// accelerate some functions (when you repeatedly need to add to the same
7389// container)
7390void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r,
7391 uint32_t val,
7392 uint8_t *typecode,
7393 int *index) {
7394 uint16_t hb = val >> 16;
7395 const int i = ra_get_index(&r->high_low_container, hb);
7396 if (i >= 0) {
7397 ra_unshare_container_at_index(&r->high_low_container, i);
7398 void *container =
7399 ra_get_container_at_index(&r->high_low_container, i, typecode);
7400 uint8_t newtypecode = *typecode;
7401 void *container2 =
7402 container_add(container, val & 0xFFFF, *typecode, &newtypecode);
7403 *index = i;
7404 if (container2 != container) {
7405 container_free(container, *typecode);
7406 ra_set_container_at_index(&r->high_low_container, i, container2,
7407 newtypecode);
7408 *typecode = newtypecode;
7409 return container2;
7410 } else {
7411 return container;
7412 }
7413 } else {
7414 array_container_t *newac = array_container_create();
7415 void *container = container_add(newac, val & 0xFFFF,
7416 ARRAY_CONTAINER_TYPE_CODE, typecode);
7417 // we could just assume that it stays an array container
7418 ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
7419 container, *typecode);
7420 *index = -i - 1;
7421 return container;
7422 }
7423}
7424
7425roaring_bitmap_t *roaring_bitmap_create() {
7426 roaring_bitmap_t *ans =
7427 (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t));
7428 if (!ans) {
7429 return NULL;
7430 }
7431 bool is_ok = ra_init(&ans->high_low_container);
7432 if (!is_ok) {
7433 free(ans);
7434 return NULL;
7435 }
7436 ans->copy_on_write = false;
7437 return ans;
7438}
7439
7440roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) {
7441 roaring_bitmap_t *ans =
7442 (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t));
7443 if (!ans) {
7444 return NULL;
7445 }
7446 bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap);
7447 if (!is_ok) {
7448 free(ans);
7449 return NULL;
7450 }
7451 ans->copy_on_write = false;
7452 return ans;
7453}
7454
7455void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
7456 const uint32_t *vals) {
7457 void *container = NULL; // hold value of last container touched
7458 uint8_t typecode = 0; // typecode of last container touched
7459 uint32_t prev = 0; // previous valued inserted
7460 size_t i = 0; // index of value
7461 int containerindex = 0;
7462 if (n_args == 0) return;
7463 uint32_t val;
7464 memcpy(&val, vals + i, sizeof(val));
7465 container =
7466 containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex);
7467 prev = val;
7468 i++;
7469 for (; i < n_args; i++) {
7470 memcpy(&val, vals + i, sizeof(val));
7471 if (((prev ^ val) >> 16) ==
7472 0) { // no need to seek the container, it is at hand
7473 // because we already have the container at hand, we can do the
7474 // insertion
7475 // automatically, bypassing the roaring_bitmap_add call
7476 uint8_t newtypecode = typecode;
7477 void *container2 =
7478 container_add(container, val & 0xFFFF, typecode, &newtypecode);
7479 if (container2 != container) { // rare instance when we need to
7480 // change the container type
7481 container_free(container, typecode);
7482 ra_set_container_at_index(&r->high_low_container,
7483 containerindex, container2,
7484 newtypecode);
7485 typecode = newtypecode;
7486 container = container2;
7487 }
7488 } else {
7489 container = containerptr_roaring_bitmap_add(r, val, &typecode,
7490 &containerindex);
7491 }
7492 prev = val;
7493 }
7494}
7495
7496roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
7497 roaring_bitmap_t *answer = roaring_bitmap_create();
7498 roaring_bitmap_add_many(answer, n_args, vals);
7499 return answer;
7500}
7501
7502roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
7503 // todo: could be greatly optimized but we do not expect this call to ever
7504 // include long lists
7505 roaring_bitmap_t *answer = roaring_bitmap_create();
7506 va_list ap;
7507 va_start(ap, n_args);
7508 for (size_t i = 1; i <= n_args; i++) {
7509 uint32_t val = va_arg(ap, uint32_t);
7510 roaring_bitmap_add(answer, val);
7511 }
7512 va_end(ap);
7513 return answer;
7514}
7515
7516static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
7517 return (a < b) ? a : b;
7518}
7519
7520static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) {
7521 return (a < b) ? a : b;
7522}
7523
7524roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
7525 uint32_t step) {
7526 if(max >= UINT64_C(0x100000000)) {
7527 max = UINT64_C(0x100000000);
7528 }
7529 if (step == 0) return NULL;
7530 if (max <= min) return NULL;
7531 roaring_bitmap_t *answer = roaring_bitmap_create();
7532 if (step >= (1 << 16)) {
7533 for (uint32_t value = (uint32_t)min; value < max; value += step) {
7534 roaring_bitmap_add(answer, value);
7535 }
7536 return answer;
7537 }
7538 uint64_t min_tmp = min;
7539 do {
7540 uint32_t key = (uint32_t)min_tmp >> 16;
7541 uint32_t container_min = min_tmp & 0xFFFF;
7542 uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16);
7543 uint8_t type;
7544 void *container = container_from_range(&type, container_min,
7545 container_max, (uint16_t)step);
7546 ra_append(&answer->high_low_container, key, container, type);
7547 uint32_t gap = container_max - container_min + step - 1;
7548 min_tmp += gap - (gap % step);
7549 } while (min_tmp < max);
7550 // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step
7551 return answer;
7552}
7553
7554void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) {
7555 if (min > max) {
7556 return;
7557 }
7558
7559 uint32_t min_key = min >> 16;
7560 uint32_t max_key = max >> 16;
7561
7562 int32_t num_required_containers = max_key - min_key + 1;
7563 int32_t suffix_length = count_greater(ra->high_low_container.keys,
7564 ra->high_low_container.size,
7565 max_key);
7566 int32_t prefix_length = count_less(ra->high_low_container.keys,
7567 ra->high_low_container.size - suffix_length,
7568 min_key);
7569 int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length;
7570
7571 if (num_required_containers > common_length) {
7572 ra_shift_tail(&ra->high_low_container, suffix_length,
7573 num_required_containers - common_length);
7574 }
7575
7576 int32_t src = prefix_length + common_length - 1;
7577 int32_t dst = ra->high_low_container.size - suffix_length - 1;
7578 for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0
7579 uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
7580 uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
7581 void* new_container;
7582 uint8_t new_type;
7583
7584 if (src >= 0 && ra->high_low_container.keys[src] == key) {
7585 ra_unshare_container_at_index(&ra->high_low_container, src);
7586 new_container = container_add_range(ra->high_low_container.containers[src],
7587 ra->high_low_container.typecodes[src],
7588 container_min, container_max, &new_type);
7589 if (new_container != ra->high_low_container.containers[src]) {
7590 container_free(ra->high_low_container.containers[src],
7591 ra->high_low_container.typecodes[src]);
7592 }
7593 src--;
7594 } else {
7595 new_container = container_from_range(&new_type, container_min,
7596 container_max+1, 1);
7597 }
7598 ra_replace_key_and_container_at_index(&ra->high_low_container, dst,
7599 key, new_container, new_type);
7600 dst--;
7601 }
7602}
7603
7604void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) {
7605 if (min > max) {
7606 return;
7607 }
7608
7609 uint32_t min_key = min >> 16;
7610 uint32_t max_key = max >> 16;
7611
7612 int32_t src = count_less(ra->high_low_container.keys, ra->high_low_container.size, min_key);
7613 int32_t dst = src;
7614 while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) {
7615 uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0;
7616 uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff;
7617 ra_unshare_container_at_index(&ra->high_low_container, src);
7618 void *new_container;
7619 uint8_t new_type;
7620 new_container = container_remove_range(ra->high_low_container.containers[src],
7621 ra->high_low_container.typecodes[src],
7622 container_min, container_max,
7623 &new_type);
7624 if (new_container != ra->high_low_container.containers[src]) {
7625 container_free(ra->high_low_container.containers[src],
7626 ra->high_low_container.typecodes[src]);
7627 }
7628 if (new_container) {
7629 ra_replace_key_and_container_at_index(&ra->high_low_container, dst,
7630 ra->high_low_container.keys[src],
7631 new_container, new_type);
7632 dst++;
7633 }
7634 src++;
7635 }
7636 if (src > dst) {
7637 ra_shift_tail(&ra->high_low_container, ra->high_low_container.size - src, dst - src);
7638 }
7639}
7640
7641void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max);
7642void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max);
7643
7644void roaring_bitmap_printf(const roaring_bitmap_t *ra) {
7645 printf("{");
7646 for (int i = 0; i < ra->high_low_container.size; ++i) {
7647 container_printf_as_uint32_array(
7648 ra->high_low_container.containers[i],
7649 ra->high_low_container.typecodes[i],
7650 ((uint32_t)ra->high_low_container.keys[i]) << 16);
7651 if (i + 1 < ra->high_low_container.size) printf(",");
7652 }
7653 printf("}");
7654}
7655
7656void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) {
7657 printf("{");
7658 for (int i = 0; i < ra->high_low_container.size; ++i) {
7659 printf("%d: %s (%d)", ra->high_low_container.keys[i],
7660 get_full_container_name(ra->high_low_container.containers[i],
7661 ra->high_low_container.typecodes[i]),
7662 container_get_cardinality(ra->high_low_container.containers[i],
7663 ra->high_low_container.typecodes[i]));
7664 if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) {
7665 printf(
7666 "(shared count = %" PRIu32 " )",
7667 ((shared_container_t *)(ra->high_low_container.containers[i]))
7668 ->counter);
7669 }
7670
7671 if (i + 1 < ra->high_low_container.size) printf(", ");
7672 }
7673 printf("}");
7674}
7675
7676typedef struct min_max_sum_s {
7677 uint32_t min;
7678 uint32_t max;
7679 uint64_t sum;
7680} min_max_sum_t;
7681
7682static bool min_max_sum_fnc(uint32_t value, void *param) {
7683 min_max_sum_t *mms = (min_max_sum_t *)param;
7684 if (value > mms->max) mms->max = value;
7685 if (value < mms->min) mms->min = value;
7686 mms->sum += value;
7687 return true; // we always process all data points
7688}
7689
7690/**
7691* (For advanced users.)
7692* Collect statistics about the bitmap
7693*/
7694void roaring_bitmap_statistics(const roaring_bitmap_t *ra,
7695 roaring_statistics_t *stat) {
7696 memset(stat, 0, sizeof(*stat));
7697 stat->n_containers = ra->high_low_container.size;
7698 stat->cardinality = roaring_bitmap_get_cardinality(ra);
7699 min_max_sum_t mms;
7700 mms.min = UINT32_C(0xFFFFFFFF);
7701 mms.max = UINT32_C(0);
7702 mms.sum = 0;
7703 roaring_iterate(ra, &min_max_sum_fnc, &mms);
7704 stat->min_value = mms.min;
7705 stat->max_value = mms.max;
7706 stat->sum_value = mms.sum;
7707
7708 for (int i = 0; i < ra->high_low_container.size; ++i) {
7709 uint8_t truetype =
7710 get_container_type(ra->high_low_container.containers[i],
7711 ra->high_low_container.typecodes[i]);
7712 uint32_t card =
7713 container_get_cardinality(ra->high_low_container.containers[i],
7714 ra->high_low_container.typecodes[i]);
7715 uint32_t sbytes =
7716 container_size_in_bytes(ra->high_low_container.containers[i],
7717 ra->high_low_container.typecodes[i]);
7718 switch (truetype) {
7719 case BITSET_CONTAINER_TYPE_CODE:
7720 stat->n_bitset_containers++;
7721 stat->n_values_bitset_containers += card;
7722 stat->n_bytes_bitset_containers += sbytes;
7723 break;
7724 case ARRAY_CONTAINER_TYPE_CODE:
7725 stat->n_array_containers++;
7726 stat->n_values_array_containers += card;
7727 stat->n_bytes_array_containers += sbytes;
7728 break;
7729 case RUN_CONTAINER_TYPE_CODE:
7730 stat->n_run_containers++;
7731 stat->n_values_run_containers += card;
7732 stat->n_bytes_run_containers += sbytes;
7733 break;
7734 default:
7735 assert(false);
7736 __builtin_unreachable();
7737 }
7738 }
7739}
7740
7741roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
7742 roaring_bitmap_t *ans =
7743 (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t));
7744 if (!ans) {
7745 return NULL;
7746 }
7747 bool is_ok = ra_copy(&r->high_low_container, &ans->high_low_container,
7748 r->copy_on_write);
7749 if (!is_ok) {
7750 free(ans);
7751 return NULL;
7752 }
7753 ans->copy_on_write = r->copy_on_write;
7754 return ans;
7755}
7756
7757bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
7758 const roaring_bitmap_t *src) {
7759 return ra_overwrite(&src->high_low_container, &dest->high_low_container,
7760 src->copy_on_write);
7761}
7762
7763void roaring_bitmap_free(roaring_bitmap_t *r) {
7764 ra_clear(&r->high_low_container);
7765 free(r);
7766}
7767
7768void roaring_bitmap_clear(roaring_bitmap_t *r) {
7769 ra_reset(&r->high_low_container);
7770}
7771
7772void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
7773 const uint16_t hb = val >> 16;
7774 const int i = ra_get_index(&r->high_low_container, hb);
7775 uint8_t typecode;
7776 if (i >= 0) {
7777 ra_unshare_container_at_index(&r->high_low_container, i);
7778 void *container =
7779 ra_get_container_at_index(&r->high_low_container, i, &typecode);
7780 uint8_t newtypecode = typecode;
7781 void *container2 =
7782 container_add(container, val & 0xFFFF, typecode, &newtypecode);
7783 if (container2 != container) {
7784 container_free(container, typecode);
7785 ra_set_container_at_index(&r->high_low_container, i, container2,
7786 newtypecode);
7787 }
7788 } else {
7789 array_container_t *newac = array_container_create();
7790 void *container = container_add(newac, val & 0xFFFF,
7791 ARRAY_CONTAINER_TYPE_CODE, &typecode);
7792 // we could just assume that it stays an array container
7793 ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
7794 container, typecode);
7795 }
7796}
7797
7798bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
7799 const uint16_t hb = val >> 16;
7800 const int i = ra_get_index(&r->high_low_container, hb);
7801 uint8_t typecode;
7802 bool result = false;
7803 if (i >= 0) {
7804 ra_unshare_container_at_index(&r->high_low_container, i);
7805 void *container =
7806 ra_get_container_at_index(&r->high_low_container, i, &typecode);
7807
7808 const int oldCardinality =
7809 container_get_cardinality(container, typecode);
7810
7811 uint8_t newtypecode = typecode;
7812 void *container2 =
7813 container_add(container, val & 0xFFFF, typecode, &newtypecode);
7814 if (container2 != container) {
7815 container_free(container, typecode);
7816 ra_set_container_at_index(&r->high_low_container, i, container2,
7817 newtypecode);
7818 result = true;
7819 } else {
7820 const int newCardinality =
7821 container_get_cardinality(container, newtypecode);
7822
7823 result = oldCardinality != newCardinality;
7824 }
7825 } else {
7826 array_container_t *newac = array_container_create();
7827 void *container = container_add(newac, val & 0xFFFF,
7828 ARRAY_CONTAINER_TYPE_CODE, &typecode);
7829 // we could just assume that it stays an array container
7830 ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
7831 container, typecode);
7832 result = true;
7833 }
7834
7835 return result;
7836}
7837
7838void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
7839 const uint16_t hb = val >> 16;
7840 const int i = ra_get_index(&r->high_low_container, hb);
7841 uint8_t typecode;
7842 if (i >= 0) {
7843 ra_unshare_container_at_index(&r->high_low_container, i);
7844 void *container =
7845 ra_get_container_at_index(&r->high_low_container, i, &typecode);
7846 uint8_t newtypecode = typecode;
7847 void *container2 =
7848 container_remove(container, val & 0xFFFF, typecode, &newtypecode);
7849 if (container2 != container) {
7850 container_free(container, typecode);
7851 ra_set_container_at_index(&r->high_low_container, i, container2,
7852 newtypecode);
7853 }
7854 if (container_get_cardinality(container2, newtypecode) != 0) {
7855 ra_set_container_at_index(&r->high_low_container, i, container2,
7856 newtypecode);
7857 } else {
7858 ra_remove_at_index_and_free(&r->high_low_container, i);
7859 }
7860 }
7861}
7862
7863bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
7864 const uint16_t hb = val >> 16;
7865 const int i = ra_get_index(&r->high_low_container, hb);
7866 uint8_t typecode;
7867 bool result = false;
7868 if (i >= 0) {
7869 ra_unshare_container_at_index(&r->high_low_container, i);
7870 void *container =
7871 ra_get_container_at_index(&r->high_low_container, i, &typecode);
7872
7873 const int oldCardinality =
7874 container_get_cardinality(container, typecode);
7875
7876 uint8_t newtypecode = typecode;
7877 void *container2 =
7878 container_remove(container, val & 0xFFFF, typecode, &newtypecode);
7879 if (container2 != container) {
7880 container_free(container, typecode);
7881 ra_set_container_at_index(&r->high_low_container, i, container2,
7882 newtypecode);
7883 }
7884
7885 const int newCardinality =
7886 container_get_cardinality(container2, newtypecode);
7887
7888 if (newCardinality != 0) {
7889 ra_set_container_at_index(&r->high_low_container, i, container2,
7890 newtypecode);
7891 } else {
7892 ra_remove_at_index_and_free(&r->high_low_container, i);
7893 }
7894
7895 result = oldCardinality != newCardinality;
7896 }
7897 return result;
7898}
7899
7900void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
7901 const uint32_t *vals) {
7902 if (n_args == 0 || r->high_low_container.size == 0) {
7903 return;
7904 }
7905 int32_t pos = -1; // position of the container used in the previous iteration
7906 for (size_t i = 0; i < n_args; i++) {
7907 uint16_t key = (uint16_t)(vals[i] >> 16);
7908 if (pos < 0 || key != r->high_low_container.keys[pos]) {
7909 pos = ra_get_index(&r->high_low_container, key);
7910 }
7911 if (pos >= 0) {
7912 uint8_t new_typecode;
7913 void *new_container;
7914 new_container = container_remove(r->high_low_container.containers[pos],
7915 vals[i] & 0xffff,
7916 r->high_low_container.typecodes[pos],
7917 &new_typecode);
7918 if (new_container != r->high_low_container.containers[pos]) {
7919 container_free(r->high_low_container.containers[pos],
7920 r->high_low_container.typecodes[pos]);
7921 ra_replace_key_and_container_at_index(&r->high_low_container,
7922 pos, key, new_container,
7923 new_typecode);
7924 }
7925 if (!container_nonzero_cardinality(new_container, new_typecode)) {
7926 container_free(new_container, new_typecode);
7927 ra_remove_at_index(&r->high_low_container, pos);
7928 pos = -1;
7929 }
7930 }
7931 }
7932}
7933
7934// there should be some SIMD optimizations possible here
7935roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
7936 const roaring_bitmap_t *x2) {
7937 uint8_t container_result_type = 0;
7938 const int length1 = x1->high_low_container.size,
7939 length2 = x2->high_low_container.size;
7940 uint32_t neededcap = length1 > length2 ? length2 : length1;
7941 roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
7942 answer->copy_on_write = x1->copy_on_write && x2->copy_on_write;
7943
7944 int pos1 = 0, pos2 = 0;
7945
7946 while (pos1 < length1 && pos2 < length2) {
7947 const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
7948 const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
7949
7950 if (s1 == s2) {
7951 uint8_t container_type_1, container_type_2;
7952 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
7953 &container_type_1);
7954 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
7955 &container_type_2);
7956 void *c = container_and(c1, container_type_1, c2, container_type_2,
7957 &container_result_type);
7958 if (container_nonzero_cardinality(c, container_result_type)) {
7959 ra_append(&answer->high_low_container, s1, c,
7960 container_result_type);
7961 } else {
7962 container_free(
7963 c, container_result_type); // otherwise:memory leak!
7964 }
7965 ++pos1;
7966 ++pos2;
7967 } else if (s1 < s2) { // s1 < s2
7968 pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
7969 } else { // s1 > s2
7970 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
7971 }
7972 }
7973 return answer;
7974}
7975
7976/**
7977 * Compute the union of 'number' bitmaps.
7978 */
7979roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
7980 const roaring_bitmap_t **x) {
7981 if (number == 0) {
7982 return roaring_bitmap_create();
7983 }
7984 if (number == 1) {
7985 return roaring_bitmap_copy(x[0]);
7986 }
7987 roaring_bitmap_t *answer =
7988 roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION);
7989 for (size_t i = 2; i < number; i++) {
7990 roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION);
7991 }
7992 roaring_bitmap_repair_after_lazy(answer);
7993 return answer;
7994}
7995
7996/**
7997 * Compute the xor of 'number' bitmaps.
7998 */
7999roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
8000 const roaring_bitmap_t **x) {
8001 if (number == 0) {
8002 return roaring_bitmap_create();
8003 }
8004 if (number == 1) {
8005 return roaring_bitmap_copy(x[0]);
8006 }
8007 roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]);
8008 for (size_t i = 2; i < number; i++) {
8009 roaring_bitmap_lazy_xor_inplace(answer, x[i]);
8010 }
8011 roaring_bitmap_repair_after_lazy(answer);
8012 return answer;
8013}
8014
8015// inplace and (modifies its first argument).
8016void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
8017 const roaring_bitmap_t *x2) {
8018 if (x1 == x2) return;
8019 int pos1 = 0, pos2 = 0, intersection_size = 0;
8020 const int length1 = ra_get_size(&x1->high_low_container);
8021 const int length2 = ra_get_size(&x2->high_low_container);
8022
8023 // any skipped-over or newly emptied containers in x1
8024 // have to be freed.
8025 while (pos1 < length1 && pos2 < length2) {
8026 const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8027 const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8028
8029 if (s1 == s2) {
8030 uint8_t typecode1, typecode2, typecode_result;
8031 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
8032 &typecode1);
8033 c1 = get_writable_copy_if_shared(c1, &typecode1);
8034 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8035 &typecode2);
8036 void *c =
8037 container_iand(c1, typecode1, c2, typecode2, &typecode_result);
8038 if (c != c1) { // in this instance a new container was created, and
8039 // we need to free the old one
8040 container_free(c1, typecode1);
8041 }
8042 if (container_nonzero_cardinality(c, typecode_result)) {
8043 ra_replace_key_and_container_at_index(&x1->high_low_container,
8044 intersection_size, s1, c,
8045 typecode_result);
8046 intersection_size++;
8047 } else {
8048 container_free(c, typecode_result);
8049 }
8050 ++pos1;
8051 ++pos2;
8052 } else if (s1 < s2) {
8053 pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1);
8054 } else { // s1 > s2
8055 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
8056 }
8057 }
8058
8059 // if we ended early because x2 ran out, then all remaining in x1 should be
8060 // freed
8061 while (pos1 < length1) {
8062 container_free(x1->high_low_container.containers[pos1],
8063 x1->high_low_container.typecodes[pos1]);
8064 ++pos1;
8065 }
8066
8067 // all containers after this have either been copied or freed
8068 ra_downsize(&x1->high_low_container, intersection_size);
8069}
8070
8071roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
8072 const roaring_bitmap_t *x2) {
8073 uint8_t container_result_type = 0;
8074 const int length1 = x1->high_low_container.size,
8075 length2 = x2->high_low_container.size;
8076 if (0 == length1) {
8077 return roaring_bitmap_copy(x2);
8078 }
8079 if (0 == length2) {
8080 return roaring_bitmap_copy(x1);
8081 }
8082 roaring_bitmap_t *answer =
8083 roaring_bitmap_create_with_capacity(length1 + length2);
8084 answer->copy_on_write = x1->copy_on_write && x2->copy_on_write;
8085 int pos1 = 0, pos2 = 0;
8086 uint8_t container_type_1, container_type_2;
8087 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8088 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8089 while (true) {
8090 if (s1 == s2) {
8091 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
8092 &container_type_1);
8093 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8094 &container_type_2);
8095 void *c = container_or(c1, container_type_1, c2, container_type_2,
8096 &container_result_type);
8097 // since we assume that the initial containers are non-empty, the
8098 // result here
8099 // can only be non-empty
8100 ra_append(&answer->high_low_container, s1, c,
8101 container_result_type);
8102 ++pos1;
8103 ++pos2;
8104 if (pos1 == length1) break;
8105 if (pos2 == length2) break;
8106 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8107 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8108
8109 } else if (s1 < s2) { // s1 < s2
8110 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
8111 &container_type_1);
8112 // c1 = container_clone(c1, container_type_1);
8113 c1 =
8114 get_copy_of_container(c1, &container_type_1, x1->copy_on_write);
8115 if (x1->copy_on_write) {
8116 ra_set_container_at_index(&x1->high_low_container, pos1, c1,
8117 container_type_1);
8118 }
8119 ra_append(&answer->high_low_container, s1, c1, container_type_1);
8120 pos1++;
8121 if (pos1 == length1) break;
8122 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8123
8124 } else { // s1 > s2
8125 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8126 &container_type_2);
8127 // c2 = container_clone(c2, container_type_2);
8128 c2 =
8129 get_copy_of_container(c2, &container_type_2, x2->copy_on_write);
8130 if (x2->copy_on_write) {
8131 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
8132 container_type_2);
8133 }
8134 ra_append(&answer->high_low_container, s2, c2, container_type_2);
8135 pos2++;
8136 if (pos2 == length2) break;
8137 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8138 }
8139 }
8140 if (pos1 == length1) {
8141 ra_append_copy_range(&answer->high_low_container,
8142 &x2->high_low_container, pos2, length2,
8143 x2->copy_on_write);
8144 } else if (pos2 == length2) {
8145 ra_append_copy_range(&answer->high_low_container,
8146 &x1->high_low_container, pos1, length1,
8147 x1->copy_on_write);
8148 }
8149 return answer;
8150}
8151
8152// inplace or (modifies its first argument).
8153void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
8154 const roaring_bitmap_t *x2) {
8155 uint8_t container_result_type = 0;
8156 int length1 = x1->high_low_container.size;
8157 const int length2 = x2->high_low_container.size;
8158
8159 if (0 == length2) return;
8160
8161 if (0 == length1) {
8162 roaring_bitmap_overwrite(x1, x2);
8163 return;
8164 }
8165 int pos1 = 0, pos2 = 0;
8166 uint8_t container_type_1, container_type_2;
8167 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8168 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8169 while (true) {
8170 if (s1 == s2) {
8171 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
8172 &container_type_1);
8173 if (!container_is_full(c1, container_type_1)) {
8174 c1 = get_writable_copy_if_shared(c1, &container_type_1);
8175
8176 void *c2 = ra_get_container_at_index(&x2->high_low_container,
8177 pos2, &container_type_2);
8178 void *c =
8179 container_ior(c1, container_type_1, c2, container_type_2,
8180 &container_result_type);
8181 if (c !=
8182 c1) { // in this instance a new container was created, and
8183 // we need to free the old one
8184 container_free(c1, container_type_1);
8185 }
8186
8187 ra_set_container_at_index(&x1->high_low_container, pos1, c,
8188 container_result_type);
8189 }
8190 ++pos1;
8191 ++pos2;
8192 if (pos1 == length1) break;
8193 if (pos2 == length2) break;
8194 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8195 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8196
8197 } else if (s1 < s2) { // s1 < s2
8198 pos1++;
8199 if (pos1 == length1) break;
8200 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8201
8202 } else { // s1 > s2
8203 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8204 &container_type_2);
8205 c2 =
8206 get_copy_of_container(c2, &container_type_2, x2->copy_on_write);
8207 if (x2->copy_on_write) {
8208 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
8209 container_type_2);
8210 }
8211
8212 // void *c2_clone = container_clone(c2, container_type_2);
8213 ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
8214 container_type_2);
8215 pos1++;
8216 length1++;
8217 pos2++;
8218 if (pos2 == length2) break;
8219 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8220 }
8221 }
8222 if (pos1 == length1) {
8223 ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
8224 pos2, length2, x2->copy_on_write);
8225 }
8226}
8227
8228roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
8229 const roaring_bitmap_t *x2) {
8230 uint8_t container_result_type = 0;
8231 const int length1 = x1->high_low_container.size,
8232 length2 = x2->high_low_container.size;
8233 if (0 == length1) {
8234 return roaring_bitmap_copy(x2);
8235 }
8236 if (0 == length2) {
8237 return roaring_bitmap_copy(x1);
8238 }
8239 roaring_bitmap_t *answer =
8240 roaring_bitmap_create_with_capacity(length1 + length2);
8241 answer->copy_on_write = x1->copy_on_write && x2->copy_on_write;
8242 int pos1 = 0, pos2 = 0;
8243 uint8_t container_type_1, container_type_2;
8244 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8245 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8246 while (true) {
8247 if (s1 == s2) {
8248 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
8249 &container_type_1);
8250 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8251 &container_type_2);
8252 void *c = container_xor(c1, container_type_1, c2, container_type_2,
8253 &container_result_type);
8254
8255 if (container_nonzero_cardinality(c, container_result_type)) {
8256 ra_append(&answer->high_low_container, s1, c,
8257 container_result_type);
8258 } else {
8259 container_free(c, container_result_type);
8260 }
8261 ++pos1;
8262 ++pos2;
8263 if (pos1 == length1) break;
8264 if (pos2 == length2) break;
8265 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8266 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8267
8268 } else if (s1 < s2) { // s1 < s2
8269 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
8270 &container_type_1);
8271 c1 =
8272 get_copy_of_container(c1, &container_type_1, x1->copy_on_write);
8273 if (x1->copy_on_write) {
8274 ra_set_container_at_index(&x1->high_low_container, pos1, c1,
8275 container_type_1);
8276 }
8277 ra_append(&answer->high_low_container, s1, c1, container_type_1);
8278 pos1++;
8279 if (pos1 == length1) break;
8280 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8281
8282 } else { // s1 > s2
8283 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8284 &container_type_2);
8285 c2 =
8286 get_copy_of_container(c2, &container_type_2, x2->copy_on_write);
8287 if (x2->copy_on_write) {
8288 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
8289 container_type_2);
8290 }
8291 ra_append(&answer->high_low_container, s2, c2, container_type_2);
8292 pos2++;
8293 if (pos2 == length2) break;
8294 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8295 }
8296 }
8297 if (pos1 == length1) {
8298 ra_append_copy_range(&answer->high_low_container,
8299 &x2->high_low_container, pos2, length2,
8300 x2->copy_on_write);
8301 } else if (pos2 == length2) {
8302 ra_append_copy_range(&answer->high_low_container,
8303 &x1->high_low_container, pos1, length1,
8304 x1->copy_on_write);
8305 }
8306 return answer;
8307}
8308
8309// inplace xor (modifies its first argument).
8310
8311void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
8312 const roaring_bitmap_t *x2) {
8313 assert(x1 != x2);
8314 uint8_t container_result_type = 0;
8315 int length1 = x1->high_low_container.size;
8316 const int length2 = x2->high_low_container.size;
8317
8318 if (0 == length2) return;
8319
8320 if (0 == length1) {
8321 roaring_bitmap_overwrite(x1, x2);
8322 return;
8323 }
8324
8325 // XOR can have new containers inserted from x2, but can also
8326 // lose containers when x1 and x2 are nonempty and identical.
8327
8328 int pos1 = 0, pos2 = 0;
8329 uint8_t container_type_1, container_type_2;
8330 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8331 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8332 while (true) {
8333 if (s1 == s2) {
8334 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
8335 &container_type_1);
8336 c1 = get_writable_copy_if_shared(c1, &container_type_1);
8337
8338 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8339 &container_type_2);
8340 void *c = container_ixor(c1, container_type_1, c2, container_type_2,
8341 &container_result_type);
8342
8343 if (container_nonzero_cardinality(c, container_result_type)) {
8344 ra_set_container_at_index(&x1->high_low_container, pos1, c,
8345 container_result_type);
8346 ++pos1;
8347 } else {
8348 container_free(c, container_result_type);
8349 ra_remove_at_index(&x1->high_low_container, pos1);
8350 --length1;
8351 }
8352
8353 ++pos2;
8354 if (pos1 == length1) break;
8355 if (pos2 == length2) break;
8356 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8357 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8358
8359 } else if (s1 < s2) { // s1 < s2
8360 pos1++;
8361 if (pos1 == length1) break;
8362 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8363
8364 } else { // s1 > s2
8365 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8366 &container_type_2);
8367 c2 =
8368 get_copy_of_container(c2, &container_type_2, x2->copy_on_write);
8369 if (x2->copy_on_write) {
8370 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
8371 container_type_2);
8372 }
8373
8374 ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
8375 container_type_2);
8376 pos1++;
8377 length1++;
8378 pos2++;
8379 if (pos2 == length2) break;
8380 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8381 }
8382 }
8383 if (pos1 == length1) {
8384 ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
8385 pos2, length2, x2->copy_on_write);
8386 }
8387}
8388
8389roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
8390 const roaring_bitmap_t *x2) {
8391 uint8_t container_result_type = 0;
8392 const int length1 = x1->high_low_container.size,
8393 length2 = x2->high_low_container.size;
8394 if (0 == length1) {
8395 roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
8396 empty_bitmap->copy_on_write = x1->copy_on_write && x2->copy_on_write;
8397 return empty_bitmap;
8398 }
8399 if (0 == length2) {
8400 return roaring_bitmap_copy(x1);
8401 }
8402 roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1);
8403 answer->copy_on_write = x1->copy_on_write && x2->copy_on_write;
8404
8405 int pos1 = 0, pos2 = 0;
8406 uint8_t container_type_1, container_type_2;
8407 uint16_t s1 = 0;
8408 uint16_t s2 = 0;
8409 while (true) {
8410 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8411 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8412
8413 if (s1 == s2) {
8414 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
8415 &container_type_1);
8416 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8417 &container_type_2);
8418 void *c =
8419 container_andnot(c1, container_type_1, c2, container_type_2,
8420 &container_result_type);
8421
8422 if (container_nonzero_cardinality(c, container_result_type)) {
8423 ra_append(&answer->high_low_container, s1, c,
8424 container_result_type);
8425 } else {
8426 container_free(c, container_result_type);
8427 }
8428 ++pos1;
8429 ++pos2;
8430 if (pos1 == length1) break;
8431 if (pos2 == length2) break;
8432 } else if (s1 < s2) { // s1 < s2
8433 const int next_pos1 =
8434 ra_advance_until(&x1->high_low_container, s2, pos1);
8435 ra_append_copy_range(&answer->high_low_container,
8436 &x1->high_low_container, pos1, next_pos1,
8437 x1->copy_on_write);
8438 // TODO : perhaps some of the copy_on_write should be based on
8439 // answer rather than x1 (more stringent?). Many similar cases
8440 pos1 = next_pos1;
8441 if (pos1 == length1) break;
8442 } else { // s1 > s2
8443 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
8444 if (pos2 == length2) break;
8445 }
8446 }
8447 if (pos2 == length2) {
8448 ra_append_copy_range(&answer->high_low_container,
8449 &x1->high_low_container, pos1, length1,
8450 x1->copy_on_write);
8451 }
8452 return answer;
8453}
8454
8455// inplace andnot (modifies its first argument).
8456
8457void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
8458 const roaring_bitmap_t *x2) {
8459 assert(x1 != x2);
8460
8461 uint8_t container_result_type = 0;
8462 int length1 = x1->high_low_container.size;
8463 const int length2 = x2->high_low_container.size;
8464 int intersection_size = 0;
8465
8466 if (0 == length2) return;
8467
8468 if (0 == length1) {
8469 roaring_bitmap_clear(x1);
8470 return;
8471 }
8472
8473 int pos1 = 0, pos2 = 0;
8474 uint8_t container_type_1, container_type_2;
8475 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8476 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8477 while (true) {
8478 if (s1 == s2) {
8479 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
8480 &container_type_1);
8481 c1 = get_writable_copy_if_shared(c1, &container_type_1);
8482
8483 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
8484 &container_type_2);
8485 void *c =
8486 container_iandnot(c1, container_type_1, c2, container_type_2,
8487 &container_result_type);
8488
8489 if (container_nonzero_cardinality(c, container_result_type)) {
8490 ra_replace_key_and_container_at_index(&x1->high_low_container,
8491 intersection_size++, s1,
8492 c, container_result_type);
8493 } else {
8494 container_free(c, container_result_type);
8495 }
8496
8497 ++pos1;
8498 ++pos2;
8499 if (pos1 == length1) break;
8500 if (pos2 == length2) break;
8501 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8502 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8503
8504 } else if (s1 < s2) { // s1 < s2
8505 if (pos1 != intersection_size) {
8506 void *c1 = ra_get_container_at_index(&x1->high_low_container,
8507 pos1, &container_type_1);
8508
8509 ra_replace_key_and_container_at_index(&x1->high_low_container,
8510 intersection_size, s1, c1,
8511 container_type_1);
8512 }
8513 intersection_size++;
8514 pos1++;
8515 if (pos1 == length1) break;
8516 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
8517
8518 } else { // s1 > s2
8519 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
8520 if (pos2 == length2) break;
8521 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
8522 }
8523 }
8524
8525 if (pos1 < length1) {
8526 // all containers between intersection_size and
8527 // pos1 are junk. However, they have either been moved
8528 // (thus still referenced) or involved in an iandnot
8529 // that will clean up all containers that could not be reused.
8530 // Thus we should not free the junk containers between
8531 // intersection_size and pos1.
8532 if (pos1 > intersection_size) {
8533 // left slide of remaining items
8534 ra_copy_range(&x1->high_low_container, pos1, length1,
8535 intersection_size);
8536 }
8537 // else current placement is fine
8538 intersection_size += (length1 - pos1);
8539 }
8540 ra_downsize(&x1->high_low_container, intersection_size);
8541}
8542
8543uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) {
8544 uint64_t card = 0;
8545 for (int i = 0; i < ra->high_low_container.size; ++i)
8546 card += container_get_cardinality(ra->high_low_container.containers[i],
8547 ra->high_low_container.typecodes[i]);
8548 return card;
8549}
8550
8551uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra,
8552 uint64_t range_start,
8553 uint64_t range_end) {
8554 if (range_end > UINT32_MAX) {
8555 range_end = UINT32_MAX + UINT64_C(1);
8556 }
8557 if (range_start >= range_end) {
8558 return 0;
8559 }
8560 range_end--; // make range_end inclusive
8561 // now we have: 0 <= range_start <= range_end <= UINT32_MAX
8562
8563 int minhb = range_start >> 16;
8564 int maxhb = range_end >> 16;
8565
8566 uint64_t card = 0;
8567
8568 int i = ra_get_index(&ra->high_low_container, minhb);
8569 if (i >= 0) {
8570 if (minhb == maxhb) {
8571 card += container_rank(ra->high_low_container.containers[i],
8572 ra->high_low_container.typecodes[i],
8573 range_end & 0xffff);
8574 } else {
8575 card += container_get_cardinality(ra->high_low_container.containers[i],
8576 ra->high_low_container.typecodes[i]);
8577 }
8578 if ((range_start & 0xffff) != 0) {
8579 card -= container_rank(ra->high_low_container.containers[i],
8580 ra->high_low_container.typecodes[i],
8581 (range_start & 0xffff) - 1);
8582 }
8583 i++;
8584 } else {
8585 i = -i - 1;
8586 }
8587
8588 for (; i < ra->high_low_container.size; i++) {
8589 uint16_t key = ra->high_low_container.keys[i];
8590 if (key < maxhb) {
8591 card += container_get_cardinality(ra->high_low_container.containers[i],
8592 ra->high_low_container.typecodes[i]);
8593 } else if (key == maxhb) {
8594 card += container_rank(ra->high_low_container.containers[i],
8595 ra->high_low_container.typecodes[i],
8596 range_end & 0xffff);
8597 break;
8598 } else {
8599 break;
8600 }
8601 }
8602
8603 return card;
8604}
8605
8606
8607bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) {
8608 return ra->high_low_container.size == 0;
8609}
8610
8611void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) {
8612 ra_to_uint32_array(&ra->high_low_container, ans);
8613}
8614
8615bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) {
8616 return ra_range_uint32_array(&ra->high_low_container, offset, limit, ans);
8617}
8618
8619/** convert array and bitmap containers to run containers when it is more
8620 * efficient;
8621 * also convert from run containers when more space efficient. Returns
8622 * true if the result has at least one run container.
8623*/
8624bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
8625 bool answer = false;
8626 for (int i = 0; i < r->high_low_container.size; i++) {
8627 uint8_t typecode_original, typecode_after;
8628 ra_unshare_container_at_index(
8629 &r->high_low_container, i); // TODO: this introduces extra cloning!
8630 void *c = ra_get_container_at_index(&r->high_low_container, i,
8631 &typecode_original);
8632 void *c1 = convert_run_optimize(c, typecode_original, &typecode_after);
8633 if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true;
8634 ra_set_container_at_index(&r->high_low_container, i, c1,
8635 typecode_after);
8636 }
8637 return answer;
8638}
8639
8640size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
8641 size_t answer = 0;
8642 for (int i = 0; i < r->high_low_container.size; i++) {
8643 uint8_t typecode_original;
8644 void *c = ra_get_container_at_index(&r->high_low_container, i,
8645 &typecode_original);
8646 answer += container_shrink_to_fit(c, typecode_original);
8647 }
8648 answer += ra_shrink_to_fit(&r->high_low_container);
8649 return answer;
8650}
8651
8652/**
8653 * Remove run-length encoding even when it is more space efficient
8654 * return whether a change was applied
8655 */
8656bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
8657 bool answer = false;
8658 for (int i = 0; i < r->high_low_container.size; i++) {
8659 uint8_t typecode_original, typecode_after;
8660 void *c = ra_get_container_at_index(&r->high_low_container, i,
8661 &typecode_original);
8662 if (get_container_type(c, typecode_original) ==
8663 RUN_CONTAINER_TYPE_CODE) {
8664 answer = true;
8665 if (typecode_original == SHARED_CONTAINER_TYPE_CODE) {
8666 run_container_t *truec =
8667 (run_container_t *)((shared_container_t *)c)->container;
8668 int32_t card = run_container_cardinality(truec);
8669 void *c1 = convert_to_bitset_or_array_container(
8670 truec, card, &typecode_after);
8671 shared_container_free((shared_container_t *)c);
8672 ra_set_container_at_index(&r->high_low_container, i, c1,
8673 typecode_after);
8674
8675 } else {
8676 int32_t card = run_container_cardinality((run_container_t *)c);
8677 void *c1 = convert_to_bitset_or_array_container(
8678 (run_container_t *)c, card, &typecode_after);
8679 ra_set_container_at_index(&r->high_low_container, i, c1,
8680 typecode_after);
8681 }
8682 }
8683 }
8684 return answer;
8685}
8686
8687size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) {
8688 size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra);
8689 uint64_t cardinality = roaring_bitmap_get_cardinality(ra);
8690 uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t);
8691 if (portablesize < sizeasarray) {
8692 buf[0] = SERIALIZATION_CONTAINER;
8693 return roaring_bitmap_portable_serialize(ra, buf + 1) + 1;
8694 } else {
8695 buf[0] = SERIALIZATION_ARRAY_UINT32;
8696 memcpy(buf + 1, &cardinality, sizeof(uint32_t));
8697 roaring_bitmap_to_uint32_array(
8698 ra, (uint32_t *)(buf + 1 + sizeof(uint32_t)));
8699 return 1 + (size_t)sizeasarray;
8700 }
8701}
8702
8703size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) {
8704 size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra);
8705 uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) +
8706 sizeof(uint32_t);
8707 return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1;
8708}
8709
8710size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) {
8711 return ra_portable_size_in_bytes(&ra->high_low_container);
8712}
8713
8714
8715roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) {
8716 roaring_bitmap_t *ans =
8717 (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t));
8718 if (ans == NULL) {
8719 return NULL;
8720 }
8721 size_t bytesread;
8722 bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
8723 if(is_ok) assert(bytesread <= maxbytes);
8724 ans->copy_on_write = false;
8725 if (!is_ok) {
8726 free(ans);
8727 return NULL;
8728 }
8729 return ans;
8730}
8731
8732roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) {
8733 return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX);
8734}
8735
8736
8737size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) {
8738 return ra_portable_deserialize_size(buf, maxbytes);
8739}
8740
8741
8742size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra,
8743 char *buf) {
8744 return ra_portable_serialize(&ra->high_low_container, buf);
8745}
8746
8747roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
8748 const char *bufaschar = (const char *)buf;
8749 if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) {
8750 /* This looks like a compressed set of uint32_t elements */
8751 uint32_t card;
8752 memcpy(&card, bufaschar + 1, sizeof(uint32_t));
8753 const uint32_t *elems =
8754 (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
8755
8756 return roaring_bitmap_of_ptr(card, elems);
8757 } else if (bufaschar[0] == SERIALIZATION_CONTAINER) {
8758 return roaring_bitmap_portable_deserialize(bufaschar + 1);
8759 } else
8760 return (NULL);
8761}
8762
8763bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator,
8764 void *ptr) {
8765 for (int i = 0; i < ra->high_low_container.size; ++i)
8766 if (!container_iterate(ra->high_low_container.containers[i],
8767 ra->high_low_container.typecodes[i],
8768 ((uint32_t)ra->high_low_container.keys[i]) << 16,
8769 iterator, ptr)) {
8770 return false;
8771 }
8772 return true;
8773}
8774
8775bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator,
8776 uint64_t high_bits, void *ptr) {
8777 for (int i = 0; i < ra->high_low_container.size; ++i)
8778 if (!container_iterate64(
8779 ra->high_low_container.containers[i],
8780 ra->high_low_container.typecodes[i],
8781 ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator,
8782 high_bits, ptr)) {
8783 return false;
8784 }
8785 return true;
8786}
8787
8788/****
8789* begin roaring_uint32_iterator_t
8790*****/
8791
8792static bool loadfirstvalue(roaring_uint32_iterator_t *newit) {
8793 newit->in_container_index = 0;
8794 newit->run_index = 0;
8795 newit->current_value = 0;
8796 if (newit->container_index >=
8797 newit->parent->high_low_container.size) { // otherwise nothing
8798 newit->current_value = UINT32_MAX;
8799 return (newit->has_value = false);
8800 }
8801 // assume not empty
8802 newit->has_value = true;
8803 // we precompute container, typecode and highbits so that successive
8804 // iterators do not have to grab them from odd memory locations
8805 // and have to worry about the (easily predicted) container_unwrap_shared
8806 // call.
8807 newit->container =
8808 newit->parent->high_low_container.containers[newit->container_index];
8809 newit->typecode =
8810 newit->parent->high_low_container.typecodes[newit->container_index];
8811 newit->highbits =
8812 ((uint32_t)
8813 newit->parent->high_low_container.keys[newit->container_index])
8814 << 16;
8815 newit->container =
8816 container_unwrap_shared(newit->container, &(newit->typecode));
8817 uint32_t wordindex;
8818 uint64_t word; // used for bitsets
8819 switch (newit->typecode) {
8820 case BITSET_CONTAINER_TYPE_CODE:
8821 wordindex = 0;
8822 while ((word = ((const bitset_container_t *)(newit->container))
8823 ->array[wordindex]) == 0)
8824 wordindex++; // advance
8825 // here "word" is non-zero
8826 newit->in_container_index = wordindex * 64 + __builtin_ctzll(word);
8827 newit->current_value = newit->highbits | newit->in_container_index;
8828 break;
8829 case ARRAY_CONTAINER_TYPE_CODE:
8830 newit->current_value =
8831 newit->highbits |
8832 ((const array_container_t *)(newit->container))->array[0];
8833 break;
8834 case RUN_CONTAINER_TYPE_CODE:
8835 newit->current_value =
8836 newit->highbits |
8837 (((const run_container_t *)(newit->container))->runs[0].value);
8838 newit->in_run_index =
8839 newit->current_value +
8840 (((const run_container_t *)(newit->container))->runs[0].length);
8841 break;
8842 default:
8843 // if this ever happens, bug!
8844 assert(false);
8845 } // switch (typecode)
8846 return true;
8847}
8848
8849// prerequesite: the value should be in range of the container
8850static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) {
8851 uint16_t lb = val & 0xFFFF;
8852 newit->in_container_index = 0;
8853 newit->run_index = 0;
8854 newit->current_value = 0;
8855 // assume it is found
8856 newit->has_value = true;
8857 newit->container =
8858 newit->parent->high_low_container.containers[newit->container_index];
8859 newit->typecode =
8860 newit->parent->high_low_container.typecodes[newit->container_index];
8861 newit->highbits =
8862 ((uint32_t)
8863 newit->parent->high_low_container.keys[newit->container_index])
8864 << 16;
8865 newit->container =
8866 container_unwrap_shared(newit->container, &(newit->typecode));
8867 switch (newit->typecode) {
8868 case BITSET_CONTAINER_TYPE_CODE:
8869 newit->in_container_index = bitset_container_index_equalorlarger((const bitset_container_t *)(newit->container), lb);
8870 newit->current_value = newit->highbits | newit->in_container_index;
8871 break;
8872 case ARRAY_CONTAINER_TYPE_CODE:
8873 newit->in_container_index = array_container_index_equalorlarger((const array_container_t *)(newit->container), lb);
8874 newit->current_value =
8875 newit->highbits |
8876 ((const array_container_t *)(newit->container))->array[newit->in_container_index];
8877 break;
8878 case RUN_CONTAINER_TYPE_CODE:
8879 newit->run_index = run_container_index_equalorlarger((const run_container_t *)(newit->container), lb);
8880 if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) {
8881 newit->current_value = val;
8882 } else {
8883 newit->current_value =
8884 newit->highbits |
8885 (((const run_container_t *)(newit->container))->runs[newit->run_index].value);
8886 }
8887 newit->in_run_index =
8888 (newit->highbits | (((const run_container_t *)(newit->container))->runs[newit->run_index].value)) +
8889 (((const run_container_t *)(newit->container))->runs[newit->run_index].length);
8890
8891 break;
8892 default:
8893 // if this ever happens, bug!
8894 assert(false);
8895 } // switch (typecode)
8896 return true;
8897}
8898
8899void roaring_init_iterator(const roaring_bitmap_t *ra,
8900 roaring_uint32_iterator_t *newit) {
8901 newit->parent = ra;
8902 newit->container_index = 0;
8903 newit->has_value = loadfirstvalue(newit);
8904}
8905
8906roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) {
8907 roaring_uint32_iterator_t *newit =
8908 (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t));
8909 if (newit == NULL) return NULL;
8910 roaring_init_iterator(ra, newit);
8911 return newit;
8912}
8913
8914roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
8915 const roaring_uint32_iterator_t *it) {
8916 roaring_uint32_iterator_t *newit =
8917 (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t));
8918 memcpy(newit, it, sizeof(roaring_uint32_iterator_t));
8919 return newit;
8920}
8921
8922bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) {
8923 uint16_t hb = val >> 16;
8924 const int i = ra_get_index(& it->parent->high_low_container, hb);
8925 if (i >= 0) {
8926 uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]);
8927 uint16_t lb = val & 0xFFFF;
8928 if(lowvalue < lb ) {
8929 it->container_index = i+1; // will have to load first value of next container
8930 } else {// the value is necessarily within the range of the container
8931 it->container_index = i;
8932 it->has_value = loadfirstvalue_largeorequal(it, val);
8933 return it->has_value;
8934 }
8935 } else {
8936 // there is no matching, so we are going for the next container
8937 it->container_index = -i-1;
8938 }
8939 it->has_value = loadfirstvalue(it);
8940 return it->has_value;
8941}
8942
8943
8944bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
8945 if (it->container_index >= it->parent->high_low_container.size) {
8946 return (it->has_value = false);
8947 }
8948 uint32_t wordindex; // used for bitsets
8949 uint64_t word; // used for bitsets
8950 switch (it->typecode) {
8951 case BITSET_CONTAINER_TYPE_CODE:
8952 it->in_container_index++;
8953 wordindex = it->in_container_index / 64;
8954 if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break;
8955 word = ((const bitset_container_t *)(it->container))
8956 ->array[wordindex] &
8957 (UINT64_MAX << (it->in_container_index % 64));
8958 // next part could be optimized/simplified
8959 while ((word == 0) &&
8960 (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
8961 wordindex++;
8962 word = ((const bitset_container_t *)(it->container))
8963 ->array[wordindex];
8964 }
8965 if (word != 0) {
8966 it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
8967 it->current_value = it->highbits | it->in_container_index;
8968 return (it->has_value = true);
8969 }
8970 break;
8971 case ARRAY_CONTAINER_TYPE_CODE:
8972 it->in_container_index++;
8973 if (it->in_container_index <
8974 ((const array_container_t *)(it->container))->cardinality) {
8975 it->current_value = it->highbits |
8976 ((const array_container_t *)(it->container))
8977 ->array[it->in_container_index];
8978 return true;
8979 }
8980 break;
8981 case RUN_CONTAINER_TYPE_CODE:
8982 if(it->current_value == UINT32_MAX) {
8983 return (it->has_value = false); // without this, we risk an overflow to zero
8984 }
8985 it->current_value++;
8986 if (it->current_value <= it->in_run_index) {
8987 return (it->has_value = true);
8988 }
8989 it->run_index++;
8990 if (it->run_index <
8991 ((const run_container_t *)(it->container))->n_runs) {
8992 it->current_value =
8993 it->highbits | (((const run_container_t *)(it->container))
8994 ->runs[it->run_index]
8995 .value);
8996 it->in_run_index = it->current_value +
8997 ((const run_container_t *)(it->container))
8998 ->runs[it->run_index]
8999 .length;
9000 return (it->has_value = true);
9001 }
9002 break;
9003 default:
9004 // if this ever happens, bug!
9005 assert(false);
9006 } // switch (typecode)
9007 // moving to next container
9008 it->container_index++;
9009 return (it->has_value = loadfirstvalue(it));
9010}
9011
9012uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) {
9013 uint32_t ret = 0;
9014 uint32_t num_values;
9015 uint32_t wordindex; // used for bitsets
9016 uint64_t word; // used for bitsets
9017 const array_container_t* acont; //TODO remove
9018 const run_container_t* rcont; //TODO remove
9019 const bitset_container_t* bcont; //TODO remove
9020
9021 while (it->has_value && ret < count) {
9022 switch (it->typecode) {
9023 case BITSET_CONTAINER_TYPE_CODE:
9024 bcont = (const bitset_container_t*)(it->container);
9025 wordindex = it->in_container_index / 64;
9026 word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64));
9027 do {
9028 while (word != 0 && ret < count) {
9029 buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word));
9030 word = word & (word - 1);
9031 buf++;
9032 ret++;
9033 }
9034 while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
9035 wordindex++;
9036 word = bcont->array[wordindex];
9037 }
9038 } while (word != 0 && ret < count);
9039 it->has_value = (word != 0);
9040 if (it->has_value) {
9041 it->in_container_index = wordindex * 64 + __builtin_ctzll(word);
9042 it->current_value = it->highbits | it->in_container_index;
9043 }
9044 break;
9045 case ARRAY_CONTAINER_TYPE_CODE:
9046 acont = (const array_container_t *)(it->container);
9047 num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret);
9048 for (uint32_t i = 0; i < num_values; i++) {
9049 buf[i] = it->highbits | acont->array[it->in_container_index + i];
9050 }
9051 buf += num_values;
9052 ret += num_values;
9053 it->in_container_index += num_values;
9054 it->has_value = (it->in_container_index < acont->cardinality);
9055 if (it->has_value) {
9056 it->current_value = it->highbits | acont->array[it->in_container_index];
9057 }
9058 break;
9059 case RUN_CONTAINER_TYPE_CODE:
9060 rcont = (const run_container_t*)(it->container);
9061 //"in_run_index" name is misleading, read it as "max_value_in_current_run"
9062 do {
9063 num_values = minimum_uint32(it->in_run_index - it->current_value + 1, count - ret);
9064 for (uint32_t i = 0; i < num_values; i++) {
9065 buf[i] = it->current_value + i;
9066 }
9067 it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0
9068 buf += num_values;
9069 ret += num_values;
9070
9071 if (it->current_value > it->in_run_index || it->current_value == 0) {
9072 it->run_index++;
9073 if (it->run_index < rcont->n_runs) {
9074 it->current_value = it->highbits | rcont->runs[it->run_index].value;
9075 it->in_run_index = it->current_value + rcont->runs[it->run_index].length;
9076 } else {
9077 it->has_value = false;
9078 }
9079 }
9080 } while ((ret < count) && it->has_value);
9081 break;
9082 default:
9083 assert(false);
9084 }
9085 if (it->has_value) {
9086 assert(ret == count);
9087 return ret;
9088 }
9089 it->container_index++;
9090 it->has_value = loadfirstvalue(it);
9091 }
9092 return ret;
9093}
9094
9095
9096
9097void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(it); }
9098
9099/****
9100* end of roaring_uint32_iterator_t
9101*****/
9102
9103bool roaring_bitmap_equals(const roaring_bitmap_t *ra1,
9104 const roaring_bitmap_t *ra2) {
9105 if (ra1->high_low_container.size != ra2->high_low_container.size) {
9106 return false;
9107 }
9108 for (int i = 0; i < ra1->high_low_container.size; ++i) {
9109 if (ra1->high_low_container.keys[i] !=
9110 ra2->high_low_container.keys[i]) {
9111 return false;
9112 }
9113 }
9114 for (int i = 0; i < ra1->high_low_container.size; ++i) {
9115 bool areequal = container_equals(ra1->high_low_container.containers[i],
9116 ra1->high_low_container.typecodes[i],
9117 ra2->high_low_container.containers[i],
9118 ra2->high_low_container.typecodes[i]);
9119 if (!areequal) {
9120 return false;
9121 }
9122 }
9123 return true;
9124}
9125
9126bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1,
9127 const roaring_bitmap_t *ra2) {
9128 const int length1 = ra1->high_low_container.size,
9129 length2 = ra2->high_low_container.size;
9130
9131 int pos1 = 0, pos2 = 0;
9132
9133 while (pos1 < length1 && pos2 < length2) {
9134 const uint16_t s1 = ra_get_key_at_index(&ra1->high_low_container, pos1);
9135 const uint16_t s2 = ra_get_key_at_index(&ra2->high_low_container, pos2);
9136
9137 if (s1 == s2) {
9138 uint8_t container_type_1, container_type_2;
9139 void *c1 = ra_get_container_at_index(&ra1->high_low_container, pos1,
9140 &container_type_1);
9141 void *c2 = ra_get_container_at_index(&ra2->high_low_container, pos2,
9142 &container_type_2);
9143 bool subset =
9144 container_is_subset(c1, container_type_1, c2, container_type_2);
9145 if (!subset) return false;
9146 ++pos1;
9147 ++pos2;
9148 } else if (s1 < s2) { // s1 < s2
9149 return false;
9150 } else { // s1 > s2
9151 pos2 = ra_advance_until(&ra2->high_low_container, s1, pos2);
9152 }
9153 }
9154 if (pos1 == length1)
9155 return true;
9156 else
9157 return false;
9158}
9159
9160static void insert_flipped_container(roaring_array_t *ans_arr,
9161 const roaring_array_t *x1_arr, uint16_t hb,
9162 uint16_t lb_start, uint16_t lb_end) {
9163 const int i = ra_get_index(x1_arr, hb);
9164 const int j = ra_get_index(ans_arr, hb);
9165 uint8_t ctype_in, ctype_out;
9166 void *flipped_container = NULL;
9167 if (i >= 0) {
9168 void *container_to_flip =
9169 ra_get_container_at_index(x1_arr, i, &ctype_in);
9170 flipped_container =
9171 container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start,
9172 (uint32_t)(lb_end + 1), &ctype_out);
9173
9174 if (container_get_cardinality(flipped_container, ctype_out))
9175 ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
9176 ctype_out);
9177 else {
9178 container_free(flipped_container, ctype_out);
9179 }
9180 } else {
9181 flipped_container = container_range_of_ones(
9182 (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
9183 ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
9184 ctype_out);
9185 }
9186}
9187
9188static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb,
9189 uint16_t lb_start, uint16_t lb_end) {
9190 const int i = ra_get_index(x1_arr, hb);
9191 uint8_t ctype_in, ctype_out;
9192 void *flipped_container = NULL;
9193 if (i >= 0) {
9194 void *container_to_flip =
9195 ra_get_container_at_index(x1_arr, i, &ctype_in);
9196 flipped_container = container_inot_range(
9197 container_to_flip, ctype_in, (uint32_t)lb_start,
9198 (uint32_t)(lb_end + 1), &ctype_out);
9199 // if a new container was created, the old one was already freed
9200 if (container_get_cardinality(flipped_container, ctype_out)) {
9201 ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
9202 } else {
9203 container_free(flipped_container, ctype_out);
9204 ra_remove_at_index(x1_arr, i);
9205 }
9206
9207 } else {
9208 flipped_container = container_range_of_ones(
9209 (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
9210 ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
9211 ctype_out);
9212 }
9213}
9214
9215static void insert_fully_flipped_container(roaring_array_t *ans_arr,
9216 const roaring_array_t *x1_arr,
9217 uint16_t hb) {
9218 const int i = ra_get_index(x1_arr, hb);
9219 const int j = ra_get_index(ans_arr, hb);
9220 uint8_t ctype_in, ctype_out;
9221 void *flipped_container = NULL;
9222 if (i >= 0) {
9223 void *container_to_flip =
9224 ra_get_container_at_index(x1_arr, i, &ctype_in);
9225 flipped_container =
9226 container_not(container_to_flip, ctype_in, &ctype_out);
9227 if (container_get_cardinality(flipped_container, ctype_out))
9228 ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
9229 ctype_out);
9230 else {
9231 container_free(flipped_container, ctype_out);
9232 }
9233 } else {
9234 flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
9235 ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
9236 ctype_out);
9237 }
9238}
9239
9240static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
9241 const int i = ra_get_index(x1_arr, hb);
9242 uint8_t ctype_in, ctype_out;
9243 void *flipped_container = NULL;
9244 if (i >= 0) {
9245 void *container_to_flip =
9246 ra_get_container_at_index(x1_arr, i, &ctype_in);
9247 flipped_container =
9248 container_inot(container_to_flip, ctype_in, &ctype_out);
9249
9250 if (container_get_cardinality(flipped_container, ctype_out)) {
9251 ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
9252 } else {
9253 container_free(flipped_container, ctype_out);
9254 ra_remove_at_index(x1_arr, i);
9255 }
9256
9257 } else {
9258 flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
9259 ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
9260 ctype_out);
9261 }
9262}
9263
9264roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
9265 uint64_t range_start,
9266 uint64_t range_end) {
9267 if (range_start >= range_end) {
9268 return roaring_bitmap_copy(x1);
9269 }
9270 if(range_end >= UINT64_C(0x100000000)) {
9271 range_end = UINT64_C(0x100000000);
9272 }
9273
9274 roaring_bitmap_t *ans = roaring_bitmap_create();
9275 ans->copy_on_write = x1->copy_on_write;
9276
9277 uint16_t hb_start = (uint16_t)(range_start >> 16);
9278 const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF;
9279 uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
9280 const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF;
9281
9282 ra_append_copies_until(&ans->high_low_container, &x1->high_low_container,
9283 hb_start, x1->copy_on_write);
9284 if (hb_start == hb_end) {
9285 insert_flipped_container(&ans->high_low_container,
9286 &x1->high_low_container, hb_start, lb_start,
9287 lb_end);
9288 } else {
9289 // start and end containers are distinct
9290 if (lb_start > 0) {
9291 // handle first (partial) container
9292 insert_flipped_container(&ans->high_low_container,
9293 &x1->high_low_container, hb_start,
9294 lb_start, 0xFFFF);
9295 ++hb_start; // for the full containers. Can't wrap.
9296 }
9297
9298 if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block
9299
9300 for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
9301 insert_fully_flipped_container(&ans->high_low_container,
9302 &x1->high_low_container, hb);
9303 }
9304
9305 // handle a partial final container
9306 if (lb_end != 0xFFFF) {
9307 insert_flipped_container(&ans->high_low_container,
9308 &x1->high_low_container, hb_end + 1, 0,
9309 lb_end);
9310 ++hb_end;
9311 }
9312 }
9313 ra_append_copies_after(&ans->high_low_container, &x1->high_low_container,
9314 hb_end, x1->copy_on_write);
9315 return ans;
9316}
9317
9318void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
9319 uint64_t range_end) {
9320 if (range_start >= range_end) {
9321 return; // empty range
9322 }
9323 if(range_end >= UINT64_C(0x100000000)) {
9324 range_end = UINT64_C(0x100000000);
9325 }
9326
9327 uint16_t hb_start = (uint16_t)(range_start >> 16);
9328 const uint16_t lb_start = (uint16_t)range_start;
9329 uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
9330 const uint16_t lb_end = (uint16_t)(range_end - 1);
9331
9332 if (hb_start == hb_end) {
9333 inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
9334 lb_end);
9335 } else {
9336 // start and end containers are distinct
9337 if (lb_start > 0) {
9338 // handle first (partial) container
9339 inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
9340 0xFFFF);
9341 ++hb_start; // for the full containers. Can't wrap.
9342 }
9343
9344 if (lb_end != 0xFFFF) --hb_end;
9345
9346 for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
9347 inplace_fully_flip_container(&x1->high_low_container, hb);
9348 }
9349 // handle a partial final container
9350 if (lb_end != 0xFFFF) {
9351 inplace_flip_container(&x1->high_low_container, hb_end + 1, 0,
9352 lb_end);
9353 ++hb_end;
9354 }
9355 }
9356}
9357
9358roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
9359 const roaring_bitmap_t *x2,
9360 const bool bitsetconversion) {
9361 uint8_t container_result_type = 0;
9362 const int length1 = x1->high_low_container.size,
9363 length2 = x2->high_low_container.size;
9364 if (0 == length1) {
9365 return roaring_bitmap_copy(x2);
9366 }
9367 if (0 == length2) {
9368 return roaring_bitmap_copy(x1);
9369 }
9370 roaring_bitmap_t *answer =
9371 roaring_bitmap_create_with_capacity(length1 + length2);
9372 answer->copy_on_write = x1->copy_on_write && x2->copy_on_write;
9373 int pos1 = 0, pos2 = 0;
9374 uint8_t container_type_1, container_type_2;
9375 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9376 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9377 while (true) {
9378 if (s1 == s2) {
9379 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
9380 &container_type_1);
9381 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
9382 &container_type_2);
9383 void *c;
9384 if (bitsetconversion && (get_container_type(c1, container_type_1) !=
9385 BITSET_CONTAINER_TYPE_CODE) &&
9386 (get_container_type(c2, container_type_2) !=
9387 BITSET_CONTAINER_TYPE_CODE)) {
9388 void *newc1 =
9389 container_mutable_unwrap_shared(c1, &container_type_1);
9390 newc1 = container_to_bitset(newc1, container_type_1);
9391 container_type_1 = BITSET_CONTAINER_TYPE_CODE;
9392 c = container_lazy_ior(newc1, container_type_1, c2,
9393 container_type_2,
9394 &container_result_type);
9395 if (c != newc1) { // should not happen
9396 container_free(newc1, container_type_1);
9397 }
9398 } else {
9399 c = container_lazy_or(c1, container_type_1, c2,
9400 container_type_2, &container_result_type);
9401 }
9402 // since we assume that the initial containers are non-empty,
9403 // the
9404 // result here
9405 // can only be non-empty
9406 ra_append(&answer->high_low_container, s1, c,
9407 container_result_type);
9408 ++pos1;
9409 ++pos2;
9410 if (pos1 == length1) break;
9411 if (pos2 == length2) break;
9412 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9413 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9414
9415 } else if (s1 < s2) { // s1 < s2
9416 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
9417 &container_type_1);
9418 c1 =
9419 get_copy_of_container(c1, &container_type_1, x1->copy_on_write);
9420 if (x1->copy_on_write) {
9421 ra_set_container_at_index(&x1->high_low_container, pos1, c1,
9422 container_type_1);
9423 }
9424 ra_append(&answer->high_low_container, s1, c1, container_type_1);
9425 pos1++;
9426 if (pos1 == length1) break;
9427 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9428
9429 } else { // s1 > s2
9430 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
9431 &container_type_2);
9432 c2 =
9433 get_copy_of_container(c2, &container_type_2, x2->copy_on_write);
9434 if (x2->copy_on_write) {
9435 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
9436 container_type_2);
9437 }
9438 ra_append(&answer->high_low_container, s2, c2, container_type_2);
9439 pos2++;
9440 if (pos2 == length2) break;
9441 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9442 }
9443 }
9444 if (pos1 == length1) {
9445 ra_append_copy_range(&answer->high_low_container,
9446 &x2->high_low_container, pos2, length2,
9447 x2->copy_on_write);
9448 } else if (pos2 == length2) {
9449 ra_append_copy_range(&answer->high_low_container,
9450 &x1->high_low_container, pos1, length1,
9451 x1->copy_on_write);
9452 }
9453 return answer;
9454}
9455
9456void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
9457 const roaring_bitmap_t *x2,
9458 const bool bitsetconversion) {
9459 uint8_t container_result_type = 0;
9460 int length1 = x1->high_low_container.size;
9461 const int length2 = x2->high_low_container.size;
9462
9463 if (0 == length2) return;
9464
9465 if (0 == length1) {
9466 roaring_bitmap_overwrite(x1, x2);
9467 return;
9468 }
9469 int pos1 = 0, pos2 = 0;
9470 uint8_t container_type_1, container_type_2;
9471 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9472 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9473 while (true) {
9474 if (s1 == s2) {
9475 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
9476 &container_type_1);
9477 if (!container_is_full(c1, container_type_1)) {
9478 if ((bitsetconversion == false) ||
9479 (get_container_type(c1, container_type_1) ==
9480 BITSET_CONTAINER_TYPE_CODE)) {
9481 c1 = get_writable_copy_if_shared(c1, &container_type_1);
9482 } else {
9483 // convert to bitset
9484 void *oldc1 = c1;
9485 uint8_t oldt1 = container_type_1;
9486 c1 = container_mutable_unwrap_shared(c1, &container_type_1);
9487 c1 = container_to_bitset(c1, container_type_1);
9488 container_free(oldc1, oldt1);
9489 container_type_1 = BITSET_CONTAINER_TYPE_CODE;
9490 }
9491
9492 void *c2 = ra_get_container_at_index(&x2->high_low_container,
9493 pos2, &container_type_2);
9494 void *c = container_lazy_ior(c1, container_type_1, c2,
9495 container_type_2,
9496 &container_result_type);
9497 if (c !=
9498 c1) { // in this instance a new container was created, and
9499 // we need to free the old one
9500 container_free(c1, container_type_1);
9501 }
9502
9503 ra_set_container_at_index(&x1->high_low_container, pos1, c,
9504 container_result_type);
9505 }
9506 ++pos1;
9507 ++pos2;
9508 if (pos1 == length1) break;
9509 if (pos2 == length2) break;
9510 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9511 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9512
9513 } else if (s1 < s2) { // s1 < s2
9514 pos1++;
9515 if (pos1 == length1) break;
9516 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9517
9518 } else { // s1 > s2
9519 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
9520 &container_type_2);
9521 // void *c2_clone = container_clone(c2, container_type_2);
9522 c2 =
9523 get_copy_of_container(c2, &container_type_2, x2->copy_on_write);
9524 if (x2->copy_on_write) {
9525 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
9526 container_type_2);
9527 }
9528 ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
9529 container_type_2);
9530 pos1++;
9531 length1++;
9532 pos2++;
9533 if (pos2 == length2) break;
9534 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9535 }
9536 }
9537 if (pos1 == length1) {
9538 ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
9539 pos2, length2, x2->copy_on_write);
9540 }
9541}
9542
9543roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
9544 const roaring_bitmap_t *x2) {
9545 uint8_t container_result_type = 0;
9546 const int length1 = x1->high_low_container.size,
9547 length2 = x2->high_low_container.size;
9548 if (0 == length1) {
9549 return roaring_bitmap_copy(x2);
9550 }
9551 if (0 == length2) {
9552 return roaring_bitmap_copy(x1);
9553 }
9554 roaring_bitmap_t *answer =
9555 roaring_bitmap_create_with_capacity(length1 + length2);
9556 answer->copy_on_write = x1->copy_on_write && x2->copy_on_write;
9557 int pos1 = 0, pos2 = 0;
9558 uint8_t container_type_1, container_type_2;
9559 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9560 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9561 while (true) {
9562 if (s1 == s2) {
9563 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
9564 &container_type_1);
9565 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
9566 &container_type_2);
9567 void *c =
9568 container_lazy_xor(c1, container_type_1, c2, container_type_2,
9569 &container_result_type);
9570
9571 if (container_nonzero_cardinality(c, container_result_type)) {
9572 ra_append(&answer->high_low_container, s1, c,
9573 container_result_type);
9574 } else {
9575 container_free(c, container_result_type);
9576 }
9577
9578 ++pos1;
9579 ++pos2;
9580 if (pos1 == length1) break;
9581 if (pos2 == length2) break;
9582 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9583 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9584
9585 } else if (s1 < s2) { // s1 < s2
9586 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
9587 &container_type_1);
9588 c1 =
9589 get_copy_of_container(c1, &container_type_1, x1->copy_on_write);
9590 if (x1->copy_on_write) {
9591 ra_set_container_at_index(&x1->high_low_container, pos1, c1,
9592 container_type_1);
9593 }
9594 ra_append(&answer->high_low_container, s1, c1, container_type_1);
9595 pos1++;
9596 if (pos1 == length1) break;
9597 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9598
9599 } else { // s1 > s2
9600 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
9601 &container_type_2);
9602 c2 =
9603 get_copy_of_container(c2, &container_type_2, x2->copy_on_write);
9604 if (x2->copy_on_write) {
9605 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
9606 container_type_2);
9607 }
9608 ra_append(&answer->high_low_container, s2, c2, container_type_2);
9609 pos2++;
9610 if (pos2 == length2) break;
9611 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9612 }
9613 }
9614 if (pos1 == length1) {
9615 ra_append_copy_range(&answer->high_low_container,
9616 &x2->high_low_container, pos2, length2,
9617 x2->copy_on_write);
9618 } else if (pos2 == length2) {
9619 ra_append_copy_range(&answer->high_low_container,
9620 &x1->high_low_container, pos1, length1,
9621 x1->copy_on_write);
9622 }
9623 return answer;
9624}
9625
9626void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
9627 const roaring_bitmap_t *x2) {
9628 assert(x1 != x2);
9629 uint8_t container_result_type = 0;
9630 int length1 = x1->high_low_container.size;
9631 const int length2 = x2->high_low_container.size;
9632
9633 if (0 == length2) return;
9634
9635 if (0 == length1) {
9636 roaring_bitmap_overwrite(x1, x2);
9637 return;
9638 }
9639 int pos1 = 0, pos2 = 0;
9640 uint8_t container_type_1, container_type_2;
9641 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9642 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9643 while (true) {
9644 if (s1 == s2) {
9645 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
9646 &container_type_1);
9647 c1 = get_writable_copy_if_shared(c1, &container_type_1);
9648 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
9649 &container_type_2);
9650 void *c =
9651 container_lazy_ixor(c1, container_type_1, c2, container_type_2,
9652 &container_result_type);
9653 if (container_nonzero_cardinality(c, container_result_type)) {
9654 ra_set_container_at_index(&x1->high_low_container, pos1, c,
9655 container_result_type);
9656 ++pos1;
9657 } else {
9658 container_free(c, container_result_type);
9659 ra_remove_at_index(&x1->high_low_container, pos1);
9660 --length1;
9661 }
9662 ++pos2;
9663 if (pos1 == length1) break;
9664 if (pos2 == length2) break;
9665 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9666 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9667
9668 } else if (s1 < s2) { // s1 < s2
9669 pos1++;
9670 if (pos1 == length1) break;
9671 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9672
9673 } else { // s1 > s2
9674 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
9675 &container_type_2);
9676 // void *c2_clone = container_clone(c2, container_type_2);
9677 c2 =
9678 get_copy_of_container(c2, &container_type_2, x2->copy_on_write);
9679 if (x2->copy_on_write) {
9680 ra_set_container_at_index(&x2->high_low_container, pos2, c2,
9681 container_type_2);
9682 }
9683 ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
9684 container_type_2);
9685 pos1++;
9686 length1++;
9687 pos2++;
9688 if (pos2 == length2) break;
9689 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9690 }
9691 }
9692 if (pos1 == length1) {
9693 ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
9694 pos2, length2, x2->copy_on_write);
9695 }
9696}
9697
9698void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) {
9699 for (int i = 0; i < ra->high_low_container.size; ++i) {
9700 const uint8_t original_typecode = ra->high_low_container.typecodes[i];
9701 void *container = ra->high_low_container.containers[i];
9702 uint8_t new_typecode = original_typecode;
9703 void *newcontainer =
9704 container_repair_after_lazy(container, &new_typecode);
9705 ra->high_low_container.containers[i] = newcontainer;
9706 ra->high_low_container.typecodes[i] = new_typecode;
9707 }
9708}
9709
9710
9711
9712/**
9713* roaring_bitmap_rank returns the number of integers that are smaller or equal
9714* to x.
9715*/
9716uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
9717 uint64_t size = 0;
9718 uint32_t xhigh = x >> 16;
9719 for (int i = 0; i < bm->high_low_container.size; i++) {
9720 uint32_t key = bm->high_low_container.keys[i];
9721 if (xhigh > key) {
9722 size +=
9723 container_get_cardinality(bm->high_low_container.containers[i],
9724 bm->high_low_container.typecodes[i]);
9725 } else if (xhigh == key) {
9726 return size + container_rank(bm->high_low_container.containers[i],
9727 bm->high_low_container.typecodes[i],
9728 x & 0xFFFF);
9729 } else {
9730 return size;
9731 }
9732 }
9733 return size;
9734}
9735
9736/**
9737* roaring_bitmap_smallest returns the smallest value in the set.
9738* Returns UINT32_MAX if the set is empty.
9739*/
9740uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
9741 if (bm->high_low_container.size > 0) {
9742 void *container = bm->high_low_container.containers[0];
9743 uint8_t typecode = bm->high_low_container.typecodes[0];
9744 uint32_t key = bm->high_low_container.keys[0];
9745 uint32_t lowvalue = container_minimum(container, typecode);
9746 return lowvalue | (key << 16);
9747 }
9748 return UINT32_MAX;
9749}
9750
9751/**
9752* roaring_bitmap_smallest returns the greatest value in the set.
9753* Returns 0 if the set is empty.
9754*/
9755uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
9756 if (bm->high_low_container.size > 0) {
9757 void *container =
9758 bm->high_low_container.containers[bm->high_low_container.size - 1];
9759 uint8_t typecode =
9760 bm->high_low_container.typecodes[bm->high_low_container.size - 1];
9761 uint32_t key =
9762 bm->high_low_container.keys[bm->high_low_container.size - 1];
9763 uint32_t lowvalue = container_maximum(container, typecode);
9764 return lowvalue | (key << 16);
9765 }
9766 return 0;
9767}
9768
9769bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank,
9770 uint32_t *element) {
9771 void *container;
9772 uint8_t typecode;
9773 uint16_t key;
9774 uint32_t start_rank = 0;
9775 int i = 0;
9776 bool valid = false;
9777 while (!valid && i < bm->high_low_container.size) {
9778 container = bm->high_low_container.containers[i];
9779 typecode = bm->high_low_container.typecodes[i];
9780 valid =
9781 container_select(container, typecode, &start_rank, rank, element);
9782 i++;
9783 }
9784
9785 if (valid) {
9786 key = bm->high_low_container.keys[i - 1];
9787 *element |= (key << 16);
9788 return true;
9789 } else
9790 return false;
9791}
9792
9793bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
9794 const roaring_bitmap_t *x2) {
9795 const int length1 = x1->high_low_container.size,
9796 length2 = x2->high_low_container.size;
9797 uint64_t answer = 0;
9798 int pos1 = 0, pos2 = 0;
9799
9800 while (pos1 < length1 && pos2 < length2) {
9801 const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1);
9802 const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2);
9803
9804 if (s1 == s2) {
9805 uint8_t container_type_1, container_type_2;
9806 void *c1 = ra_get_container_at_index(& x1->high_low_container, pos1,
9807 &container_type_1);
9808 void *c2 = ra_get_container_at_index(& x2->high_low_container, pos2,
9809 &container_type_2);
9810 if( container_intersect(c1, container_type_1, c2, container_type_2) ) return true;
9811 ++pos1;
9812 ++pos2;
9813 } else if (s1 < s2) { // s1 < s2
9814 pos1 = ra_advance_until(& x1->high_low_container, s2, pos1);
9815 } else { // s1 > s2
9816 pos2 = ra_advance_until(& x2->high_low_container, s1, pos2);
9817 }
9818 }
9819 return answer;
9820}
9821
9822
9823uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
9824 const roaring_bitmap_t *x2) {
9825 const int length1 = x1->high_low_container.size,
9826 length2 = x2->high_low_container.size;
9827 uint64_t answer = 0;
9828 int pos1 = 0, pos2 = 0;
9829
9830 while (pos1 < length1 && pos2 < length2) {
9831 const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
9832 const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
9833
9834 if (s1 == s2) {
9835 uint8_t container_type_1, container_type_2;
9836 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
9837 &container_type_1);
9838 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
9839 &container_type_2);
9840 answer += container_and_cardinality(c1, container_type_1, c2,
9841 container_type_2);
9842 ++pos1;
9843 ++pos2;
9844 } else if (s1 < s2) { // s1 < s2
9845 pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
9846 } else { // s1 > s2
9847 pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
9848 }
9849 }
9850 return answer;
9851}
9852
9853double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1,
9854 const roaring_bitmap_t *x2) {
9855 const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
9856 const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
9857 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
9858 return (double)inter / (double)(c1 + c2 - inter);
9859}
9860
9861uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
9862 const roaring_bitmap_t *x2) {
9863 const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
9864 const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
9865 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
9866 return c1 + c2 - inter;
9867}
9868
9869uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
9870 const roaring_bitmap_t *x2) {
9871 const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
9872 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
9873 return c1 - inter;
9874}
9875
9876uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
9877 const roaring_bitmap_t *x2) {
9878 const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
9879 const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
9880 const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
9881 return c1 + c2 - 2 * inter;
9882}
9883
9884
9885/**
9886 * Check whether a range of values from range_start (included) to range_end (excluded) is present
9887 */
9888bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) {
9889 if(range_end >= UINT64_C(0x100000000)) {
9890 range_end = UINT64_C(0x100000000);
9891 }
9892 if (range_start >= range_end) return true; // empty range are always contained!
9893 if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start);
9894 uint16_t hb_rs = (uint16_t)(range_start >> 16);
9895 uint16_t hb_re = (uint16_t)((range_end - 1) >> 16);
9896 const int32_t span = hb_re - hb_rs;
9897 const int32_t hlc_sz = ra_get_size(&r->high_low_container);
9898 if (hlc_sz < span + 1) {
9899 return false;
9900 }
9901 int32_t is = ra_get_index(&r->high_low_container, hb_rs);
9902 int32_t ie = ra_get_index(&r->high_low_container, hb_re);
9903 ie = (ie < 0 ? -ie - 1 : ie);
9904 if ((is < 0) || ((ie - is) != span)) {
9905 return false;
9906 }
9907 const uint32_t lb_rs = range_start & 0xFFFF;
9908 const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1;
9909 uint8_t typecode;
9910 void *container = ra_get_container_at_index(&r->high_low_container, is, &typecode);
9911 if (hb_rs == hb_re) {
9912 return container_contains_range(container, lb_rs, lb_re, typecode);
9913 }
9914 if (!container_contains_range(container, lb_rs, 1 << 16, typecode)) {
9915 return false;
9916 }
9917 assert(ie < hlc_sz); // would indicate an algorithmic bug
9918 container = ra_get_container_at_index(&r->high_low_container, ie, &typecode);
9919 if (!container_contains_range(container, 0, lb_re, typecode)) {
9920 return false;
9921 }
9922 for (int32_t i = is + 1; i < ie; ++i) {
9923 container = ra_get_container_at_index(&r->high_low_container, i, &typecode);
9924 if (!container_is_full(container, typecode) ) {
9925 return false;
9926 }
9927 }
9928 return true;
9929}
9930
9931
9932bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1,
9933 const roaring_bitmap_t *ra2) {
9934 return (roaring_bitmap_get_cardinality(ra2) >
9935 roaring_bitmap_get_cardinality(ra1) &&
9936 roaring_bitmap_is_subset(ra1, ra2));
9937}
9938/* end file /opt/bitmap/CRoaring-0.2.57/src/roaring.c */
9939/* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring_array.c */
9940#include <assert.h>
9941#include <stdbool.h>
9942#include <stdio.h>
9943#include <stdlib.h>
9944#include <string.h>
9945#include <inttypes.h>
9946
9947
9948// Convention: [0,ra->size) all elements are initialized
9949// [ra->size, ra->allocation_size) is junk and contains nothing needing freeing
9950
9951extern inline int32_t ra_get_size(const roaring_array_t *ra);
9952extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
9953extern inline void *ra_get_container_at_index(const roaring_array_t *ra,
9954 uint16_t i, uint8_t *typecode);
9955extern inline void ra_unshare_container_at_index(roaring_array_t *ra,
9956 uint16_t i);
9957extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra,
9958 int32_t i,
9959 uint16_t key, void *c,
9960 uint8_t typecode);
9961extern inline void ra_set_container_at_index(const roaring_array_t *ra,
9962 int32_t i, void *c,
9963 uint8_t typecode);
9964
9965#define INITIAL_CAPACITY 4
9966
9967static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) {
9968 // because we combine the allocations, it is not possible to use realloc
9969 /*ra->keys =
9970 (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity);
9971ra->containers =
9972 (void **)realloc(ra->containers, sizeof(void *) * new_capacity);
9973ra->typecodes =
9974 (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity);
9975if (!ra->keys || !ra->containers || !ra->typecodes) {
9976 free(ra->keys);
9977 free(ra->containers);
9978 free(ra->typecodes);
9979 return false;
9980}*/
9981
9982 if ( new_capacity == 0 ) {
9983 free(ra->containers);
9984 ra->containers = NULL;
9985 ra->keys = NULL;
9986 ra->typecodes = NULL;
9987 ra->allocation_size = 0;
9988 return true;
9989 }
9990 const size_t memoryneeded =
9991 new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t));
9992 void *bigalloc = malloc(memoryneeded);
9993 if (!bigalloc) return false;
9994 void *oldbigalloc = ra->containers;
9995 void **newcontainers = (void **)bigalloc;
9996 uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity);
9997 uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity);
9998 assert((char *)(newtypecodes + new_capacity) ==
9999 (char *)bigalloc + memoryneeded);
10000 if(ra->size > 0) {
10001 memcpy(newcontainers, ra->containers, sizeof(void *) * ra->size);
10002 memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size);
10003 memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size);
10004 }
10005 ra->containers = newcontainers;
10006 ra->keys = newkeys;
10007 ra->typecodes = newtypecodes;
10008 ra->allocation_size = new_capacity;
10009 free(oldbigalloc);
10010 return true;
10011}
10012
10013bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) {
10014 if (!new_ra) return false;
10015 new_ra->keys = NULL;
10016 new_ra->containers = NULL;
10017 new_ra->typecodes = NULL;
10018
10019 new_ra->allocation_size = cap;
10020 new_ra->size = 0;
10021 if(cap > 0) {
10022 void *bigalloc =
10023 malloc(cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)));
10024 if( bigalloc == NULL ) return false;
10025 new_ra->containers = (void **)bigalloc;
10026 new_ra->keys = (uint16_t *)(new_ra->containers + cap);
10027 new_ra->typecodes = (uint8_t *)(new_ra->keys + cap);
10028 }
10029 return true;
10030}
10031
10032int ra_shrink_to_fit(roaring_array_t *ra) {
10033 int savings = (ra->allocation_size - ra->size) *
10034 (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t));
10035 if (!realloc_array(ra, ra->size)) {
10036 return 0;
10037 }
10038 ra->allocation_size = ra->size;
10039 return savings;
10040}
10041
10042bool ra_init(roaring_array_t *t) {
10043 return ra_init_with_capacity(t, INITIAL_CAPACITY);
10044}
10045
10046bool ra_copy(const roaring_array_t *source, roaring_array_t *dest,
10047 bool copy_on_write) {
10048 if (!ra_init_with_capacity(dest, source->size)) return false;
10049 dest->size = source->size;
10050 dest->allocation_size = source->size;
10051 if(dest->size > 0) {
10052 memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t));
10053 }
10054 // we go through the containers, turning them into shared containers...
10055 if (copy_on_write) {
10056 for (int32_t i = 0; i < dest->size; ++i) {
10057 source->containers[i] = get_copy_of_container(
10058 source->containers[i], &source->typecodes[i], copy_on_write);
10059 }
10060 // we do a shallow copy to the other bitmap
10061 if(dest->size > 0) {
10062 memcpy(dest->containers, source->containers,
10063 dest->size * sizeof(void *));
10064 memcpy(dest->typecodes, source->typecodes,
10065 dest->size * sizeof(uint8_t));
10066 }
10067 } else {
10068 if(dest->size > 0) {
10069 memcpy(dest->typecodes, source->typecodes,
10070 dest->size * sizeof(uint8_t));
10071 }
10072 for (int32_t i = 0; i < dest->size; i++) {
10073 dest->containers[i] =
10074 container_clone(source->containers[i], source->typecodes[i]);
10075 if (dest->containers[i] == NULL) {
10076 for (int32_t j = 0; j < i; j++) {
10077 container_free(dest->containers[j], dest->typecodes[j]);
10078 }
10079 ra_clear_without_containers(dest);
10080 return false;
10081 }
10082 }
10083 }
10084 return true;
10085}
10086
10087bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest,
10088 bool copy_on_write) {
10089 ra_clear_containers(dest); // we are going to overwrite them
10090 if (dest->allocation_size < source->size) {
10091 if (!realloc_array(dest, source->size)) {
10092 return false;
10093 }
10094 }
10095 dest->size = source->size;
10096 memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t));
10097 // we go through the containers, turning them into shared containers...
10098 if (copy_on_write) {
10099 for (int32_t i = 0; i < dest->size; ++i) {
10100 source->containers[i] = get_copy_of_container(
10101 source->containers[i], &source->typecodes[i], copy_on_write);
10102 }
10103 // we do a shallow copy to the other bitmap
10104 memcpy(dest->containers, source->containers,
10105 dest->size * sizeof(void *));
10106 memcpy(dest->typecodes, source->typecodes,
10107 dest->size * sizeof(uint8_t));
10108 } else {
10109 memcpy(dest->typecodes, source->typecodes,
10110 dest->size * sizeof(uint8_t));
10111 for (int32_t i = 0; i < dest->size; i++) {
10112 dest->containers[i] =
10113 container_clone(source->containers[i], source->typecodes[i]);
10114 if (dest->containers[i] == NULL) {
10115 for (int32_t j = 0; j < i; j++) {
10116 container_free(dest->containers[j], dest->typecodes[j]);
10117 }
10118 ra_clear_without_containers(dest);
10119 return false;
10120 }
10121 }
10122 }
10123 return true;
10124}
10125
10126void ra_clear_containers(roaring_array_t *ra) {
10127 for (int32_t i = 0; i < ra->size; ++i) {
10128 container_free(ra->containers[i], ra->typecodes[i]);
10129 }
10130}
10131
10132void ra_reset(roaring_array_t *ra) {
10133 ra_clear_containers(ra);
10134 ra->size = 0;
10135 ra_shrink_to_fit(ra);
10136}
10137
10138void ra_clear_without_containers(roaring_array_t *ra) {
10139 free(ra->containers); // keys and typecodes are allocated with containers
10140 ra->size = 0;
10141 ra->allocation_size = 0;
10142 ra->containers = NULL;
10143 ra->keys = NULL;
10144 ra->typecodes = NULL;
10145}
10146
10147void ra_clear(roaring_array_t *ra) {
10148 ra_clear_containers(ra);
10149 ra_clear_without_containers(ra);
10150}
10151
10152bool extend_array(roaring_array_t *ra, int32_t k) {
10153 int32_t desired_size = ra->size + k;
10154 assert(desired_size <= MAX_CONTAINERS);
10155 if (desired_size > ra->allocation_size) {
10156 int32_t new_capacity =
10157 (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4;
10158 if (new_capacity > MAX_CONTAINERS) {
10159 new_capacity = MAX_CONTAINERS;
10160 }
10161
10162 return realloc_array(ra, new_capacity);
10163 }
10164 return true;
10165}
10166
10167void ra_append(roaring_array_t *ra, uint16_t key, void *container,
10168 uint8_t typecode) {
10169 extend_array(ra, 1);
10170 const int32_t pos = ra->size;
10171
10172 ra->keys[pos] = key;
10173 ra->containers[pos] = container;
10174 ra->typecodes[pos] = typecode;
10175 ra->size++;
10176}
10177
10178void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa,
10179 uint16_t index, bool copy_on_write) {
10180 extend_array(ra, 1);
10181 const int32_t pos = ra->size;
10182
10183 // old contents is junk not needing freeing
10184 ra->keys[pos] = sa->keys[index];
10185 // the shared container will be in two bitmaps
10186 if (copy_on_write) {
10187 sa->containers[index] = get_copy_of_container(
10188 sa->containers[index], &sa->typecodes[index], copy_on_write);
10189 ra->containers[pos] = sa->containers[index];
10190 ra->typecodes[pos] = sa->typecodes[index];
10191 } else {
10192 ra->containers[pos] =
10193 container_clone(sa->containers[index], sa->typecodes[index]);
10194 ra->typecodes[pos] = sa->typecodes[index];
10195 }
10196 ra->size++;
10197}
10198
10199void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa,
10200 uint16_t stopping_key, bool copy_on_write) {
10201 for (int32_t i = 0; i < sa->size; ++i) {
10202 if (sa->keys[i] >= stopping_key) break;
10203 ra_append_copy(ra, sa, i, copy_on_write);
10204 }
10205}
10206
10207void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa,
10208 int32_t start_index, int32_t end_index,
10209 bool copy_on_write) {
10210 extend_array(ra, end_index - start_index);
10211 for (int32_t i = start_index; i < end_index; ++i) {
10212 const int32_t pos = ra->size;
10213 ra->keys[pos] = sa->keys[i];
10214 if (copy_on_write) {
10215 sa->containers[i] = get_copy_of_container(
10216 sa->containers[i], &sa->typecodes[i], copy_on_write);
10217 ra->containers[pos] = sa->containers[i];
10218 ra->typecodes[pos] = sa->typecodes[i];
10219 } else {
10220 ra->containers[pos] =
10221 container_clone(sa->containers[i], sa->typecodes[i]);
10222 ra->typecodes[pos] = sa->typecodes[i];
10223 }
10224 ra->size++;
10225 }
10226}
10227
10228void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa,
10229 uint16_t before_start, bool copy_on_write) {
10230 int start_location = ra_get_index(sa, before_start);
10231 if (start_location >= 0)
10232 ++start_location;
10233 else
10234 start_location = -start_location - 1;
10235 ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write);
10236}
10237
10238void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa,
10239 int32_t start_index, int32_t end_index) {
10240 extend_array(ra, end_index - start_index);
10241
10242 for (int32_t i = start_index; i < end_index; ++i) {
10243 const int32_t pos = ra->size;
10244
10245 ra->keys[pos] = sa->keys[i];
10246 ra->containers[pos] = sa->containers[i];
10247 ra->typecodes[pos] = sa->typecodes[i];
10248 ra->size++;
10249 }
10250}
10251
10252void ra_append_range(roaring_array_t *ra, roaring_array_t *sa,
10253 int32_t start_index, int32_t end_index,
10254 bool copy_on_write) {
10255 extend_array(ra, end_index - start_index);
10256
10257 for (int32_t i = start_index; i < end_index; ++i) {
10258 const int32_t pos = ra->size;
10259 ra->keys[pos] = sa->keys[i];
10260 if (copy_on_write) {
10261 sa->containers[i] = get_copy_of_container(
10262 sa->containers[i], &sa->typecodes[i], copy_on_write);
10263 ra->containers[pos] = sa->containers[i];
10264 ra->typecodes[pos] = sa->typecodes[i];
10265 } else {
10266 ra->containers[pos] =
10267 container_clone(sa->containers[i], sa->typecodes[i]);
10268 ra->typecodes[pos] = sa->typecodes[i];
10269 }
10270 ra->size++;
10271 }
10272}
10273
10274void *ra_get_container(roaring_array_t *ra, uint16_t x, uint8_t *typecode) {
10275 int i = binarySearch(ra->keys, (int32_t)ra->size, x);
10276 if (i < 0) return NULL;
10277 *typecode = ra->typecodes[i];
10278 return ra->containers[i];
10279}
10280
10281extern void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i,
10282 uint8_t *typecode);
10283
10284void *ra_get_writable_container(roaring_array_t *ra, uint16_t x,
10285 uint8_t *typecode) {
10286 int i = binarySearch(ra->keys, (int32_t)ra->size, x);
10287 if (i < 0) return NULL;
10288 *typecode = ra->typecodes[i];
10289 return get_writable_copy_if_shared(ra->containers[i], typecode);
10290}
10291
10292void *ra_get_writable_container_at_index(roaring_array_t *ra, uint16_t i,
10293 uint8_t *typecode) {
10294 assert(i < ra->size);
10295 *typecode = ra->typecodes[i];
10296 return get_writable_copy_if_shared(ra->containers[i], typecode);
10297}
10298
10299uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) {
10300 return ra->keys[i];
10301}
10302
10303extern int32_t ra_get_index(const roaring_array_t *ra, uint16_t x);
10304
10305extern int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x,
10306 int32_t pos);
10307
10308// everything skipped over is freed
10309int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) {
10310 while (pos < ra->size && ra->keys[pos] < x) {
10311 container_free(ra->containers[pos], ra->typecodes[pos]);
10312 ++pos;
10313 }
10314 return pos;
10315}
10316
10317void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key,
10318 void *container, uint8_t typecode) {
10319 extend_array(ra, 1);
10320 // May be an optimization opportunity with DIY memmove
10321 memmove(&(ra->keys[i + 1]), &(ra->keys[i]),
10322 sizeof(uint16_t) * (ra->size - i));
10323 memmove(&(ra->containers[i + 1]), &(ra->containers[i]),
10324 sizeof(void *) * (ra->size - i));
10325 memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]),
10326 sizeof(uint8_t) * (ra->size - i));
10327 ra->keys[i] = key;
10328 ra->containers[i] = container;
10329 ra->typecodes[i] = typecode;
10330 ra->size++;
10331}
10332
10333// note: Java routine set things to 0, enabling GC.
10334// Java called it "resize" but it was always used to downsize.
10335// Allowing upsize would break the conventions about
10336// valid containers below ra->size.
10337
10338void ra_downsize(roaring_array_t *ra, int32_t new_length) {
10339 assert(new_length <= ra->size);
10340 ra->size = new_length;
10341}
10342
10343void ra_remove_at_index(roaring_array_t *ra, int32_t i) {
10344 memmove(&(ra->containers[i]), &(ra->containers[i + 1]),
10345 sizeof(void *) * (ra->size - i - 1));
10346 memmove(&(ra->keys[i]), &(ra->keys[i + 1]),
10347 sizeof(uint16_t) * (ra->size - i - 1));
10348 memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]),
10349 sizeof(uint8_t) * (ra->size - i - 1));
10350 ra->size--;
10351}
10352
10353void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) {
10354 container_free(ra->containers[i], ra->typecodes[i]);
10355 ra_remove_at_index(ra, i);
10356}
10357
10358// used in inplace andNot only, to slide left the containers from
10359// the mutated RoaringBitmap that are after the largest container of
10360// the argument RoaringBitmap. In use it should be followed by a call to
10361// downsize.
10362//
10363void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end,
10364 uint32_t new_begin) {
10365 assert(begin <= end);
10366 assert(new_begin < begin);
10367
10368 const int range = end - begin;
10369
10370 // We ensure to previously have freed overwritten containers
10371 // that are not copied elsewhere
10372
10373 memmove(&(ra->containers[new_begin]), &(ra->containers[begin]),
10374 sizeof(void *) * range);
10375 memmove(&(ra->keys[new_begin]), &(ra->keys[begin]),
10376 sizeof(uint16_t) * range);
10377 memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]),
10378 sizeof(uint8_t) * range);
10379}
10380
10381void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) {
10382 if (distance > 0) {
10383 extend_array(ra, distance);
10384 }
10385 int32_t srcpos = ra->size - count;
10386 int32_t dstpos = srcpos + distance;
10387 memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]),
10388 sizeof(uint16_t) * count);
10389 memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]),
10390 sizeof(void *) * count);
10391 memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]),
10392 sizeof(uint8_t) * count);
10393 ra->size += distance;
10394}
10395
10396
10397size_t ra_size_in_bytes(roaring_array_t *ra) {
10398 size_t cardinality = 0;
10399 size_t tot_len =
10400 1 /* initial byte type */ + 4 /* tot_len */ + sizeof(roaring_array_t) +
10401 ra->size * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t));
10402 for (int32_t i = 0; i < ra->size; i++) {
10403 tot_len +=
10404 (container_serialization_len(ra->containers[i], ra->typecodes[i]) +
10405 sizeof(uint16_t));
10406 cardinality +=
10407 container_get_cardinality(ra->containers[i], ra->typecodes[i]);
10408 }
10409
10410 if ((cardinality * sizeof(uint32_t) + sizeof(uint32_t)) < tot_len) {
10411 return cardinality * sizeof(uint32_t) + 1 + sizeof(uint32_t);
10412 }
10413 return tot_len;
10414}
10415
10416void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) {
10417 size_t ctr = 0;
10418 for (int32_t i = 0; i < ra->size; ++i) {
10419 int num_added = container_to_uint32_array(
10420 ans + ctr, ra->containers[i], ra->typecodes[i],
10421 ((uint32_t)ra->keys[i]) << 16);
10422 ctr += num_added;
10423 }
10424}
10425
10426bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) {
10427 size_t ctr = 0;
10428 size_t dtr = 0;
10429
10430 size_t t_limit = 0;
10431
10432 bool first = false;
10433 size_t first_skip = 0;
10434
10435 uint32_t *t_ans = NULL;
10436 size_t cur_len = 0;
10437
10438 for (int i = 0; i < ra->size; ++i) {
10439
10440 const void *container = container_unwrap_shared(ra->containers[i], &ra->typecodes[i]);
10441 switch (ra->typecodes[i]) {
10442 case BITSET_CONTAINER_TYPE_CODE:
10443 t_limit = ((const bitset_container_t *)container)->cardinality;
10444 break;
10445 case ARRAY_CONTAINER_TYPE_CODE:
10446 t_limit = ((const array_container_t *)container)->cardinality;
10447 break;
10448 case RUN_CONTAINER_TYPE_CODE:
10449 t_limit = run_container_cardinality((const run_container_t *)container);
10450 break;
10451 }
10452 if (ctr + t_limit - 1 >= offset && ctr < offset + limit){
10453 if (!first){
10454 //first_skip = t_limit - (ctr + t_limit - offset);
10455 first_skip = offset - ctr;
10456 first = true;
10457 t_ans = (uint32_t *)malloc(sizeof(*t_ans) * (first_skip + limit));
10458 if(t_ans == NULL) {
10459 return false;
10460 }
10461 memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ;
10462 cur_len = first_skip + limit;
10463 }
10464 if (dtr + t_limit > cur_len){
10465 uint32_t * append_ans = (uint32_t *)malloc(sizeof(*append_ans) * (cur_len + t_limit));
10466 if(append_ans == NULL) {
10467 if(t_ans != NULL) free(t_ans);
10468 return false;
10469 }
10470 memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit));
10471 cur_len = cur_len + t_limit;
10472 memcpy(append_ans, t_ans, dtr * sizeof(uint32_t));
10473 free(t_ans);
10474 t_ans = append_ans;
10475 }
10476 switch (ra->typecodes[i]) {
10477 case BITSET_CONTAINER_TYPE_CODE:
10478 container_to_uint32_array(
10479 t_ans + dtr, (const bitset_container_t *)container, ra->typecodes[i],
10480 ((uint32_t)ra->keys[i]) << 16);
10481 break;
10482 case ARRAY_CONTAINER_TYPE_CODE:
10483 container_to_uint32_array(
10484 t_ans + dtr, (const array_container_t *)container, ra->typecodes[i],
10485 ((uint32_t)ra->keys[i]) << 16);
10486 break;
10487 case RUN_CONTAINER_TYPE_CODE:
10488 container_to_uint32_array(
10489 t_ans + dtr, (const run_container_t *)container, ra->typecodes[i],
10490 ((uint32_t)ra->keys[i]) << 16);
10491 break;
10492 }
10493 dtr += t_limit;
10494 }
10495 ctr += t_limit;
10496 if (dtr-first_skip >= limit) break;
10497 }
10498 if(t_ans != NULL) {
10499 memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t));
10500 free(t_ans);
10501 }
10502 return true;
10503}
10504
10505bool ra_has_run_container(const roaring_array_t *ra) {
10506 for (int32_t k = 0; k < ra->size; ++k) {
10507 if (get_container_type(ra->containers[k], ra->typecodes[k]) ==
10508 RUN_CONTAINER_TYPE_CODE)
10509 return true;
10510 }
10511 return false;
10512}
10513
10514uint32_t ra_portable_header_size(const roaring_array_t *ra) {
10515 if (ra_has_run_container(ra)) {
10516 if (ra->size <
10517 NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets
10518 return 4 + (ra->size + 7) / 8 + 4 * ra->size;
10519 }
10520 return 4 + (ra->size + 7) / 8 +
10521 8 * ra->size; // - 4 because we pack the size with the cookie
10522 } else {
10523 return 4 + 4 + 8 * ra->size;
10524 }
10525}
10526
10527size_t ra_portable_size_in_bytes(const roaring_array_t *ra) {
10528 size_t count = ra_portable_header_size(ra);
10529
10530 for (int32_t k = 0; k < ra->size; ++k) {
10531 count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
10532 }
10533 return count;
10534}
10535
10536size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) {
10537 char *initbuf = buf;
10538 uint32_t startOffset = 0;
10539 bool hasrun = ra_has_run_container(ra);
10540 if (hasrun) {
10541 uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16);
10542 memcpy(buf, &cookie, sizeof(cookie));
10543 buf += sizeof(cookie);
10544 uint32_t s = (ra->size + 7) / 8;
10545 uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(s, 1);
10546 assert(bitmapOfRunContainers != NULL); // todo: handle
10547 for (int32_t i = 0; i < ra->size; ++i) {
10548 if (get_container_type(ra->containers[i], ra->typecodes[i]) ==
10549 RUN_CONTAINER_TYPE_CODE) {
10550 bitmapOfRunContainers[i / 8] |= (1 << (i % 8));
10551 }
10552 }
10553 memcpy(buf, bitmapOfRunContainers, s);
10554 buf += s;
10555 free(bitmapOfRunContainers);
10556 if (ra->size < NO_OFFSET_THRESHOLD) {
10557 startOffset = 4 + 4 * ra->size + s;
10558 } else {
10559 startOffset = 4 + 8 * ra->size + s;
10560 }
10561 } else { // backwards compatibility
10562 uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER;
10563
10564 memcpy(buf, &cookie, sizeof(cookie));
10565 buf += sizeof(cookie);
10566 memcpy(buf, &ra->size, sizeof(ra->size));
10567 buf += sizeof(ra->size);
10568
10569 startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size;
10570 }
10571 for (int32_t k = 0; k < ra->size; ++k) {
10572 memcpy(buf, &ra->keys[k], sizeof(ra->keys[k]));
10573 buf += sizeof(ra->keys[k]);
10574 // get_cardinality returns a value in [1,1<<16], subtracting one
10575 // we get [0,1<<16 - 1] which fits in 16 bits
10576 uint16_t card = (uint16_t)(
10577 container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1);
10578 memcpy(buf, &card, sizeof(card));
10579 buf += sizeof(card);
10580 }
10581 if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) {
10582 // writing the containers offsets
10583 for (int32_t k = 0; k < ra->size; k++) {
10584 memcpy(buf, &startOffset, sizeof(startOffset));
10585 buf += sizeof(startOffset);
10586 startOffset =
10587 startOffset +
10588 container_size_in_bytes(ra->containers[k], ra->typecodes[k]);
10589 }
10590 }
10591 for (int32_t k = 0; k < ra->size; ++k) {
10592 buf += container_write(ra->containers[k], ra->typecodes[k], buf);
10593 }
10594 return buf - initbuf;
10595}
10596
10597// Quickly checks whether there is a serialized bitmap at the pointer,
10598// not exceeding size "maxbytes" in bytes. This function does not allocate
10599// memory dynamically.
10600//
10601// This function returns 0 if and only if no valid bitmap is found.
10602// Otherwise, it returns how many bytes are occupied.
10603//
10604size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) {
10605 size_t bytestotal = sizeof(int32_t);// for cookie
10606 if(bytestotal > maxbytes) return 0;
10607 uint32_t cookie;
10608 memcpy(&cookie, buf, sizeof(int32_t));
10609 buf += sizeof(uint32_t);
10610 if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
10611 cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
10612 return 0;
10613 }
10614 int32_t size;
10615
10616 if ((cookie & 0xFFFF) == SERIAL_COOKIE)
10617 size = (cookie >> 16) + 1;
10618 else {
10619 bytestotal += sizeof(int32_t);
10620 if(bytestotal > maxbytes) return 0;
10621 memcpy(&size, buf, sizeof(int32_t));
10622 buf += sizeof(uint32_t);
10623 }
10624 if (size > (1<<16)) {
10625 return 0; // logically impossible
10626 }
10627 char *bitmapOfRunContainers = NULL;
10628 bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
10629 if (hasrun) {
10630 int32_t s = (size + 7) / 8;
10631 bytestotal += s;
10632 if(bytestotal > maxbytes) return 0;
10633 bitmapOfRunContainers = (char *)buf;
10634 buf += s;
10635 }
10636 bytestotal += size * 2 * sizeof(uint16_t);
10637 if(bytestotal > maxbytes) return 0;
10638 uint16_t *keyscards = (uint16_t *)buf;
10639 buf += size * 2 * sizeof(uint16_t);
10640 if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
10641 // skipping the offsets
10642 bytestotal += size * 4;
10643 if(bytestotal > maxbytes) return 0;
10644 buf += size * 4;
10645 }
10646 // Reading the containers
10647 for (int32_t k = 0; k < size; ++k) {
10648 uint16_t tmp;
10649 memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
10650 uint32_t thiscard = tmp + 1;
10651 bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
10652 bool isrun = false;
10653 if(hasrun) {
10654 if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
10655 isbitmap = false;
10656 isrun = true;
10657 }
10658 }
10659 if (isbitmap) {
10660 size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
10661 bytestotal += containersize;
10662 if(bytestotal > maxbytes) return 0;
10663 buf += containersize;
10664 } else if (isrun) {
10665 bytestotal += sizeof(uint16_t);
10666 if(bytestotal > maxbytes) return 0;
10667 uint16_t n_runs;
10668 memcpy(&n_runs, buf, sizeof(uint16_t));
10669 buf += sizeof(uint16_t);
10670 size_t containersize = n_runs * sizeof(rle16_t);
10671 bytestotal += containersize;
10672 if(bytestotal > maxbytes) return 0;
10673 buf += containersize;
10674 } else {
10675 size_t containersize = thiscard * sizeof(uint16_t);
10676 bytestotal += containersize;
10677 if(bytestotal > maxbytes) return 0;
10678 buf += containersize;
10679 }
10680 }
10681 return bytestotal;
10682}
10683
10684
10685// this function populates answer from the content of buf (reading up to maxbytes bytes).
10686// The function returns false if a properly serialized bitmap cannot be found.
10687// if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes.
10688bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) {
10689 *readbytes = sizeof(int32_t);// for cookie
10690 if(*readbytes > maxbytes) {
10691 fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n");
10692 return false;
10693 }
10694 uint32_t cookie;
10695 memcpy(&cookie, buf, sizeof(int32_t));
10696 buf += sizeof(uint32_t);
10697 if ((cookie & 0xFFFF) != SERIAL_COOKIE &&
10698 cookie != SERIAL_COOKIE_NO_RUNCONTAINER) {
10699 fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n",
10700 cookie);
10701 return false;
10702 }
10703 int32_t size;
10704
10705 if ((cookie & 0xFFFF) == SERIAL_COOKIE)
10706 size = (cookie >> 16) + 1;
10707 else {
10708 *readbytes += sizeof(int32_t);
10709 if(*readbytes > maxbytes) {
10710 fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n");
10711 return false;
10712 }
10713 memcpy(&size, buf, sizeof(int32_t));
10714 buf += sizeof(uint32_t);
10715 }
10716 if (size > (1<<16)) {
10717 fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n",
10718 size);
10719 return false; // logically impossible
10720 }
10721 const char *bitmapOfRunContainers = NULL;
10722 bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE;
10723 if (hasrun) {
10724 int32_t s = (size + 7) / 8;
10725 *readbytes += s;
10726 if(*readbytes > maxbytes) {// data is corrupted?
10727 fprintf(stderr, "Ran out of bytes while reading run bitmap.\n");
10728 return false;
10729 }
10730 bitmapOfRunContainers = buf;
10731 buf += s;
10732 }
10733 uint16_t *keyscards = (uint16_t *)buf;
10734
10735 *readbytes += size * 2 * sizeof(uint16_t);
10736 if(*readbytes > maxbytes) {
10737 fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n");
10738 return false;
10739 }
10740 buf += size * 2 * sizeof(uint16_t);
10741
10742 bool is_ok = ra_init_with_capacity(answer, size);
10743 if (!is_ok) {
10744 fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n");
10745 return false;
10746 }
10747
10748 for (int32_t k = 0; k < size; ++k) {
10749 uint16_t tmp;
10750 memcpy(&tmp, keyscards + 2*k, sizeof(tmp));
10751 answer->keys[k] = tmp;
10752 }
10753 if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) {
10754 *readbytes += size * 4;
10755 if(*readbytes > maxbytes) {// data is corrupted?
10756 fprintf(stderr, "Ran out of bytes while reading offsets.\n");
10757 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
10758 return false;
10759 }
10760
10761 // skipping the offsets
10762 buf += size * 4;
10763 }
10764 // Reading the containers
10765 for (int32_t k = 0; k < size; ++k) {
10766 uint16_t tmp;
10767 memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp));
10768 uint32_t thiscard = tmp + 1;
10769 bool isbitmap = (thiscard > DEFAULT_MAX_SIZE);
10770 bool isrun = false;
10771 if(hasrun) {
10772 if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) {
10773 isbitmap = false;
10774 isrun = true;
10775 }
10776 }
10777 if (isbitmap) {
10778 // we check that the read is allowed
10779 size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
10780 *readbytes += containersize;
10781 if(*readbytes > maxbytes) {
10782 fprintf(stderr, "Running out of bytes while reading a bitset container.\n");
10783 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
10784 return false;
10785 }
10786 // it is now safe to read
10787 bitset_container_t *c = bitset_container_create();
10788 if(c == NULL) {// memory allocation failure
10789 fprintf(stderr, "Failed to allocate memory for a bitset container.\n");
10790 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
10791 return false;
10792 }
10793 answer->size++;
10794 buf += bitset_container_read(thiscard, c, buf);
10795 answer->containers[k] = c;
10796 answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE;
10797 } else if (isrun) {
10798 // we check that the read is allowed
10799 *readbytes += sizeof(uint16_t);
10800 if(*readbytes > maxbytes) {
10801 fprintf(stderr, "Running out of bytes while reading a run container (header).\n");
10802 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
10803 return false;
10804 }
10805 uint16_t n_runs;
10806 memcpy(&n_runs, buf, sizeof(uint16_t));
10807 size_t containersize = n_runs * sizeof(rle16_t);
10808 *readbytes += containersize;
10809 if(*readbytes > maxbytes) {// data is corrupted?
10810 fprintf(stderr, "Running out of bytes while reading a run container.\n");
10811 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
10812 return false;
10813 }
10814 // it is now safe to read
10815
10816 run_container_t *c = run_container_create();
10817 if(c == NULL) {// memory allocation failure
10818 fprintf(stderr, "Failed to allocate memory for a run container.\n");
10819 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
10820 return false;
10821 }
10822 answer->size++;
10823 buf += run_container_read(thiscard, c, buf);
10824 answer->containers[k] = c;
10825 answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE;
10826 } else {
10827 // we check that the read is allowed
10828 size_t containersize = thiscard * sizeof(uint16_t);
10829 *readbytes += containersize;
10830 if(*readbytes > maxbytes) {// data is corrupted?
10831 fprintf(stderr, "Running out of bytes while reading an array container.\n");
10832 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
10833 return false;
10834 }
10835 // it is now safe to read
10836 array_container_t *c =
10837 array_container_create_given_capacity(thiscard);
10838 if(c == NULL) {// memory allocation failure
10839 fprintf(stderr, "Failed to allocate memory for an array container.\n");
10840 ra_clear(answer);// we need to clear the containers already allocated, and the roaring array
10841 return false;
10842 }
10843 answer->size++;
10844 buf += array_container_read(thiscard, c, buf);
10845 answer->containers[k] = c;
10846 answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE;
10847 }
10848 }
10849 return true;
10850}
10851/* end file /opt/bitmap/CRoaring-0.2.57/src/roaring_array.c */
10852/* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring_priority_queue.c */
10853
10854struct roaring_pq_element_s {
10855 uint64_t size;
10856 bool is_temporary;
10857 roaring_bitmap_t *bitmap;
10858};
10859
10860typedef struct roaring_pq_element_s roaring_pq_element_t;
10861
10862struct roaring_pq_s {
10863 roaring_pq_element_t *elements;
10864 uint64_t size;
10865};
10866
10867typedef struct roaring_pq_s roaring_pq_t;
10868
10869static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) {
10870 return t1->size < t2->size;
10871}
10872
10873static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) {
10874 uint64_t i = pq->size;
10875 pq->elements[pq->size++] = *t;
10876 while (i > 0) {
10877 uint64_t p = (i - 1) >> 1;
10878 roaring_pq_element_t ap = pq->elements[p];
10879 if (!compare(t, &ap)) break;
10880 pq->elements[i] = ap;
10881 i = p;
10882 }
10883 pq->elements[i] = *t;
10884}
10885
10886static void pq_free(roaring_pq_t *pq) {
10887 free(pq->elements);
10888 pq->elements = NULL; // paranoid
10889 free(pq);
10890}
10891
10892static void percolate_down(roaring_pq_t *pq, uint32_t i) {
10893 uint32_t size = (uint32_t)pq->size;
10894 uint32_t hsize = size >> 1;
10895 roaring_pq_element_t ai = pq->elements[i];
10896 while (i < hsize) {
10897 uint32_t l = (i << 1) + 1;
10898 uint32_t r = l + 1;
10899 roaring_pq_element_t bestc = pq->elements[l];
10900 if (r < size) {
10901 if (compare(pq->elements + r, &bestc)) {
10902 l = r;
10903 bestc = pq->elements[r];
10904 }
10905 }
10906 if (!compare(&bestc, &ai)) {
10907 break;
10908 }
10909 pq->elements[i] = bestc;
10910 i = l;
10911 }
10912 pq->elements[i] = ai;
10913}
10914
10915static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) {
10916 roaring_pq_t *answer = (roaring_pq_t *)malloc(sizeof(roaring_pq_t));
10917 answer->elements =
10918 (roaring_pq_element_t *)malloc(sizeof(roaring_pq_element_t) * length);
10919 answer->size = length;
10920 for (uint32_t i = 0; i < length; i++) {
10921 answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i];
10922 answer->elements[i].is_temporary = false;
10923 answer->elements[i].size =
10924 roaring_bitmap_portable_size_in_bytes(arr[i]);
10925 }
10926 for (int32_t i = (length >> 1); i >= 0; i--) {
10927 percolate_down(answer, i);
10928 }
10929 return answer;
10930}
10931
10932static roaring_pq_element_t pq_poll(roaring_pq_t *pq) {
10933 roaring_pq_element_t ans = *pq->elements;
10934 if (pq->size > 1) {
10935 pq->elements[0] = pq->elements[--pq->size];
10936 percolate_down(pq, 0);
10937 } else
10938 --pq->size;
10939 // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size;
10940 return ans;
10941}
10942
10943// this function consumes and frees the inputs
10944static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1,
10945 roaring_bitmap_t *x2) {
10946 uint8_t container_result_type = 0;
10947 const int length1 = ra_get_size(&x1->high_low_container),
10948 length2 = ra_get_size(&x2->high_low_container);
10949 if (0 == length1) {
10950 roaring_bitmap_free(x1);
10951 return x2;
10952 }
10953 if (0 == length2) {
10954 roaring_bitmap_free(x2);
10955 return x1;
10956 }
10957 uint32_t neededcap = length1 > length2 ? length2 : length1;
10958 roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
10959 int pos1 = 0, pos2 = 0;
10960 uint8_t container_type_1, container_type_2;
10961 uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
10962 uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
10963 while (true) {
10964 if (s1 == s2) {
10965 // todo: unsharing can be inefficient as it may create a clone where
10966 // none
10967 // is needed, but it has the benefit of being easy to reason about.
10968 ra_unshare_container_at_index(&x1->high_low_container, pos1);
10969 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
10970 &container_type_1);
10971 assert(container_type_1 != SHARED_CONTAINER_TYPE_CODE);
10972 ra_unshare_container_at_index(&x2->high_low_container, pos2);
10973 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
10974 &container_type_2);
10975 assert(container_type_2 != SHARED_CONTAINER_TYPE_CODE);
10976 void *c;
10977
10978 if ((container_type_2 == BITSET_CONTAINER_TYPE_CODE) &&
10979 (container_type_1 != BITSET_CONTAINER_TYPE_CODE)) {
10980 c = container_lazy_ior(c2, container_type_2, c1,
10981 container_type_1,
10982 &container_result_type);
10983 container_free(c1, container_type_1);
10984 if (c != c2) {
10985 container_free(c2, container_type_2);
10986 }
10987 } else {
10988 c = container_lazy_ior(c1, container_type_1, c2,
10989 container_type_2,
10990 &container_result_type);
10991 container_free(c2, container_type_2);
10992 if (c != c1) {
10993 container_free(c1, container_type_1);
10994 }
10995 }
10996 // since we assume that the initial containers are non-empty, the
10997 // result here
10998 // can only be non-empty
10999 ra_append(&answer->high_low_container, s1, c,
11000 container_result_type);
11001 ++pos1;
11002 ++pos2;
11003 if (pos1 == length1) break;
11004 if (pos2 == length2) break;
11005 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
11006 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
11007
11008 } else if (s1 < s2) { // s1 < s2
11009 void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1,
11010 &container_type_1);
11011 ra_append(&answer->high_low_container, s1, c1, container_type_1);
11012 pos1++;
11013 if (pos1 == length1) break;
11014 s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
11015
11016 } else { // s1 > s2
11017 void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2,
11018 &container_type_2);
11019 ra_append(&answer->high_low_container, s2, c2, container_type_2);
11020 pos2++;
11021 if (pos2 == length2) break;
11022 s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
11023 }
11024 }
11025 if (pos1 == length1) {
11026 ra_append_move_range(&answer->high_low_container,
11027 &x2->high_low_container, pos2, length2);
11028 } else if (pos2 == length2) {
11029 ra_append_move_range(&answer->high_low_container,
11030 &x1->high_low_container, pos1, length1);
11031 }
11032 ra_clear_without_containers(&x1->high_low_container);
11033 ra_clear_without_containers(&x2->high_low_container);
11034 free(x1);
11035 free(x2);
11036 return answer;
11037}
11038
11039/**
11040 * Compute the union of 'number' bitmaps using a heap. This can
11041 * sometimes be faster than roaring_bitmap_or_many which uses
11042 * a naive algorithm. Caller is responsible for freeing the
11043 * result.
11044 */
11045roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number,
11046 const roaring_bitmap_t **x) {
11047 if (number == 0) {
11048 return roaring_bitmap_create();
11049 }
11050 if (number == 1) {
11051 return roaring_bitmap_copy(x[0]);
11052 }
11053 roaring_pq_t *pq = create_pq(x, number);
11054 while (pq->size > 1) {
11055 roaring_pq_element_t x1 = pq_poll(pq);
11056 roaring_pq_element_t x2 = pq_poll(pq);
11057
11058 if (x1.is_temporary && x2.is_temporary) {
11059 roaring_bitmap_t *newb =
11060 lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap);
11061 // should normally return a fresh new bitmap *except* that
11062 // it can return x1.bitmap or x2.bitmap in degenerate cases
11063 bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap));
11064 uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb);
11065 roaring_pq_element_t newelement = {
11066 .size = bsize, .is_temporary = temporary, .bitmap = newb};
11067 pq_add(pq, &newelement);
11068 } else if (x2.is_temporary) {
11069 roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false);
11070 x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap);
11071 pq_add(pq, &x2);
11072 } else if (x1.is_temporary) {
11073 roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false);
11074 x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap);
11075
11076 pq_add(pq, &x1);
11077 } else {
11078 roaring_bitmap_t *newb =
11079 roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false);
11080 uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb);
11081 roaring_pq_element_t newelement = {
11082 .size = bsize, .is_temporary = true, .bitmap = newb};
11083
11084 pq_add(pq, &newelement);
11085 }
11086 }
11087 roaring_pq_element_t X = pq_poll(pq);
11088 roaring_bitmap_t *answer = X.bitmap;
11089 roaring_bitmap_repair_after_lazy(answer);
11090 pq_free(pq);
11091 return answer;
11092}
11093/* end file /opt/bitmap/CRoaring-0.2.57/src/roaring_priority_queue.c */
11094