1 | /* auto-generated on Tue Dec 18 09:42:59 CST 2018. Do not edit! */ |
2 | #include "roaring/roaring.h" |
3 | |
4 | /* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */ |
5 | #ifdef DMALLOC |
6 | #include "dmalloc.h" |
7 | #endif |
8 | |
9 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/array_util.c */ |
10 | #include <assert.h> |
11 | #include <stdbool.h> |
12 | #include <stdint.h> |
13 | #include <stdio.h> |
14 | #include <stdlib.h> |
15 | #include <string.h> |
16 | |
17 | extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, |
18 | uint16_t ikey); |
19 | |
20 | #ifdef USESSE4 |
21 | // used by intersect_vector16 |
22 | ALIGNED(0x1000) |
23 | static const uint8_t shuffle_mask16[] = { |
24 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
25 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
26 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF, |
27 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
28 | 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
29 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
30 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
31 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
32 | 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
33 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF, |
34 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF, |
35 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
36 | 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
37 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, |
38 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
39 | 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
40 | 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
41 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF, |
42 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
43 | 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
44 | 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, |
45 | 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
46 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, |
47 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
48 | 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
49 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF, |
50 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, |
51 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
52 | 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
53 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF, |
54 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
55 | 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
56 | 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
57 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF, |
58 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
59 | 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
60 | 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, |
61 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, |
62 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
63 | 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
64 | 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, |
65 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
66 | 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF, |
67 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
68 | 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
69 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
70 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
71 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
72 | 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
73 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF, |
74 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
75 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
76 | 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
77 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
78 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
79 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
80 | 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
81 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, |
82 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
83 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
84 | 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
85 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, |
86 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
87 | 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
88 | 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
89 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF, |
90 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, |
91 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
92 | 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
93 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, |
94 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
95 | 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
96 | 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
97 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, |
98 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, |
99 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
100 | 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
101 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, |
102 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
103 | 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
104 | 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, |
105 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, |
106 | 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
107 | 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
108 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
109 | 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
110 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, |
111 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
112 | 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
113 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF, |
114 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, |
115 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
116 | 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
117 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF, |
118 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
119 | 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
120 | 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
121 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF, |
122 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
123 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
124 | 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
125 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, |
126 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
127 | 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
128 | 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
129 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
130 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, |
131 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
132 | 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
133 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF, |
134 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
135 | 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
136 | 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
137 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13, |
138 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
139 | 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
140 | 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, |
141 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, |
142 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
143 | 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
144 | 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
145 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, |
146 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
147 | 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
148 | 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, |
149 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, |
150 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
151 | 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
152 | 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
153 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF, |
154 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, |
155 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
156 | 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
157 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, |
158 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
159 | 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
160 | 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
161 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11, |
162 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, |
163 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
164 | 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
165 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13, |
166 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
167 | 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
168 | 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
169 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11, |
170 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
171 | 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
172 | 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, |
173 | 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, |
174 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, |
175 | 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
176 | 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
177 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11, |
178 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, |
179 | 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
180 | 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, |
181 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11, |
182 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
183 | 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
184 | 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
185 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11, |
186 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
187 | 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
188 | 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13, |
189 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11, |
190 | 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
191 | 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, |
192 | 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, |
193 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
194 | 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF, |
195 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
196 | 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
197 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
198 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
199 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
200 | 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
201 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF, |
202 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
203 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
204 | 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
205 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
206 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
207 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
208 | 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
209 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15, |
210 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
211 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
212 | 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
213 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15, |
214 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
215 | 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
216 | 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
217 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF, |
218 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, |
219 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
220 | 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
221 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, |
222 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
223 | 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
224 | 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
225 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, |
226 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, |
227 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
228 | 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
229 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15, |
230 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
231 | 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
232 | 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
233 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, |
234 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
235 | 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
236 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15, |
237 | 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
238 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, |
239 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
240 | 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
241 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15, |
242 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, |
243 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
244 | 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
245 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15, |
246 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
247 | 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
248 | 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
249 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15, |
250 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
251 | 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
252 | 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, |
253 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15, |
254 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
255 | 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
256 | 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, |
257 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
258 | 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, |
259 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
260 | 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
261 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15, |
262 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
263 | 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
264 | 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
265 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11, |
266 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
267 | 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
268 | 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15, |
269 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15, |
270 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
271 | 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
272 | 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, |
273 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9, |
274 | 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
275 | 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
276 | 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, |
277 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9, |
278 | 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
279 | 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, |
280 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
281 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF, |
282 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13, |
283 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
284 | 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
285 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, |
286 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
287 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
288 | 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
289 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13, |
290 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13, |
291 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
292 | 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
293 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15, |
294 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
295 | 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
296 | 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
297 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13, |
298 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
299 | 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
300 | 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15, |
301 | 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, |
302 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, |
303 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
304 | 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
305 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13, |
306 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, |
307 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
308 | 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, |
309 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13, |
310 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
311 | 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
312 | 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
313 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13, |
314 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, |
315 | 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
316 | 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15, |
317 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13, |
318 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
319 | 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
320 | 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15, |
321 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7, |
322 | 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13, |
323 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
324 | 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
325 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15, |
326 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
327 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
328 | 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
329 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13, |
330 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, |
331 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
332 | 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15, |
333 | 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15, |
334 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, |
335 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
336 | 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, |
337 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11, |
338 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, |
339 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
340 | 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, |
341 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11, |
342 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
343 | 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, |
344 | 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
345 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13, |
346 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, |
347 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
348 | 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15, |
349 | 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13, |
350 | 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, |
351 | 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, |
352 | 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15, |
353 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9, |
354 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9, |
355 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
356 | 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
357 | 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11, |
358 | 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, |
359 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, |
360 | 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
361 | 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9, |
362 | 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5, |
363 | 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, |
364 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, |
365 | 12, 13, 14, 15}; |
366 | |
367 | /** |
368 | * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions |
369 | * Optimized by D. Lemire on May 3rd 2013 |
370 | */ |
371 | int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, |
372 | const uint16_t *__restrict__ B, size_t s_b, |
373 | uint16_t *C) { |
374 | size_t count = 0; |
375 | size_t i_a = 0, i_b = 0; |
376 | const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); |
377 | const size_t st_a = (s_a / vectorlength) * vectorlength; |
378 | const size_t st_b = (s_b / vectorlength) * vectorlength; |
379 | __m128i v_a, v_b; |
380 | if ((i_a < st_a) && (i_b < st_b)) { |
381 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
382 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
383 | while ((A[i_a] == 0) || (B[i_b] == 0)) { |
384 | const __m128i res_v = _mm_cmpestrm( |
385 | v_b, vectorlength, v_a, vectorlength, |
386 | _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); |
387 | const int r = _mm_extract_epi32(res_v, 0); |
388 | __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + r); |
389 | __m128i p = _mm_shuffle_epi8(v_a, sm16); |
390 | _mm_storeu_si128((__m128i *)&C[count], p); // can overflow |
391 | count += _mm_popcnt_u32(r); |
392 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
393 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
394 | if (a_max <= b_max) { |
395 | i_a += vectorlength; |
396 | if (i_a == st_a) break; |
397 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
398 | } |
399 | if (b_max <= a_max) { |
400 | i_b += vectorlength; |
401 | if (i_b == st_b) break; |
402 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
403 | } |
404 | } |
405 | if ((i_a < st_a) && (i_b < st_b)) |
406 | while (true) { |
407 | const __m128i res_v = _mm_cmpistrm( |
408 | v_b, v_a, |
409 | _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); |
410 | const int r = _mm_extract_epi32(res_v, 0); |
411 | __m128i sm16 = |
412 | _mm_load_si128((const __m128i *)shuffle_mask16 + r); |
413 | __m128i p = _mm_shuffle_epi8(v_a, sm16); |
414 | _mm_storeu_si128((__m128i *)&C[count], p); // can overflow |
415 | count += _mm_popcnt_u32(r); |
416 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
417 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
418 | if (a_max <= b_max) { |
419 | i_a += vectorlength; |
420 | if (i_a == st_a) break; |
421 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
422 | } |
423 | if (b_max <= a_max) { |
424 | i_b += vectorlength; |
425 | if (i_b == st_b) break; |
426 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
427 | } |
428 | } |
429 | } |
430 | // intersect the tail using scalar intersection |
431 | while (i_a < s_a && i_b < s_b) { |
432 | uint16_t a = A[i_a]; |
433 | uint16_t b = B[i_b]; |
434 | if (a < b) { |
435 | i_a++; |
436 | } else if (b < a) { |
437 | i_b++; |
438 | } else { |
439 | C[count] = a; //==b; |
440 | count++; |
441 | i_a++; |
442 | i_b++; |
443 | } |
444 | } |
445 | return (int32_t)count; |
446 | } |
447 | |
448 | int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, |
449 | size_t s_a, |
450 | const uint16_t *__restrict__ B, |
451 | size_t s_b) { |
452 | size_t count = 0; |
453 | size_t i_a = 0, i_b = 0; |
454 | const int vectorlength = sizeof(__m128i) / sizeof(uint16_t); |
455 | const size_t st_a = (s_a / vectorlength) * vectorlength; |
456 | const size_t st_b = (s_b / vectorlength) * vectorlength; |
457 | __m128i v_a, v_b; |
458 | if ((i_a < st_a) && (i_b < st_b)) { |
459 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
460 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
461 | while ((A[i_a] == 0) || (B[i_b] == 0)) { |
462 | const __m128i res_v = _mm_cmpestrm( |
463 | v_b, vectorlength, v_a, vectorlength, |
464 | _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); |
465 | const int r = _mm_extract_epi32(res_v, 0); |
466 | count += _mm_popcnt_u32(r); |
467 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
468 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
469 | if (a_max <= b_max) { |
470 | i_a += vectorlength; |
471 | if (i_a == st_a) break; |
472 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
473 | } |
474 | if (b_max <= a_max) { |
475 | i_b += vectorlength; |
476 | if (i_b == st_b) break; |
477 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
478 | } |
479 | } |
480 | if ((i_a < st_a) && (i_b < st_b)) |
481 | while (true) { |
482 | const __m128i res_v = _mm_cmpistrm( |
483 | v_b, v_a, |
484 | _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK); |
485 | const int r = _mm_extract_epi32(res_v, 0); |
486 | count += _mm_popcnt_u32(r); |
487 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
488 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
489 | if (a_max <= b_max) { |
490 | i_a += vectorlength; |
491 | if (i_a == st_a) break; |
492 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
493 | } |
494 | if (b_max <= a_max) { |
495 | i_b += vectorlength; |
496 | if (i_b == st_b) break; |
497 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
498 | } |
499 | } |
500 | } |
501 | // intersect the tail using scalar intersection |
502 | while (i_a < s_a && i_b < s_b) { |
503 | uint16_t a = A[i_a]; |
504 | uint16_t b = B[i_b]; |
505 | if (a < b) { |
506 | i_a++; |
507 | } else if (b < a) { |
508 | i_b++; |
509 | } else { |
510 | count++; |
511 | i_a++; |
512 | i_b++; |
513 | } |
514 | } |
515 | return (int32_t)count; |
516 | } |
517 | |
518 | int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, |
519 | const uint16_t *__restrict__ B, size_t s_b, |
520 | uint16_t *C) { |
521 | // we handle the degenerate case |
522 | if (s_a == 0) return 0; |
523 | if (s_b == 0) { |
524 | if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a); |
525 | return (int32_t)s_a; |
526 | } |
527 | // handle the leading zeroes, it is messy but it allows us to use the fast |
528 | // _mm_cmpistrm instrinsic safely |
529 | int32_t count = 0; |
530 | if ((A[0] == 0) || (B[0] == 0)) { |
531 | if ((A[0] == 0) && (B[0] == 0)) { |
532 | A++; |
533 | s_a--; |
534 | B++; |
535 | s_b--; |
536 | } else if (A[0] == 0) { |
537 | C[count++] = 0; |
538 | A++; |
539 | s_a--; |
540 | } else { |
541 | B++; |
542 | s_b--; |
543 | } |
544 | } |
545 | // at this point, we have two non-empty arrays, made of non-zero |
546 | // increasing values. |
547 | size_t i_a = 0, i_b = 0; |
548 | const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t); |
549 | const size_t st_a = (s_a / vectorlength) * vectorlength; |
550 | const size_t st_b = (s_b / vectorlength) * vectorlength; |
551 | if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path |
552 | __m128i v_a, v_b; //, v_bmax; |
553 | // we load a vector from A and a vector from B |
554 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
555 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
556 | // we have a runningmask which indicates which values from A have been |
557 | // spotted in B, these don't get written out. |
558 | __m128i runningmask_a_found_in_b = _mm_setzero_si128(); |
559 | /**** |
560 | * start of the main vectorized loop |
561 | *****/ |
562 | while (true) { |
563 | // afoundinb will contain a mask indicate for each entry in A |
564 | // whether it is seen |
565 | // in B |
566 | const __m128i a_found_in_b = |
567 | _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | |
568 | _SIDD_BIT_MASK); |
569 | runningmask_a_found_in_b = |
570 | _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); |
571 | // we always compare the last values of A and B |
572 | const uint16_t a_max = A[i_a + vectorlength - 1]; |
573 | const uint16_t b_max = B[i_b + vectorlength - 1]; |
574 | if (a_max <= b_max) { |
575 | // Ok. In this code path, we are ready to write our v_a |
576 | // because there is no need to read more from B, they will |
577 | // all be large values. |
578 | const int bitmask_belongs_to_difference = |
579 | _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; |
580 | /*** next few lines are probably expensive *****/ |
581 | __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + |
582 | bitmask_belongs_to_difference); |
583 | __m128i p = _mm_shuffle_epi8(v_a, sm16); |
584 | _mm_storeu_si128((__m128i *)&C[count], p); // can overflow |
585 | count += _mm_popcnt_u32(bitmask_belongs_to_difference); |
586 | // we advance a |
587 | i_a += vectorlength; |
588 | if (i_a == st_a) // no more |
589 | break; |
590 | runningmask_a_found_in_b = _mm_setzero_si128(); |
591 | v_a = _mm_lddqu_si128((__m128i *)&A[i_a]); |
592 | } |
593 | if (b_max <= a_max) { |
594 | // in this code path, the current v_b has become useless |
595 | i_b += vectorlength; |
596 | if (i_b == st_b) break; |
597 | v_b = _mm_lddqu_si128((__m128i *)&B[i_b]); |
598 | } |
599 | } |
600 | // at this point, either we have i_a == st_a, which is the end of the |
601 | // vectorized processing, |
602 | // or we have i_b == st_b, and we are not done processing the vector... |
603 | // so we need to finish it off. |
604 | if (i_a < st_a) { // we have unfinished business... |
605 | uint16_t buffer[8]; // buffer to do a masked load |
606 | memset(buffer, 0, 8 * sizeof(uint16_t)); |
607 | memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t)); |
608 | v_b = _mm_lddqu_si128((__m128i *)buffer); |
609 | const __m128i a_found_in_b = |
610 | _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | |
611 | _SIDD_BIT_MASK); |
612 | runningmask_a_found_in_b = |
613 | _mm_or_si128(runningmask_a_found_in_b, a_found_in_b); |
614 | const int bitmask_belongs_to_difference = |
615 | _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF; |
616 | __m128i sm16 = _mm_load_si128((const __m128i *)shuffle_mask16 + |
617 | bitmask_belongs_to_difference); |
618 | __m128i p = _mm_shuffle_epi8(v_a, sm16); |
619 | _mm_storeu_si128((__m128i *)&C[count], p); // can overflow |
620 | count += _mm_popcnt_u32(bitmask_belongs_to_difference); |
621 | i_a += vectorlength; |
622 | } |
623 | // at this point we should have i_a == st_a and i_b == st_b |
624 | } |
625 | // do the tail using scalar code |
626 | while (i_a < s_a && i_b < s_b) { |
627 | uint16_t a = A[i_a]; |
628 | uint16_t b = B[i_b]; |
629 | if (b < a) { |
630 | i_b++; |
631 | } else if (a < b) { |
632 | C[count] = a; |
633 | count++; |
634 | i_a++; |
635 | } else { //== |
636 | i_a++; |
637 | i_b++; |
638 | } |
639 | } |
640 | if (i_a < s_a) { |
641 | memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a)); |
642 | count += (int32_t)(s_a - i_a); |
643 | } |
644 | return count; |
645 | } |
646 | |
647 | #endif // USESSE4 |
648 | |
649 | |
650 | |
651 | #ifdef USE_OLD_SKEW_INTERSECT |
652 | // TODO: given enough experience with the new skew intersect, drop the old one from the code base. |
653 | |
654 | |
655 | /* Computes the intersection between one small and one large set of uint16_t. |
656 | * Stores the result into buffer and return the number of elements. */ |
657 | int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, |
658 | const uint16_t *large, size_t size_l, |
659 | uint16_t *buffer) { |
660 | size_t pos = 0, idx_l = 0, idx_s = 0; |
661 | |
662 | if (0 == size_s) { |
663 | return 0; |
664 | } |
665 | |
666 | uint16_t val_l = large[idx_l], val_s = small[idx_s]; |
667 | |
668 | while (true) { |
669 | if (val_l < val_s) { |
670 | idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); |
671 | if (idx_l == size_l) break; |
672 | val_l = large[idx_l]; |
673 | } else if (val_s < val_l) { |
674 | idx_s++; |
675 | if (idx_s == size_s) break; |
676 | val_s = small[idx_s]; |
677 | } else { |
678 | buffer[pos++] = val_s; |
679 | idx_s++; |
680 | if (idx_s == size_s) break; |
681 | val_s = small[idx_s]; |
682 | idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); |
683 | if (idx_l == size_l) break; |
684 | val_l = large[idx_l]; |
685 | } |
686 | } |
687 | |
688 | return (int32_t)pos; |
689 | } |
690 | #else // USE_OLD_SKEW_INTERSECT |
691 | |
692 | |
693 | /** |
694 | * Branchless binary search going after 4 values at once. |
695 | * Assumes that array is sorted. |
696 | * You have that array[*index1] >= target1, array[*index12] >= target2, ... |
697 | * except when *index1 = n, in which case you know that all values in array are |
698 | * smaller than target1, and so forth. |
699 | * It has logarithmic complexity. |
700 | */ |
701 | static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1, |
702 | uint16_t target2, uint16_t target3, uint16_t target4, |
703 | int32_t *index1, int32_t *index2, int32_t *index3, |
704 | int32_t *index4) { |
705 | const uint16_t *base1 = array; |
706 | const uint16_t *base2 = array; |
707 | const uint16_t *base3 = array; |
708 | const uint16_t *base4 = array; |
709 | if (n == 0) |
710 | return; |
711 | while (n > 1) { |
712 | int32_t half = n >> 1; |
713 | base1 = (base1[half] < target1) ? &base1[half] : base1; |
714 | base2 = (base2[half] < target2) ? &base2[half] : base2; |
715 | base3 = (base3[half] < target3) ? &base3[half] : base3; |
716 | base4 = (base4[half] < target4) ? &base4[half] : base4; |
717 | n -= half; |
718 | } |
719 | *index1 = (int32_t)((*base1 < target1) + base1 - array); |
720 | *index2 = (int32_t)((*base2 < target2) + base2 - array); |
721 | *index3 = (int32_t)((*base3 < target3) + base3 - array); |
722 | *index4 = (int32_t)((*base4 < target4) + base4 - array); |
723 | } |
724 | |
725 | /** |
726 | * Branchless binary search going after 2 values at once. |
727 | * Assumes that array is sorted. |
728 | * You have that array[*index1] >= target1, array[*index12] >= target2. |
729 | * except when *index1 = n, in which case you know that all values in array are |
730 | * smaller than target1, and so forth. |
731 | * It has logarithmic complexity. |
732 | */ |
733 | static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1, |
734 | uint16_t target2, int32_t *index1, int32_t *index2) { |
735 | const uint16_t *base1 = array; |
736 | const uint16_t *base2 = array; |
737 | if (n == 0) |
738 | return; |
739 | while (n > 1) { |
740 | int32_t half = n >> 1; |
741 | base1 = (base1[half] < target1) ? &base1[half] : base1; |
742 | base2 = (base2[half] < target2) ? &base2[half] : base2; |
743 | n -= half; |
744 | } |
745 | *index1 = (int32_t)((*base1 < target1) + base1 - array); |
746 | *index2 = (int32_t)((*base2 < target2) + base2 - array); |
747 | } |
748 | |
749 | /* Computes the intersection between one small and one large set of uint16_t. |
750 | * Stores the result into buffer and return the number of elements. |
751 | * Processes the small set in blocks of 4 values calling binarySearch4 |
752 | * and binarySearch2. This approach can be slightly superior to a conventional |
753 | * galloping search in some instances. |
754 | */ |
755 | int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s, |
756 | const uint16_t *large, size_t size_l, |
757 | uint16_t *buffer) { |
758 | size_t pos = 0, idx_l = 0, idx_s = 0; |
759 | |
760 | if (0 == size_s) { |
761 | return 0; |
762 | } |
763 | int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0; |
764 | while ((idx_s + 4 <= size_s) && (idx_l < size_l)) { |
765 | uint16_t target1 = small[idx_s]; |
766 | uint16_t target2 = small[idx_s + 1]; |
767 | uint16_t target3 = small[idx_s + 2]; |
768 | uint16_t target4 = small[idx_s + 3]; |
769 | binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3, |
770 | target4, &index1, &index2, &index3, &index4); |
771 | if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { |
772 | buffer[pos++] = target1; |
773 | } |
774 | if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { |
775 | buffer[pos++] = target2; |
776 | } |
777 | if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) { |
778 | buffer[pos++] = target3; |
779 | } |
780 | if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) { |
781 | buffer[pos++] = target4; |
782 | } |
783 | idx_s += 4; |
784 | idx_l += index1; |
785 | } |
786 | if ((idx_s + 2 <= size_s) && (idx_l < size_l)) { |
787 | uint16_t target1 = small[idx_s]; |
788 | uint16_t target2 = small[idx_s + 1]; |
789 | binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1, |
790 | &index2); |
791 | if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) { |
792 | buffer[pos++] = target1; |
793 | } |
794 | if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) { |
795 | buffer[pos++] = target2; |
796 | } |
797 | idx_s += 2; |
798 | idx_l += index1; |
799 | } |
800 | if ((idx_s < size_s) && (idx_l < size_l)) { |
801 | uint16_t val_s = small[idx_s]; |
802 | int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s); |
803 | if (index >= 0) |
804 | buffer[pos++] = val_s; |
805 | } |
806 | return (int32_t)pos; |
807 | } |
808 | |
809 | |
810 | #endif //USE_OLD_SKEW_INTERSECT |
811 | |
812 | |
813 | // TODO: this could be accelerated, possibly, by using binarySearch4 as above. |
814 | int32_t intersect_skewed_uint16_cardinality(const uint16_t *small, |
815 | size_t size_s, |
816 | const uint16_t *large, |
817 | size_t size_l) { |
818 | size_t pos = 0, idx_l = 0, idx_s = 0; |
819 | |
820 | if (0 == size_s) { |
821 | return 0; |
822 | } |
823 | |
824 | uint16_t val_l = large[idx_l], val_s = small[idx_s]; |
825 | |
826 | while (true) { |
827 | if (val_l < val_s) { |
828 | idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); |
829 | if (idx_l == size_l) break; |
830 | val_l = large[idx_l]; |
831 | } else if (val_s < val_l) { |
832 | idx_s++; |
833 | if (idx_s == size_s) break; |
834 | val_s = small[idx_s]; |
835 | } else { |
836 | pos++; |
837 | idx_s++; |
838 | if (idx_s == size_s) break; |
839 | val_s = small[idx_s]; |
840 | idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); |
841 | if (idx_l == size_l) break; |
842 | val_l = large[idx_l]; |
843 | } |
844 | } |
845 | |
846 | return (int32_t)pos; |
847 | } |
848 | |
849 | bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s, |
850 | const uint16_t *large, size_t size_l) { |
851 | size_t idx_l = 0, idx_s = 0; |
852 | |
853 | if (0 == size_s) { |
854 | return false; |
855 | } |
856 | |
857 | uint16_t val_l = large[idx_l], val_s = small[idx_s]; |
858 | |
859 | while (true) { |
860 | if (val_l < val_s) { |
861 | idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s); |
862 | if (idx_l == size_l) break; |
863 | val_l = large[idx_l]; |
864 | } else if (val_s < val_l) { |
865 | idx_s++; |
866 | if (idx_s == size_s) break; |
867 | val_s = small[idx_s]; |
868 | } else { |
869 | return true; |
870 | } |
871 | } |
872 | |
873 | return false; |
874 | } |
875 | |
876 | /** |
877 | * Generic intersection function. |
878 | */ |
879 | int32_t intersect_uint16(const uint16_t *A, const size_t lenA, |
880 | const uint16_t *B, const size_t lenB, uint16_t *out) { |
881 | const uint16_t *initout = out; |
882 | if (lenA == 0 || lenB == 0) return 0; |
883 | const uint16_t *endA = A + lenA; |
884 | const uint16_t *endB = B + lenB; |
885 | |
886 | while (1) { |
887 | while (*A < *B) { |
888 | SKIP_FIRST_COMPARE: |
889 | if (++A == endA) return (int32_t)(out - initout); |
890 | } |
891 | while (*A > *B) { |
892 | if (++B == endB) return (int32_t)(out - initout); |
893 | } |
894 | if (*A == *B) { |
895 | *out++ = *A; |
896 | if (++A == endA || ++B == endB) return (int32_t)(out - initout); |
897 | } else { |
898 | goto SKIP_FIRST_COMPARE; |
899 | } |
900 | } |
901 | return (int32_t)(out - initout); // NOTREACHED |
902 | } |
903 | |
904 | int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, |
905 | const uint16_t *B, const size_t lenB) { |
906 | int32_t answer = 0; |
907 | if (lenA == 0 || lenB == 0) return 0; |
908 | const uint16_t *endA = A + lenA; |
909 | const uint16_t *endB = B + lenB; |
910 | |
911 | while (1) { |
912 | while (*A < *B) { |
913 | SKIP_FIRST_COMPARE: |
914 | if (++A == endA) return answer; |
915 | } |
916 | while (*A > *B) { |
917 | if (++B == endB) return answer; |
918 | } |
919 | if (*A == *B) { |
920 | ++answer; |
921 | if (++A == endA || ++B == endB) return answer; |
922 | } else { |
923 | goto SKIP_FIRST_COMPARE; |
924 | } |
925 | } |
926 | return answer; // NOTREACHED |
927 | } |
928 | |
929 | |
930 | bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, |
931 | const uint16_t *B, const size_t lenB) { |
932 | if (lenA == 0 || lenB == 0) return 0; |
933 | const uint16_t *endA = A + lenA; |
934 | const uint16_t *endB = B + lenB; |
935 | |
936 | while (1) { |
937 | while (*A < *B) { |
938 | SKIP_FIRST_COMPARE: |
939 | if (++A == endA) return false; |
940 | } |
941 | while (*A > *B) { |
942 | if (++B == endB) return false; |
943 | } |
944 | if (*A == *B) { |
945 | return true; |
946 | } else { |
947 | goto SKIP_FIRST_COMPARE; |
948 | } |
949 | } |
950 | return false; // NOTREACHED |
951 | } |
952 | |
953 | |
954 | |
955 | /** |
956 | * Generic intersection function. |
957 | */ |
958 | size_t intersection_uint32(const uint32_t *A, const size_t lenA, |
959 | const uint32_t *B, const size_t lenB, |
960 | uint32_t *out) { |
961 | const uint32_t *initout = out; |
962 | if (lenA == 0 || lenB == 0) return 0; |
963 | const uint32_t *endA = A + lenA; |
964 | const uint32_t *endB = B + lenB; |
965 | |
966 | while (1) { |
967 | while (*A < *B) { |
968 | SKIP_FIRST_COMPARE: |
969 | if (++A == endA) return (out - initout); |
970 | } |
971 | while (*A > *B) { |
972 | if (++B == endB) return (out - initout); |
973 | } |
974 | if (*A == *B) { |
975 | *out++ = *A; |
976 | if (++A == endA || ++B == endB) return (out - initout); |
977 | } else { |
978 | goto SKIP_FIRST_COMPARE; |
979 | } |
980 | } |
981 | return (out - initout); // NOTREACHED |
982 | } |
983 | |
984 | size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, |
985 | const uint32_t *B, const size_t lenB) { |
986 | if (lenA == 0 || lenB == 0) return 0; |
987 | size_t card = 0; |
988 | const uint32_t *endA = A + lenA; |
989 | const uint32_t *endB = B + lenB; |
990 | |
991 | while (1) { |
992 | while (*A < *B) { |
993 | SKIP_FIRST_COMPARE: |
994 | if (++A == endA) return card; |
995 | } |
996 | while (*A > *B) { |
997 | if (++B == endB) return card; |
998 | } |
999 | if (*A == *B) { |
1000 | card++; |
1001 | if (++A == endA || ++B == endB) return card; |
1002 | } else { |
1003 | goto SKIP_FIRST_COMPARE; |
1004 | } |
1005 | } |
1006 | return card; // NOTREACHED |
1007 | } |
1008 | |
1009 | // can one vectorize the computation of the union? (Update: Yes! See |
1010 | // union_vector16). |
1011 | |
1012 | size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, |
1013 | size_t size_2, uint16_t *buffer) { |
1014 | size_t pos = 0, idx_1 = 0, idx_2 = 0; |
1015 | |
1016 | if (0 == size_2) { |
1017 | memmove(buffer, set_1, size_1 * sizeof(uint16_t)); |
1018 | return size_1; |
1019 | } |
1020 | if (0 == size_1) { |
1021 | memmove(buffer, set_2, size_2 * sizeof(uint16_t)); |
1022 | return size_2; |
1023 | } |
1024 | |
1025 | uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; |
1026 | |
1027 | while (true) { |
1028 | if (val_1 < val_2) { |
1029 | buffer[pos++] = val_1; |
1030 | ++idx_1; |
1031 | if (idx_1 >= size_1) break; |
1032 | val_1 = set_1[idx_1]; |
1033 | } else if (val_2 < val_1) { |
1034 | buffer[pos++] = val_2; |
1035 | ++idx_2; |
1036 | if (idx_2 >= size_2) break; |
1037 | val_2 = set_2[idx_2]; |
1038 | } else { |
1039 | buffer[pos++] = val_1; |
1040 | ++idx_1; |
1041 | ++idx_2; |
1042 | if (idx_1 >= size_1 || idx_2 >= size_2) break; |
1043 | val_1 = set_1[idx_1]; |
1044 | val_2 = set_2[idx_2]; |
1045 | } |
1046 | } |
1047 | |
1048 | if (idx_1 < size_1) { |
1049 | const size_t n_elems = size_1 - idx_1; |
1050 | memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t)); |
1051 | pos += n_elems; |
1052 | } else if (idx_2 < size_2) { |
1053 | const size_t n_elems = size_2 - idx_2; |
1054 | memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t)); |
1055 | pos += n_elems; |
1056 | } |
1057 | |
1058 | return pos; |
1059 | } |
1060 | |
1061 | int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, |
1062 | int length2, uint16_t *a_out) { |
1063 | int out_card = 0; |
1064 | int k1 = 0, k2 = 0; |
1065 | if (length1 == 0) return 0; |
1066 | if (length2 == 0) { |
1067 | if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1); |
1068 | return length1; |
1069 | } |
1070 | uint16_t s1 = a1[k1]; |
1071 | uint16_t s2 = a2[k2]; |
1072 | while (true) { |
1073 | if (s1 < s2) { |
1074 | a_out[out_card++] = s1; |
1075 | ++k1; |
1076 | if (k1 >= length1) { |
1077 | break; |
1078 | } |
1079 | s1 = a1[k1]; |
1080 | } else if (s1 == s2) { |
1081 | ++k1; |
1082 | ++k2; |
1083 | if (k1 >= length1) { |
1084 | break; |
1085 | } |
1086 | if (k2 >= length2) { |
1087 | memmove(a_out + out_card, a1 + k1, |
1088 | sizeof(uint16_t) * (length1 - k1)); |
1089 | return out_card + length1 - k1; |
1090 | } |
1091 | s1 = a1[k1]; |
1092 | s2 = a2[k2]; |
1093 | } else { // if (val1>val2) |
1094 | ++k2; |
1095 | if (k2 >= length2) { |
1096 | memmove(a_out + out_card, a1 + k1, |
1097 | sizeof(uint16_t) * (length1 - k1)); |
1098 | return out_card + length1 - k1; |
1099 | } |
1100 | s2 = a2[k2]; |
1101 | } |
1102 | } |
1103 | return out_card; |
1104 | } |
1105 | |
1106 | int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, |
1107 | const uint16_t *array_2, int32_t card_2, uint16_t *out) { |
1108 | int32_t pos1 = 0, pos2 = 0, pos_out = 0; |
1109 | while (pos1 < card_1 && pos2 < card_2) { |
1110 | const uint16_t v1 = array_1[pos1]; |
1111 | const uint16_t v2 = array_2[pos2]; |
1112 | if (v1 == v2) { |
1113 | ++pos1; |
1114 | ++pos2; |
1115 | continue; |
1116 | } |
1117 | if (v1 < v2) { |
1118 | out[pos_out++] = v1; |
1119 | ++pos1; |
1120 | } else { |
1121 | out[pos_out++] = v2; |
1122 | ++pos2; |
1123 | } |
1124 | } |
1125 | if (pos1 < card_1) { |
1126 | const size_t n_elems = card_1 - pos1; |
1127 | memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t)); |
1128 | pos_out += (int32_t)n_elems; |
1129 | } else if (pos2 < card_2) { |
1130 | const size_t n_elems = card_2 - pos2; |
1131 | memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t)); |
1132 | pos_out += (int32_t)n_elems; |
1133 | } |
1134 | return pos_out; |
1135 | } |
1136 | |
1137 | #ifdef USESSE4 |
1138 | |
1139 | /*** |
1140 | * start of the SIMD 16-bit union code |
1141 | * |
1142 | */ |
1143 | |
1144 | // Assuming that vInput1 and vInput2 are sorted, produces a sorted output going |
1145 | // from vecMin all the way to vecMax |
1146 | // developed originally for merge sort using SIMD instructions. |
1147 | // Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly |
1148 | // Algorithm for Sorting an Array of Structures |
1149 | static inline void sse_merge(const __m128i *vInput1, |
1150 | const __m128i *vInput2, // input 1 & 2 |
1151 | __m128i *vecMin, __m128i *vecMax) { // output |
1152 | __m128i vecTmp; |
1153 | vecTmp = _mm_min_epu16(*vInput1, *vInput2); |
1154 | *vecMax = _mm_max_epu16(*vInput1, *vInput2); |
1155 | vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2); |
1156 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1157 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1158 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1159 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1160 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1161 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1162 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1163 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1164 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1165 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1166 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1167 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1168 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1169 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1170 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1171 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1172 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1173 | vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1174 | *vecMin = _mm_min_epu16(vecTmp, *vecMax); |
1175 | *vecMax = _mm_max_epu16(vecTmp, *vecMax); |
1176 | *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2); |
1177 | } |
1178 | |
1179 | // used by store_unique, generated by simdunion.py |
1180 | static uint8_t uniqshuf[] = { |
1181 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1182 | 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1183 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1184 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1185 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1186 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, |
1187 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1188 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1189 | 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1190 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1191 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1192 | 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1193 | 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1194 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, |
1195 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, |
1196 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1197 | 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1198 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1199 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, |
1200 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1201 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1202 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1203 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1204 | 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1205 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1206 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, |
1207 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1208 | 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1209 | 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1210 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, |
1211 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, |
1212 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1213 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, |
1214 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, |
1215 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1216 | 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1217 | 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1218 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, |
1219 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, |
1220 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1221 | 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1222 | 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1223 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1224 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1225 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, |
1226 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1227 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1228 | 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1229 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, |
1230 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, |
1231 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1232 | 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1233 | 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1234 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, |
1235 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1236 | 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1237 | 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1238 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, |
1239 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1240 | 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1241 | 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1242 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, |
1243 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, |
1244 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1245 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, |
1246 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, |
1247 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1248 | 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1249 | 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1250 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, |
1251 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1252 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1253 | 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1254 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1255 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1256 | 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1257 | 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1258 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, |
1259 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, |
1260 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1261 | 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1262 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, |
1263 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, |
1264 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1265 | 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1266 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1267 | 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1268 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1269 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, |
1270 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1271 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1272 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1273 | 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1274 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1275 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, |
1276 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1277 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, |
1278 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, |
1279 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1280 | 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1281 | 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1282 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, |
1283 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, |
1284 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1285 | 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1286 | 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1287 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1288 | 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1289 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1290 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, |
1291 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1292 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1293 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1294 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, |
1295 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1296 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1297 | 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1298 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, |
1299 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1300 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1301 | 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1302 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1303 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1304 | 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1305 | 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1306 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, |
1307 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf, |
1308 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1309 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, |
1310 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1311 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1312 | 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1313 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1314 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, |
1315 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1316 | 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1317 | 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1318 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, |
1319 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1320 | 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1321 | 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1322 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, |
1323 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, |
1324 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1325 | 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1326 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, |
1327 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, |
1328 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1329 | 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1330 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1331 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1332 | 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1333 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1334 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, |
1335 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1336 | 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1337 | 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1338 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, |
1339 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf, |
1340 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1341 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, |
1342 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, |
1343 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1344 | 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1345 | 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1346 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF, |
1347 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf, |
1348 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1349 | 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1350 | 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1351 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1352 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, |
1353 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1354 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1355 | 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1356 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1357 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1358 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1359 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1360 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1361 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1362 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, |
1363 | 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1364 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1365 | 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, |
1366 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1367 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1368 | 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1369 | 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1370 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, |
1371 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, |
1372 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1373 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, |
1374 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, |
1375 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1376 | 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1377 | 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1378 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, |
1379 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1380 | 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1381 | 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1382 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, |
1383 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1384 | 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1385 | 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1386 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, |
1387 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, |
1388 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1389 | 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1390 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, |
1391 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, |
1392 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1393 | 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1394 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1395 | 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1396 | 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1397 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, |
1398 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, |
1399 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1400 | 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1401 | 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1402 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, |
1403 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, |
1404 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1405 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, |
1406 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, |
1407 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1408 | 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1409 | 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1410 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, |
1411 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, |
1412 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1413 | 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1414 | 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1415 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1416 | 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1417 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1418 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, |
1419 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1420 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1421 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1422 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, |
1423 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1424 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1425 | 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1426 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, |
1427 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1428 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1429 | 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1430 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1431 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1432 | 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1433 | 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1434 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, |
1435 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF, |
1436 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1437 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, |
1438 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1439 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1440 | 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1441 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1442 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, |
1443 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1444 | 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1445 | 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1446 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, |
1447 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1448 | 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1449 | 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1450 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, |
1451 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, |
1452 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1453 | 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1454 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, |
1455 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, |
1456 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1457 | 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1458 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, |
1459 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1460 | 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1461 | 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1462 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, |
1463 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1464 | 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1465 | 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1466 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, |
1467 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, |
1468 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1469 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, |
1470 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, |
1471 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1472 | 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1473 | 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1474 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF, |
1475 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, |
1476 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1477 | 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1478 | 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1479 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1480 | 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1481 | 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, |
1482 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, |
1483 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, |
1484 | 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1485 | 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, |
1486 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, |
1487 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, |
1488 | 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1489 | 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1490 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, |
1491 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, |
1492 | 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1493 | 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1494 | 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, |
1495 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1496 | 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1497 | 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1498 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, |
1499 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF, |
1500 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1501 | 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, |
1502 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, |
1503 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, |
1504 | 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1505 | 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1506 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF, |
1507 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, |
1508 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1509 | 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1510 | 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1511 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, |
1512 | 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1513 | 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1514 | 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, |
1515 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF, |
1516 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1517 | 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1518 | 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1519 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF, |
1520 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1521 | 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, |
1522 | 0xFF, 0xFF, 0xFF, 0xFF}; |
1523 | |
1524 | // write vector new, while omitting repeated values assuming that previously |
1525 | // written vector was "old" |
1526 | static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) { |
1527 | __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2); |
1528 | // lots of high latency instructions follow (optimize?) |
1529 | int M = _mm_movemask_epi8( |
1530 | _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128())); |
1531 | int numberofnewvalues = 8 - _mm_popcnt_u32(M); |
1532 | __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); |
1533 | __m128i val = _mm_shuffle_epi8(newval, key); |
1534 | _mm_storeu_si128((__m128i *)output, val); |
1535 | return numberofnewvalues; |
1536 | } |
1537 | |
1538 | // working in-place, this function overwrites the repeated values |
1539 | // could be avoided? |
1540 | static inline uint32_t unique(uint16_t *out, uint32_t len) { |
1541 | uint32_t pos = 1; |
1542 | for (uint32_t i = 1; i < len; ++i) { |
1543 | if (out[i] != out[i - 1]) { |
1544 | out[pos++] = out[i]; |
1545 | } |
1546 | } |
1547 | return pos; |
1548 | } |
1549 | |
1550 | // use with qsort, could be avoided |
1551 | static int uint16_compare(const void *a, const void *b) { |
1552 | return (*(uint16_t *)a - *(uint16_t *)b); |
1553 | } |
1554 | |
1555 | // a one-pass SSE union algorithm |
1556 | uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1, |
1557 | const uint16_t *__restrict__ array2, uint32_t length2, |
1558 | uint16_t *__restrict__ output) { |
1559 | if ((length1 < 8) || (length2 < 8)) { |
1560 | return (uint32_t)union_uint16(array1, length1, array2, length2, output); |
1561 | } |
1562 | __m128i vA, vB, V, vecMin, vecMax; |
1563 | __m128i laststore; |
1564 | uint16_t *initoutput = output; |
1565 | uint32_t len1 = length1 / 8; |
1566 | uint32_t len2 = length2 / 8; |
1567 | uint32_t pos1 = 0; |
1568 | uint32_t pos2 = 0; |
1569 | // we start the machine |
1570 | vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); |
1571 | pos1++; |
1572 | vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); |
1573 | pos2++; |
1574 | sse_merge(&vA, &vB, &vecMin, &vecMax); |
1575 | laststore = _mm_set1_epi16(-1); |
1576 | output += store_unique(laststore, vecMin, output); |
1577 | laststore = vecMin; |
1578 | if ((pos1 < len1) && (pos2 < len2)) { |
1579 | uint16_t curA, curB; |
1580 | curA = array1[8 * pos1]; |
1581 | curB = array2[8 * pos2]; |
1582 | while (true) { |
1583 | if (curA <= curB) { |
1584 | V = _mm_lddqu_si128((const __m128i *)array1 + pos1); |
1585 | pos1++; |
1586 | if (pos1 < len1) { |
1587 | curA = array1[8 * pos1]; |
1588 | } else { |
1589 | break; |
1590 | } |
1591 | } else { |
1592 | V = _mm_lddqu_si128((const __m128i *)array2 + pos2); |
1593 | pos2++; |
1594 | if (pos2 < len2) { |
1595 | curB = array2[8 * pos2]; |
1596 | } else { |
1597 | break; |
1598 | } |
1599 | } |
1600 | sse_merge(&V, &vecMax, &vecMin, &vecMax); |
1601 | output += store_unique(laststore, vecMin, output); |
1602 | laststore = vecMin; |
1603 | } |
1604 | sse_merge(&V, &vecMax, &vecMin, &vecMax); |
1605 | output += store_unique(laststore, vecMin, output); |
1606 | laststore = vecMin; |
1607 | } |
1608 | // we finish the rest off using a scalar algorithm |
1609 | // could be improved? |
1610 | // |
1611 | // copy the small end on a tmp buffer |
1612 | uint32_t len = (uint32_t)(output - initoutput); |
1613 | uint16_t buffer[16]; |
1614 | uint32_t leftoversize = store_unique(laststore, vecMax, buffer); |
1615 | if (pos1 == len1) { |
1616 | memcpy(buffer + leftoversize, array1 + 8 * pos1, |
1617 | (length1 - 8 * len1) * sizeof(uint16_t)); |
1618 | leftoversize += length1 - 8 * len1; |
1619 | qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); |
1620 | |
1621 | leftoversize = unique(buffer, leftoversize); |
1622 | len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2, |
1623 | length2 - 8 * pos2, output); |
1624 | } else { |
1625 | memcpy(buffer + leftoversize, array2 + 8 * pos2, |
1626 | (length2 - 8 * len2) * sizeof(uint16_t)); |
1627 | leftoversize += length2 - 8 * len2; |
1628 | qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); |
1629 | leftoversize = unique(buffer, leftoversize); |
1630 | len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1, |
1631 | length1 - 8 * pos1, output); |
1632 | } |
1633 | return len; |
1634 | } |
1635 | |
1636 | /** |
1637 | * End of the SIMD 16-bit union code |
1638 | * |
1639 | */ |
1640 | |
1641 | /** |
1642 | * Start of SIMD 16-bit XOR code |
1643 | */ |
1644 | |
1645 | // write vector new, while omitting repeated values assuming that previously |
1646 | // written vector was "old" |
1647 | static inline int store_unique_xor(__m128i old, __m128i newval, |
1648 | uint16_t *output) { |
1649 | __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4); |
1650 | __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2); |
1651 | __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1); |
1652 | __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval); |
1653 | __m128i equalleftoright = _mm_or_si128(equalleft, equalright); |
1654 | int M = _mm_movemask_epi8( |
1655 | _mm_packs_epi16(equalleftoright, _mm_setzero_si128())); |
1656 | int numberofnewvalues = 8 - _mm_popcnt_u32(M); |
1657 | __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M); |
1658 | __m128i val = _mm_shuffle_epi8(vecTmp2, key); |
1659 | _mm_storeu_si128((__m128i *)output, val); |
1660 | return numberofnewvalues; |
1661 | } |
1662 | |
1663 | // working in-place, this function overwrites the repeated values |
1664 | // could be avoided? Warning: assumes len > 0 |
1665 | static inline uint32_t unique_xor(uint16_t *out, uint32_t len) { |
1666 | uint32_t pos = 1; |
1667 | for (uint32_t i = 1; i < len; ++i) { |
1668 | if (out[i] != out[i - 1]) { |
1669 | out[pos++] = out[i]; |
1670 | } else |
1671 | pos--; // if it is identical to previous, delete it |
1672 | } |
1673 | return pos; |
1674 | } |
1675 | |
1676 | // a one-pass SSE xor algorithm |
1677 | uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, |
1678 | const uint16_t *__restrict__ array2, uint32_t length2, |
1679 | uint16_t *__restrict__ output) { |
1680 | if ((length1 < 8) || (length2 < 8)) { |
1681 | return xor_uint16(array1, length1, array2, length2, output); |
1682 | } |
1683 | __m128i vA, vB, V, vecMin, vecMax; |
1684 | __m128i laststore; |
1685 | uint16_t *initoutput = output; |
1686 | uint32_t len1 = length1 / 8; |
1687 | uint32_t len2 = length2 / 8; |
1688 | uint32_t pos1 = 0; |
1689 | uint32_t pos2 = 0; |
1690 | // we start the machine |
1691 | vA = _mm_lddqu_si128((const __m128i *)array1 + pos1); |
1692 | pos1++; |
1693 | vB = _mm_lddqu_si128((const __m128i *)array2 + pos2); |
1694 | pos2++; |
1695 | sse_merge(&vA, &vB, &vecMin, &vecMax); |
1696 | laststore = _mm_set1_epi16(-1); |
1697 | uint16_t buffer[17]; |
1698 | output += store_unique_xor(laststore, vecMin, output); |
1699 | |
1700 | laststore = vecMin; |
1701 | if ((pos1 < len1) && (pos2 < len2)) { |
1702 | uint16_t curA, curB; |
1703 | curA = array1[8 * pos1]; |
1704 | curB = array2[8 * pos2]; |
1705 | while (true) { |
1706 | if (curA <= curB) { |
1707 | V = _mm_lddqu_si128((const __m128i *)array1 + pos1); |
1708 | pos1++; |
1709 | if (pos1 < len1) { |
1710 | curA = array1[8 * pos1]; |
1711 | } else { |
1712 | break; |
1713 | } |
1714 | } else { |
1715 | V = _mm_lddqu_si128((const __m128i *)array2 + pos2); |
1716 | pos2++; |
1717 | if (pos2 < len2) { |
1718 | curB = array2[8 * pos2]; |
1719 | } else { |
1720 | break; |
1721 | } |
1722 | } |
1723 | sse_merge(&V, &vecMax, &vecMin, &vecMax); |
1724 | // conditionally stores the last value of laststore as well as all |
1725 | // but the |
1726 | // last value of vecMin |
1727 | output += store_unique_xor(laststore, vecMin, output); |
1728 | laststore = vecMin; |
1729 | } |
1730 | sse_merge(&V, &vecMax, &vecMin, &vecMax); |
1731 | // conditionally stores the last value of laststore as well as all but |
1732 | // the |
1733 | // last value of vecMin |
1734 | output += store_unique_xor(laststore, vecMin, output); |
1735 | laststore = vecMin; |
1736 | } |
1737 | uint32_t len = (uint32_t)(output - initoutput); |
1738 | |
1739 | // we finish the rest off using a scalar algorithm |
1740 | // could be improved? |
1741 | // conditionally stores the last value of laststore as well as all but the |
1742 | // last value of vecMax, |
1743 | // we store to "buffer" |
1744 | int leftoversize = store_unique_xor(laststore, vecMax, buffer); |
1745 | uint16_t vec7 = _mm_extract_epi16(vecMax, 7); |
1746 | uint16_t vec6 = _mm_extract_epi16(vecMax, 6); |
1747 | if (vec7 != vec6) buffer[leftoversize++] = vec7; |
1748 | if (pos1 == len1) { |
1749 | memcpy(buffer + leftoversize, array1 + 8 * pos1, |
1750 | (length1 - 8 * len1) * sizeof(uint16_t)); |
1751 | leftoversize += length1 - 8 * len1; |
1752 | if (leftoversize == 0) { // trivial case |
1753 | memcpy(output, array2 + 8 * pos2, |
1754 | (length2 - 8 * pos2) * sizeof(uint16_t)); |
1755 | len += (length2 - 8 * pos2); |
1756 | } else { |
1757 | qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); |
1758 | leftoversize = unique_xor(buffer, leftoversize); |
1759 | len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2, |
1760 | length2 - 8 * pos2, output); |
1761 | } |
1762 | } else { |
1763 | memcpy(buffer + leftoversize, array2 + 8 * pos2, |
1764 | (length2 - 8 * len2) * sizeof(uint16_t)); |
1765 | leftoversize += length2 - 8 * len2; |
1766 | if (leftoversize == 0) { // trivial case |
1767 | memcpy(output, array1 + 8 * pos1, |
1768 | (length1 - 8 * pos1) * sizeof(uint16_t)); |
1769 | len += (length1 - 8 * pos1); |
1770 | } else { |
1771 | qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare); |
1772 | leftoversize = unique_xor(buffer, leftoversize); |
1773 | len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1, |
1774 | length1 - 8 * pos1, output); |
1775 | } |
1776 | } |
1777 | return len; |
1778 | } |
1779 | |
1780 | /** |
1781 | * End of SIMD 16-bit XOR code |
1782 | */ |
1783 | |
1784 | #endif // USESSE4 |
1785 | |
1786 | size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, |
1787 | size_t size_2, uint32_t *buffer) { |
1788 | size_t pos = 0, idx_1 = 0, idx_2 = 0; |
1789 | |
1790 | if (0 == size_2) { |
1791 | memmove(buffer, set_1, size_1 * sizeof(uint32_t)); |
1792 | return size_1; |
1793 | } |
1794 | if (0 == size_1) { |
1795 | memmove(buffer, set_2, size_2 * sizeof(uint32_t)); |
1796 | return size_2; |
1797 | } |
1798 | |
1799 | uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; |
1800 | |
1801 | while (true) { |
1802 | if (val_1 < val_2) { |
1803 | buffer[pos++] = val_1; |
1804 | ++idx_1; |
1805 | if (idx_1 >= size_1) break; |
1806 | val_1 = set_1[idx_1]; |
1807 | } else if (val_2 < val_1) { |
1808 | buffer[pos++] = val_2; |
1809 | ++idx_2; |
1810 | if (idx_2 >= size_2) break; |
1811 | val_2 = set_2[idx_2]; |
1812 | } else { |
1813 | buffer[pos++] = val_1; |
1814 | ++idx_1; |
1815 | ++idx_2; |
1816 | if (idx_1 >= size_1 || idx_2 >= size_2) break; |
1817 | val_1 = set_1[idx_1]; |
1818 | val_2 = set_2[idx_2]; |
1819 | } |
1820 | } |
1821 | |
1822 | if (idx_1 < size_1) { |
1823 | const size_t n_elems = size_1 - idx_1; |
1824 | memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint32_t)); |
1825 | pos += n_elems; |
1826 | } else if (idx_2 < size_2) { |
1827 | const size_t n_elems = size_2 - idx_2; |
1828 | memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint32_t)); |
1829 | pos += n_elems; |
1830 | } |
1831 | |
1832 | return pos; |
1833 | } |
1834 | |
1835 | size_t union_uint32_card(const uint32_t *set_1, size_t size_1, |
1836 | const uint32_t *set_2, size_t size_2) { |
1837 | size_t pos = 0, idx_1 = 0, idx_2 = 0; |
1838 | |
1839 | if (0 == size_2) { |
1840 | return size_1; |
1841 | } |
1842 | if (0 == size_1) { |
1843 | return size_2; |
1844 | } |
1845 | |
1846 | uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2]; |
1847 | |
1848 | while (true) { |
1849 | if (val_1 < val_2) { |
1850 | ++idx_1; |
1851 | ++pos; |
1852 | if (idx_1 >= size_1) break; |
1853 | val_1 = set_1[idx_1]; |
1854 | } else if (val_2 < val_1) { |
1855 | ++idx_2; |
1856 | ++pos; |
1857 | if (idx_2 >= size_2) break; |
1858 | val_2 = set_2[idx_2]; |
1859 | } else { |
1860 | ++idx_1; |
1861 | ++idx_2; |
1862 | ++pos; |
1863 | if (idx_1 >= size_1 || idx_2 >= size_2) break; |
1864 | val_1 = set_1[idx_1]; |
1865 | val_2 = set_2[idx_2]; |
1866 | } |
1867 | } |
1868 | |
1869 | if (idx_1 < size_1) { |
1870 | const size_t n_elems = size_1 - idx_1; |
1871 | pos += n_elems; |
1872 | } else if (idx_2 < size_2) { |
1873 | const size_t n_elems = size_2 - idx_2; |
1874 | pos += n_elems; |
1875 | } |
1876 | return pos; |
1877 | } |
1878 | |
1879 | |
1880 | |
1881 | size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, |
1882 | size_t size_2, uint16_t *buffer) { |
1883 | #ifdef ROARING_VECTOR_OPERATIONS_ENABLED |
1884 | // compute union with smallest array first |
1885 | if (size_1 < size_2) { |
1886 | return union_vector16(set_1, (uint32_t)size_1, |
1887 | set_2, (uint32_t)size_2, buffer); |
1888 | } else { |
1889 | return union_vector16(set_2, (uint32_t)size_2, |
1890 | set_1, (uint32_t)size_1, buffer); |
1891 | } |
1892 | #else |
1893 | // compute union with smallest array first |
1894 | if (size_1 < size_2) { |
1895 | return union_uint16( |
1896 | set_1, size_1, set_2, size_2, buffer); |
1897 | } else { |
1898 | return union_uint16( |
1899 | set_2, size_2, set_1, size_1, buffer); |
1900 | } |
1901 | #endif |
1902 | } |
1903 | /* end file /opt/bitmap/CRoaring-0.2.57/src/array_util.c */ |
1904 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/bitset_util.c */ |
1905 | #include <assert.h> |
1906 | #include <stdint.h> |
1907 | #include <stdio.h> |
1908 | #include <stdlib.h> |
1909 | #include <string.h> |
1910 | |
1911 | |
1912 | #ifdef IS_X64 |
1913 | static uint8_t lengthTable[256] = { |
1914 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, |
1915 | 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
1916 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, |
1917 | 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, |
1918 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, |
1919 | 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, |
1920 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, |
1921 | 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
1922 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, |
1923 | 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, |
1924 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8}; |
1925 | #endif |
1926 | |
1927 | #ifdef USEAVX |
1928 | ALIGNED(32) |
1929 | static uint32_t vecDecodeTable[256][8] = { |
1930 | {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ |
1931 | {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ |
1932 | {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ |
1933 | {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ |
1934 | {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ |
1935 | {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ |
1936 | {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ |
1937 | {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ |
1938 | {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ |
1939 | {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ |
1940 | {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ |
1941 | {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ |
1942 | {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ |
1943 | {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ |
1944 | {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ |
1945 | {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ |
1946 | {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ |
1947 | {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ |
1948 | {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ |
1949 | {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ |
1950 | {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ |
1951 | {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ |
1952 | {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ |
1953 | {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ |
1954 | {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ |
1955 | {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ |
1956 | {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ |
1957 | {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ |
1958 | {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ |
1959 | {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ |
1960 | {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ |
1961 | {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ |
1962 | {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ |
1963 | {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ |
1964 | {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ |
1965 | {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ |
1966 | {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ |
1967 | {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ |
1968 | {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ |
1969 | {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ |
1970 | {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ |
1971 | {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ |
1972 | {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ |
1973 | {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ |
1974 | {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ |
1975 | {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ |
1976 | {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ |
1977 | {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ |
1978 | {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ |
1979 | {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ |
1980 | {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ |
1981 | {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ |
1982 | {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ |
1983 | {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ |
1984 | {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ |
1985 | {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ |
1986 | {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ |
1987 | {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ |
1988 | {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ |
1989 | {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ |
1990 | {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ |
1991 | {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ |
1992 | {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ |
1993 | {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ |
1994 | {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ |
1995 | {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ |
1996 | {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ |
1997 | {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ |
1998 | {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ |
1999 | {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ |
2000 | {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ |
2001 | {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ |
2002 | {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ |
2003 | {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ |
2004 | {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ |
2005 | {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ |
2006 | {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ |
2007 | {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ |
2008 | {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ |
2009 | {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ |
2010 | {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ |
2011 | {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ |
2012 | {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ |
2013 | {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ |
2014 | {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ |
2015 | {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ |
2016 | {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ |
2017 | {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ |
2018 | {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ |
2019 | {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ |
2020 | {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ |
2021 | {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ |
2022 | {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ |
2023 | {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ |
2024 | {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ |
2025 | {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ |
2026 | {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ |
2027 | {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ |
2028 | {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ |
2029 | {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ |
2030 | {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ |
2031 | {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ |
2032 | {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ |
2033 | {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ |
2034 | {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ |
2035 | {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ |
2036 | {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ |
2037 | {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ |
2038 | {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ |
2039 | {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ |
2040 | {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ |
2041 | {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ |
2042 | {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ |
2043 | {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ |
2044 | {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ |
2045 | {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ |
2046 | {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ |
2047 | {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ |
2048 | {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ |
2049 | {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ |
2050 | {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ |
2051 | {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ |
2052 | {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ |
2053 | {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ |
2054 | {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ |
2055 | {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ |
2056 | {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ |
2057 | {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ |
2058 | {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ |
2059 | {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ |
2060 | {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ |
2061 | {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ |
2062 | {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ |
2063 | {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ |
2064 | {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ |
2065 | {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ |
2066 | {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ |
2067 | {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ |
2068 | {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ |
2069 | {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ |
2070 | {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ |
2071 | {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ |
2072 | {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ |
2073 | {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ |
2074 | {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ |
2075 | {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ |
2076 | {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ |
2077 | {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ |
2078 | {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ |
2079 | {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ |
2080 | {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ |
2081 | {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ |
2082 | {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ |
2083 | {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ |
2084 | {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ |
2085 | {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ |
2086 | {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ |
2087 | {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ |
2088 | {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ |
2089 | {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ |
2090 | {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ |
2091 | {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ |
2092 | {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ |
2093 | {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ |
2094 | {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ |
2095 | {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ |
2096 | {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ |
2097 | {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ |
2098 | {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ |
2099 | {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ |
2100 | {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ |
2101 | {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ |
2102 | {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ |
2103 | {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ |
2104 | {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ |
2105 | {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ |
2106 | {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ |
2107 | {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ |
2108 | {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ |
2109 | {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ |
2110 | {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ |
2111 | {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ |
2112 | {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ |
2113 | {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ |
2114 | {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ |
2115 | {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ |
2116 | {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ |
2117 | {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ |
2118 | {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ |
2119 | {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ |
2120 | {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ |
2121 | {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ |
2122 | {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ |
2123 | {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ |
2124 | {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ |
2125 | {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ |
2126 | {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ |
2127 | {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ |
2128 | {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ |
2129 | {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ |
2130 | {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ |
2131 | {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ |
2132 | {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ |
2133 | {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ |
2134 | {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ |
2135 | {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ |
2136 | {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ |
2137 | {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ |
2138 | {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ |
2139 | {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ |
2140 | {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ |
2141 | {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ |
2142 | {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ |
2143 | {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ |
2144 | {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ |
2145 | {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ |
2146 | {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ |
2147 | {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ |
2148 | {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ |
2149 | {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ |
2150 | {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ |
2151 | {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ |
2152 | {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ |
2153 | {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ |
2154 | {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ |
2155 | {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ |
2156 | {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ |
2157 | {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ |
2158 | {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ |
2159 | {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ |
2160 | {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ |
2161 | {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ |
2162 | {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ |
2163 | {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ |
2164 | {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ |
2165 | {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ |
2166 | {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ |
2167 | {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ |
2168 | {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ |
2169 | {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ |
2170 | {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ |
2171 | {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ |
2172 | {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ |
2173 | {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ |
2174 | {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ |
2175 | {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ |
2176 | {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ |
2177 | {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ |
2178 | {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ |
2179 | {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ |
2180 | {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ |
2181 | {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ |
2182 | {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ |
2183 | {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ |
2184 | {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ |
2185 | {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ |
2186 | }; |
2187 | |
2188 | #endif // #ifdef USEAVX |
2189 | |
2190 | #ifdef IS_X64 |
2191 | // same as vecDecodeTable but in 16 bits |
2192 | ALIGNED(32) |
2193 | static uint16_t vecDecodeTable_uint16[256][8] = { |
2194 | {0, 0, 0, 0, 0, 0, 0, 0}, /* 0x00 (00000000) */ |
2195 | {1, 0, 0, 0, 0, 0, 0, 0}, /* 0x01 (00000001) */ |
2196 | {2, 0, 0, 0, 0, 0, 0, 0}, /* 0x02 (00000010) */ |
2197 | {1, 2, 0, 0, 0, 0, 0, 0}, /* 0x03 (00000011) */ |
2198 | {3, 0, 0, 0, 0, 0, 0, 0}, /* 0x04 (00000100) */ |
2199 | {1, 3, 0, 0, 0, 0, 0, 0}, /* 0x05 (00000101) */ |
2200 | {2, 3, 0, 0, 0, 0, 0, 0}, /* 0x06 (00000110) */ |
2201 | {1, 2, 3, 0, 0, 0, 0, 0}, /* 0x07 (00000111) */ |
2202 | {4, 0, 0, 0, 0, 0, 0, 0}, /* 0x08 (00001000) */ |
2203 | {1, 4, 0, 0, 0, 0, 0, 0}, /* 0x09 (00001001) */ |
2204 | {2, 4, 0, 0, 0, 0, 0, 0}, /* 0x0A (00001010) */ |
2205 | {1, 2, 4, 0, 0, 0, 0, 0}, /* 0x0B (00001011) */ |
2206 | {3, 4, 0, 0, 0, 0, 0, 0}, /* 0x0C (00001100) */ |
2207 | {1, 3, 4, 0, 0, 0, 0, 0}, /* 0x0D (00001101) */ |
2208 | {2, 3, 4, 0, 0, 0, 0, 0}, /* 0x0E (00001110) */ |
2209 | {1, 2, 3, 4, 0, 0, 0, 0}, /* 0x0F (00001111) */ |
2210 | {5, 0, 0, 0, 0, 0, 0, 0}, /* 0x10 (00010000) */ |
2211 | {1, 5, 0, 0, 0, 0, 0, 0}, /* 0x11 (00010001) */ |
2212 | {2, 5, 0, 0, 0, 0, 0, 0}, /* 0x12 (00010010) */ |
2213 | {1, 2, 5, 0, 0, 0, 0, 0}, /* 0x13 (00010011) */ |
2214 | {3, 5, 0, 0, 0, 0, 0, 0}, /* 0x14 (00010100) */ |
2215 | {1, 3, 5, 0, 0, 0, 0, 0}, /* 0x15 (00010101) */ |
2216 | {2, 3, 5, 0, 0, 0, 0, 0}, /* 0x16 (00010110) */ |
2217 | {1, 2, 3, 5, 0, 0, 0, 0}, /* 0x17 (00010111) */ |
2218 | {4, 5, 0, 0, 0, 0, 0, 0}, /* 0x18 (00011000) */ |
2219 | {1, 4, 5, 0, 0, 0, 0, 0}, /* 0x19 (00011001) */ |
2220 | {2, 4, 5, 0, 0, 0, 0, 0}, /* 0x1A (00011010) */ |
2221 | {1, 2, 4, 5, 0, 0, 0, 0}, /* 0x1B (00011011) */ |
2222 | {3, 4, 5, 0, 0, 0, 0, 0}, /* 0x1C (00011100) */ |
2223 | {1, 3, 4, 5, 0, 0, 0, 0}, /* 0x1D (00011101) */ |
2224 | {2, 3, 4, 5, 0, 0, 0, 0}, /* 0x1E (00011110) */ |
2225 | {1, 2, 3, 4, 5, 0, 0, 0}, /* 0x1F (00011111) */ |
2226 | {6, 0, 0, 0, 0, 0, 0, 0}, /* 0x20 (00100000) */ |
2227 | {1, 6, 0, 0, 0, 0, 0, 0}, /* 0x21 (00100001) */ |
2228 | {2, 6, 0, 0, 0, 0, 0, 0}, /* 0x22 (00100010) */ |
2229 | {1, 2, 6, 0, 0, 0, 0, 0}, /* 0x23 (00100011) */ |
2230 | {3, 6, 0, 0, 0, 0, 0, 0}, /* 0x24 (00100100) */ |
2231 | {1, 3, 6, 0, 0, 0, 0, 0}, /* 0x25 (00100101) */ |
2232 | {2, 3, 6, 0, 0, 0, 0, 0}, /* 0x26 (00100110) */ |
2233 | {1, 2, 3, 6, 0, 0, 0, 0}, /* 0x27 (00100111) */ |
2234 | {4, 6, 0, 0, 0, 0, 0, 0}, /* 0x28 (00101000) */ |
2235 | {1, 4, 6, 0, 0, 0, 0, 0}, /* 0x29 (00101001) */ |
2236 | {2, 4, 6, 0, 0, 0, 0, 0}, /* 0x2A (00101010) */ |
2237 | {1, 2, 4, 6, 0, 0, 0, 0}, /* 0x2B (00101011) */ |
2238 | {3, 4, 6, 0, 0, 0, 0, 0}, /* 0x2C (00101100) */ |
2239 | {1, 3, 4, 6, 0, 0, 0, 0}, /* 0x2D (00101101) */ |
2240 | {2, 3, 4, 6, 0, 0, 0, 0}, /* 0x2E (00101110) */ |
2241 | {1, 2, 3, 4, 6, 0, 0, 0}, /* 0x2F (00101111) */ |
2242 | {5, 6, 0, 0, 0, 0, 0, 0}, /* 0x30 (00110000) */ |
2243 | {1, 5, 6, 0, 0, 0, 0, 0}, /* 0x31 (00110001) */ |
2244 | {2, 5, 6, 0, 0, 0, 0, 0}, /* 0x32 (00110010) */ |
2245 | {1, 2, 5, 6, 0, 0, 0, 0}, /* 0x33 (00110011) */ |
2246 | {3, 5, 6, 0, 0, 0, 0, 0}, /* 0x34 (00110100) */ |
2247 | {1, 3, 5, 6, 0, 0, 0, 0}, /* 0x35 (00110101) */ |
2248 | {2, 3, 5, 6, 0, 0, 0, 0}, /* 0x36 (00110110) */ |
2249 | {1, 2, 3, 5, 6, 0, 0, 0}, /* 0x37 (00110111) */ |
2250 | {4, 5, 6, 0, 0, 0, 0, 0}, /* 0x38 (00111000) */ |
2251 | {1, 4, 5, 6, 0, 0, 0, 0}, /* 0x39 (00111001) */ |
2252 | {2, 4, 5, 6, 0, 0, 0, 0}, /* 0x3A (00111010) */ |
2253 | {1, 2, 4, 5, 6, 0, 0, 0}, /* 0x3B (00111011) */ |
2254 | {3, 4, 5, 6, 0, 0, 0, 0}, /* 0x3C (00111100) */ |
2255 | {1, 3, 4, 5, 6, 0, 0, 0}, /* 0x3D (00111101) */ |
2256 | {2, 3, 4, 5, 6, 0, 0, 0}, /* 0x3E (00111110) */ |
2257 | {1, 2, 3, 4, 5, 6, 0, 0}, /* 0x3F (00111111) */ |
2258 | {7, 0, 0, 0, 0, 0, 0, 0}, /* 0x40 (01000000) */ |
2259 | {1, 7, 0, 0, 0, 0, 0, 0}, /* 0x41 (01000001) */ |
2260 | {2, 7, 0, 0, 0, 0, 0, 0}, /* 0x42 (01000010) */ |
2261 | {1, 2, 7, 0, 0, 0, 0, 0}, /* 0x43 (01000011) */ |
2262 | {3, 7, 0, 0, 0, 0, 0, 0}, /* 0x44 (01000100) */ |
2263 | {1, 3, 7, 0, 0, 0, 0, 0}, /* 0x45 (01000101) */ |
2264 | {2, 3, 7, 0, 0, 0, 0, 0}, /* 0x46 (01000110) */ |
2265 | {1, 2, 3, 7, 0, 0, 0, 0}, /* 0x47 (01000111) */ |
2266 | {4, 7, 0, 0, 0, 0, 0, 0}, /* 0x48 (01001000) */ |
2267 | {1, 4, 7, 0, 0, 0, 0, 0}, /* 0x49 (01001001) */ |
2268 | {2, 4, 7, 0, 0, 0, 0, 0}, /* 0x4A (01001010) */ |
2269 | {1, 2, 4, 7, 0, 0, 0, 0}, /* 0x4B (01001011) */ |
2270 | {3, 4, 7, 0, 0, 0, 0, 0}, /* 0x4C (01001100) */ |
2271 | {1, 3, 4, 7, 0, 0, 0, 0}, /* 0x4D (01001101) */ |
2272 | {2, 3, 4, 7, 0, 0, 0, 0}, /* 0x4E (01001110) */ |
2273 | {1, 2, 3, 4, 7, 0, 0, 0}, /* 0x4F (01001111) */ |
2274 | {5, 7, 0, 0, 0, 0, 0, 0}, /* 0x50 (01010000) */ |
2275 | {1, 5, 7, 0, 0, 0, 0, 0}, /* 0x51 (01010001) */ |
2276 | {2, 5, 7, 0, 0, 0, 0, 0}, /* 0x52 (01010010) */ |
2277 | {1, 2, 5, 7, 0, 0, 0, 0}, /* 0x53 (01010011) */ |
2278 | {3, 5, 7, 0, 0, 0, 0, 0}, /* 0x54 (01010100) */ |
2279 | {1, 3, 5, 7, 0, 0, 0, 0}, /* 0x55 (01010101) */ |
2280 | {2, 3, 5, 7, 0, 0, 0, 0}, /* 0x56 (01010110) */ |
2281 | {1, 2, 3, 5, 7, 0, 0, 0}, /* 0x57 (01010111) */ |
2282 | {4, 5, 7, 0, 0, 0, 0, 0}, /* 0x58 (01011000) */ |
2283 | {1, 4, 5, 7, 0, 0, 0, 0}, /* 0x59 (01011001) */ |
2284 | {2, 4, 5, 7, 0, 0, 0, 0}, /* 0x5A (01011010) */ |
2285 | {1, 2, 4, 5, 7, 0, 0, 0}, /* 0x5B (01011011) */ |
2286 | {3, 4, 5, 7, 0, 0, 0, 0}, /* 0x5C (01011100) */ |
2287 | {1, 3, 4, 5, 7, 0, 0, 0}, /* 0x5D (01011101) */ |
2288 | {2, 3, 4, 5, 7, 0, 0, 0}, /* 0x5E (01011110) */ |
2289 | {1, 2, 3, 4, 5, 7, 0, 0}, /* 0x5F (01011111) */ |
2290 | {6, 7, 0, 0, 0, 0, 0, 0}, /* 0x60 (01100000) */ |
2291 | {1, 6, 7, 0, 0, 0, 0, 0}, /* 0x61 (01100001) */ |
2292 | {2, 6, 7, 0, 0, 0, 0, 0}, /* 0x62 (01100010) */ |
2293 | {1, 2, 6, 7, 0, 0, 0, 0}, /* 0x63 (01100011) */ |
2294 | {3, 6, 7, 0, 0, 0, 0, 0}, /* 0x64 (01100100) */ |
2295 | {1, 3, 6, 7, 0, 0, 0, 0}, /* 0x65 (01100101) */ |
2296 | {2, 3, 6, 7, 0, 0, 0, 0}, /* 0x66 (01100110) */ |
2297 | {1, 2, 3, 6, 7, 0, 0, 0}, /* 0x67 (01100111) */ |
2298 | {4, 6, 7, 0, 0, 0, 0, 0}, /* 0x68 (01101000) */ |
2299 | {1, 4, 6, 7, 0, 0, 0, 0}, /* 0x69 (01101001) */ |
2300 | {2, 4, 6, 7, 0, 0, 0, 0}, /* 0x6A (01101010) */ |
2301 | {1, 2, 4, 6, 7, 0, 0, 0}, /* 0x6B (01101011) */ |
2302 | {3, 4, 6, 7, 0, 0, 0, 0}, /* 0x6C (01101100) */ |
2303 | {1, 3, 4, 6, 7, 0, 0, 0}, /* 0x6D (01101101) */ |
2304 | {2, 3, 4, 6, 7, 0, 0, 0}, /* 0x6E (01101110) */ |
2305 | {1, 2, 3, 4, 6, 7, 0, 0}, /* 0x6F (01101111) */ |
2306 | {5, 6, 7, 0, 0, 0, 0, 0}, /* 0x70 (01110000) */ |
2307 | {1, 5, 6, 7, 0, 0, 0, 0}, /* 0x71 (01110001) */ |
2308 | {2, 5, 6, 7, 0, 0, 0, 0}, /* 0x72 (01110010) */ |
2309 | {1, 2, 5, 6, 7, 0, 0, 0}, /* 0x73 (01110011) */ |
2310 | {3, 5, 6, 7, 0, 0, 0, 0}, /* 0x74 (01110100) */ |
2311 | {1, 3, 5, 6, 7, 0, 0, 0}, /* 0x75 (01110101) */ |
2312 | {2, 3, 5, 6, 7, 0, 0, 0}, /* 0x76 (01110110) */ |
2313 | {1, 2, 3, 5, 6, 7, 0, 0}, /* 0x77 (01110111) */ |
2314 | {4, 5, 6, 7, 0, 0, 0, 0}, /* 0x78 (01111000) */ |
2315 | {1, 4, 5, 6, 7, 0, 0, 0}, /* 0x79 (01111001) */ |
2316 | {2, 4, 5, 6, 7, 0, 0, 0}, /* 0x7A (01111010) */ |
2317 | {1, 2, 4, 5, 6, 7, 0, 0}, /* 0x7B (01111011) */ |
2318 | {3, 4, 5, 6, 7, 0, 0, 0}, /* 0x7C (01111100) */ |
2319 | {1, 3, 4, 5, 6, 7, 0, 0}, /* 0x7D (01111101) */ |
2320 | {2, 3, 4, 5, 6, 7, 0, 0}, /* 0x7E (01111110) */ |
2321 | {1, 2, 3, 4, 5, 6, 7, 0}, /* 0x7F (01111111) */ |
2322 | {8, 0, 0, 0, 0, 0, 0, 0}, /* 0x80 (10000000) */ |
2323 | {1, 8, 0, 0, 0, 0, 0, 0}, /* 0x81 (10000001) */ |
2324 | {2, 8, 0, 0, 0, 0, 0, 0}, /* 0x82 (10000010) */ |
2325 | {1, 2, 8, 0, 0, 0, 0, 0}, /* 0x83 (10000011) */ |
2326 | {3, 8, 0, 0, 0, 0, 0, 0}, /* 0x84 (10000100) */ |
2327 | {1, 3, 8, 0, 0, 0, 0, 0}, /* 0x85 (10000101) */ |
2328 | {2, 3, 8, 0, 0, 0, 0, 0}, /* 0x86 (10000110) */ |
2329 | {1, 2, 3, 8, 0, 0, 0, 0}, /* 0x87 (10000111) */ |
2330 | {4, 8, 0, 0, 0, 0, 0, 0}, /* 0x88 (10001000) */ |
2331 | {1, 4, 8, 0, 0, 0, 0, 0}, /* 0x89 (10001001) */ |
2332 | {2, 4, 8, 0, 0, 0, 0, 0}, /* 0x8A (10001010) */ |
2333 | {1, 2, 4, 8, 0, 0, 0, 0}, /* 0x8B (10001011) */ |
2334 | {3, 4, 8, 0, 0, 0, 0, 0}, /* 0x8C (10001100) */ |
2335 | {1, 3, 4, 8, 0, 0, 0, 0}, /* 0x8D (10001101) */ |
2336 | {2, 3, 4, 8, 0, 0, 0, 0}, /* 0x8E (10001110) */ |
2337 | {1, 2, 3, 4, 8, 0, 0, 0}, /* 0x8F (10001111) */ |
2338 | {5, 8, 0, 0, 0, 0, 0, 0}, /* 0x90 (10010000) */ |
2339 | {1, 5, 8, 0, 0, 0, 0, 0}, /* 0x91 (10010001) */ |
2340 | {2, 5, 8, 0, 0, 0, 0, 0}, /* 0x92 (10010010) */ |
2341 | {1, 2, 5, 8, 0, 0, 0, 0}, /* 0x93 (10010011) */ |
2342 | {3, 5, 8, 0, 0, 0, 0, 0}, /* 0x94 (10010100) */ |
2343 | {1, 3, 5, 8, 0, 0, 0, 0}, /* 0x95 (10010101) */ |
2344 | {2, 3, 5, 8, 0, 0, 0, 0}, /* 0x96 (10010110) */ |
2345 | {1, 2, 3, 5, 8, 0, 0, 0}, /* 0x97 (10010111) */ |
2346 | {4, 5, 8, 0, 0, 0, 0, 0}, /* 0x98 (10011000) */ |
2347 | {1, 4, 5, 8, 0, 0, 0, 0}, /* 0x99 (10011001) */ |
2348 | {2, 4, 5, 8, 0, 0, 0, 0}, /* 0x9A (10011010) */ |
2349 | {1, 2, 4, 5, 8, 0, 0, 0}, /* 0x9B (10011011) */ |
2350 | {3, 4, 5, 8, 0, 0, 0, 0}, /* 0x9C (10011100) */ |
2351 | {1, 3, 4, 5, 8, 0, 0, 0}, /* 0x9D (10011101) */ |
2352 | {2, 3, 4, 5, 8, 0, 0, 0}, /* 0x9E (10011110) */ |
2353 | {1, 2, 3, 4, 5, 8, 0, 0}, /* 0x9F (10011111) */ |
2354 | {6, 8, 0, 0, 0, 0, 0, 0}, /* 0xA0 (10100000) */ |
2355 | {1, 6, 8, 0, 0, 0, 0, 0}, /* 0xA1 (10100001) */ |
2356 | {2, 6, 8, 0, 0, 0, 0, 0}, /* 0xA2 (10100010) */ |
2357 | {1, 2, 6, 8, 0, 0, 0, 0}, /* 0xA3 (10100011) */ |
2358 | {3, 6, 8, 0, 0, 0, 0, 0}, /* 0xA4 (10100100) */ |
2359 | {1, 3, 6, 8, 0, 0, 0, 0}, /* 0xA5 (10100101) */ |
2360 | {2, 3, 6, 8, 0, 0, 0, 0}, /* 0xA6 (10100110) */ |
2361 | {1, 2, 3, 6, 8, 0, 0, 0}, /* 0xA7 (10100111) */ |
2362 | {4, 6, 8, 0, 0, 0, 0, 0}, /* 0xA8 (10101000) */ |
2363 | {1, 4, 6, 8, 0, 0, 0, 0}, /* 0xA9 (10101001) */ |
2364 | {2, 4, 6, 8, 0, 0, 0, 0}, /* 0xAA (10101010) */ |
2365 | {1, 2, 4, 6, 8, 0, 0, 0}, /* 0xAB (10101011) */ |
2366 | {3, 4, 6, 8, 0, 0, 0, 0}, /* 0xAC (10101100) */ |
2367 | {1, 3, 4, 6, 8, 0, 0, 0}, /* 0xAD (10101101) */ |
2368 | {2, 3, 4, 6, 8, 0, 0, 0}, /* 0xAE (10101110) */ |
2369 | {1, 2, 3, 4, 6, 8, 0, 0}, /* 0xAF (10101111) */ |
2370 | {5, 6, 8, 0, 0, 0, 0, 0}, /* 0xB0 (10110000) */ |
2371 | {1, 5, 6, 8, 0, 0, 0, 0}, /* 0xB1 (10110001) */ |
2372 | {2, 5, 6, 8, 0, 0, 0, 0}, /* 0xB2 (10110010) */ |
2373 | {1, 2, 5, 6, 8, 0, 0, 0}, /* 0xB3 (10110011) */ |
2374 | {3, 5, 6, 8, 0, 0, 0, 0}, /* 0xB4 (10110100) */ |
2375 | {1, 3, 5, 6, 8, 0, 0, 0}, /* 0xB5 (10110101) */ |
2376 | {2, 3, 5, 6, 8, 0, 0, 0}, /* 0xB6 (10110110) */ |
2377 | {1, 2, 3, 5, 6, 8, 0, 0}, /* 0xB7 (10110111) */ |
2378 | {4, 5, 6, 8, 0, 0, 0, 0}, /* 0xB8 (10111000) */ |
2379 | {1, 4, 5, 6, 8, 0, 0, 0}, /* 0xB9 (10111001) */ |
2380 | {2, 4, 5, 6, 8, 0, 0, 0}, /* 0xBA (10111010) */ |
2381 | {1, 2, 4, 5, 6, 8, 0, 0}, /* 0xBB (10111011) */ |
2382 | {3, 4, 5, 6, 8, 0, 0, 0}, /* 0xBC (10111100) */ |
2383 | {1, 3, 4, 5, 6, 8, 0, 0}, /* 0xBD (10111101) */ |
2384 | {2, 3, 4, 5, 6, 8, 0, 0}, /* 0xBE (10111110) */ |
2385 | {1, 2, 3, 4, 5, 6, 8, 0}, /* 0xBF (10111111) */ |
2386 | {7, 8, 0, 0, 0, 0, 0, 0}, /* 0xC0 (11000000) */ |
2387 | {1, 7, 8, 0, 0, 0, 0, 0}, /* 0xC1 (11000001) */ |
2388 | {2, 7, 8, 0, 0, 0, 0, 0}, /* 0xC2 (11000010) */ |
2389 | {1, 2, 7, 8, 0, 0, 0, 0}, /* 0xC3 (11000011) */ |
2390 | {3, 7, 8, 0, 0, 0, 0, 0}, /* 0xC4 (11000100) */ |
2391 | {1, 3, 7, 8, 0, 0, 0, 0}, /* 0xC5 (11000101) */ |
2392 | {2, 3, 7, 8, 0, 0, 0, 0}, /* 0xC6 (11000110) */ |
2393 | {1, 2, 3, 7, 8, 0, 0, 0}, /* 0xC7 (11000111) */ |
2394 | {4, 7, 8, 0, 0, 0, 0, 0}, /* 0xC8 (11001000) */ |
2395 | {1, 4, 7, 8, 0, 0, 0, 0}, /* 0xC9 (11001001) */ |
2396 | {2, 4, 7, 8, 0, 0, 0, 0}, /* 0xCA (11001010) */ |
2397 | {1, 2, 4, 7, 8, 0, 0, 0}, /* 0xCB (11001011) */ |
2398 | {3, 4, 7, 8, 0, 0, 0, 0}, /* 0xCC (11001100) */ |
2399 | {1, 3, 4, 7, 8, 0, 0, 0}, /* 0xCD (11001101) */ |
2400 | {2, 3, 4, 7, 8, 0, 0, 0}, /* 0xCE (11001110) */ |
2401 | {1, 2, 3, 4, 7, 8, 0, 0}, /* 0xCF (11001111) */ |
2402 | {5, 7, 8, 0, 0, 0, 0, 0}, /* 0xD0 (11010000) */ |
2403 | {1, 5, 7, 8, 0, 0, 0, 0}, /* 0xD1 (11010001) */ |
2404 | {2, 5, 7, 8, 0, 0, 0, 0}, /* 0xD2 (11010010) */ |
2405 | {1, 2, 5, 7, 8, 0, 0, 0}, /* 0xD3 (11010011) */ |
2406 | {3, 5, 7, 8, 0, 0, 0, 0}, /* 0xD4 (11010100) */ |
2407 | {1, 3, 5, 7, 8, 0, 0, 0}, /* 0xD5 (11010101) */ |
2408 | {2, 3, 5, 7, 8, 0, 0, 0}, /* 0xD6 (11010110) */ |
2409 | {1, 2, 3, 5, 7, 8, 0, 0}, /* 0xD7 (11010111) */ |
2410 | {4, 5, 7, 8, 0, 0, 0, 0}, /* 0xD8 (11011000) */ |
2411 | {1, 4, 5, 7, 8, 0, 0, 0}, /* 0xD9 (11011001) */ |
2412 | {2, 4, 5, 7, 8, 0, 0, 0}, /* 0xDA (11011010) */ |
2413 | {1, 2, 4, 5, 7, 8, 0, 0}, /* 0xDB (11011011) */ |
2414 | {3, 4, 5, 7, 8, 0, 0, 0}, /* 0xDC (11011100) */ |
2415 | {1, 3, 4, 5, 7, 8, 0, 0}, /* 0xDD (11011101) */ |
2416 | {2, 3, 4, 5, 7, 8, 0, 0}, /* 0xDE (11011110) */ |
2417 | {1, 2, 3, 4, 5, 7, 8, 0}, /* 0xDF (11011111) */ |
2418 | {6, 7, 8, 0, 0, 0, 0, 0}, /* 0xE0 (11100000) */ |
2419 | {1, 6, 7, 8, 0, 0, 0, 0}, /* 0xE1 (11100001) */ |
2420 | {2, 6, 7, 8, 0, 0, 0, 0}, /* 0xE2 (11100010) */ |
2421 | {1, 2, 6, 7, 8, 0, 0, 0}, /* 0xE3 (11100011) */ |
2422 | {3, 6, 7, 8, 0, 0, 0, 0}, /* 0xE4 (11100100) */ |
2423 | {1, 3, 6, 7, 8, 0, 0, 0}, /* 0xE5 (11100101) */ |
2424 | {2, 3, 6, 7, 8, 0, 0, 0}, /* 0xE6 (11100110) */ |
2425 | {1, 2, 3, 6, 7, 8, 0, 0}, /* 0xE7 (11100111) */ |
2426 | {4, 6, 7, 8, 0, 0, 0, 0}, /* 0xE8 (11101000) */ |
2427 | {1, 4, 6, 7, 8, 0, 0, 0}, /* 0xE9 (11101001) */ |
2428 | {2, 4, 6, 7, 8, 0, 0, 0}, /* 0xEA (11101010) */ |
2429 | {1, 2, 4, 6, 7, 8, 0, 0}, /* 0xEB (11101011) */ |
2430 | {3, 4, 6, 7, 8, 0, 0, 0}, /* 0xEC (11101100) */ |
2431 | {1, 3, 4, 6, 7, 8, 0, 0}, /* 0xED (11101101) */ |
2432 | {2, 3, 4, 6, 7, 8, 0, 0}, /* 0xEE (11101110) */ |
2433 | {1, 2, 3, 4, 6, 7, 8, 0}, /* 0xEF (11101111) */ |
2434 | {5, 6, 7, 8, 0, 0, 0, 0}, /* 0xF0 (11110000) */ |
2435 | {1, 5, 6, 7, 8, 0, 0, 0}, /* 0xF1 (11110001) */ |
2436 | {2, 5, 6, 7, 8, 0, 0, 0}, /* 0xF2 (11110010) */ |
2437 | {1, 2, 5, 6, 7, 8, 0, 0}, /* 0xF3 (11110011) */ |
2438 | {3, 5, 6, 7, 8, 0, 0, 0}, /* 0xF4 (11110100) */ |
2439 | {1, 3, 5, 6, 7, 8, 0, 0}, /* 0xF5 (11110101) */ |
2440 | {2, 3, 5, 6, 7, 8, 0, 0}, /* 0xF6 (11110110) */ |
2441 | {1, 2, 3, 5, 6, 7, 8, 0}, /* 0xF7 (11110111) */ |
2442 | {4, 5, 6, 7, 8, 0, 0, 0}, /* 0xF8 (11111000) */ |
2443 | {1, 4, 5, 6, 7, 8, 0, 0}, /* 0xF9 (11111001) */ |
2444 | {2, 4, 5, 6, 7, 8, 0, 0}, /* 0xFA (11111010) */ |
2445 | {1, 2, 4, 5, 6, 7, 8, 0}, /* 0xFB (11111011) */ |
2446 | {3, 4, 5, 6, 7, 8, 0, 0}, /* 0xFC (11111100) */ |
2447 | {1, 3, 4, 5, 6, 7, 8, 0}, /* 0xFD (11111101) */ |
2448 | {2, 3, 4, 5, 6, 7, 8, 0}, /* 0xFE (11111110) */ |
2449 | {1, 2, 3, 4, 5, 6, 7, 8} /* 0xFF (11111111) */ |
2450 | }; |
2451 | |
2452 | #endif |
2453 | |
2454 | #ifdef USEAVX |
2455 | |
2456 | size_t bitset_extract_setbits_avx2(uint64_t *array, size_t length, void *vout, |
2457 | size_t outcapacity, uint32_t base) { |
2458 | uint32_t *out = (uint32_t *)vout; |
2459 | uint32_t *initout = out; |
2460 | __m256i baseVec = _mm256_set1_epi32(base - 1); |
2461 | __m256i incVec = _mm256_set1_epi32(64); |
2462 | __m256i add8 = _mm256_set1_epi32(8); |
2463 | uint32_t *safeout = out + outcapacity; |
2464 | size_t i = 0; |
2465 | for (; (i < length) && (out + 64 <= safeout); ++i) { |
2466 | uint64_t w = array[i]; |
2467 | if (w == 0) { |
2468 | baseVec = _mm256_add_epi32(baseVec, incVec); |
2469 | } else { |
2470 | for (int k = 0; k < 4; ++k) { |
2471 | uint8_t byteA = (uint8_t)w; |
2472 | uint8_t byteB = (uint8_t)(w >> 8); |
2473 | w >>= 16; |
2474 | __m256i vecA = |
2475 | _mm256_load_si256((const __m256i *)vecDecodeTable[byteA]); |
2476 | __m256i vecB = |
2477 | _mm256_load_si256((const __m256i *)vecDecodeTable[byteB]); |
2478 | uint8_t advanceA = lengthTable[byteA]; |
2479 | uint8_t advanceB = lengthTable[byteB]; |
2480 | vecA = _mm256_add_epi32(baseVec, vecA); |
2481 | baseVec = _mm256_add_epi32(baseVec, add8); |
2482 | vecB = _mm256_add_epi32(baseVec, vecB); |
2483 | baseVec = _mm256_add_epi32(baseVec, add8); |
2484 | _mm256_storeu_si256((__m256i *)out, vecA); |
2485 | out += advanceA; |
2486 | _mm256_storeu_si256((__m256i *)out, vecB); |
2487 | out += advanceB; |
2488 | } |
2489 | } |
2490 | } |
2491 | base += i * 64; |
2492 | for (; (i < length) && (out < safeout); ++i) { |
2493 | uint64_t w = array[i]; |
2494 | while ((w != 0) && (out < safeout)) { |
2495 | uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) |
2496 | int r = __builtin_ctzll(w); // on x64, should compile to TZCNT |
2497 | uint32_t val = r + base; |
2498 | memcpy(out, &val, |
2499 | sizeof(uint32_t)); // should be compiled as a MOV on x64 |
2500 | out++; |
2501 | w ^= t; |
2502 | } |
2503 | base += 64; |
2504 | } |
2505 | return out - initout; |
2506 | } |
2507 | #endif // USEAVX |
2508 | |
2509 | size_t (uint64_t *bitset, size_t length, void *vout, |
2510 | uint32_t base) { |
2511 | int outpos = 0; |
2512 | uint32_t *out = (uint32_t *)vout; |
2513 | for (size_t i = 0; i < length; ++i) { |
2514 | uint64_t w = bitset[i]; |
2515 | while (w != 0) { |
2516 | uint64_t t = w & (~w + 1); // on x64, should compile to BLSI (careful: the Intel compiler seems to fail) |
2517 | int r = __builtin_ctzll(w); // on x64, should compile to TZCNT |
2518 | uint32_t val = r + base; |
2519 | memcpy(out + outpos, &val, |
2520 | sizeof(uint32_t)); // should be compiled as a MOV on x64 |
2521 | outpos++; |
2522 | w ^= t; |
2523 | } |
2524 | base += 64; |
2525 | } |
2526 | return outpos; |
2527 | } |
2528 | |
2529 | size_t (const uint64_t * __restrict__ bitset1, |
2530 | const uint64_t * __restrict__ bitset2, |
2531 | size_t length, uint16_t *out, |
2532 | uint16_t base) { |
2533 | int outpos = 0; |
2534 | for (size_t i = 0; i < length; ++i) { |
2535 | uint64_t w = bitset1[i] & bitset2[i]; |
2536 | while (w != 0) { |
2537 | uint64_t t = w & (~w + 1); |
2538 | int r = __builtin_ctzll(w); |
2539 | out[outpos++] = r + base; |
2540 | w ^= t; |
2541 | } |
2542 | base += 64; |
2543 | } |
2544 | return outpos; |
2545 | } |
2546 | |
2547 | #ifdef IS_X64 |
2548 | /* |
2549 | * Given a bitset containing "length" 64-bit words, write out the position |
2550 | * of all the set bits to "out" as 16-bit integers, values start at "base" (can |
2551 | *be set to zero). |
2552 | * |
2553 | * The "out" pointer should be sufficient to store the actual number of bits |
2554 | *set. |
2555 | * |
2556 | * Returns how many values were actually decoded. |
2557 | * |
2558 | * This function uses SSE decoding. |
2559 | */ |
2560 | size_t (const uint64_t *bitset, size_t length, |
2561 | uint16_t *out, size_t outcapacity, |
2562 | uint16_t base) { |
2563 | uint16_t *initout = out; |
2564 | __m128i baseVec = _mm_set1_epi16(base - 1); |
2565 | __m128i incVec = _mm_set1_epi16(64); |
2566 | __m128i add8 = _mm_set1_epi16(8); |
2567 | uint16_t *safeout = out + outcapacity; |
2568 | const int numberofbytes = 2; // process two bytes at a time |
2569 | size_t i = 0; |
2570 | for (; (i < length) && (out + numberofbytes * 8 <= safeout); ++i) { |
2571 | uint64_t w = bitset[i]; |
2572 | if (w == 0) { |
2573 | baseVec = _mm_add_epi16(baseVec, incVec); |
2574 | } else { |
2575 | for (int k = 0; k < 4; ++k) { |
2576 | uint8_t byteA = (uint8_t)w; |
2577 | uint8_t byteB = (uint8_t)(w >> 8); |
2578 | w >>= 16; |
2579 | __m128i vecA = _mm_load_si128( |
2580 | (const __m128i *)vecDecodeTable_uint16[byteA]); |
2581 | __m128i vecB = _mm_load_si128( |
2582 | (const __m128i *)vecDecodeTable_uint16[byteB]); |
2583 | uint8_t advanceA = lengthTable[byteA]; |
2584 | uint8_t advanceB = lengthTable[byteB]; |
2585 | vecA = _mm_add_epi16(baseVec, vecA); |
2586 | baseVec = _mm_add_epi16(baseVec, add8); |
2587 | vecB = _mm_add_epi16(baseVec, vecB); |
2588 | baseVec = _mm_add_epi16(baseVec, add8); |
2589 | _mm_storeu_si128((__m128i *)out, vecA); |
2590 | out += advanceA; |
2591 | _mm_storeu_si128((__m128i *)out, vecB); |
2592 | out += advanceB; |
2593 | } |
2594 | } |
2595 | } |
2596 | base += (uint16_t)(i * 64); |
2597 | for (; (i < length) && (out < safeout); ++i) { |
2598 | uint64_t w = bitset[i]; |
2599 | while ((w != 0) && (out < safeout)) { |
2600 | uint64_t t = w & (~w + 1); |
2601 | int r = __builtin_ctzll(w); |
2602 | *out = r + base; |
2603 | out++; |
2604 | w ^= t; |
2605 | } |
2606 | base += 64; |
2607 | } |
2608 | return out - initout; |
2609 | } |
2610 | #endif |
2611 | |
2612 | /* |
2613 | * Given a bitset containing "length" 64-bit words, write out the position |
2614 | * of all the set bits to "out", values start at "base" (can be set to zero). |
2615 | * |
2616 | * The "out" pointer should be sufficient to store the actual number of bits |
2617 | *set. |
2618 | * |
2619 | * Returns how many values were actually decoded. |
2620 | */ |
2621 | size_t (const uint64_t *bitset, size_t length, |
2622 | uint16_t *out, uint16_t base) { |
2623 | int outpos = 0; |
2624 | for (size_t i = 0; i < length; ++i) { |
2625 | uint64_t w = bitset[i]; |
2626 | while (w != 0) { |
2627 | uint64_t t = w & (~w + 1); |
2628 | int r = __builtin_ctzll(w); |
2629 | out[outpos++] = r + base; |
2630 | w ^= t; |
2631 | } |
2632 | base += 64; |
2633 | } |
2634 | return outpos; |
2635 | } |
2636 | |
2637 | #if defined(ASMBITMANIPOPTIMIZATION) |
2638 | |
2639 | uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, |
2640 | const uint16_t *list, uint64_t length) { |
2641 | uint64_t offset, load, pos; |
2642 | uint64_t shift = 6; |
2643 | const uint16_t *end = list + length; |
2644 | if (!length) return card; |
2645 | // TODO: could unroll for performance, see bitset_set_list |
2646 | // bts is not available as an intrinsic in GCC |
2647 | __asm volatile( |
2648 | "1:\n" |
2649 | "movzwq (%[list]), %[pos]\n" |
2650 | "shrx %[shift], %[pos], %[offset]\n" |
2651 | "mov (%[bitset],%[offset],8), %[load]\n" |
2652 | "bts %[pos], %[load]\n" |
2653 | "mov %[load], (%[bitset],%[offset],8)\n" |
2654 | "sbb $-1, %[card]\n" |
2655 | "add $2, %[list]\n" |
2656 | "cmp %[list], %[end]\n" |
2657 | "jnz 1b" |
2658 | : [card] "+&r" (card), [list] "+&r" (list), [load] "=&r" (load), |
2659 | [pos] "=&r" (pos), [offset] "=&r" (offset) |
2660 | : [end] "r" (end), [bitset] "r" (bitset), [shift] "r" (shift)); |
2661 | return card; |
2662 | } |
2663 | |
2664 | void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { |
2665 | uint64_t pos; |
2666 | const uint16_t *end = list + length; |
2667 | |
2668 | uint64_t shift = 6; |
2669 | uint64_t offset; |
2670 | uint64_t load; |
2671 | for (; list + 3 < end; list += 4) { |
2672 | pos = list[0]; |
2673 | __asm volatile( |
2674 | "shrx %[shift], %[pos], %[offset]\n" |
2675 | "mov (%[bitset],%[offset],8), %[load]\n" |
2676 | "bts %[pos], %[load]\n" |
2677 | "mov %[load], (%[bitset],%[offset],8)" |
2678 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2679 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2680 | pos = list[1]; |
2681 | __asm volatile( |
2682 | "shrx %[shift], %[pos], %[offset]\n" |
2683 | "mov (%[bitset],%[offset],8), %[load]\n" |
2684 | "bts %[pos], %[load]\n" |
2685 | "mov %[load], (%[bitset],%[offset],8)" |
2686 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2687 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2688 | pos = list[2]; |
2689 | __asm volatile( |
2690 | "shrx %[shift], %[pos], %[offset]\n" |
2691 | "mov (%[bitset],%[offset],8), %[load]\n" |
2692 | "bts %[pos], %[load]\n" |
2693 | "mov %[load], (%[bitset],%[offset],8)" |
2694 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2695 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2696 | pos = list[3]; |
2697 | __asm volatile( |
2698 | "shrx %[shift], %[pos], %[offset]\n" |
2699 | "mov (%[bitset],%[offset],8), %[load]\n" |
2700 | "bts %[pos], %[load]\n" |
2701 | "mov %[load], (%[bitset],%[offset],8)" |
2702 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2703 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2704 | } |
2705 | |
2706 | while (list != end) { |
2707 | pos = list[0]; |
2708 | __asm volatile( |
2709 | "shrx %[shift], %[pos], %[offset]\n" |
2710 | "mov (%[bitset],%[offset],8), %[load]\n" |
2711 | "bts %[pos], %[load]\n" |
2712 | "mov %[load], (%[bitset],%[offset],8)" |
2713 | : [load] "=&r" (load), [offset] "=&r" (offset) |
2714 | : [bitset] "r" (bitset), [shift] "r" (shift), [pos] "r" (pos)); |
2715 | list++; |
2716 | } |
2717 | } |
2718 | |
2719 | uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, |
2720 | uint64_t length) { |
2721 | uint64_t offset, load, pos; |
2722 | uint64_t shift = 6; |
2723 | const uint16_t *end = list + length; |
2724 | if (!length) return card; |
2725 | // btr is not available as an intrinsic in GCC |
2726 | __asm volatile( |
2727 | "1:\n" |
2728 | "movzwq (%[list]), %[pos]\n" |
2729 | "shrx %[shift], %[pos], %[offset]\n" |
2730 | "mov (%[bitset],%[offset],8), %[load]\n" |
2731 | "btr %[pos], %[load]\n" |
2732 | "mov %[load], (%[bitset],%[offset],8)\n" |
2733 | "sbb $0, %[card]\n" |
2734 | "add $2, %[list]\n" |
2735 | "cmp %[list], %[end]\n" |
2736 | "jnz 1b" |
2737 | : [card] "+&r" (card), [list] "+&r" (list), [load] "=&r" (load), |
2738 | [pos] "=&r" (pos), [offset] "=&r" (offset) |
2739 | : [end] "r" (end), [bitset] "r" (bitset), [shift] "r" (shift) |
2740 | : |
2741 | /* clobbers */ "memory" ); |
2742 | return card; |
2743 | } |
2744 | |
2745 | #else |
2746 | uint64_t bitset_clear_list(void *bitset, uint64_t card, const uint16_t *list, |
2747 | uint64_t length) { |
2748 | uint64_t offset, load, newload, pos, index; |
2749 | const uint16_t *end = list + length; |
2750 | while (list != end) { |
2751 | pos = *(const uint16_t *)list; |
2752 | offset = pos >> 6; |
2753 | index = pos % 64; |
2754 | load = ((uint64_t *)bitset)[offset]; |
2755 | newload = load & ~(UINT64_C(1) << index); |
2756 | card -= (load ^ newload) >> index; |
2757 | ((uint64_t *)bitset)[offset] = newload; |
2758 | list++; |
2759 | } |
2760 | return card; |
2761 | } |
2762 | |
2763 | uint64_t bitset_set_list_withcard(void *bitset, uint64_t card, |
2764 | const uint16_t *list, uint64_t length) { |
2765 | uint64_t offset, load, newload, pos, index; |
2766 | const uint16_t *end = list + length; |
2767 | while (list != end) { |
2768 | pos = *(const uint16_t *)list; |
2769 | offset = pos >> 6; |
2770 | index = pos % 64; |
2771 | load = ((uint64_t *)bitset)[offset]; |
2772 | newload = load | (UINT64_C(1) << index); |
2773 | card += (load ^ newload) >> index; |
2774 | ((uint64_t *)bitset)[offset] = newload; |
2775 | list++; |
2776 | } |
2777 | return card; |
2778 | } |
2779 | |
2780 | void bitset_set_list(void *bitset, const uint16_t *list, uint64_t length) { |
2781 | uint64_t offset, load, newload, pos, index; |
2782 | const uint16_t *end = list + length; |
2783 | while (list != end) { |
2784 | pos = *(const uint16_t *)list; |
2785 | offset = pos >> 6; |
2786 | index = pos % 64; |
2787 | load = ((uint64_t *)bitset)[offset]; |
2788 | newload = load | (UINT64_C(1) << index); |
2789 | ((uint64_t *)bitset)[offset] = newload; |
2790 | list++; |
2791 | } |
2792 | } |
2793 | |
2794 | #endif |
2795 | |
2796 | /* flip specified bits */ |
2797 | /* TODO: consider whether worthwhile to make an asm version */ |
2798 | |
2799 | uint64_t bitset_flip_list_withcard(void *bitset, uint64_t card, |
2800 | const uint16_t *list, uint64_t length) { |
2801 | uint64_t offset, load, newload, pos, index; |
2802 | const uint16_t *end = list + length; |
2803 | while (list != end) { |
2804 | pos = *(const uint16_t *)list; |
2805 | offset = pos >> 6; |
2806 | index = pos % 64; |
2807 | load = ((uint64_t *)bitset)[offset]; |
2808 | newload = load ^ (UINT64_C(1) << index); |
2809 | // todo: is a branch here all that bad? |
2810 | card += |
2811 | (1 - 2 * (((UINT64_C(1) << index) & load) >> index)); // +1 or -1 |
2812 | ((uint64_t *)bitset)[offset] = newload; |
2813 | list++; |
2814 | } |
2815 | return card; |
2816 | } |
2817 | |
2818 | void bitset_flip_list(void *bitset, const uint16_t *list, uint64_t length) { |
2819 | uint64_t offset, load, newload, pos, index; |
2820 | const uint16_t *end = list + length; |
2821 | while (list != end) { |
2822 | pos = *(const uint16_t *)list; |
2823 | offset = pos >> 6; |
2824 | index = pos % 64; |
2825 | load = ((uint64_t *)bitset)[offset]; |
2826 | newload = load ^ (UINT64_C(1) << index); |
2827 | ((uint64_t *)bitset)[offset] = newload; |
2828 | list++; |
2829 | } |
2830 | } |
2831 | /* end file /opt/bitmap/CRoaring-0.2.57/src/bitset_util.c */ |
2832 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/array.c */ |
2833 | /* |
2834 | * array.c |
2835 | * |
2836 | */ |
2837 | |
2838 | #include <assert.h> |
2839 | #include <stdio.h> |
2840 | #include <stdlib.h> |
2841 | |
2842 | extern inline uint16_t array_container_minimum(const array_container_t *arr); |
2843 | extern inline uint16_t array_container_maximum(const array_container_t *arr); |
2844 | extern inline int array_container_index_equalorlarger(const array_container_t *arr, uint16_t x); |
2845 | |
2846 | extern inline int array_container_rank(const array_container_t *arr, |
2847 | uint16_t x); |
2848 | extern inline bool array_container_contains(const array_container_t *arr, |
2849 | uint16_t pos); |
2850 | extern int array_container_cardinality(const array_container_t *array); |
2851 | extern bool array_container_nonzero_cardinality(const array_container_t *array); |
2852 | extern void array_container_clear(array_container_t *array); |
2853 | extern int32_t array_container_serialized_size_in_bytes(int32_t card); |
2854 | extern bool array_container_empty(const array_container_t *array); |
2855 | extern bool array_container_full(const array_container_t *array); |
2856 | |
2857 | /* Create a new array with capacity size. Return NULL in case of failure. */ |
2858 | array_container_t *array_container_create_given_capacity(int32_t size) { |
2859 | array_container_t *container; |
2860 | |
2861 | if ((container = (array_container_t *)malloc(sizeof(array_container_t))) == |
2862 | NULL) { |
2863 | return NULL; |
2864 | } |
2865 | |
2866 | if( size <= 0 ) { // we don't want to rely on malloc(0) |
2867 | container->array = NULL; |
2868 | } else if ((container->array = (uint16_t *)malloc(sizeof(uint16_t) * size)) == |
2869 | NULL) { |
2870 | free(container); |
2871 | return NULL; |
2872 | } |
2873 | |
2874 | container->capacity = size; |
2875 | container->cardinality = 0; |
2876 | |
2877 | return container; |
2878 | } |
2879 | |
2880 | /* Create a new array. Return NULL in case of failure. */ |
2881 | array_container_t *array_container_create() { |
2882 | return array_container_create_given_capacity(ARRAY_DEFAULT_INIT_SIZE); |
2883 | } |
2884 | |
2885 | /* Create a new array containing all values in [min,max). */ |
2886 | array_container_t * array_container_create_range(uint32_t min, uint32_t max) { |
2887 | array_container_t * answer = array_container_create_given_capacity(max - min + 1); |
2888 | if(answer == NULL) return answer; |
2889 | answer->cardinality = 0; |
2890 | for(uint32_t k = min; k < max; k++) { |
2891 | answer->array[answer->cardinality++] = k; |
2892 | } |
2893 | return answer; |
2894 | } |
2895 | |
2896 | /* Duplicate container */ |
2897 | array_container_t *array_container_clone(const array_container_t *src) { |
2898 | array_container_t *newcontainer = |
2899 | array_container_create_given_capacity(src->capacity); |
2900 | if (newcontainer == NULL) return NULL; |
2901 | |
2902 | newcontainer->cardinality = src->cardinality; |
2903 | |
2904 | memcpy(newcontainer->array, src->array, |
2905 | src->cardinality * sizeof(uint16_t)); |
2906 | |
2907 | return newcontainer; |
2908 | } |
2909 | |
2910 | int array_container_shrink_to_fit(array_container_t *src) { |
2911 | if (src->cardinality == src->capacity) return 0; // nothing to do |
2912 | int savings = src->capacity - src->cardinality; |
2913 | src->capacity = src->cardinality; |
2914 | if( src->capacity == 0) { // we do not want to rely on realloc for zero allocs |
2915 | free(src->array); |
2916 | src->array = NULL; |
2917 | } else { |
2918 | uint16_t *oldarray = src->array; |
2919 | src->array = |
2920 | (uint16_t *)realloc(oldarray, src->capacity * sizeof(uint16_t)); |
2921 | if (src->array == NULL) free(oldarray); // should never happen? |
2922 | } |
2923 | return savings; |
2924 | } |
2925 | |
2926 | /* Free memory. */ |
2927 | void array_container_free(array_container_t *arr) { |
2928 | if(arr->array != NULL) {// Jon Strabala reports that some tools complain otherwise |
2929 | free(arr->array); |
2930 | arr->array = NULL; // pedantic |
2931 | } |
2932 | free(arr); |
2933 | } |
2934 | |
2935 | static inline int32_t grow_capacity(int32_t capacity) { |
2936 | return (capacity <= 0) ? ARRAY_DEFAULT_INIT_SIZE |
2937 | : capacity < 64 ? capacity * 2 |
2938 | : capacity < 1024 ? capacity * 3 / 2 |
2939 | : capacity * 5 / 4; |
2940 | } |
2941 | |
2942 | static inline int32_t clamp(int32_t val, int32_t min, int32_t max) { |
2943 | return ((val < min) ? min : (val > max) ? max : val); |
2944 | } |
2945 | |
2946 | void array_container_grow(array_container_t *container, int32_t min, |
2947 | bool preserve) { |
2948 | |
2949 | int32_t max = (min <= DEFAULT_MAX_SIZE ? DEFAULT_MAX_SIZE : 65536); |
2950 | int32_t new_capacity = clamp(grow_capacity(container->capacity), min, max); |
2951 | |
2952 | container->capacity = new_capacity; |
2953 | uint16_t *array = container->array; |
2954 | |
2955 | if (preserve) { |
2956 | container->array = |
2957 | (uint16_t *)realloc(array, new_capacity * sizeof(uint16_t)); |
2958 | if (container->array == NULL) free(array); |
2959 | } else { |
2960 | // Jon Strabala reports that some tools complain otherwise |
2961 | if (array != NULL) { |
2962 | free(array); |
2963 | } |
2964 | container->array = (uint16_t *)malloc(new_capacity * sizeof(uint16_t)); |
2965 | } |
2966 | |
2967 | // handle the case where realloc fails |
2968 | if (container->array == NULL) { |
2969 | fprintf(stderr, "could not allocate memory\n" ); |
2970 | } |
2971 | assert(container->array != NULL); |
2972 | } |
2973 | |
2974 | /* Copy one container into another. We assume that they are distinct. */ |
2975 | void array_container_copy(const array_container_t *src, |
2976 | array_container_t *dst) { |
2977 | const int32_t cardinality = src->cardinality; |
2978 | if (cardinality > dst->capacity) { |
2979 | array_container_grow(dst, cardinality, false); |
2980 | } |
2981 | |
2982 | dst->cardinality = cardinality; |
2983 | memcpy(dst->array, src->array, cardinality * sizeof(uint16_t)); |
2984 | } |
2985 | |
2986 | void array_container_add_from_range(array_container_t *arr, uint32_t min, |
2987 | uint32_t max, uint16_t step) { |
2988 | for (uint32_t value = min; value < max; value += step) { |
2989 | array_container_append(arr, value); |
2990 | } |
2991 | } |
2992 | |
2993 | /* Computes the union of array1 and array2 and write the result to arrayout. |
2994 | * It is assumed that arrayout is distinct from both array1 and array2. |
2995 | */ |
2996 | void array_container_union(const array_container_t *array_1, |
2997 | const array_container_t *array_2, |
2998 | array_container_t *out) { |
2999 | const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; |
3000 | const int32_t max_cardinality = card_1 + card_2; |
3001 | |
3002 | if (out->capacity < max_cardinality) { |
3003 | array_container_grow(out, max_cardinality, false); |
3004 | } |
3005 | out->cardinality = (int32_t)fast_union_uint16(array_1->array, card_1, |
3006 | array_2->array, card_2, out->array); |
3007 | |
3008 | } |
3009 | |
3010 | /* Computes the difference of array1 and array2 and write the result |
3011 | * to array out. |
3012 | * Array out does not need to be distinct from array_1 |
3013 | */ |
3014 | void array_container_andnot(const array_container_t *array_1, |
3015 | const array_container_t *array_2, |
3016 | array_container_t *out) { |
3017 | if (out->capacity < array_1->cardinality) |
3018 | array_container_grow(out, array_1->cardinality, false); |
3019 | #ifdef ROARING_VECTOR_OPERATIONS_ENABLED |
3020 | out->cardinality = |
3021 | difference_vector16(array_1->array, array_1->cardinality, |
3022 | array_2->array, array_2->cardinality, out->array); |
3023 | #else |
3024 | out->cardinality = |
3025 | difference_uint16(array_1->array, array_1->cardinality, array_2->array, |
3026 | array_2->cardinality, out->array); |
3027 | #endif |
3028 | } |
3029 | |
3030 | /* Computes the symmetric difference of array1 and array2 and write the |
3031 | * result |
3032 | * to arrayout. |
3033 | * It is assumed that arrayout is distinct from both array1 and array2. |
3034 | */ |
3035 | void array_container_xor(const array_container_t *array_1, |
3036 | const array_container_t *array_2, |
3037 | array_container_t *out) { |
3038 | const int32_t card_1 = array_1->cardinality, card_2 = array_2->cardinality; |
3039 | const int32_t max_cardinality = card_1 + card_2; |
3040 | if (out->capacity < max_cardinality) { |
3041 | array_container_grow(out, max_cardinality, false); |
3042 | } |
3043 | |
3044 | #ifdef ROARING_VECTOR_OPERATIONS_ENABLED |
3045 | out->cardinality = |
3046 | xor_vector16(array_1->array, array_1->cardinality, array_2->array, |
3047 | array_2->cardinality, out->array); |
3048 | #else |
3049 | out->cardinality = |
3050 | xor_uint16(array_1->array, array_1->cardinality, array_2->array, |
3051 | array_2->cardinality, out->array); |
3052 | #endif |
3053 | } |
3054 | |
3055 | static inline int32_t minimum_int32(int32_t a, int32_t b) { |
3056 | return (a < b) ? a : b; |
3057 | } |
3058 | |
3059 | /* computes the intersection of array1 and array2 and write the result to |
3060 | * arrayout. |
3061 | * It is assumed that arrayout is distinct from both array1 and array2. |
3062 | * */ |
3063 | void array_container_intersection(const array_container_t *array1, |
3064 | const array_container_t *array2, |
3065 | array_container_t *out) { |
3066 | int32_t card_1 = array1->cardinality, card_2 = array2->cardinality, |
3067 | min_card = minimum_int32(card_1, card_2); |
3068 | const int threshold = 64; // subject to tuning |
3069 | #ifdef USEAVX |
3070 | if (out->capacity < min_card) { |
3071 | array_container_grow(out, min_card + sizeof(__m128i) / sizeof(uint16_t), |
3072 | false); |
3073 | } |
3074 | #else |
3075 | if (out->capacity < min_card) { |
3076 | array_container_grow(out, min_card, false); |
3077 | } |
3078 | #endif |
3079 | |
3080 | if (card_1 * threshold < card_2) { |
3081 | out->cardinality = intersect_skewed_uint16( |
3082 | array1->array, card_1, array2->array, card_2, out->array); |
3083 | } else if (card_2 * threshold < card_1) { |
3084 | out->cardinality = intersect_skewed_uint16( |
3085 | array2->array, card_2, array1->array, card_1, out->array); |
3086 | } else { |
3087 | #ifdef USEAVX |
3088 | out->cardinality = intersect_vector16( |
3089 | array1->array, card_1, array2->array, card_2, out->array); |
3090 | #else |
3091 | out->cardinality = intersect_uint16(array1->array, card_1, |
3092 | array2->array, card_2, out->array); |
3093 | #endif |
3094 | } |
3095 | } |
3096 | |
3097 | /* computes the size of the intersection of array1 and array2 |
3098 | * */ |
3099 | int array_container_intersection_cardinality(const array_container_t *array1, |
3100 | const array_container_t *array2) { |
3101 | int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; |
3102 | const int threshold = 64; // subject to tuning |
3103 | if (card_1 * threshold < card_2) { |
3104 | return intersect_skewed_uint16_cardinality(array1->array, card_1, |
3105 | array2->array, card_2); |
3106 | } else if (card_2 * threshold < card_1) { |
3107 | return intersect_skewed_uint16_cardinality(array2->array, card_2, |
3108 | array1->array, card_1); |
3109 | } else { |
3110 | #ifdef USEAVX |
3111 | return intersect_vector16_cardinality(array1->array, card_1, |
3112 | array2->array, card_2); |
3113 | #else |
3114 | return intersect_uint16_cardinality(array1->array, card_1, |
3115 | array2->array, card_2); |
3116 | #endif |
3117 | } |
3118 | } |
3119 | |
3120 | bool array_container_intersect(const array_container_t *array1, |
3121 | const array_container_t *array2) { |
3122 | int32_t card_1 = array1->cardinality, card_2 = array2->cardinality; |
3123 | const int threshold = 64; // subject to tuning |
3124 | if (card_1 * threshold < card_2) { |
3125 | return intersect_skewed_uint16_nonempty( |
3126 | array1->array, card_1, array2->array, card_2); |
3127 | } else if (card_2 * threshold < card_1) { |
3128 | return intersect_skewed_uint16_nonempty( |
3129 | array2->array, card_2, array1->array, card_1); |
3130 | } else { |
3131 | // we do not bother vectorizing |
3132 | return intersect_uint16_nonempty(array1->array, card_1, |
3133 | array2->array, card_2); |
3134 | } |
3135 | } |
3136 | |
3137 | /* computes the intersection of array1 and array2 and write the result to |
3138 | * array1. |
3139 | * */ |
3140 | void array_container_intersection_inplace(array_container_t *src_1, |
3141 | const array_container_t *src_2) { |
3142 | // todo: can any of this be vectorized? |
3143 | int32_t card_1 = src_1->cardinality, card_2 = src_2->cardinality; |
3144 | const int threshold = 64; // subject to tuning |
3145 | if (card_1 * threshold < card_2) { |
3146 | src_1->cardinality = intersect_skewed_uint16( |
3147 | src_1->array, card_1, src_2->array, card_2, src_1->array); |
3148 | } else if (card_2 * threshold < card_1) { |
3149 | src_1->cardinality = intersect_skewed_uint16( |
3150 | src_2->array, card_2, src_1->array, card_1, src_1->array); |
3151 | } else { |
3152 | src_1->cardinality = intersect_uint16( |
3153 | src_1->array, card_1, src_2->array, card_2, src_1->array); |
3154 | } |
3155 | } |
3156 | |
3157 | int array_container_to_uint32_array(void *vout, const array_container_t *cont, |
3158 | uint32_t base) { |
3159 | int outpos = 0; |
3160 | uint32_t *out = (uint32_t *)vout; |
3161 | for (int i = 0; i < cont->cardinality; ++i) { |
3162 | const uint32_t val = base + cont->array[i]; |
3163 | memcpy(out + outpos, &val, |
3164 | sizeof(uint32_t)); // should be compiled as a MOV on x64 |
3165 | outpos++; |
3166 | } |
3167 | return outpos; |
3168 | } |
3169 | |
3170 | void array_container_printf(const array_container_t *v) { |
3171 | if (v->cardinality == 0) { |
3172 | printf("{}" ); |
3173 | return; |
3174 | } |
3175 | printf("{" ); |
3176 | printf("%d" , v->array[0]); |
3177 | for (int i = 1; i < v->cardinality; ++i) { |
3178 | printf(",%d" , v->array[i]); |
3179 | } |
3180 | printf("}" ); |
3181 | } |
3182 | |
3183 | void array_container_printf_as_uint32_array(const array_container_t *v, |
3184 | uint32_t base) { |
3185 | if (v->cardinality == 0) { |
3186 | return; |
3187 | } |
3188 | printf("%u" , v->array[0] + base); |
3189 | for (int i = 1; i < v->cardinality; ++i) { |
3190 | printf(",%u" , v->array[i] + base); |
3191 | } |
3192 | } |
3193 | |
3194 | /* Compute the number of runs */ |
3195 | int32_t array_container_number_of_runs(const array_container_t *a) { |
3196 | // Can SIMD work here? |
3197 | int32_t nr_runs = 0; |
3198 | int32_t prev = -2; |
3199 | for (const uint16_t *p = a->array; p != a->array + a->cardinality; ++p) { |
3200 | if (*p != prev + 1) nr_runs++; |
3201 | prev = *p; |
3202 | } |
3203 | return nr_runs; |
3204 | } |
3205 | |
3206 | int32_t array_container_serialize(const array_container_t *container, char *buf) { |
3207 | int32_t l, off; |
3208 | uint16_t cardinality = (uint16_t)container->cardinality; |
3209 | |
3210 | memcpy(buf, &cardinality, off = sizeof(cardinality)); |
3211 | l = sizeof(uint16_t) * container->cardinality; |
3212 | if (l) memcpy(&buf[off], container->array, l); |
3213 | |
3214 | return (off + l); |
3215 | } |
3216 | |
3217 | /** |
3218 | * Writes the underlying array to buf, outputs how many bytes were written. |
3219 | * The number of bytes written should be |
3220 | * array_container_size_in_bytes(container). |
3221 | * |
3222 | */ |
3223 | int32_t array_container_write(const array_container_t *container, char *buf) { |
3224 | memcpy(buf, container->array, container->cardinality * sizeof(uint16_t)); |
3225 | return array_container_size_in_bytes(container); |
3226 | } |
3227 | |
3228 | bool array_container_equals(const array_container_t *container1, |
3229 | const array_container_t *container2) { |
3230 | if (container1->cardinality != container2->cardinality) { |
3231 | return false; |
3232 | } |
3233 | // could be vectorized: |
3234 | for (int32_t i = 0; i < container1->cardinality; ++i) { |
3235 | if (container1->array[i] != container2->array[i]) return false; |
3236 | } |
3237 | return true; |
3238 | } |
3239 | |
3240 | bool array_container_is_subset(const array_container_t *container1, |
3241 | const array_container_t *container2) { |
3242 | if (container1->cardinality > container2->cardinality) { |
3243 | return false; |
3244 | } |
3245 | int i1 = 0, i2 = 0; |
3246 | while (i1 < container1->cardinality && i2 < container2->cardinality) { |
3247 | if (container1->array[i1] == container2->array[i2]) { |
3248 | i1++; |
3249 | i2++; |
3250 | } else if (container1->array[i1] > container2->array[i2]) { |
3251 | i2++; |
3252 | } else { // container1->array[i1] < container2->array[i2] |
3253 | return false; |
3254 | } |
3255 | } |
3256 | if (i1 == container1->cardinality) { |
3257 | return true; |
3258 | } else { |
3259 | return false; |
3260 | } |
3261 | } |
3262 | |
3263 | int32_t array_container_read(int32_t cardinality, array_container_t *container, |
3264 | const char *buf) { |
3265 | if (container->capacity < cardinality) { |
3266 | array_container_grow(container, cardinality, false); |
3267 | } |
3268 | container->cardinality = cardinality; |
3269 | memcpy(container->array, buf, container->cardinality * sizeof(uint16_t)); |
3270 | |
3271 | return array_container_size_in_bytes(container); |
3272 | } |
3273 | |
3274 | uint32_t array_container_serialization_len(const array_container_t *container) { |
3275 | return (sizeof(uint16_t) /* container->cardinality converted to 16 bit */ + |
3276 | (sizeof(uint16_t) * container->cardinality)); |
3277 | } |
3278 | |
3279 | void *array_container_deserialize(const char *buf, size_t buf_len) { |
3280 | array_container_t *ptr; |
3281 | |
3282 | if (buf_len < 2) /* capacity converted to 16 bit */ |
3283 | return (NULL); |
3284 | else |
3285 | buf_len -= 2; |
3286 | |
3287 | if ((ptr = (array_container_t *)malloc(sizeof(array_container_t))) != |
3288 | NULL) { |
3289 | size_t len; |
3290 | int32_t off; |
3291 | uint16_t cardinality; |
3292 | |
3293 | memcpy(&cardinality, buf, off = sizeof(cardinality)); |
3294 | |
3295 | ptr->capacity = ptr->cardinality = (uint32_t)cardinality; |
3296 | len = sizeof(uint16_t) * ptr->cardinality; |
3297 | |
3298 | if (len != buf_len) { |
3299 | free(ptr); |
3300 | return (NULL); |
3301 | } |
3302 | |
3303 | if ((ptr->array = (uint16_t *)malloc(sizeof(uint16_t) * |
3304 | ptr->capacity)) == NULL) { |
3305 | free(ptr); |
3306 | return (NULL); |
3307 | } |
3308 | |
3309 | if (len) memcpy(ptr->array, &buf[off], len); |
3310 | |
3311 | /* Check if returned values are monotonically increasing */ |
3312 | for (int32_t i = 0, j = 0; i < ptr->cardinality; i++) { |
3313 | if (ptr->array[i] < j) { |
3314 | free(ptr->array); |
3315 | free(ptr); |
3316 | return (NULL); |
3317 | } else |
3318 | j = ptr->array[i]; |
3319 | } |
3320 | } |
3321 | |
3322 | return (ptr); |
3323 | } |
3324 | |
3325 | bool array_container_iterate(const array_container_t *cont, uint32_t base, |
3326 | roaring_iterator iterator, void *ptr) { |
3327 | for (int i = 0; i < cont->cardinality; i++) |
3328 | if (!iterator(cont->array[i] + base, ptr)) return false; |
3329 | return true; |
3330 | } |
3331 | |
3332 | bool array_container_iterate64(const array_container_t *cont, uint32_t base, |
3333 | roaring_iterator64 iterator, uint64_t high_bits, |
3334 | void *ptr) { |
3335 | for (int i = 0; i < cont->cardinality; i++) |
3336 | if (!iterator(high_bits | (uint64_t)(cont->array[i] + base), ptr)) |
3337 | return false; |
3338 | return true; |
3339 | } |
3340 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/array.c */ |
3341 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/bitset.c */ |
3342 | /* |
3343 | * bitset.c |
3344 | * |
3345 | */ |
3346 | #ifndef _POSIX_C_SOURCE |
3347 | #define _POSIX_C_SOURCE 200809L |
3348 | #endif |
3349 | #include <assert.h> |
3350 | #include <stdio.h> |
3351 | #include <stdlib.h> |
3352 | #include <string.h> |
3353 | |
3354 | |
3355 | extern int bitset_container_cardinality(const bitset_container_t *bitset); |
3356 | extern bool bitset_container_nonzero_cardinality(bitset_container_t *bitset); |
3357 | extern void bitset_container_set(bitset_container_t *bitset, uint16_t pos); |
3358 | extern void bitset_container_unset(bitset_container_t *bitset, uint16_t pos); |
3359 | extern inline bool bitset_container_get(const bitset_container_t *bitset, |
3360 | uint16_t pos); |
3361 | extern int32_t bitset_container_serialized_size_in_bytes(); |
3362 | extern bool bitset_container_add(bitset_container_t *bitset, uint16_t pos); |
3363 | extern bool bitset_container_remove(bitset_container_t *bitset, uint16_t pos); |
3364 | extern inline bool bitset_container_contains(const bitset_container_t *bitset, |
3365 | uint16_t pos); |
3366 | |
3367 | void bitset_container_clear(bitset_container_t *bitset) { |
3368 | memset(bitset->array, 0, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3369 | bitset->cardinality = 0; |
3370 | } |
3371 | |
3372 | void bitset_container_set_all(bitset_container_t *bitset) { |
3373 | memset(bitset->array, INT64_C(-1), |
3374 | sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3375 | bitset->cardinality = (1 << 16); |
3376 | } |
3377 | |
3378 | |
3379 | |
3380 | /* Create a new bitset. Return NULL in case of failure. */ |
3381 | bitset_container_t *bitset_container_create(void) { |
3382 | bitset_container_t *bitset = |
3383 | (bitset_container_t *)malloc(sizeof(bitset_container_t)); |
3384 | |
3385 | if (!bitset) { |
3386 | return NULL; |
3387 | } |
3388 | // sizeof(__m256i) == 32 |
3389 | bitset->array = (uint64_t *)aligned_malloc( |
3390 | 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3391 | if (!bitset->array) { |
3392 | free(bitset); |
3393 | return NULL; |
3394 | } |
3395 | bitset_container_clear(bitset); |
3396 | return bitset; |
3397 | } |
3398 | |
3399 | /* Copy one container into another. We assume that they are distinct. */ |
3400 | void bitset_container_copy(const bitset_container_t *source, |
3401 | bitset_container_t *dest) { |
3402 | dest->cardinality = source->cardinality; |
3403 | memcpy(dest->array, source->array, |
3404 | sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3405 | } |
3406 | |
3407 | void bitset_container_add_from_range(bitset_container_t *bitset, uint32_t min, |
3408 | uint32_t max, uint16_t step) { |
3409 | if (step == 0) return; // refuse to crash |
3410 | if ((64 % step) == 0) { // step divides 64 |
3411 | uint64_t mask = 0; // construct the repeated mask |
3412 | for (uint32_t value = (min % step); value < 64; value += step) { |
3413 | mask |= ((uint64_t)1 << value); |
3414 | } |
3415 | uint32_t firstword = min / 64; |
3416 | uint32_t endword = (max - 1) / 64; |
3417 | bitset->cardinality = (max - min + step - 1) / step; |
3418 | if (firstword == endword) { |
3419 | bitset->array[firstword] |= |
3420 | mask & (((~UINT64_C(0)) << (min % 64)) & |
3421 | ((~UINT64_C(0)) >> ((~max + 1) % 64))); |
3422 | return; |
3423 | } |
3424 | bitset->array[firstword] = mask & ((~UINT64_C(0)) << (min % 64)); |
3425 | for (uint32_t i = firstword + 1; i < endword; i++) |
3426 | bitset->array[i] = mask; |
3427 | bitset->array[endword] = mask & ((~UINT64_C(0)) >> ((~max + 1) % 64)); |
3428 | } else { |
3429 | for (uint32_t value = min; value < max; value += step) { |
3430 | bitset_container_add(bitset, value); |
3431 | } |
3432 | } |
3433 | } |
3434 | |
3435 | /* Free memory. */ |
3436 | void bitset_container_free(bitset_container_t *bitset) { |
3437 | if(bitset->array != NULL) {// Jon Strabala reports that some tools complain otherwise |
3438 | aligned_free(bitset->array); |
3439 | bitset->array = NULL; // pedantic |
3440 | } |
3441 | free(bitset); |
3442 | } |
3443 | |
3444 | /* duplicate container. */ |
3445 | bitset_container_t *bitset_container_clone(const bitset_container_t *src) { |
3446 | bitset_container_t *bitset = |
3447 | (bitset_container_t *)malloc(sizeof(bitset_container_t)); |
3448 | |
3449 | if (!bitset) { |
3450 | return NULL; |
3451 | } |
3452 | // sizeof(__m256i) == 32 |
3453 | bitset->array = (uint64_t *)aligned_malloc( |
3454 | 32, sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3455 | if (!bitset->array) { |
3456 | free(bitset); |
3457 | return NULL; |
3458 | } |
3459 | bitset->cardinality = src->cardinality; |
3460 | memcpy(bitset->array, src->array, |
3461 | sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3462 | return bitset; |
3463 | } |
3464 | |
3465 | void bitset_container_set_range(bitset_container_t *bitset, uint32_t begin, |
3466 | uint32_t end) { |
3467 | bitset_set_range(bitset->array, begin, end); |
3468 | bitset->cardinality = |
3469 | bitset_container_compute_cardinality(bitset); // could be smarter |
3470 | } |
3471 | |
3472 | |
3473 | bool bitset_container_intersect(const bitset_container_t *src_1, |
3474 | const bitset_container_t *src_2) { |
3475 | // could vectorize, but this is probably already quite fast in practice |
3476 | const uint64_t * __restrict__ array_1 = src_1->array; |
3477 | const uint64_t * __restrict__ array_2 = src_2->array; |
3478 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i ++) { |
3479 | if((array_1[i] & array_2[i]) != 0) return true; |
3480 | } |
3481 | return false; |
3482 | } |
3483 | |
3484 | |
3485 | #ifdef USEAVX |
3486 | #ifndef WORDS_IN_AVX2_REG |
3487 | #define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) |
3488 | #endif |
3489 | /* Get the number of bits set (force computation) */ |
3490 | int bitset_container_compute_cardinality(const bitset_container_t *bitset) { |
3491 | return (int) avx2_harley_seal_popcount256( |
3492 | (const __m256i *)bitset->array, |
3493 | BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG)); |
3494 | } |
3495 | #else |
3496 | |
3497 | /* Get the number of bits set (force computation) */ |
3498 | int bitset_container_compute_cardinality(const bitset_container_t *bitset) { |
3499 | const uint64_t *array = bitset->array; |
3500 | int32_t sum = 0; |
3501 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 4) { |
3502 | sum += hamming(array[i]); |
3503 | sum += hamming(array[i + 1]); |
3504 | sum += hamming(array[i + 2]); |
3505 | sum += hamming(array[i + 3]); |
3506 | } |
3507 | return sum; |
3508 | } |
3509 | |
3510 | #endif |
3511 | |
3512 | #ifdef USEAVX |
3513 | |
3514 | #define BITSET_CONTAINER_FN_REPEAT 8 |
3515 | #ifndef WORDS_IN_AVX2_REG |
3516 | #define WORDS_IN_AVX2_REG sizeof(__m256i) / sizeof(uint64_t) |
3517 | #endif |
3518 | #define LOOP_SIZE \ |
3519 | BITSET_CONTAINER_SIZE_IN_WORDS / \ |
3520 | ((WORDS_IN_AVX2_REG)*BITSET_CONTAINER_FN_REPEAT) |
3521 | |
3522 | /* Computes a binary operation (eg union) on bitset1 and bitset2 and write the |
3523 | result to bitsetout */ |
3524 | // clang-format off |
3525 | #define BITSET_CONTAINER_FN(opname, opsymbol, avx_intrinsic) \ |
3526 | int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ |
3527 | const bitset_container_t *src_2, \ |
3528 | bitset_container_t *dst) { \ |
3529 | const uint8_t * __restrict__ array_1 = (const uint8_t *)src_1->array; \ |
3530 | const uint8_t * __restrict__ array_2 = (const uint8_t *)src_2->array; \ |
3531 | /* not using the blocking optimization for some reason*/ \ |
3532 | uint8_t *out = (uint8_t*)dst->array; \ |
3533 | const int innerloop = 8; \ |
3534 | for (size_t i = 0; \ |
3535 | i < BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG); \ |
3536 | i+=innerloop) {\ |
3537 | __m256i A1, A2, AO; \ |
3538 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1)); \ |
3539 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2)); \ |
3540 | AO = avx_intrinsic(A2, A1); \ |
3541 | _mm256_storeu_si256((__m256i *)out, AO); \ |
3542 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 32)); \ |
3543 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 32)); \ |
3544 | AO = avx_intrinsic(A2, A1); \ |
3545 | _mm256_storeu_si256((__m256i *)(out+32), AO); \ |
3546 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 64)); \ |
3547 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 64)); \ |
3548 | AO = avx_intrinsic(A2, A1); \ |
3549 | _mm256_storeu_si256((__m256i *)(out+64), AO); \ |
3550 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 96)); \ |
3551 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 96)); \ |
3552 | AO = avx_intrinsic(A2, A1); \ |
3553 | _mm256_storeu_si256((__m256i *)(out+96), AO); \ |
3554 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 128)); \ |
3555 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 128)); \ |
3556 | AO = avx_intrinsic(A2, A1); \ |
3557 | _mm256_storeu_si256((__m256i *)(out+128), AO); \ |
3558 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 160)); \ |
3559 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 160)); \ |
3560 | AO = avx_intrinsic(A2, A1); \ |
3561 | _mm256_storeu_si256((__m256i *)(out+160), AO); \ |
3562 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 192)); \ |
3563 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 192)); \ |
3564 | AO = avx_intrinsic(A2, A1); \ |
3565 | _mm256_storeu_si256((__m256i *)(out+192), AO); \ |
3566 | A1 = _mm256_lddqu_si256((const __m256i *)(array_1 + 224)); \ |
3567 | A2 = _mm256_lddqu_si256((const __m256i *)(array_2 + 224)); \ |
3568 | AO = avx_intrinsic(A2, A1); \ |
3569 | _mm256_storeu_si256((__m256i *)(out+224), AO); \ |
3570 | out+=256; \ |
3571 | array_1 += 256; \ |
3572 | array_2 += 256; \ |
3573 | } \ |
3574 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ |
3575 | return dst->cardinality; \ |
3576 | } \ |
3577 | /* next, a version that updates cardinality*/ \ |
3578 | int bitset_container_##opname(const bitset_container_t *src_1, \ |
3579 | const bitset_container_t *src_2, \ |
3580 | bitset_container_t *dst) { \ |
3581 | const __m256i * __restrict__ array_1 = (const __m256i *) src_1->array; \ |
3582 | const __m256i * __restrict__ array_2 = (const __m256i *) src_2->array; \ |
3583 | __m256i *out = (__m256i *) dst->array; \ |
3584 | dst->cardinality = (int32_t)avx2_harley_seal_popcount256andstore_##opname(array_2,\ |
3585 | array_1, out,BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ |
3586 | return dst->cardinality; \ |
3587 | } \ |
3588 | /* next, a version that just computes the cardinality*/ \ |
3589 | int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ |
3590 | const bitset_container_t *src_2) { \ |
3591 | const __m256i * __restrict__ data1 = (const __m256i *) src_1->array; \ |
3592 | const __m256i * __restrict__ data2 = (const __m256i *) src_2->array; \ |
3593 | return (int)avx2_harley_seal_popcount256_##opname(data2, \ |
3594 | data1, BITSET_CONTAINER_SIZE_IN_WORDS / (WORDS_IN_AVX2_REG));\ |
3595 | } |
3596 | |
3597 | |
3598 | |
3599 | #else /* not USEAVX */ |
3600 | |
3601 | #define BITSET_CONTAINER_FN(opname, opsymbol, avxintrinsic) \ |
3602 | int bitset_container_##opname(const bitset_container_t *src_1, \ |
3603 | const bitset_container_t *src_2, \ |
3604 | bitset_container_t *dst) { \ |
3605 | const uint64_t * __restrict__ array_1 = src_1->array; \ |
3606 | const uint64_t * __restrict__ array_2 = src_2->array; \ |
3607 | uint64_t *out = dst->array; \ |
3608 | int32_t sum = 0; \ |
3609 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ |
3610 | const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ |
3611 | word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ |
3612 | out[i] = word_1; \ |
3613 | out[i + 1] = word_2; \ |
3614 | sum += hamming(word_1); \ |
3615 | sum += hamming(word_2); \ |
3616 | } \ |
3617 | dst->cardinality = sum; \ |
3618 | return dst->cardinality; \ |
3619 | } \ |
3620 | int bitset_container_##opname##_nocard(const bitset_container_t *src_1, \ |
3621 | const bitset_container_t *src_2, \ |
3622 | bitset_container_t *dst) { \ |
3623 | const uint64_t * __restrict__ array_1 = src_1->array; \ |
3624 | const uint64_t * __restrict__ array_2 = src_2->array; \ |
3625 | uint64_t *out = dst->array; \ |
3626 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i++) { \ |
3627 | out[i] = (array_1[i])opsymbol(array_2[i]); \ |
3628 | } \ |
3629 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; \ |
3630 | return dst->cardinality; \ |
3631 | } \ |
3632 | int bitset_container_##opname##_justcard(const bitset_container_t *src_1, \ |
3633 | const bitset_container_t *src_2) { \ |
3634 | const uint64_t * __restrict__ array_1 = src_1->array; \ |
3635 | const uint64_t * __restrict__ array_2 = src_2->array; \ |
3636 | int32_t sum = 0; \ |
3637 | for (size_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 2) { \ |
3638 | const uint64_t word_1 = (array_1[i])opsymbol(array_2[i]), \ |
3639 | word_2 = (array_1[i + 1])opsymbol(array_2[i + 1]); \ |
3640 | sum += hamming(word_1); \ |
3641 | sum += hamming(word_2); \ |
3642 | } \ |
3643 | return sum; \ |
3644 | } |
3645 | |
3646 | #endif |
3647 | |
3648 | // we duplicate the function because other containers use the "or" term, makes API more consistent |
3649 | BITSET_CONTAINER_FN(or, |, _mm256_or_si256) |
3650 | BITSET_CONTAINER_FN(union, |, _mm256_or_si256) |
3651 | |
3652 | // we duplicate the function because other containers use the "intersection" term, makes API more consistent |
3653 | BITSET_CONTAINER_FN(and, &, _mm256_and_si256) |
3654 | BITSET_CONTAINER_FN(intersection, &, _mm256_and_si256) |
3655 | |
3656 | BITSET_CONTAINER_FN(xor, ^, _mm256_xor_si256) |
3657 | BITSET_CONTAINER_FN(andnot, &~, _mm256_andnot_si256) |
3658 | // clang-format On |
3659 | |
3660 | |
3661 | |
3662 | int bitset_container_to_uint32_array( void *vout, const bitset_container_t *cont, uint32_t base) { |
3663 | #ifdef USEAVX2FORDECODING |
3664 | if(cont->cardinality >= 8192)// heuristic |
3665 | return (int) bitset_extract_setbits_avx2(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,cont->cardinality,base); |
3666 | else |
3667 | return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); |
3668 | #else |
3669 | return (int) bitset_extract_setbits(cont->array, BITSET_CONTAINER_SIZE_IN_WORDS, vout,base); |
3670 | #endif |
3671 | } |
3672 | |
3673 | /* |
3674 | * Print this container using printf (useful for debugging). |
3675 | */ |
3676 | void bitset_container_printf(const bitset_container_t * v) { |
3677 | printf("{" ); |
3678 | uint32_t base = 0; |
3679 | bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable |
3680 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { |
3681 | uint64_t w = v->array[i]; |
3682 | while (w != 0) { |
3683 | uint64_t t = w & (~w + 1); |
3684 | int r = __builtin_ctzll(w); |
3685 | if(iamfirst) {// predicted to be false |
3686 | printf("%u" ,base + r); |
3687 | iamfirst = false; |
3688 | } else { |
3689 | printf(",%u" ,base + r); |
3690 | } |
3691 | w ^= t; |
3692 | } |
3693 | base += 64; |
3694 | } |
3695 | printf("}" ); |
3696 | } |
3697 | |
3698 | |
3699 | /* |
3700 | * Print this container using printf as a comma-separated list of 32-bit integers starting at base. |
3701 | */ |
3702 | void bitset_container_printf_as_uint32_array(const bitset_container_t * v, uint32_t base) { |
3703 | bool iamfirst = true;// TODO: rework so that this is not necessary yet still readable |
3704 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { |
3705 | uint64_t w = v->array[i]; |
3706 | while (w != 0) { |
3707 | uint64_t t = w & (~w + 1); |
3708 | int r = __builtin_ctzll(w); |
3709 | if(iamfirst) {// predicted to be false |
3710 | printf("%u" , r + base); |
3711 | iamfirst = false; |
3712 | } else { |
3713 | printf(",%u" ,r + base); |
3714 | } |
3715 | w ^= t; |
3716 | } |
3717 | base += 64; |
3718 | } |
3719 | } |
3720 | |
3721 | |
3722 | // TODO: use the fast lower bound, also |
3723 | int bitset_container_number_of_runs(bitset_container_t *b) { |
3724 | int num_runs = 0; |
3725 | uint64_t next_word = b->array[0]; |
3726 | |
3727 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS-1; ++i) { |
3728 | uint64_t word = next_word; |
3729 | next_word = b->array[i+1]; |
3730 | num_runs += hamming((~word) & (word << 1)) + ( (word >> 63) & ~next_word); |
3731 | } |
3732 | |
3733 | uint64_t word = next_word; |
3734 | num_runs += hamming((~word) & (word << 1)); |
3735 | if((word & 0x8000000000000000ULL) != 0) |
3736 | num_runs++; |
3737 | return num_runs; |
3738 | } |
3739 | |
3740 | int32_t bitset_container_serialize(const bitset_container_t *container, char *buf) { |
3741 | int32_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; |
3742 | memcpy(buf, container->array, l); |
3743 | return(l); |
3744 | } |
3745 | |
3746 | |
3747 | |
3748 | int32_t bitset_container_write(const bitset_container_t *container, |
3749 | char *buf) { |
3750 | memcpy(buf, container->array, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); |
3751 | return bitset_container_size_in_bytes(container); |
3752 | } |
3753 | |
3754 | |
3755 | int32_t bitset_container_read(int32_t cardinality, bitset_container_t *container, |
3756 | const char *buf) { |
3757 | container->cardinality = cardinality; |
3758 | memcpy(container->array, buf, BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t)); |
3759 | return bitset_container_size_in_bytes(container); |
3760 | } |
3761 | |
3762 | uint32_t bitset_container_serialization_len() { |
3763 | return(sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS); |
3764 | } |
3765 | |
3766 | void* bitset_container_deserialize(const char *buf, size_t buf_len) { |
3767 | bitset_container_t *ptr; |
3768 | size_t l = sizeof(uint64_t) * BITSET_CONTAINER_SIZE_IN_WORDS; |
3769 | |
3770 | if(l != buf_len) |
3771 | return(NULL); |
3772 | |
3773 | if((ptr = (bitset_container_t *)malloc(sizeof(bitset_container_t))) != NULL) { |
3774 | memcpy(ptr, buf, sizeof(bitset_container_t)); |
3775 | // sizeof(__m256i) == 32 |
3776 | ptr->array = (uint64_t *) aligned_malloc(32, l); |
3777 | if (! ptr->array) { |
3778 | free(ptr); |
3779 | return NULL; |
3780 | } |
3781 | memcpy(ptr->array, buf, l); |
3782 | ptr->cardinality = bitset_container_compute_cardinality(ptr); |
3783 | } |
3784 | |
3785 | return((void*)ptr); |
3786 | } |
3787 | |
3788 | bool bitset_container_iterate(const bitset_container_t *cont, uint32_t base, roaring_iterator iterator, void *ptr) { |
3789 | for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { |
3790 | uint64_t w = cont->array[i]; |
3791 | while (w != 0) { |
3792 | uint64_t t = w & (~w + 1); |
3793 | int r = __builtin_ctzll(w); |
3794 | if(!iterator(r + base, ptr)) return false; |
3795 | w ^= t; |
3796 | } |
3797 | base += 64; |
3798 | } |
3799 | return true; |
3800 | } |
3801 | |
3802 | bool bitset_container_iterate64(const bitset_container_t *cont, uint32_t base, roaring_iterator64 iterator, uint64_t high_bits, void *ptr) { |
3803 | for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { |
3804 | uint64_t w = cont->array[i]; |
3805 | while (w != 0) { |
3806 | uint64_t t = w & (~w + 1); |
3807 | int r = __builtin_ctzll(w); |
3808 | if(!iterator(high_bits | (uint64_t)(r + base), ptr)) return false; |
3809 | w ^= t; |
3810 | } |
3811 | base += 64; |
3812 | } |
3813 | return true; |
3814 | } |
3815 | |
3816 | |
3817 | bool bitset_container_equals(const bitset_container_t *container1, const bitset_container_t *container2) { |
3818 | if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { |
3819 | if(container1->cardinality != container2->cardinality) { |
3820 | return false; |
3821 | } |
3822 | } |
3823 | for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { |
3824 | if(container1->array[i] != container2->array[i]) { |
3825 | return false; |
3826 | } |
3827 | } |
3828 | return true; |
3829 | } |
3830 | |
3831 | bool bitset_container_is_subset(const bitset_container_t *container1, |
3832 | const bitset_container_t *container2) { |
3833 | if((container1->cardinality != BITSET_UNKNOWN_CARDINALITY) && (container2->cardinality != BITSET_UNKNOWN_CARDINALITY)) { |
3834 | if(container1->cardinality > container2->cardinality) { |
3835 | return false; |
3836 | } |
3837 | } |
3838 | for(int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { |
3839 | if((container1->array[i] & container2->array[i]) != container1->array[i]) { |
3840 | return false; |
3841 | } |
3842 | } |
3843 | return true; |
3844 | } |
3845 | |
3846 | bool bitset_container_select(const bitset_container_t *container, uint32_t *start_rank, uint32_t rank, uint32_t *element) { |
3847 | int card = bitset_container_cardinality(container); |
3848 | if(rank >= *start_rank + card) { |
3849 | *start_rank += card; |
3850 | return false; |
3851 | } |
3852 | const uint64_t *array = container->array; |
3853 | int32_t size; |
3854 | for (int i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; i += 1) { |
3855 | size = hamming(array[i]); |
3856 | if(rank <= *start_rank + size) { |
3857 | uint64_t w = container->array[i]; |
3858 | uint16_t base = i*64; |
3859 | while (w != 0) { |
3860 | uint64_t t = w & (~w + 1); |
3861 | int r = __builtin_ctzll(w); |
3862 | if(*start_rank == rank) { |
3863 | *element = r+base; |
3864 | return true; |
3865 | } |
3866 | w ^= t; |
3867 | *start_rank += 1; |
3868 | } |
3869 | } |
3870 | else |
3871 | *start_rank += size; |
3872 | } |
3873 | assert(false); |
3874 | __builtin_unreachable(); |
3875 | } |
3876 | |
3877 | |
3878 | /* Returns the smallest value (assumes not empty) */ |
3879 | uint16_t bitset_container_minimum(const bitset_container_t *container) { |
3880 | for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i ) { |
3881 | uint64_t w = container->array[i]; |
3882 | if (w != 0) { |
3883 | int r = __builtin_ctzll(w); |
3884 | return r + i * 64; |
3885 | } |
3886 | } |
3887 | return UINT16_MAX; |
3888 | } |
3889 | |
3890 | /* Returns the largest value (assumes not empty) */ |
3891 | uint16_t bitset_container_maximum(const bitset_container_t *container) { |
3892 | for (int32_t i = BITSET_CONTAINER_SIZE_IN_WORDS - 1; i > 0; --i ) { |
3893 | uint64_t w = container->array[i]; |
3894 | if (w != 0) { |
3895 | int r = __builtin_clzll(w); |
3896 | return i * 64 + 63 - r; |
3897 | } |
3898 | } |
3899 | return 0; |
3900 | } |
3901 | |
3902 | /* Returns the number of values equal or smaller than x */ |
3903 | int bitset_container_rank(const bitset_container_t *container, uint16_t x) { |
3904 | uint32_t x32 = x; |
3905 | int sum = 0; |
3906 | uint32_t k = 0; |
3907 | for (; k + 63 <= x32; k += 64) { |
3908 | sum += hamming(container->array[k / 64]); |
3909 | } |
3910 | // at this point, we have covered everything up to k, k not included. |
3911 | // we have that k < x, but not so large that k+63<=x |
3912 | // k is a power of 64 |
3913 | int bitsleft = x32 - k + 1;// will be in [0,64) |
3914 | uint64_t leftoverword = container->array[k / 64];// k / 64 should be within scope |
3915 | leftoverword = leftoverword & ((UINT64_C(1) << bitsleft) - 1); |
3916 | sum += hamming(leftoverword); |
3917 | return sum; |
3918 | } |
3919 | |
3920 | /* Returns the index of the first value equal or larger than x, or -1 */ |
3921 | int bitset_container_index_equalorlarger(const bitset_container_t *container, uint16_t x) { |
3922 | uint32_t x32 = x; |
3923 | uint32_t k = x32 / 64; |
3924 | uint64_t word = container->array[k]; |
3925 | const int diff = x32 - k * 64; // in [0,64) |
3926 | word = (word >> diff) << diff; // a mask is faster, but we don't care |
3927 | while(word == 0) { |
3928 | k++; |
3929 | if(k == BITSET_CONTAINER_SIZE_IN_WORDS) return -1; |
3930 | word = container->array[k]; |
3931 | } |
3932 | return k * 64 + __builtin_ctzll(word); |
3933 | } |
3934 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/bitset.c */ |
3935 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/containers.c */ |
3936 | |
3937 | |
3938 | extern inline const void *container_unwrap_shared( |
3939 | const void *candidate_shared_container, uint8_t *type); |
3940 | extern inline void *container_mutable_unwrap_shared( |
3941 | void *candidate_shared_container, uint8_t *type); |
3942 | |
3943 | extern const char *get_container_name(uint8_t typecode); |
3944 | |
3945 | extern int container_get_cardinality(const void *container, uint8_t typecode); |
3946 | |
3947 | extern void *container_iand(void *c1, uint8_t type1, const void *c2, |
3948 | uint8_t type2, uint8_t *result_type); |
3949 | |
3950 | extern void *container_ior(void *c1, uint8_t type1, const void *c2, |
3951 | uint8_t type2, uint8_t *result_type); |
3952 | |
3953 | extern void *container_ixor(void *c1, uint8_t type1, const void *c2, |
3954 | uint8_t type2, uint8_t *result_type); |
3955 | |
3956 | extern void *container_iandnot(void *c1, uint8_t type1, const void *c2, |
3957 | uint8_t type2, uint8_t *result_type); |
3958 | |
3959 | void container_free(void *container, uint8_t typecode) { |
3960 | switch (typecode) { |
3961 | case BITSET_CONTAINER_TYPE_CODE: |
3962 | bitset_container_free((bitset_container_t *)container); |
3963 | break; |
3964 | case ARRAY_CONTAINER_TYPE_CODE: |
3965 | array_container_free((array_container_t *)container); |
3966 | break; |
3967 | case RUN_CONTAINER_TYPE_CODE: |
3968 | run_container_free((run_container_t *)container); |
3969 | break; |
3970 | case SHARED_CONTAINER_TYPE_CODE: |
3971 | shared_container_free((shared_container_t *)container); |
3972 | break; |
3973 | default: |
3974 | assert(false); |
3975 | __builtin_unreachable(); |
3976 | } |
3977 | } |
3978 | |
3979 | void container_printf(const void *container, uint8_t typecode) { |
3980 | container = container_unwrap_shared(container, &typecode); |
3981 | switch (typecode) { |
3982 | case BITSET_CONTAINER_TYPE_CODE: |
3983 | bitset_container_printf((const bitset_container_t *)container); |
3984 | return; |
3985 | case ARRAY_CONTAINER_TYPE_CODE: |
3986 | array_container_printf((const array_container_t *)container); |
3987 | return; |
3988 | case RUN_CONTAINER_TYPE_CODE: |
3989 | run_container_printf((const run_container_t *)container); |
3990 | return; |
3991 | default: |
3992 | __builtin_unreachable(); |
3993 | } |
3994 | } |
3995 | |
3996 | void container_printf_as_uint32_array(const void *container, uint8_t typecode, |
3997 | uint32_t base) { |
3998 | container = container_unwrap_shared(container, &typecode); |
3999 | switch (typecode) { |
4000 | case BITSET_CONTAINER_TYPE_CODE: |
4001 | bitset_container_printf_as_uint32_array( |
4002 | (const bitset_container_t *)container, base); |
4003 | return; |
4004 | case ARRAY_CONTAINER_TYPE_CODE: |
4005 | array_container_printf_as_uint32_array( |
4006 | (const array_container_t *)container, base); |
4007 | return; |
4008 | case RUN_CONTAINER_TYPE_CODE: |
4009 | run_container_printf_as_uint32_array( |
4010 | (const run_container_t *)container, base); |
4011 | return; |
4012 | return; |
4013 | default: |
4014 | __builtin_unreachable(); |
4015 | } |
4016 | } |
4017 | |
4018 | int32_t container_serialize(const void *container, uint8_t typecode, |
4019 | char *buf) { |
4020 | container = container_unwrap_shared(container, &typecode); |
4021 | switch (typecode) { |
4022 | case BITSET_CONTAINER_TYPE_CODE: |
4023 | return (bitset_container_serialize((const bitset_container_t *)container, |
4024 | buf)); |
4025 | case ARRAY_CONTAINER_TYPE_CODE: |
4026 | return ( |
4027 | array_container_serialize((const array_container_t *)container, buf)); |
4028 | case RUN_CONTAINER_TYPE_CODE: |
4029 | return (run_container_serialize((const run_container_t *)container, buf)); |
4030 | default: |
4031 | assert(0); |
4032 | __builtin_unreachable(); |
4033 | return (-1); |
4034 | } |
4035 | } |
4036 | |
4037 | uint32_t container_serialization_len(const void *container, uint8_t typecode) { |
4038 | container = container_unwrap_shared(container, &typecode); |
4039 | switch (typecode) { |
4040 | case BITSET_CONTAINER_TYPE_CODE: |
4041 | return bitset_container_serialization_len(); |
4042 | case ARRAY_CONTAINER_TYPE_CODE: |
4043 | return array_container_serialization_len( |
4044 | (const array_container_t *)container); |
4045 | case RUN_CONTAINER_TYPE_CODE: |
4046 | return run_container_serialization_len( |
4047 | (const run_container_t *)container); |
4048 | default: |
4049 | assert(0); |
4050 | __builtin_unreachable(); |
4051 | return (0); |
4052 | } |
4053 | } |
4054 | |
4055 | void *container_deserialize(uint8_t typecode, const char *buf, size_t buf_len) { |
4056 | switch (typecode) { |
4057 | case BITSET_CONTAINER_TYPE_CODE: |
4058 | return (bitset_container_deserialize(buf, buf_len)); |
4059 | case ARRAY_CONTAINER_TYPE_CODE: |
4060 | return (array_container_deserialize(buf, buf_len)); |
4061 | case RUN_CONTAINER_TYPE_CODE: |
4062 | return (run_container_deserialize(buf, buf_len)); |
4063 | case SHARED_CONTAINER_TYPE_CODE: |
4064 | printf("this should never happen.\n" ); |
4065 | assert(0); |
4066 | __builtin_unreachable(); |
4067 | return (NULL); |
4068 | default: |
4069 | assert(0); |
4070 | __builtin_unreachable(); |
4071 | return (NULL); |
4072 | } |
4073 | } |
4074 | |
4075 | extern bool container_nonzero_cardinality(const void *container, |
4076 | uint8_t typecode); |
4077 | |
4078 | extern void container_free(void *container, uint8_t typecode); |
4079 | |
4080 | extern int container_to_uint32_array(uint32_t *output, const void *container, |
4081 | uint8_t typecode, uint32_t base); |
4082 | |
4083 | extern void *container_add(void *container, uint16_t val, uint8_t typecode, |
4084 | uint8_t *new_typecode); |
4085 | |
4086 | extern inline bool container_contains(const void *container, uint16_t val, |
4087 | uint8_t typecode); |
4088 | |
4089 | extern void *container_clone(const void *container, uint8_t typecode); |
4090 | |
4091 | extern void *container_and(const void *c1, uint8_t type1, const void *c2, |
4092 | uint8_t type2, uint8_t *result_type); |
4093 | |
4094 | extern void *container_or(const void *c1, uint8_t type1, const void *c2, |
4095 | uint8_t type2, uint8_t *result_type); |
4096 | |
4097 | extern void *container_xor(const void *c1, uint8_t type1, const void *c2, |
4098 | uint8_t type2, uint8_t *result_type); |
4099 | |
4100 | void *get_copy_of_container(void *container, uint8_t *typecode, |
4101 | bool copy_on_write) { |
4102 | if (copy_on_write) { |
4103 | shared_container_t *shared_container; |
4104 | if (*typecode == SHARED_CONTAINER_TYPE_CODE) { |
4105 | shared_container = (shared_container_t *)container; |
4106 | shared_container->counter += 1; |
4107 | return shared_container; |
4108 | } |
4109 | assert(*typecode != SHARED_CONTAINER_TYPE_CODE); |
4110 | |
4111 | if ((shared_container = (shared_container_t *)malloc( |
4112 | sizeof(shared_container_t))) == NULL) { |
4113 | return NULL; |
4114 | } |
4115 | |
4116 | shared_container->container = container; |
4117 | shared_container->typecode = *typecode; |
4118 | |
4119 | shared_container->counter = 2; |
4120 | *typecode = SHARED_CONTAINER_TYPE_CODE; |
4121 | |
4122 | return shared_container; |
4123 | } // copy_on_write |
4124 | // otherwise, no copy on write... |
4125 | const void *actualcontainer = |
4126 | container_unwrap_shared((const void *)container, typecode); |
4127 | assert(*typecode != SHARED_CONTAINER_TYPE_CODE); |
4128 | return container_clone(actualcontainer, *typecode); |
4129 | } |
4130 | /** |
4131 | * Copies a container, requires a typecode. This allocates new memory, caller |
4132 | * is responsible for deallocation. |
4133 | */ |
4134 | void *container_clone(const void *container, uint8_t typecode) { |
4135 | container = container_unwrap_shared(container, &typecode); |
4136 | switch (typecode) { |
4137 | case BITSET_CONTAINER_TYPE_CODE: |
4138 | return bitset_container_clone((const bitset_container_t *)container); |
4139 | case ARRAY_CONTAINER_TYPE_CODE: |
4140 | return array_container_clone((const array_container_t *)container); |
4141 | case RUN_CONTAINER_TYPE_CODE: |
4142 | return run_container_clone((const run_container_t *)container); |
4143 | case SHARED_CONTAINER_TYPE_CODE: |
4144 | printf("shared containers are not cloneable\n" ); |
4145 | assert(false); |
4146 | return NULL; |
4147 | default: |
4148 | assert(false); |
4149 | __builtin_unreachable(); |
4150 | return NULL; |
4151 | } |
4152 | } |
4153 | |
4154 | void *(shared_container_t *container, |
4155 | uint8_t *typecode) { |
4156 | assert(container->counter > 0); |
4157 | assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); |
4158 | container->counter--; |
4159 | *typecode = container->typecode; |
4160 | void *answer; |
4161 | if (container->counter == 0) { |
4162 | answer = container->container; |
4163 | container->container = NULL; // paranoid |
4164 | free(container); |
4165 | } else { |
4166 | answer = container_clone(container->container, *typecode); |
4167 | } |
4168 | assert(*typecode != SHARED_CONTAINER_TYPE_CODE); |
4169 | return answer; |
4170 | } |
4171 | |
4172 | void shared_container_free(shared_container_t *container) { |
4173 | assert(container->counter > 0); |
4174 | container->counter--; |
4175 | if (container->counter == 0) { |
4176 | assert(container->typecode != SHARED_CONTAINER_TYPE_CODE); |
4177 | container_free(container->container, container->typecode); |
4178 | container->container = NULL; // paranoid |
4179 | free(container); |
4180 | } |
4181 | } |
4182 | |
4183 | extern void *container_not(const void *c1, uint8_t type1, uint8_t *result_type); |
4184 | |
4185 | extern void *container_not_range(const void *c1, uint8_t type1, |
4186 | uint32_t range_start, uint32_t range_end, |
4187 | uint8_t *result_type); |
4188 | |
4189 | extern void *container_inot(void *c1, uint8_t type1, uint8_t *result_type); |
4190 | |
4191 | extern void *container_inot_range(void *c1, uint8_t type1, uint32_t range_start, |
4192 | uint32_t range_end, uint8_t *result_type); |
4193 | |
4194 | extern void *container_range_of_ones(uint32_t range_start, uint32_t range_end, |
4195 | uint8_t *result_type); |
4196 | |
4197 | // where are the correponding things for union and intersection?? |
4198 | extern void *container_lazy_xor(const void *c1, uint8_t type1, const void *c2, |
4199 | uint8_t type2, uint8_t *result_type); |
4200 | |
4201 | extern void *container_lazy_ixor(void *c1, uint8_t type1, const void *c2, |
4202 | uint8_t type2, uint8_t *result_type); |
4203 | |
4204 | extern void *container_andnot(const void *c1, uint8_t type1, const void *c2, |
4205 | uint8_t type2, uint8_t *result_type); |
4206 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/containers.c */ |
4207 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/convert.c */ |
4208 | #include <stdio.h> |
4209 | |
4210 | |
4211 | // file contains grubby stuff that must know impl. details of all container |
4212 | // types. |
4213 | bitset_container_t *bitset_container_from_array(const array_container_t *a) { |
4214 | bitset_container_t *ans = bitset_container_create(); |
4215 | int limit = array_container_cardinality(a); |
4216 | for (int i = 0; i < limit; ++i) bitset_container_set(ans, a->array[i]); |
4217 | return ans; |
4218 | } |
4219 | |
4220 | bitset_container_t *bitset_container_from_run(const run_container_t *arr) { |
4221 | int card = run_container_cardinality(arr); |
4222 | bitset_container_t *answer = bitset_container_create(); |
4223 | for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { |
4224 | rle16_t vl = arr->runs[rlepos]; |
4225 | bitset_set_lenrange(answer->array, vl.value, vl.length); |
4226 | } |
4227 | answer->cardinality = card; |
4228 | return answer; |
4229 | } |
4230 | |
4231 | array_container_t *array_container_from_run(const run_container_t *arr) { |
4232 | array_container_t *answer = |
4233 | array_container_create_given_capacity(run_container_cardinality(arr)); |
4234 | answer->cardinality = 0; |
4235 | for (int rlepos = 0; rlepos < arr->n_runs; ++rlepos) { |
4236 | int run_start = arr->runs[rlepos].value; |
4237 | int run_end = run_start + arr->runs[rlepos].length; |
4238 | |
4239 | for (int run_value = run_start; run_value <= run_end; ++run_value) { |
4240 | answer->array[answer->cardinality++] = (uint16_t)run_value; |
4241 | } |
4242 | } |
4243 | return answer; |
4244 | } |
4245 | |
4246 | array_container_t *array_container_from_bitset(const bitset_container_t *bits) { |
4247 | array_container_t *result = |
4248 | array_container_create_given_capacity(bits->cardinality); |
4249 | result->cardinality = bits->cardinality; |
4250 | // sse version ends up being slower here |
4251 | // (bitset_extract_setbits_sse_uint16) |
4252 | // because of the sparsity of the data |
4253 | bitset_extract_setbits_uint16(bits->array, BITSET_CONTAINER_SIZE_IN_WORDS, |
4254 | result->array, 0); |
4255 | return result; |
4256 | } |
4257 | |
4258 | /* assumes that container has adequate space. Run from [s,e] (inclusive) */ |
4259 | static void add_run(run_container_t *r, int s, int e) { |
4260 | r->runs[r->n_runs].value = s; |
4261 | r->runs[r->n_runs].length = e - s; |
4262 | r->n_runs++; |
4263 | } |
4264 | |
4265 | run_container_t *run_container_from_array(const array_container_t *c) { |
4266 | int32_t n_runs = array_container_number_of_runs(c); |
4267 | run_container_t *answer = run_container_create_given_capacity(n_runs); |
4268 | int prev = -2; |
4269 | int run_start = -1; |
4270 | int32_t card = c->cardinality; |
4271 | if (card == 0) return answer; |
4272 | for (int i = 0; i < card; ++i) { |
4273 | const uint16_t cur_val = c->array[i]; |
4274 | if (cur_val != prev + 1) { |
4275 | // new run starts; flush old one, if any |
4276 | if (run_start != -1) add_run(answer, run_start, prev); |
4277 | run_start = cur_val; |
4278 | } |
4279 | prev = c->array[i]; |
4280 | } |
4281 | // now prev is the last seen value |
4282 | add_run(answer, run_start, prev); |
4283 | // assert(run_container_cardinality(answer) == c->cardinality); |
4284 | return answer; |
4285 | } |
4286 | |
4287 | /** |
4288 | * Convert the runcontainer to either a Bitmap or an Array Container, depending |
4289 | * on the cardinality. Frees the container. |
4290 | * Allocates and returns new container, which caller is responsible for freeing |
4291 | */ |
4292 | |
4293 | void *convert_to_bitset_or_array_container(run_container_t *r, int32_t card, |
4294 | uint8_t *resulttype) { |
4295 | if (card <= DEFAULT_MAX_SIZE) { |
4296 | array_container_t *answer = array_container_create_given_capacity(card); |
4297 | answer->cardinality = 0; |
4298 | for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { |
4299 | uint16_t run_start = r->runs[rlepos].value; |
4300 | uint16_t run_end = run_start + r->runs[rlepos].length; |
4301 | for (uint16_t run_value = run_start; run_value <= run_end; |
4302 | ++run_value) { |
4303 | answer->array[answer->cardinality++] = run_value; |
4304 | } |
4305 | } |
4306 | assert(card == answer->cardinality); |
4307 | *resulttype = ARRAY_CONTAINER_TYPE_CODE; |
4308 | run_container_free(r); |
4309 | return answer; |
4310 | } |
4311 | bitset_container_t *answer = bitset_container_create(); |
4312 | for (int rlepos = 0; rlepos < r->n_runs; ++rlepos) { |
4313 | uint16_t run_start = r->runs[rlepos].value; |
4314 | bitset_set_lenrange(answer->array, run_start, r->runs[rlepos].length); |
4315 | } |
4316 | answer->cardinality = card; |
4317 | *resulttype = BITSET_CONTAINER_TYPE_CODE; |
4318 | run_container_free(r); |
4319 | return answer; |
4320 | } |
4321 | |
4322 | /* Converts a run container to either an array or a bitset, IF it saves space. |
4323 | */ |
4324 | /* If a conversion occurs, the caller is responsible to free the original |
4325 | * container and |
4326 | * he becomes responsible to free the new one. */ |
4327 | void *convert_run_to_efficient_container(run_container_t *c, |
4328 | uint8_t *typecode_after) { |
4329 | int32_t size_as_run_container = |
4330 | run_container_serialized_size_in_bytes(c->n_runs); |
4331 | |
4332 | int32_t size_as_bitset_container = |
4333 | bitset_container_serialized_size_in_bytes(); |
4334 | int32_t card = run_container_cardinality(c); |
4335 | int32_t size_as_array_container = |
4336 | array_container_serialized_size_in_bytes(card); |
4337 | |
4338 | int32_t min_size_non_run = |
4339 | size_as_bitset_container < size_as_array_container |
4340 | ? size_as_bitset_container |
4341 | : size_as_array_container; |
4342 | if (size_as_run_container <= min_size_non_run) { // no conversion |
4343 | *typecode_after = RUN_CONTAINER_TYPE_CODE; |
4344 | return c; |
4345 | } |
4346 | if (card <= DEFAULT_MAX_SIZE) { |
4347 | // to array |
4348 | array_container_t *answer = array_container_create_given_capacity(card); |
4349 | answer->cardinality = 0; |
4350 | for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { |
4351 | int run_start = c->runs[rlepos].value; |
4352 | int run_end = run_start + c->runs[rlepos].length; |
4353 | |
4354 | for (int run_value = run_start; run_value <= run_end; ++run_value) { |
4355 | answer->array[answer->cardinality++] = (uint16_t)run_value; |
4356 | } |
4357 | } |
4358 | *typecode_after = ARRAY_CONTAINER_TYPE_CODE; |
4359 | return answer; |
4360 | } |
4361 | |
4362 | // else to bitset |
4363 | bitset_container_t *answer = bitset_container_create(); |
4364 | |
4365 | for (int rlepos = 0; rlepos < c->n_runs; ++rlepos) { |
4366 | int start = c->runs[rlepos].value; |
4367 | int end = start + c->runs[rlepos].length; |
4368 | bitset_set_range(answer->array, start, end + 1); |
4369 | } |
4370 | answer->cardinality = card; |
4371 | *typecode_after = BITSET_CONTAINER_TYPE_CODE; |
4372 | return answer; |
4373 | } |
4374 | |
4375 | // like convert_run_to_efficient_container but frees the old result if needed |
4376 | void *convert_run_to_efficient_container_and_free(run_container_t *c, |
4377 | uint8_t *typecode_after) { |
4378 | void *answer = convert_run_to_efficient_container(c, typecode_after); |
4379 | if (answer != c) run_container_free(c); |
4380 | return answer; |
4381 | } |
4382 | |
4383 | /* once converted, the original container is disposed here, rather than |
4384 | in roaring_array |
4385 | */ |
4386 | |
4387 | // TODO: split into run- array- and bitset- subfunctions for sanity; |
4388 | // a few function calls won't really matter. |
4389 | |
4390 | void *convert_run_optimize(void *c, uint8_t typecode_original, |
4391 | uint8_t *typecode_after) { |
4392 | if (typecode_original == RUN_CONTAINER_TYPE_CODE) { |
4393 | void *newc = convert_run_to_efficient_container((run_container_t *)c, |
4394 | typecode_after); |
4395 | if (newc != c) { |
4396 | container_free(c, typecode_original); |
4397 | } |
4398 | return newc; |
4399 | } else if (typecode_original == ARRAY_CONTAINER_TYPE_CODE) { |
4400 | // it might need to be converted to a run container. |
4401 | array_container_t *c_qua_array = (array_container_t *)c; |
4402 | int32_t n_runs = array_container_number_of_runs(c_qua_array); |
4403 | int32_t size_as_run_container = |
4404 | run_container_serialized_size_in_bytes(n_runs); |
4405 | int32_t card = array_container_cardinality(c_qua_array); |
4406 | int32_t size_as_array_container = |
4407 | array_container_serialized_size_in_bytes(card); |
4408 | |
4409 | if (size_as_run_container >= size_as_array_container) { |
4410 | *typecode_after = ARRAY_CONTAINER_TYPE_CODE; |
4411 | return c; |
4412 | } |
4413 | // else convert array to run container |
4414 | run_container_t *answer = run_container_create_given_capacity(n_runs); |
4415 | int prev = -2; |
4416 | int run_start = -1; |
4417 | |
4418 | assert(card > 0); |
4419 | for (int i = 0; i < card; ++i) { |
4420 | uint16_t cur_val = c_qua_array->array[i]; |
4421 | if (cur_val != prev + 1) { |
4422 | // new run starts; flush old one, if any |
4423 | if (run_start != -1) add_run(answer, run_start, prev); |
4424 | run_start = cur_val; |
4425 | } |
4426 | prev = c_qua_array->array[i]; |
4427 | } |
4428 | assert(run_start >= 0); |
4429 | // now prev is the last seen value |
4430 | add_run(answer, run_start, prev); |
4431 | *typecode_after = RUN_CONTAINER_TYPE_CODE; |
4432 | array_container_free(c_qua_array); |
4433 | return answer; |
4434 | } else if (typecode_original == |
4435 | BITSET_CONTAINER_TYPE_CODE) { // run conversions on bitset |
4436 | // does bitset need conversion to run? |
4437 | bitset_container_t *c_qua_bitset = (bitset_container_t *)c; |
4438 | int32_t n_runs = bitset_container_number_of_runs(c_qua_bitset); |
4439 | int32_t size_as_run_container = |
4440 | run_container_serialized_size_in_bytes(n_runs); |
4441 | int32_t size_as_bitset_container = |
4442 | bitset_container_serialized_size_in_bytes(); |
4443 | |
4444 | if (size_as_bitset_container <= size_as_run_container) { |
4445 | // no conversion needed. |
4446 | *typecode_after = BITSET_CONTAINER_TYPE_CODE; |
4447 | return c; |
4448 | } |
4449 | // bitset to runcontainer (ported from Java RunContainer( |
4450 | // BitmapContainer bc, int nbrRuns)) |
4451 | assert(n_runs > 0); // no empty bitmaps |
4452 | run_container_t *answer = run_container_create_given_capacity(n_runs); |
4453 | |
4454 | int long_ctr = 0; |
4455 | uint64_t cur_word = c_qua_bitset->array[0]; |
4456 | int run_count = 0; |
4457 | while (true) { |
4458 | while (cur_word == UINT64_C(0) && |
4459 | long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) |
4460 | cur_word = c_qua_bitset->array[++long_ctr]; |
4461 | |
4462 | if (cur_word == UINT64_C(0)) { |
4463 | bitset_container_free(c_qua_bitset); |
4464 | *typecode_after = RUN_CONTAINER_TYPE_CODE; |
4465 | return answer; |
4466 | } |
4467 | |
4468 | int local_run_start = __builtin_ctzll(cur_word); |
4469 | int run_start = local_run_start + 64 * long_ctr; |
4470 | uint64_t cur_word_with_1s = cur_word | (cur_word - 1); |
4471 | |
4472 | int run_end = 0; |
4473 | while (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF) && |
4474 | long_ctr < BITSET_CONTAINER_SIZE_IN_WORDS - 1) |
4475 | cur_word_with_1s = c_qua_bitset->array[++long_ctr]; |
4476 | |
4477 | if (cur_word_with_1s == UINT64_C(0xFFFFFFFFFFFFFFFF)) { |
4478 | run_end = 64 + long_ctr * 64; // exclusive, I guess |
4479 | add_run(answer, run_start, run_end - 1); |
4480 | bitset_container_free(c_qua_bitset); |
4481 | *typecode_after = RUN_CONTAINER_TYPE_CODE; |
4482 | return answer; |
4483 | } |
4484 | int local_run_end = __builtin_ctzll(~cur_word_with_1s); |
4485 | run_end = local_run_end + long_ctr * 64; |
4486 | add_run(answer, run_start, run_end - 1); |
4487 | run_count++; |
4488 | cur_word = cur_word_with_1s & (cur_word_with_1s + 1); |
4489 | } |
4490 | return answer; |
4491 | } else { |
4492 | assert(false); |
4493 | __builtin_unreachable(); |
4494 | return NULL; |
4495 | } |
4496 | } |
4497 | |
4498 | bitset_container_t *bitset_container_from_run_range(const run_container_t *run, |
4499 | uint32_t min, uint32_t max) { |
4500 | bitset_container_t *bitset = bitset_container_create(); |
4501 | int32_t union_cardinality = 0; |
4502 | for (int32_t i = 0; i < run->n_runs; ++i) { |
4503 | uint32_t rle_min = run->runs[i].value; |
4504 | uint32_t rle_max = rle_min + run->runs[i].length; |
4505 | bitset_set_lenrange(bitset->array, rle_min, rle_max - rle_min); |
4506 | union_cardinality += run->runs[i].length + 1; |
4507 | } |
4508 | union_cardinality += max - min + 1; |
4509 | union_cardinality -= bitset_lenrange_cardinality(bitset->array, min, max-min); |
4510 | bitset_set_lenrange(bitset->array, min, max - min); |
4511 | bitset->cardinality = union_cardinality; |
4512 | return bitset; |
4513 | } |
4514 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/convert.c */ |
4515 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_andnot.c */ |
4516 | /* |
4517 | * mixed_andnot.c. More methods since operation is not symmetric, |
4518 | * except no "wide" andnot , so no lazy options motivated. |
4519 | */ |
4520 | |
4521 | #include <assert.h> |
4522 | #include <string.h> |
4523 | |
4524 | |
4525 | /* Compute the andnot of src_1 and src_2 and write the result to |
4526 | * dst, a valid array container that could be the same as dst.*/ |
4527 | void array_bitset_container_andnot(const array_container_t *src_1, |
4528 | const bitset_container_t *src_2, |
4529 | array_container_t *dst) { |
4530 | // follows Java implementation as of June 2016 |
4531 | if (dst->capacity < src_1->cardinality) { |
4532 | array_container_grow(dst, src_1->cardinality, false); |
4533 | } |
4534 | int32_t newcard = 0; |
4535 | const int32_t origcard = src_1->cardinality; |
4536 | for (int i = 0; i < origcard; ++i) { |
4537 | uint16_t key = src_1->array[i]; |
4538 | dst->array[newcard] = key; |
4539 | newcard += 1 - bitset_container_contains(src_2, key); |
4540 | } |
4541 | dst->cardinality = newcard; |
4542 | } |
4543 | |
4544 | /* Compute the andnot of src_1 and src_2 and write the result to |
4545 | * src_1 */ |
4546 | |
4547 | void array_bitset_container_iandnot(array_container_t *src_1, |
4548 | const bitset_container_t *src_2) { |
4549 | array_bitset_container_andnot(src_1, src_2, src_1); |
4550 | } |
4551 | |
4552 | /* Compute the andnot of src_1 and src_2 and write the result to |
4553 | * dst, which does not initially have a valid container. |
4554 | * Return true for a bitset result; false for array |
4555 | */ |
4556 | |
4557 | bool bitset_array_container_andnot(const bitset_container_t *src_1, |
4558 | const array_container_t *src_2, void **dst) { |
4559 | // Java did this directly, but we have option of asm or avx |
4560 | bitset_container_t *result = bitset_container_create(); |
4561 | bitset_container_copy(src_1, result); |
4562 | result->cardinality = |
4563 | (int32_t)bitset_clear_list(result->array, (uint64_t)result->cardinality, |
4564 | src_2->array, (uint64_t)src_2->cardinality); |
4565 | |
4566 | // do required type conversions. |
4567 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
4568 | *dst = array_container_from_bitset(result); |
4569 | bitset_container_free(result); |
4570 | return false; |
4571 | } |
4572 | *dst = result; |
4573 | return true; |
4574 | } |
4575 | |
4576 | /* Compute the andnot of src_1 and src_2 and write the result to |
4577 | * dst (which has no container initially). It will modify src_1 |
4578 | * to be dst if the result is a bitset. Otherwise, it will |
4579 | * free src_1 and dst will be a new array container. In both |
4580 | * cases, the caller is responsible for deallocating dst. |
4581 | * Returns true iff dst is a bitset */ |
4582 | |
4583 | bool bitset_array_container_iandnot(bitset_container_t *src_1, |
4584 | const array_container_t *src_2, |
4585 | void **dst) { |
4586 | *dst = src_1; |
4587 | src_1->cardinality = |
4588 | (int32_t)bitset_clear_list(src_1->array, (uint64_t)src_1->cardinality, |
4589 | src_2->array, (uint64_t)src_2->cardinality); |
4590 | |
4591 | if (src_1->cardinality <= DEFAULT_MAX_SIZE) { |
4592 | *dst = array_container_from_bitset(src_1); |
4593 | bitset_container_free(src_1); |
4594 | return false; // not bitset |
4595 | } else |
4596 | return true; |
4597 | } |
4598 | |
4599 | /* Compute the andnot of src_1 and src_2 and write the result to |
4600 | * dst. Result may be either a bitset or an array container |
4601 | * (returns "result is bitset"). dst does not initially have |
4602 | * any container, but becomes either a bitset container (return |
4603 | * result true) or an array container. |
4604 | */ |
4605 | |
4606 | bool run_bitset_container_andnot(const run_container_t *src_1, |
4607 | const bitset_container_t *src_2, void **dst) { |
4608 | // follows the Java implementation as of June 2016 |
4609 | int card = run_container_cardinality(src_1); |
4610 | if (card <= DEFAULT_MAX_SIZE) { |
4611 | // must be an array |
4612 | array_container_t *answer = array_container_create_given_capacity(card); |
4613 | answer->cardinality = 0; |
4614 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
4615 | rle16_t rle = src_1->runs[rlepos]; |
4616 | for (int run_value = rle.value; run_value <= rle.value + rle.length; |
4617 | ++run_value) { |
4618 | if (!bitset_container_get(src_2, (uint16_t)run_value)) { |
4619 | answer->array[answer->cardinality++] = (uint16_t)run_value; |
4620 | } |
4621 | } |
4622 | } |
4623 | *dst = answer; |
4624 | return false; |
4625 | } else { // we guess it will be a bitset, though have to check guess when |
4626 | // done |
4627 | bitset_container_t *answer = bitset_container_clone(src_2); |
4628 | |
4629 | uint32_t last_pos = 0; |
4630 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
4631 | rle16_t rle = src_1->runs[rlepos]; |
4632 | |
4633 | uint32_t start = rle.value; |
4634 | uint32_t end = start + rle.length + 1; |
4635 | bitset_reset_range(answer->array, last_pos, start); |
4636 | bitset_flip_range(answer->array, start, end); |
4637 | last_pos = end; |
4638 | } |
4639 | bitset_reset_range(answer->array, last_pos, (uint32_t)(1 << 16)); |
4640 | |
4641 | answer->cardinality = bitset_container_compute_cardinality(answer); |
4642 | |
4643 | if (answer->cardinality <= DEFAULT_MAX_SIZE) { |
4644 | *dst = array_container_from_bitset(answer); |
4645 | bitset_container_free(answer); |
4646 | return false; // not bitset |
4647 | } |
4648 | *dst = answer; |
4649 | return true; // bitset |
4650 | } |
4651 | } |
4652 | |
4653 | /* Compute the andnot of src_1 and src_2 and write the result to |
4654 | * dst. Result may be either a bitset or an array container |
4655 | * (returns "result is bitset"). dst does not initially have |
4656 | * any container, but becomes either a bitset container (return |
4657 | * result true) or an array container. |
4658 | */ |
4659 | |
4660 | bool run_bitset_container_iandnot(run_container_t *src_1, |
4661 | const bitset_container_t *src_2, void **dst) { |
4662 | // dummy implementation |
4663 | bool ans = run_bitset_container_andnot(src_1, src_2, dst); |
4664 | run_container_free(src_1); |
4665 | return ans; |
4666 | } |
4667 | |
4668 | /* Compute the andnot of src_1 and src_2 and write the result to |
4669 | * dst. Result may be either a bitset or an array container |
4670 | * (returns "result is bitset"). dst does not initially have |
4671 | * any container, but becomes either a bitset container (return |
4672 | * result true) or an array container. |
4673 | */ |
4674 | |
4675 | bool bitset_run_container_andnot(const bitset_container_t *src_1, |
4676 | const run_container_t *src_2, void **dst) { |
4677 | // follows Java implementation |
4678 | bitset_container_t *result = bitset_container_create(); |
4679 | |
4680 | bitset_container_copy(src_1, result); |
4681 | for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { |
4682 | rle16_t rle = src_2->runs[rlepos]; |
4683 | bitset_reset_range(result->array, rle.value, |
4684 | rle.value + rle.length + UINT32_C(1)); |
4685 | } |
4686 | result->cardinality = bitset_container_compute_cardinality(result); |
4687 | |
4688 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
4689 | *dst = array_container_from_bitset(result); |
4690 | bitset_container_free(result); |
4691 | return false; // not bitset |
4692 | } |
4693 | *dst = result; |
4694 | return true; // bitset |
4695 | } |
4696 | |
4697 | /* Compute the andnot of src_1 and src_2 and write the result to |
4698 | * dst (which has no container initially). It will modify src_1 |
4699 | * to be dst if the result is a bitset. Otherwise, it will |
4700 | * free src_1 and dst will be a new array container. In both |
4701 | * cases, the caller is responsible for deallocating dst. |
4702 | * Returns true iff dst is a bitset */ |
4703 | |
4704 | bool bitset_run_container_iandnot(bitset_container_t *src_1, |
4705 | const run_container_t *src_2, void **dst) { |
4706 | *dst = src_1; |
4707 | |
4708 | for (int32_t rlepos = 0; rlepos < src_2->n_runs; ++rlepos) { |
4709 | rle16_t rle = src_2->runs[rlepos]; |
4710 | bitset_reset_range(src_1->array, rle.value, |
4711 | rle.value + rle.length + UINT32_C(1)); |
4712 | } |
4713 | src_1->cardinality = bitset_container_compute_cardinality(src_1); |
4714 | |
4715 | if (src_1->cardinality <= DEFAULT_MAX_SIZE) { |
4716 | *dst = array_container_from_bitset(src_1); |
4717 | bitset_container_free(src_1); |
4718 | return false; // not bitset |
4719 | } else |
4720 | return true; |
4721 | } |
4722 | |
4723 | /* helper. a_out must be a valid array container with adequate capacity. |
4724 | * Returns the cardinality of the output container. Partly Based on Java |
4725 | * implementation Util.unsignedDifference. |
4726 | * |
4727 | * TODO: Util.unsignedDifference does not use advanceUntil. Is it cheaper |
4728 | * to avoid advanceUntil? |
4729 | */ |
4730 | |
4731 | static int run_array_array_subtract(const run_container_t *r, |
4732 | const array_container_t *a_in, |
4733 | array_container_t *a_out) { |
4734 | int out_card = 0; |
4735 | int32_t in_array_pos = |
4736 | -1; // since advanceUntil always assumes we start the search AFTER this |
4737 | |
4738 | for (int rlepos = 0; rlepos < r->n_runs; rlepos++) { |
4739 | int32_t start = r->runs[rlepos].value; |
4740 | int32_t end = start + r->runs[rlepos].length + 1; |
4741 | |
4742 | in_array_pos = advanceUntil(a_in->array, in_array_pos, |
4743 | a_in->cardinality, (uint16_t)start); |
4744 | |
4745 | if (in_array_pos >= a_in->cardinality) { // run has no items subtracted |
4746 | for (int32_t i = start; i < end; ++i) |
4747 | a_out->array[out_card++] = (uint16_t)i; |
4748 | } else { |
4749 | uint16_t next_nonincluded = a_in->array[in_array_pos]; |
4750 | if (next_nonincluded >= end) { |
4751 | // another case when run goes unaltered |
4752 | for (int32_t i = start; i < end; ++i) |
4753 | a_out->array[out_card++] = (uint16_t)i; |
4754 | in_array_pos--; // ensure we see this item again if necessary |
4755 | } else { |
4756 | for (int32_t i = start; i < end; ++i) |
4757 | if (i != next_nonincluded) |
4758 | a_out->array[out_card++] = (uint16_t)i; |
4759 | else // 0 should ensure we don't match |
4760 | next_nonincluded = |
4761 | (in_array_pos + 1 >= a_in->cardinality) |
4762 | ? 0 |
4763 | : a_in->array[++in_array_pos]; |
4764 | in_array_pos--; // see again |
4765 | } |
4766 | } |
4767 | } |
4768 | return out_card; |
4769 | } |
4770 | |
4771 | /* dst does not indicate a valid container initially. Eventually it |
4772 | * can become any type of container. |
4773 | */ |
4774 | |
4775 | int run_array_container_andnot(const run_container_t *src_1, |
4776 | const array_container_t *src_2, void **dst) { |
4777 | // follows the Java impl as of June 2016 |
4778 | |
4779 | int card = run_container_cardinality(src_1); |
4780 | const int arbitrary_threshold = 32; |
4781 | |
4782 | if (card <= arbitrary_threshold) { |
4783 | if (src_2->cardinality == 0) { |
4784 | *dst = run_container_clone(src_1); |
4785 | return RUN_CONTAINER_TYPE_CODE; |
4786 | } |
4787 | // Java's "lazyandNot.toEfficientContainer" thing |
4788 | run_container_t *answer = run_container_create_given_capacity( |
4789 | card + array_container_cardinality(src_2)); |
4790 | |
4791 | int rlepos = 0; |
4792 | int xrlepos = 0; // "x" is src_2 |
4793 | rle16_t rle = src_1->runs[rlepos]; |
4794 | int32_t start = rle.value; |
4795 | int32_t end = start + rle.length + 1; |
4796 | int32_t xstart = src_2->array[xrlepos]; |
4797 | |
4798 | while ((rlepos < src_1->n_runs) && (xrlepos < src_2->cardinality)) { |
4799 | if (end <= xstart) { |
4800 | // output the first run |
4801 | answer->runs[answer->n_runs++] = |
4802 | (rle16_t){.value = (uint16_t)start, |
4803 | .length = (uint16_t)(end - start - 1)}; |
4804 | rlepos++; |
4805 | if (rlepos < src_1->n_runs) { |
4806 | start = src_1->runs[rlepos].value; |
4807 | end = start + src_1->runs[rlepos].length + 1; |
4808 | } |
4809 | } else if (xstart + 1 <= start) { |
4810 | // exit the second run |
4811 | xrlepos++; |
4812 | if (xrlepos < src_2->cardinality) { |
4813 | xstart = src_2->array[xrlepos]; |
4814 | } |
4815 | } else { |
4816 | if (start < xstart) { |
4817 | answer->runs[answer->n_runs++] = |
4818 | (rle16_t){.value = (uint16_t)start, |
4819 | .length = (uint16_t)(xstart - start - 1)}; |
4820 | } |
4821 | if (xstart + 1 < end) { |
4822 | start = xstart + 1; |
4823 | } else { |
4824 | rlepos++; |
4825 | if (rlepos < src_1->n_runs) { |
4826 | start = src_1->runs[rlepos].value; |
4827 | end = start + src_1->runs[rlepos].length + 1; |
4828 | } |
4829 | } |
4830 | } |
4831 | } |
4832 | if (rlepos < src_1->n_runs) { |
4833 | answer->runs[answer->n_runs++] = |
4834 | (rle16_t){.value = (uint16_t)start, |
4835 | .length = (uint16_t)(end - start - 1)}; |
4836 | rlepos++; |
4837 | if (rlepos < src_1->n_runs) { |
4838 | memcpy(answer->runs + answer->n_runs, src_1->runs + rlepos, |
4839 | (src_1->n_runs - rlepos) * sizeof(rle16_t)); |
4840 | answer->n_runs += (src_1->n_runs - rlepos); |
4841 | } |
4842 | } |
4843 | uint8_t return_type; |
4844 | *dst = convert_run_to_efficient_container(answer, &return_type); |
4845 | if (answer != *dst) run_container_free(answer); |
4846 | return return_type; |
4847 | } |
4848 | // else it's a bitmap or array |
4849 | |
4850 | if (card <= DEFAULT_MAX_SIZE) { |
4851 | array_container_t *ac = array_container_create_given_capacity(card); |
4852 | // nb Java code used a generic iterator-based merge to compute |
4853 | // difference |
4854 | ac->cardinality = run_array_array_subtract(src_1, src_2, ac); |
4855 | *dst = ac; |
4856 | return ARRAY_CONTAINER_TYPE_CODE; |
4857 | } |
4858 | bitset_container_t *ans = bitset_container_from_run(src_1); |
4859 | bool result_is_bitset = bitset_array_container_iandnot(ans, src_2, dst); |
4860 | return (result_is_bitset ? BITSET_CONTAINER_TYPE_CODE |
4861 | : ARRAY_CONTAINER_TYPE_CODE); |
4862 | } |
4863 | |
4864 | /* Compute the andnot of src_1 and src_2 and write the result to |
4865 | * dst (which has no container initially). It will modify src_1 |
4866 | * to be dst if the result is a bitset. Otherwise, it will |
4867 | * free src_1 and dst will be a new array container. In both |
4868 | * cases, the caller is responsible for deallocating dst. |
4869 | * Returns true iff dst is a bitset */ |
4870 | |
4871 | int run_array_container_iandnot(run_container_t *src_1, |
4872 | const array_container_t *src_2, void **dst) { |
4873 | // dummy implementation same as June 2016 Java |
4874 | int ans = run_array_container_andnot(src_1, src_2, dst); |
4875 | run_container_free(src_1); |
4876 | return ans; |
4877 | } |
4878 | |
4879 | /* dst must be a valid array container, allowed to be src_1 */ |
4880 | |
4881 | void array_run_container_andnot(const array_container_t *src_1, |
4882 | const run_container_t *src_2, |
4883 | array_container_t *dst) { |
4884 | // basically following Java impl as of June 2016 |
4885 | if (src_1->cardinality > dst->capacity) { |
4886 | array_container_grow(dst, src_1->cardinality, false); |
4887 | } |
4888 | |
4889 | if (src_2->n_runs == 0) { |
4890 | memmove(dst->array, src_1->array, |
4891 | sizeof(uint16_t) * src_1->cardinality); |
4892 | dst->cardinality = src_1->cardinality; |
4893 | return; |
4894 | } |
4895 | int32_t run_start = src_2->runs[0].value; |
4896 | int32_t run_end = run_start + src_2->runs[0].length; |
4897 | int which_run = 0; |
4898 | |
4899 | uint16_t val = 0; |
4900 | int dest_card = 0; |
4901 | for (int i = 0; i < src_1->cardinality; ++i) { |
4902 | val = src_1->array[i]; |
4903 | if (val < run_start) |
4904 | dst->array[dest_card++] = val; |
4905 | else if (val <= run_end) { |
4906 | ; // omitted item |
4907 | } else { |
4908 | do { |
4909 | if (which_run + 1 < src_2->n_runs) { |
4910 | ++which_run; |
4911 | run_start = src_2->runs[which_run].value; |
4912 | run_end = run_start + src_2->runs[which_run].length; |
4913 | |
4914 | } else |
4915 | run_start = run_end = (1 << 16) + 1; |
4916 | } while (val > run_end); |
4917 | --i; |
4918 | } |
4919 | } |
4920 | dst->cardinality = dest_card; |
4921 | } |
4922 | |
4923 | /* dst does not indicate a valid container initially. Eventually it |
4924 | * can become any kind of container. |
4925 | */ |
4926 | |
4927 | void array_run_container_iandnot(array_container_t *src_1, |
4928 | const run_container_t *src_2) { |
4929 | array_run_container_andnot(src_1, src_2, src_1); |
4930 | } |
4931 | |
4932 | /* dst does not indicate a valid container initially. Eventually it |
4933 | * can become any kind of container. |
4934 | */ |
4935 | |
4936 | int run_run_container_andnot(const run_container_t *src_1, |
4937 | const run_container_t *src_2, void **dst) { |
4938 | run_container_t *ans = run_container_create(); |
4939 | run_container_andnot(src_1, src_2, ans); |
4940 | uint8_t typecode_after; |
4941 | *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); |
4942 | return typecode_after; |
4943 | } |
4944 | |
4945 | /* Compute the andnot of src_1 and src_2 and write the result to |
4946 | * dst (which has no container initially). It will modify src_1 |
4947 | * to be dst if the result is a bitset. Otherwise, it will |
4948 | * free src_1 and dst will be a new array container. In both |
4949 | * cases, the caller is responsible for deallocating dst. |
4950 | * Returns true iff dst is a bitset */ |
4951 | |
4952 | int run_run_container_iandnot(run_container_t *src_1, |
4953 | const run_container_t *src_2, void **dst) { |
4954 | // following Java impl as of June 2016 (dummy) |
4955 | int ans = run_run_container_andnot(src_1, src_2, dst); |
4956 | run_container_free(src_1); |
4957 | return ans; |
4958 | } |
4959 | |
4960 | /* |
4961 | * dst is a valid array container and may be the same as src_1 |
4962 | */ |
4963 | |
4964 | void array_array_container_andnot(const array_container_t *src_1, |
4965 | const array_container_t *src_2, |
4966 | array_container_t *dst) { |
4967 | array_container_andnot(src_1, src_2, dst); |
4968 | } |
4969 | |
4970 | /* inplace array-array andnot will always be able to reuse the space of |
4971 | * src_1 */ |
4972 | void array_array_container_iandnot(array_container_t *src_1, |
4973 | const array_container_t *src_2) { |
4974 | array_container_andnot(src_1, src_2, src_1); |
4975 | } |
4976 | |
4977 | /* Compute the andnot of src_1 and src_2 and write the result to |
4978 | * dst (which has no container initially). Return value is |
4979 | * "dst is a bitset" |
4980 | */ |
4981 | |
4982 | bool bitset_bitset_container_andnot(const bitset_container_t *src_1, |
4983 | const bitset_container_t *src_2, |
4984 | void **dst) { |
4985 | bitset_container_t *ans = bitset_container_create(); |
4986 | int card = bitset_container_andnot(src_1, src_2, ans); |
4987 | if (card <= DEFAULT_MAX_SIZE) { |
4988 | *dst = array_container_from_bitset(ans); |
4989 | bitset_container_free(ans); |
4990 | return false; // not bitset |
4991 | } else { |
4992 | *dst = ans; |
4993 | return true; |
4994 | } |
4995 | } |
4996 | |
4997 | /* Compute the andnot of src_1 and src_2 and write the result to |
4998 | * dst (which has no container initially). It will modify src_1 |
4999 | * to be dst if the result is a bitset. Otherwise, it will |
5000 | * free src_1 and dst will be a new array container. In both |
5001 | * cases, the caller is responsible for deallocating dst. |
5002 | * Returns true iff dst is a bitset */ |
5003 | |
5004 | bool bitset_bitset_container_iandnot(bitset_container_t *src_1, |
5005 | const bitset_container_t *src_2, |
5006 | void **dst) { |
5007 | int card = bitset_container_andnot(src_1, src_2, src_1); |
5008 | if (card <= DEFAULT_MAX_SIZE) { |
5009 | *dst = array_container_from_bitset(src_1); |
5010 | bitset_container_free(src_1); |
5011 | return false; // not bitset |
5012 | } else { |
5013 | *dst = src_1; |
5014 | return true; |
5015 | } |
5016 | } |
5017 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_andnot.c */ |
5018 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_equal.c */ |
5019 | |
5020 | bool array_container_equal_bitset(const array_container_t* container1, |
5021 | const bitset_container_t* container2) { |
5022 | if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
5023 | if (container2->cardinality != container1->cardinality) { |
5024 | return false; |
5025 | } |
5026 | } |
5027 | int32_t pos = 0; |
5028 | for (int32_t i = 0; i < BITSET_CONTAINER_SIZE_IN_WORDS; ++i) { |
5029 | uint64_t w = container2->array[i]; |
5030 | while (w != 0) { |
5031 | uint64_t t = w & (~w + 1); |
5032 | uint16_t r = i * 64 + __builtin_ctzll(w); |
5033 | if (pos >= container1->cardinality) { |
5034 | return false; |
5035 | } |
5036 | if (container1->array[pos] != r) { |
5037 | return false; |
5038 | } |
5039 | ++pos; |
5040 | w ^= t; |
5041 | } |
5042 | } |
5043 | return (pos == container1->cardinality); |
5044 | } |
5045 | |
5046 | bool run_container_equals_array(const run_container_t* container1, |
5047 | const array_container_t* container2) { |
5048 | if (run_container_cardinality(container1) != container2->cardinality) |
5049 | return false; |
5050 | int32_t pos = 0; |
5051 | for (int i = 0; i < container1->n_runs; ++i) { |
5052 | const uint32_t run_start = container1->runs[i].value; |
5053 | const uint32_t le = container1->runs[i].length; |
5054 | |
5055 | if (container2->array[pos] != run_start) { |
5056 | return false; |
5057 | } |
5058 | |
5059 | if (container2->array[pos + le] != run_start + le) { |
5060 | return false; |
5061 | } |
5062 | |
5063 | pos += le + 1; |
5064 | } |
5065 | return true; |
5066 | } |
5067 | |
5068 | bool run_container_equals_bitset(const run_container_t* container1, |
5069 | const bitset_container_t* container2) { |
5070 | if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
5071 | if (container2->cardinality != run_container_cardinality(container1)) { |
5072 | return false; |
5073 | } |
5074 | } else { |
5075 | int32_t card = bitset_container_compute_cardinality( |
5076 | container2); // modify container2? |
5077 | if (card != run_container_cardinality(container1)) { |
5078 | return false; |
5079 | } |
5080 | } |
5081 | for (int i = 0; i < container1->n_runs; ++i) { |
5082 | uint32_t run_start = container1->runs[i].value; |
5083 | uint32_t le = container1->runs[i].length; |
5084 | for (uint32_t j = run_start; j <= run_start + le; ++j) { |
5085 | // todo: this code could be much faster |
5086 | if (!bitset_container_contains(container2, j)) { |
5087 | return false; |
5088 | } |
5089 | } |
5090 | } |
5091 | return true; |
5092 | } |
5093 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_equal.c */ |
5094 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_intersection.c */ |
5095 | /* |
5096 | * mixed_intersection.c |
5097 | * |
5098 | */ |
5099 | |
5100 | |
5101 | /* Compute the intersection of src_1 and src_2 and write the result to |
5102 | * dst. */ |
5103 | void array_bitset_container_intersection(const array_container_t *src_1, |
5104 | const bitset_container_t *src_2, |
5105 | array_container_t *dst) { |
5106 | if (dst->capacity < src_1->cardinality) { |
5107 | array_container_grow(dst, src_1->cardinality, false); |
5108 | } |
5109 | int32_t newcard = 0; // dst could be src_1 |
5110 | const int32_t origcard = src_1->cardinality; |
5111 | for (int i = 0; i < origcard; ++i) { |
5112 | uint16_t key = src_1->array[i]; |
5113 | // this branchless approach is much faster... |
5114 | dst->array[newcard] = key; |
5115 | newcard += bitset_container_contains(src_2, key); |
5116 | /** |
5117 | * we could do it this way instead... |
5118 | * if (bitset_container_contains(src_2, key)) { |
5119 | * dst->array[newcard++] = key; |
5120 | * } |
5121 | * but if the result is unpredictible, the processor generates |
5122 | * many mispredicted branches. |
5123 | * Difference can be huge (from 3 cycles when predictible all the way |
5124 | * to 16 cycles when unpredictible. |
5125 | * See |
5126 | * https://github.com/lemire/Code-used-on-Daniel-Lemire-s-blog/blob/master/extra/bitset/c/arraybitsetintersection.c |
5127 | */ |
5128 | } |
5129 | dst->cardinality = newcard; |
5130 | } |
5131 | |
5132 | /* Compute the size of the intersection of src_1 and src_2. */ |
5133 | int array_bitset_container_intersection_cardinality( |
5134 | const array_container_t *src_1, const bitset_container_t *src_2) { |
5135 | int32_t newcard = 0; |
5136 | const int32_t origcard = src_1->cardinality; |
5137 | for (int i = 0; i < origcard; ++i) { |
5138 | uint16_t key = src_1->array[i]; |
5139 | newcard += bitset_container_contains(src_2, key); |
5140 | } |
5141 | return newcard; |
5142 | } |
5143 | |
5144 | |
5145 | bool array_bitset_container_intersect(const array_container_t *src_1, |
5146 | const bitset_container_t *src_2) { |
5147 | const int32_t origcard = src_1->cardinality; |
5148 | for (int i = 0; i < origcard; ++i) { |
5149 | uint16_t key = src_1->array[i]; |
5150 | if(bitset_container_contains(src_2, key)) return true; |
5151 | } |
5152 | return false; |
5153 | } |
5154 | |
5155 | /* Compute the intersection of src_1 and src_2 and write the result to |
5156 | * dst. It is allowed for dst to be equal to src_1. We assume that dst is a |
5157 | * valid container. */ |
5158 | void array_run_container_intersection(const array_container_t *src_1, |
5159 | const run_container_t *src_2, |
5160 | array_container_t *dst) { |
5161 | if (run_container_is_full(src_2)) { |
5162 | if (dst != src_1) array_container_copy(src_1, dst); |
5163 | return; |
5164 | } |
5165 | if (dst->capacity < src_1->cardinality) { |
5166 | array_container_grow(dst, src_1->cardinality, false); |
5167 | } |
5168 | if (src_2->n_runs == 0) { |
5169 | return; |
5170 | } |
5171 | int32_t rlepos = 0; |
5172 | int32_t arraypos = 0; |
5173 | rle16_t rle = src_2->runs[rlepos]; |
5174 | int32_t newcard = 0; |
5175 | while (arraypos < src_1->cardinality) { |
5176 | const uint16_t arrayval = src_1->array[arraypos]; |
5177 | while (rle.value + rle.length < |
5178 | arrayval) { // this will frequently be false |
5179 | ++rlepos; |
5180 | if (rlepos == src_2->n_runs) { |
5181 | dst->cardinality = newcard; |
5182 | return; // we are done |
5183 | } |
5184 | rle = src_2->runs[rlepos]; |
5185 | } |
5186 | if (rle.value > arrayval) { |
5187 | arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, |
5188 | rle.value); |
5189 | } else { |
5190 | dst->array[newcard] = arrayval; |
5191 | newcard++; |
5192 | arraypos++; |
5193 | } |
5194 | } |
5195 | dst->cardinality = newcard; |
5196 | } |
5197 | |
5198 | /* Compute the intersection of src_1 and src_2 and write the result to |
5199 | * *dst. If the result is true then the result is a bitset_container_t |
5200 | * otherwise is a array_container_t. If *dst == src_2, an in-place processing |
5201 | * is attempted.*/ |
5202 | bool run_bitset_container_intersection(const run_container_t *src_1, |
5203 | const bitset_container_t *src_2, |
5204 | void **dst) { |
5205 | if (run_container_is_full(src_1)) { |
5206 | if (*dst != src_2) *dst = bitset_container_clone(src_2); |
5207 | return true; |
5208 | } |
5209 | int32_t card = run_container_cardinality(src_1); |
5210 | if (card <= DEFAULT_MAX_SIZE) { |
5211 | // result can only be an array (assuming that we never make a |
5212 | // RunContainer) |
5213 | if (card > src_2->cardinality) { |
5214 | card = src_2->cardinality; |
5215 | } |
5216 | array_container_t *answer = array_container_create_given_capacity(card); |
5217 | *dst = answer; |
5218 | if (*dst == NULL) { |
5219 | return false; |
5220 | } |
5221 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5222 | rle16_t rle = src_1->runs[rlepos]; |
5223 | uint32_t endofrun = (uint32_t)rle.value + rle.length; |
5224 | for (uint32_t runValue = rle.value; runValue <= endofrun; |
5225 | ++runValue) { |
5226 | answer->array[answer->cardinality] = (uint16_t)runValue; |
5227 | answer->cardinality += |
5228 | bitset_container_contains(src_2, runValue); |
5229 | } |
5230 | } |
5231 | return false; |
5232 | } |
5233 | if (*dst == src_2) { // we attempt in-place |
5234 | bitset_container_t *answer = (bitset_container_t *)*dst; |
5235 | uint32_t start = 0; |
5236 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5237 | const rle16_t rle = src_1->runs[rlepos]; |
5238 | uint32_t end = rle.value; |
5239 | bitset_reset_range(src_2->array, start, end); |
5240 | |
5241 | start = end + rle.length + 1; |
5242 | } |
5243 | bitset_reset_range(src_2->array, start, UINT32_C(1) << 16); |
5244 | answer->cardinality = bitset_container_compute_cardinality(answer); |
5245 | if (src_2->cardinality > DEFAULT_MAX_SIZE) { |
5246 | return true; |
5247 | } else { |
5248 | array_container_t *newanswer = array_container_from_bitset(src_2); |
5249 | if (newanswer == NULL) { |
5250 | *dst = NULL; |
5251 | return false; |
5252 | } |
5253 | *dst = newanswer; |
5254 | return false; |
5255 | } |
5256 | } else { // no inplace |
5257 | // we expect the answer to be a bitmap (if we are lucky) |
5258 | bitset_container_t *answer = bitset_container_clone(src_2); |
5259 | |
5260 | *dst = answer; |
5261 | if (answer == NULL) { |
5262 | return true; |
5263 | } |
5264 | uint32_t start = 0; |
5265 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5266 | const rle16_t rle = src_1->runs[rlepos]; |
5267 | uint32_t end = rle.value; |
5268 | bitset_reset_range(answer->array, start, end); |
5269 | start = end + rle.length + 1; |
5270 | } |
5271 | bitset_reset_range(answer->array, start, UINT32_C(1) << 16); |
5272 | answer->cardinality = bitset_container_compute_cardinality(answer); |
5273 | |
5274 | if (answer->cardinality > DEFAULT_MAX_SIZE) { |
5275 | return true; |
5276 | } else { |
5277 | array_container_t *newanswer = array_container_from_bitset(answer); |
5278 | bitset_container_free((bitset_container_t *)*dst); |
5279 | if (newanswer == NULL) { |
5280 | *dst = NULL; |
5281 | return false; |
5282 | } |
5283 | *dst = newanswer; |
5284 | return false; |
5285 | } |
5286 | } |
5287 | } |
5288 | |
5289 | /* Compute the size of the intersection between src_1 and src_2 . */ |
5290 | int array_run_container_intersection_cardinality(const array_container_t *src_1, |
5291 | const run_container_t *src_2) { |
5292 | if (run_container_is_full(src_2)) { |
5293 | return src_1->cardinality; |
5294 | } |
5295 | if (src_2->n_runs == 0) { |
5296 | return 0; |
5297 | } |
5298 | int32_t rlepos = 0; |
5299 | int32_t arraypos = 0; |
5300 | rle16_t rle = src_2->runs[rlepos]; |
5301 | int32_t newcard = 0; |
5302 | while (arraypos < src_1->cardinality) { |
5303 | const uint16_t arrayval = src_1->array[arraypos]; |
5304 | while (rle.value + rle.length < |
5305 | arrayval) { // this will frequently be false |
5306 | ++rlepos; |
5307 | if (rlepos == src_2->n_runs) { |
5308 | return newcard; // we are done |
5309 | } |
5310 | rle = src_2->runs[rlepos]; |
5311 | } |
5312 | if (rle.value > arrayval) { |
5313 | arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, |
5314 | rle.value); |
5315 | } else { |
5316 | newcard++; |
5317 | arraypos++; |
5318 | } |
5319 | } |
5320 | return newcard; |
5321 | } |
5322 | |
5323 | /* Compute the intersection between src_1 and src_2 |
5324 | **/ |
5325 | int run_bitset_container_intersection_cardinality( |
5326 | const run_container_t *src_1, const bitset_container_t *src_2) { |
5327 | if (run_container_is_full(src_1)) { |
5328 | return bitset_container_cardinality(src_2); |
5329 | } |
5330 | int answer = 0; |
5331 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5332 | rle16_t rle = src_1->runs[rlepos]; |
5333 | answer += |
5334 | bitset_lenrange_cardinality(src_2->array, rle.value, rle.length); |
5335 | } |
5336 | return answer; |
5337 | } |
5338 | |
5339 | |
5340 | bool array_run_container_intersect(const array_container_t *src_1, |
5341 | const run_container_t *src_2) { |
5342 | if( run_container_is_full(src_2) ) { |
5343 | return !array_container_empty(src_1); |
5344 | } |
5345 | if (src_2->n_runs == 0) { |
5346 | return false; |
5347 | } |
5348 | int32_t rlepos = 0; |
5349 | int32_t arraypos = 0; |
5350 | rle16_t rle = src_2->runs[rlepos]; |
5351 | while (arraypos < src_1->cardinality) { |
5352 | const uint16_t arrayval = src_1->array[arraypos]; |
5353 | while (rle.value + rle.length < |
5354 | arrayval) { // this will frequently be false |
5355 | ++rlepos; |
5356 | if (rlepos == src_2->n_runs) { |
5357 | return false; // we are done |
5358 | } |
5359 | rle = src_2->runs[rlepos]; |
5360 | } |
5361 | if (rle.value > arrayval) { |
5362 | arraypos = advanceUntil(src_1->array, arraypos, src_1->cardinality, |
5363 | rle.value); |
5364 | } else { |
5365 | return true; |
5366 | } |
5367 | } |
5368 | return false; |
5369 | } |
5370 | |
5371 | /* Compute the intersection between src_1 and src_2 |
5372 | **/ |
5373 | bool run_bitset_container_intersect(const run_container_t *src_1, |
5374 | const bitset_container_t *src_2) { |
5375 | if( run_container_is_full(src_1) ) { |
5376 | return !bitset_container_empty(src_2); |
5377 | } |
5378 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5379 | rle16_t rle = src_1->runs[rlepos]; |
5380 | if(!bitset_lenrange_empty(src_2->array, rle.value,rle.length)) return true; |
5381 | } |
5382 | return false; |
5383 | } |
5384 | |
5385 | /* |
5386 | * Compute the intersection between src_1 and src_2 and write the result |
5387 | * to *dst. If the return function is true, the result is a bitset_container_t |
5388 | * otherwise is a array_container_t. |
5389 | */ |
5390 | bool bitset_bitset_container_intersection(const bitset_container_t *src_1, |
5391 | const bitset_container_t *src_2, |
5392 | void **dst) { |
5393 | const int newCardinality = bitset_container_and_justcard(src_1, src_2); |
5394 | if (newCardinality > DEFAULT_MAX_SIZE) { |
5395 | *dst = bitset_container_create(); |
5396 | if (*dst != NULL) { |
5397 | bitset_container_and_nocard(src_1, src_2, |
5398 | (bitset_container_t *)*dst); |
5399 | ((bitset_container_t *)*dst)->cardinality = newCardinality; |
5400 | } |
5401 | return true; // it is a bitset |
5402 | } |
5403 | *dst = array_container_create_given_capacity(newCardinality); |
5404 | if (*dst != NULL) { |
5405 | ((array_container_t *)*dst)->cardinality = newCardinality; |
5406 | bitset_extract_intersection_setbits_uint16( |
5407 | ((const bitset_container_t *)src_1)->array, |
5408 | ((const bitset_container_t *)src_2)->array, |
5409 | BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, |
5410 | 0); |
5411 | } |
5412 | return false; // not a bitset |
5413 | } |
5414 | |
5415 | bool bitset_bitset_container_intersection_inplace( |
5416 | bitset_container_t *src_1, const bitset_container_t *src_2, void **dst) { |
5417 | const int newCardinality = bitset_container_and_justcard(src_1, src_2); |
5418 | if (newCardinality > DEFAULT_MAX_SIZE) { |
5419 | *dst = src_1; |
5420 | bitset_container_and_nocard(src_1, src_2, src_1); |
5421 | ((bitset_container_t *)*dst)->cardinality = newCardinality; |
5422 | return true; // it is a bitset |
5423 | } |
5424 | *dst = array_container_create_given_capacity(newCardinality); |
5425 | if (*dst != NULL) { |
5426 | ((array_container_t *)*dst)->cardinality = newCardinality; |
5427 | bitset_extract_intersection_setbits_uint16( |
5428 | ((const bitset_container_t *)src_1)->array, |
5429 | ((const bitset_container_t *)src_2)->array, |
5430 | BITSET_CONTAINER_SIZE_IN_WORDS, ((array_container_t *)*dst)->array, |
5431 | 0); |
5432 | } |
5433 | return false; // not a bitset |
5434 | } |
5435 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_intersection.c */ |
5436 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_negation.c */ |
5437 | /* |
5438 | * mixed_negation.c |
5439 | * |
5440 | */ |
5441 | |
5442 | #include <assert.h> |
5443 | #include <string.h> |
5444 | |
5445 | |
5446 | // TODO: make simplified and optimized negation code across |
5447 | // the full range. |
5448 | |
5449 | /* Negation across the entire range of the container. |
5450 | * Compute the negation of src and write the result |
5451 | * to *dst. The complement of a |
5452 | * sufficiently sparse set will always be dense and a hence a bitmap |
5453 | ' * We assume that dst is pre-allocated and a valid bitset container |
5454 | * There can be no in-place version. |
5455 | */ |
5456 | void array_container_negation(const array_container_t *src, |
5457 | bitset_container_t *dst) { |
5458 | uint64_t card = UINT64_C(1 << 16); |
5459 | bitset_container_set_all(dst); |
5460 | |
5461 | dst->cardinality = (int32_t)bitset_clear_list(dst->array, card, src->array, |
5462 | (uint64_t)src->cardinality); |
5463 | } |
5464 | |
5465 | /* Negation across the entire range of the container |
5466 | * Compute the negation of src and write the result |
5467 | * to *dst. A true return value indicates a bitset result, |
5468 | * otherwise the result is an array container. |
5469 | * We assume that dst is not pre-allocated. In |
5470 | * case of failure, *dst will be NULL. |
5471 | */ |
5472 | bool bitset_container_negation(const bitset_container_t *src, void **dst) { |
5473 | return bitset_container_negation_range(src, 0, (1 << 16), dst); |
5474 | } |
5475 | |
5476 | /* inplace version */ |
5477 | /* |
5478 | * Same as bitset_container_negation except that if the output is to |
5479 | * be a |
5480 | * bitset_container_t, then src is modified and no allocation is made. |
5481 | * If the output is to be an array_container_t, then caller is responsible |
5482 | * to free the container. |
5483 | * In all cases, the result is in *dst. |
5484 | */ |
5485 | bool bitset_container_negation_inplace(bitset_container_t *src, void **dst) { |
5486 | return bitset_container_negation_range_inplace(src, 0, (1 << 16), dst); |
5487 | } |
5488 | |
5489 | /* Negation across the entire range of container |
5490 | * Compute the negation of src and write the result |
5491 | * to *dst. Return values are the *_TYPECODES as defined * in containers.h |
5492 | * We assume that dst is not pre-allocated. In |
5493 | * case of failure, *dst will be NULL. |
5494 | */ |
5495 | int run_container_negation(const run_container_t *src, void **dst) { |
5496 | return run_container_negation_range(src, 0, (1 << 16), dst); |
5497 | } |
5498 | |
5499 | /* |
5500 | * Same as run_container_negation except that if the output is to |
5501 | * be a |
5502 | * run_container_t, and has the capacity to hold the result, |
5503 | * then src is modified and no allocation is made. |
5504 | * In all cases, the result is in *dst. |
5505 | */ |
5506 | int run_container_negation_inplace(run_container_t *src, void **dst) { |
5507 | return run_container_negation_range_inplace(src, 0, (1 << 16), dst); |
5508 | } |
5509 | |
5510 | /* Negation across a range of the container. |
5511 | * Compute the negation of src and write the result |
5512 | * to *dst. Returns true if the result is a bitset container |
5513 | * and false for an array container. *dst is not preallocated. |
5514 | */ |
5515 | bool array_container_negation_range(const array_container_t *src, |
5516 | const int range_start, const int range_end, |
5517 | void **dst) { |
5518 | /* close port of the Java implementation */ |
5519 | if (range_start >= range_end) { |
5520 | *dst = array_container_clone(src); |
5521 | return false; |
5522 | } |
5523 | |
5524 | int32_t start_index = |
5525 | binarySearch(src->array, src->cardinality, (uint16_t)range_start); |
5526 | if (start_index < 0) start_index = -start_index - 1; |
5527 | |
5528 | int32_t last_index = |
5529 | binarySearch(src->array, src->cardinality, (uint16_t)(range_end - 1)); |
5530 | if (last_index < 0) last_index = -last_index - 2; |
5531 | |
5532 | const int32_t current_values_in_range = last_index - start_index + 1; |
5533 | const int32_t span_to_be_flipped = range_end - range_start; |
5534 | const int32_t new_values_in_range = |
5535 | span_to_be_flipped - current_values_in_range; |
5536 | const int32_t cardinality_change = |
5537 | new_values_in_range - current_values_in_range; |
5538 | const int32_t new_cardinality = src->cardinality + cardinality_change; |
5539 | |
5540 | if (new_cardinality > DEFAULT_MAX_SIZE) { |
5541 | bitset_container_t *temp = bitset_container_from_array(src); |
5542 | bitset_flip_range(temp->array, (uint32_t)range_start, |
5543 | (uint32_t)range_end); |
5544 | temp->cardinality = new_cardinality; |
5545 | *dst = temp; |
5546 | return true; |
5547 | } |
5548 | |
5549 | array_container_t *arr = |
5550 | array_container_create_given_capacity(new_cardinality); |
5551 | *dst = (void *)arr; |
5552 | if(new_cardinality == 0) { |
5553 | arr->cardinality = new_cardinality; |
5554 | return false; // we are done. |
5555 | } |
5556 | // copy stuff before the active area |
5557 | memcpy(arr->array, src->array, start_index * sizeof(uint16_t)); |
5558 | |
5559 | // work on the range |
5560 | int32_t out_pos = start_index, in_pos = start_index; |
5561 | int32_t val_in_range = range_start; |
5562 | for (; val_in_range < range_end && in_pos <= last_index; ++val_in_range) { |
5563 | if ((uint16_t)val_in_range != src->array[in_pos]) { |
5564 | arr->array[out_pos++] = (uint16_t)val_in_range; |
5565 | } else { |
5566 | ++in_pos; |
5567 | } |
5568 | } |
5569 | for (; val_in_range < range_end; ++val_in_range) |
5570 | arr->array[out_pos++] = (uint16_t)val_in_range; |
5571 | |
5572 | // content after the active range |
5573 | memcpy(arr->array + out_pos, src->array + (last_index + 1), |
5574 | (src->cardinality - (last_index + 1)) * sizeof(uint16_t)); |
5575 | arr->cardinality = new_cardinality; |
5576 | return false; |
5577 | } |
5578 | |
5579 | /* Even when the result would fit, it is unclear how to make an |
5580 | * inplace version without inefficient copying. |
5581 | */ |
5582 | |
5583 | bool array_container_negation_range_inplace(array_container_t *src, |
5584 | const int range_start, |
5585 | const int range_end, void **dst) { |
5586 | bool ans = array_container_negation_range(src, range_start, range_end, dst); |
5587 | // TODO : try a real inplace version |
5588 | array_container_free(src); |
5589 | return ans; |
5590 | } |
5591 | |
5592 | /* Negation across a range of the container |
5593 | * Compute the negation of src and write the result |
5594 | * to *dst. A true return value indicates a bitset result, |
5595 | * otherwise the result is an array container. |
5596 | * We assume that dst is not pre-allocated. In |
5597 | * case of failure, *dst will be NULL. |
5598 | */ |
5599 | bool bitset_container_negation_range(const bitset_container_t *src, |
5600 | const int range_start, const int range_end, |
5601 | void **dst) { |
5602 | // TODO maybe consider density-based estimate |
5603 | // and sometimes build result directly as array, with |
5604 | // conversion back to bitset if wrong. Or determine |
5605 | // actual result cardinality, then go directly for the known final cont. |
5606 | |
5607 | // keep computation using bitsets as long as possible. |
5608 | bitset_container_t *t = bitset_container_clone(src); |
5609 | bitset_flip_range(t->array, (uint32_t)range_start, (uint32_t)range_end); |
5610 | t->cardinality = bitset_container_compute_cardinality(t); |
5611 | |
5612 | if (t->cardinality > DEFAULT_MAX_SIZE) { |
5613 | *dst = t; |
5614 | return true; |
5615 | } else { |
5616 | *dst = array_container_from_bitset(t); |
5617 | bitset_container_free(t); |
5618 | return false; |
5619 | } |
5620 | } |
5621 | |
5622 | /* inplace version */ |
5623 | /* |
5624 | * Same as bitset_container_negation except that if the output is to |
5625 | * be a |
5626 | * bitset_container_t, then src is modified and no allocation is made. |
5627 | * If the output is to be an array_container_t, then caller is responsible |
5628 | * to free the container. |
5629 | * In all cases, the result is in *dst. |
5630 | */ |
5631 | bool bitset_container_negation_range_inplace(bitset_container_t *src, |
5632 | const int range_start, |
5633 | const int range_end, void **dst) { |
5634 | bitset_flip_range(src->array, (uint32_t)range_start, (uint32_t)range_end); |
5635 | src->cardinality = bitset_container_compute_cardinality(src); |
5636 | if (src->cardinality > DEFAULT_MAX_SIZE) { |
5637 | *dst = src; |
5638 | return true; |
5639 | } |
5640 | *dst = array_container_from_bitset(src); |
5641 | bitset_container_free(src); |
5642 | return false; |
5643 | } |
5644 | |
5645 | /* Negation across a range of container |
5646 | * Compute the negation of src and write the result |
5647 | * to *dst. Return values are the *_TYPECODES as defined * in containers.h |
5648 | * We assume that dst is not pre-allocated. In |
5649 | * case of failure, *dst will be NULL. |
5650 | */ |
5651 | int run_container_negation_range(const run_container_t *src, |
5652 | const int range_start, const int range_end, |
5653 | void **dst) { |
5654 | uint8_t return_typecode; |
5655 | |
5656 | // follows the Java implementation |
5657 | if (range_end <= range_start) { |
5658 | *dst = run_container_clone(src); |
5659 | return RUN_CONTAINER_TYPE_CODE; |
5660 | } |
5661 | |
5662 | run_container_t *ans = run_container_create_given_capacity( |
5663 | src->n_runs + 1); // src->n_runs + 1); |
5664 | int k = 0; |
5665 | for (; k < src->n_runs && src->runs[k].value < range_start; ++k) { |
5666 | ans->runs[k] = src->runs[k]; |
5667 | ans->n_runs++; |
5668 | } |
5669 | |
5670 | run_container_smart_append_exclusive( |
5671 | ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); |
5672 | |
5673 | for (; k < src->n_runs; ++k) { |
5674 | run_container_smart_append_exclusive(ans, src->runs[k].value, |
5675 | src->runs[k].length); |
5676 | } |
5677 | |
5678 | *dst = convert_run_to_efficient_container(ans, &return_typecode); |
5679 | if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); |
5680 | |
5681 | return return_typecode; |
5682 | } |
5683 | |
5684 | /* |
5685 | * Same as run_container_negation except that if the output is to |
5686 | * be a |
5687 | * run_container_t, and has the capacity to hold the result, |
5688 | * then src is modified and no allocation is made. |
5689 | * In all cases, the result is in *dst. |
5690 | */ |
5691 | int run_container_negation_range_inplace(run_container_t *src, |
5692 | const int range_start, |
5693 | const int range_end, void **dst) { |
5694 | uint8_t return_typecode; |
5695 | |
5696 | if (range_end <= range_start) { |
5697 | *dst = src; |
5698 | return RUN_CONTAINER_TYPE_CODE; |
5699 | } |
5700 | |
5701 | // TODO: efficient special case when range is 0 to 65535 inclusive |
5702 | |
5703 | if (src->capacity == src->n_runs) { |
5704 | // no excess room. More checking to see if result can fit |
5705 | bool last_val_before_range = false; |
5706 | bool first_val_in_range = false; |
5707 | bool last_val_in_range = false; |
5708 | bool first_val_past_range = false; |
5709 | |
5710 | if (range_start > 0) |
5711 | last_val_before_range = |
5712 | run_container_contains(src, (uint16_t)(range_start - 1)); |
5713 | first_val_in_range = run_container_contains(src, (uint16_t)range_start); |
5714 | |
5715 | if (last_val_before_range == first_val_in_range) { |
5716 | last_val_in_range = |
5717 | run_container_contains(src, (uint16_t)(range_end - 1)); |
5718 | if (range_end != 0x10000) |
5719 | first_val_past_range = |
5720 | run_container_contains(src, (uint16_t)range_end); |
5721 | |
5722 | if (last_val_in_range == |
5723 | first_val_past_range) { // no space for inplace |
5724 | int ans = run_container_negation_range(src, range_start, |
5725 | range_end, dst); |
5726 | run_container_free(src); |
5727 | return ans; |
5728 | } |
5729 | } |
5730 | } |
5731 | // all other cases: result will fit |
5732 | |
5733 | run_container_t *ans = src; |
5734 | int my_nbr_runs = src->n_runs; |
5735 | |
5736 | ans->n_runs = 0; |
5737 | int k = 0; |
5738 | for (; (k < my_nbr_runs) && (src->runs[k].value < range_start); ++k) { |
5739 | // ans->runs[k] = src->runs[k]; (would be self-copy) |
5740 | ans->n_runs++; |
5741 | } |
5742 | |
5743 | // as with Java implementation, use locals to give self a buffer of depth 1 |
5744 | rle16_t buffered = (rle16_t){.value = (uint16_t)0, .length = (uint16_t)0}; |
5745 | rle16_t next = buffered; |
5746 | if (k < my_nbr_runs) buffered = src->runs[k]; |
5747 | |
5748 | run_container_smart_append_exclusive( |
5749 | ans, (uint16_t)range_start, (uint16_t)(range_end - range_start - 1)); |
5750 | |
5751 | for (; k < my_nbr_runs; ++k) { |
5752 | if (k + 1 < my_nbr_runs) next = src->runs[k + 1]; |
5753 | |
5754 | run_container_smart_append_exclusive(ans, buffered.value, |
5755 | buffered.length); |
5756 | buffered = next; |
5757 | } |
5758 | |
5759 | *dst = convert_run_to_efficient_container(ans, &return_typecode); |
5760 | if (return_typecode != RUN_CONTAINER_TYPE_CODE) run_container_free(ans); |
5761 | |
5762 | return return_typecode; |
5763 | } |
5764 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_negation.c */ |
5765 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_subset.c */ |
5766 | |
5767 | bool array_container_is_subset_bitset(const array_container_t* container1, |
5768 | const bitset_container_t* container2) { |
5769 | if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
5770 | if (container2->cardinality < container1->cardinality) { |
5771 | return false; |
5772 | } |
5773 | } |
5774 | for (int i = 0; i < container1->cardinality; ++i) { |
5775 | if (!bitset_container_contains(container2, container1->array[i])) { |
5776 | return false; |
5777 | } |
5778 | } |
5779 | return true; |
5780 | } |
5781 | |
5782 | bool run_container_is_subset_array(const run_container_t* container1, |
5783 | const array_container_t* container2) { |
5784 | if (run_container_cardinality(container1) > container2->cardinality) |
5785 | return false; |
5786 | int32_t start_pos = -1, stop_pos = -1; |
5787 | for (int i = 0; i < container1->n_runs; ++i) { |
5788 | int32_t start = container1->runs[i].value; |
5789 | int32_t stop = start + container1->runs[i].length; |
5790 | start_pos = advanceUntil(container2->array, stop_pos, |
5791 | container2->cardinality, start); |
5792 | stop_pos = advanceUntil(container2->array, stop_pos, |
5793 | container2->cardinality, stop); |
5794 | if (start_pos == container2->cardinality) { |
5795 | return false; |
5796 | } else if (stop_pos - start_pos != stop - start || |
5797 | container2->array[start_pos] != start || |
5798 | container2->array[stop_pos] != stop) { |
5799 | return false; |
5800 | } |
5801 | } |
5802 | return true; |
5803 | } |
5804 | |
5805 | bool array_container_is_subset_run(const array_container_t* container1, |
5806 | const run_container_t* container2) { |
5807 | if (container1->cardinality > run_container_cardinality(container2)) |
5808 | return false; |
5809 | int i_array = 0, i_run = 0; |
5810 | while (i_array < container1->cardinality && i_run < container2->n_runs) { |
5811 | uint32_t start = container2->runs[i_run].value; |
5812 | uint32_t stop = start + container2->runs[i_run].length; |
5813 | if (container1->array[i_array] < start) { |
5814 | return false; |
5815 | } else if (container1->array[i_array] > stop) { |
5816 | i_run++; |
5817 | } else { // the value of the array is in the run |
5818 | i_array++; |
5819 | } |
5820 | } |
5821 | if (i_array == container1->cardinality) { |
5822 | return true; |
5823 | } else { |
5824 | return false; |
5825 | } |
5826 | } |
5827 | |
5828 | bool run_container_is_subset_bitset(const run_container_t* container1, |
5829 | const bitset_container_t* container2) { |
5830 | // todo: this code could be much faster |
5831 | if (container2->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
5832 | if (container2->cardinality < run_container_cardinality(container1)) { |
5833 | return false; |
5834 | } |
5835 | } else { |
5836 | int32_t card = bitset_container_compute_cardinality( |
5837 | container2); // modify container2? |
5838 | if (card < run_container_cardinality(container1)) { |
5839 | return false; |
5840 | } |
5841 | } |
5842 | for (int i = 0; i < container1->n_runs; ++i) { |
5843 | uint32_t run_start = container1->runs[i].value; |
5844 | uint32_t le = container1->runs[i].length; |
5845 | for (uint32_t j = run_start; j <= run_start + le; ++j) { |
5846 | if (!bitset_container_contains(container2, j)) { |
5847 | return false; |
5848 | } |
5849 | } |
5850 | } |
5851 | return true; |
5852 | } |
5853 | |
5854 | bool bitset_container_is_subset_run(const bitset_container_t* container1, |
5855 | const run_container_t* container2) { |
5856 | // todo: this code could be much faster |
5857 | if (container1->cardinality != BITSET_UNKNOWN_CARDINALITY) { |
5858 | if (container1->cardinality > run_container_cardinality(container2)) { |
5859 | return false; |
5860 | } |
5861 | } |
5862 | int32_t i_bitset = 0, i_run = 0; |
5863 | while (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS && |
5864 | i_run < container2->n_runs) { |
5865 | uint64_t w = container1->array[i_bitset]; |
5866 | while (w != 0 && i_run < container2->n_runs) { |
5867 | uint32_t start = container2->runs[i_run].value; |
5868 | uint32_t stop = start + container2->runs[i_run].length; |
5869 | uint64_t t = w & (~w + 1); |
5870 | uint16_t r = i_bitset * 64 + __builtin_ctzll(w); |
5871 | if (r < start) { |
5872 | return false; |
5873 | } else if (r > stop) { |
5874 | i_run++; |
5875 | continue; |
5876 | } else { |
5877 | w ^= t; |
5878 | } |
5879 | } |
5880 | if (w == 0) { |
5881 | i_bitset++; |
5882 | } else { |
5883 | return false; |
5884 | } |
5885 | } |
5886 | if (i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS) { |
5887 | // terminated iterating on the run containers, check that rest of bitset |
5888 | // is empty |
5889 | for (; i_bitset < BITSET_CONTAINER_SIZE_IN_WORDS; i_bitset++) { |
5890 | if (container1->array[i_bitset] != 0) { |
5891 | return false; |
5892 | } |
5893 | } |
5894 | } |
5895 | return true; |
5896 | } |
5897 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_subset.c */ |
5898 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_union.c */ |
5899 | /* |
5900 | * mixed_union.c |
5901 | * |
5902 | */ |
5903 | |
5904 | #include <assert.h> |
5905 | #include <string.h> |
5906 | |
5907 | |
5908 | /* Compute the union of src_1 and src_2 and write the result to |
5909 | * dst. */ |
5910 | void array_bitset_container_union(const array_container_t *src_1, |
5911 | const bitset_container_t *src_2, |
5912 | bitset_container_t *dst) { |
5913 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
5914 | dst->cardinality = (int32_t)bitset_set_list_withcard( |
5915 | dst->array, dst->cardinality, src_1->array, src_1->cardinality); |
5916 | } |
5917 | |
5918 | /* Compute the union of src_1 and src_2 and write the result to |
5919 | * dst. It is allowed for src_2 to be dst. This version does not |
5920 | * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). */ |
5921 | void array_bitset_container_lazy_union(const array_container_t *src_1, |
5922 | const bitset_container_t *src_2, |
5923 | bitset_container_t *dst) { |
5924 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
5925 | bitset_set_list(dst->array, src_1->array, src_1->cardinality); |
5926 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
5927 | } |
5928 | |
5929 | void run_bitset_container_union(const run_container_t *src_1, |
5930 | const bitset_container_t *src_2, |
5931 | bitset_container_t *dst) { |
5932 | assert(!run_container_is_full(src_1)); // catch this case upstream |
5933 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
5934 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5935 | rle16_t rle = src_1->runs[rlepos]; |
5936 | bitset_set_lenrange(dst->array, rle.value, rle.length); |
5937 | } |
5938 | dst->cardinality = bitset_container_compute_cardinality(dst); |
5939 | } |
5940 | |
5941 | void run_bitset_container_lazy_union(const run_container_t *src_1, |
5942 | const bitset_container_t *src_2, |
5943 | bitset_container_t *dst) { |
5944 | assert(!run_container_is_full(src_1)); // catch this case upstream |
5945 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
5946 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
5947 | rle16_t rle = src_1->runs[rlepos]; |
5948 | bitset_set_lenrange(dst->array, rle.value, rle.length); |
5949 | } |
5950 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
5951 | } |
5952 | |
5953 | // why do we leave the result as a run container?? |
5954 | void array_run_container_union(const array_container_t *src_1, |
5955 | const run_container_t *src_2, |
5956 | run_container_t *dst) { |
5957 | if (run_container_is_full(src_2)) { |
5958 | run_container_copy(src_2, dst); |
5959 | return; |
5960 | } |
5961 | // TODO: see whether the "2*" is spurious |
5962 | run_container_grow(dst, 2 * (src_1->cardinality + src_2->n_runs), false); |
5963 | int32_t rlepos = 0; |
5964 | int32_t arraypos = 0; |
5965 | rle16_t previousrle; |
5966 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
5967 | previousrle = run_container_append_first(dst, src_2->runs[rlepos]); |
5968 | rlepos++; |
5969 | } else { |
5970 | previousrle = |
5971 | run_container_append_value_first(dst, src_1->array[arraypos]); |
5972 | arraypos++; |
5973 | } |
5974 | while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { |
5975 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
5976 | run_container_append(dst, src_2->runs[rlepos], &previousrle); |
5977 | rlepos++; |
5978 | } else { |
5979 | run_container_append_value(dst, src_1->array[arraypos], |
5980 | &previousrle); |
5981 | arraypos++; |
5982 | } |
5983 | } |
5984 | if (arraypos < src_1->cardinality) { |
5985 | while (arraypos < src_1->cardinality) { |
5986 | run_container_append_value(dst, src_1->array[arraypos], |
5987 | &previousrle); |
5988 | arraypos++; |
5989 | } |
5990 | } else { |
5991 | while (rlepos < src_2->n_runs) { |
5992 | run_container_append(dst, src_2->runs[rlepos], &previousrle); |
5993 | rlepos++; |
5994 | } |
5995 | } |
5996 | } |
5997 | |
5998 | void array_run_container_inplace_union(const array_container_t *src_1, |
5999 | run_container_t *src_2) { |
6000 | if (run_container_is_full(src_2)) { |
6001 | return; |
6002 | } |
6003 | const int32_t maxoutput = src_1->cardinality + src_2->n_runs; |
6004 | const int32_t neededcapacity = maxoutput + src_2->n_runs; |
6005 | if (src_2->capacity < neededcapacity) |
6006 | run_container_grow(src_2, neededcapacity, true); |
6007 | memmove(src_2->runs + maxoutput, src_2->runs, |
6008 | src_2->n_runs * sizeof(rle16_t)); |
6009 | rle16_t *inputsrc2 = src_2->runs + maxoutput; |
6010 | int32_t rlepos = 0; |
6011 | int32_t arraypos = 0; |
6012 | int src2nruns = src_2->n_runs; |
6013 | src_2->n_runs = 0; |
6014 | |
6015 | rle16_t previousrle; |
6016 | |
6017 | if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { |
6018 | previousrle = run_container_append_first(src_2, inputsrc2[rlepos]); |
6019 | rlepos++; |
6020 | } else { |
6021 | previousrle = |
6022 | run_container_append_value_first(src_2, src_1->array[arraypos]); |
6023 | arraypos++; |
6024 | } |
6025 | |
6026 | while ((rlepos < src2nruns) && (arraypos < src_1->cardinality)) { |
6027 | if (inputsrc2[rlepos].value <= src_1->array[arraypos]) { |
6028 | run_container_append(src_2, inputsrc2[rlepos], &previousrle); |
6029 | rlepos++; |
6030 | } else { |
6031 | run_container_append_value(src_2, src_1->array[arraypos], |
6032 | &previousrle); |
6033 | arraypos++; |
6034 | } |
6035 | } |
6036 | if (arraypos < src_1->cardinality) { |
6037 | while (arraypos < src_1->cardinality) { |
6038 | run_container_append_value(src_2, src_1->array[arraypos], |
6039 | &previousrle); |
6040 | arraypos++; |
6041 | } |
6042 | } else { |
6043 | while (rlepos < src2nruns) { |
6044 | run_container_append(src_2, inputsrc2[rlepos], &previousrle); |
6045 | rlepos++; |
6046 | } |
6047 | } |
6048 | } |
6049 | |
6050 | bool array_array_container_union(const array_container_t *src_1, |
6051 | const array_container_t *src_2, void **dst) { |
6052 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6053 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
6054 | *dst = array_container_create_given_capacity(totalCardinality); |
6055 | if (*dst != NULL) { |
6056 | array_container_union(src_1, src_2, (array_container_t *)*dst); |
6057 | } else { |
6058 | return true; // otherwise failure won't be caught |
6059 | } |
6060 | return false; // not a bitset |
6061 | } |
6062 | *dst = bitset_container_create(); |
6063 | bool returnval = true; // expect a bitset |
6064 | if (*dst != NULL) { |
6065 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6066 | bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); |
6067 | ourbitset->cardinality = (int32_t)bitset_set_list_withcard( |
6068 | ourbitset->array, src_1->cardinality, src_2->array, |
6069 | src_2->cardinality); |
6070 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
6071 | // need to convert! |
6072 | *dst = array_container_from_bitset(ourbitset); |
6073 | bitset_container_free(ourbitset); |
6074 | returnval = false; // not going to be a bitset |
6075 | } |
6076 | } |
6077 | return returnval; |
6078 | } |
6079 | |
6080 | bool array_array_container_inplace_union(array_container_t *src_1, |
6081 | const array_container_t *src_2, void **dst) { |
6082 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6083 | *dst = NULL; |
6084 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
6085 | if(src_1->capacity < totalCardinality) { |
6086 | *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous |
6087 | if (*dst != NULL) { |
6088 | array_container_union(src_1, src_2, (array_container_t *)*dst); |
6089 | } else { |
6090 | return true; // otherwise failure won't be caught |
6091 | } |
6092 | return false; // not a bitset |
6093 | } else { |
6094 | memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); |
6095 | src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, |
6096 | src_2->array, src_2->cardinality, src_1->array); |
6097 | return false; // not a bitset |
6098 | } |
6099 | } |
6100 | *dst = bitset_container_create(); |
6101 | bool returnval = true; // expect a bitset |
6102 | if (*dst != NULL) { |
6103 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6104 | bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); |
6105 | ourbitset->cardinality = (int32_t)bitset_set_list_withcard( |
6106 | ourbitset->array, src_1->cardinality, src_2->array, |
6107 | src_2->cardinality); |
6108 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
6109 | // need to convert! |
6110 | if(src_1->capacity < ourbitset->cardinality) { |
6111 | array_container_grow(src_1, ourbitset->cardinality, false); |
6112 | } |
6113 | |
6114 | bitset_extract_setbits_uint16(ourbitset->array, BITSET_CONTAINER_SIZE_IN_WORDS, |
6115 | src_1->array, 0); |
6116 | src_1->cardinality = ourbitset->cardinality; |
6117 | *dst = src_1; |
6118 | bitset_container_free(ourbitset); |
6119 | returnval = false; // not going to be a bitset |
6120 | } |
6121 | } |
6122 | return returnval; |
6123 | } |
6124 | |
6125 | |
6126 | bool array_array_container_lazy_union(const array_container_t *src_1, |
6127 | const array_container_t *src_2, |
6128 | void **dst) { |
6129 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6130 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
6131 | *dst = array_container_create_given_capacity(totalCardinality); |
6132 | if (*dst != NULL) { |
6133 | array_container_union(src_1, src_2, (array_container_t *)*dst); |
6134 | } else { |
6135 | return true; // otherwise failure won't be caught |
6136 | } |
6137 | return false; // not a bitset |
6138 | } |
6139 | *dst = bitset_container_create(); |
6140 | bool returnval = true; // expect a bitset |
6141 | if (*dst != NULL) { |
6142 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6143 | bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); |
6144 | bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); |
6145 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6146 | } |
6147 | return returnval; |
6148 | } |
6149 | |
6150 | |
6151 | bool array_array_container_lazy_inplace_union(array_container_t *src_1, |
6152 | const array_container_t *src_2, |
6153 | void **dst) { |
6154 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6155 | *dst = NULL; |
6156 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
6157 | if(src_1->capacity < totalCardinality) { |
6158 | *dst = array_container_create_given_capacity(2 * totalCardinality); // be purposefully generous |
6159 | if (*dst != NULL) { |
6160 | array_container_union(src_1, src_2, (array_container_t *)*dst); |
6161 | } else { |
6162 | return true; // otherwise failure won't be caught |
6163 | } |
6164 | return false; // not a bitset |
6165 | } else { |
6166 | memmove(src_1->array + src_2->cardinality, src_1->array, src_1->cardinality * sizeof(uint16_t)); |
6167 | src_1->cardinality = (int32_t)fast_union_uint16(src_1->array + src_2->cardinality, src_1->cardinality, |
6168 | src_2->array, src_2->cardinality, src_1->array); |
6169 | return false; // not a bitset |
6170 | } |
6171 | } |
6172 | *dst = bitset_container_create(); |
6173 | bool returnval = true; // expect a bitset |
6174 | if (*dst != NULL) { |
6175 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6176 | bitset_set_list(ourbitset->array, src_1->array, src_1->cardinality); |
6177 | bitset_set_list(ourbitset->array, src_2->array, src_2->cardinality); |
6178 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6179 | } |
6180 | return returnval; |
6181 | } |
6182 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_union.c */ |
6183 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_xor.c */ |
6184 | /* |
6185 | * mixed_xor.c |
6186 | */ |
6187 | |
6188 | #include <assert.h> |
6189 | #include <string.h> |
6190 | |
6191 | |
6192 | /* Compute the xor of src_1 and src_2 and write the result to |
6193 | * dst (which has no container initially). |
6194 | * Result is true iff dst is a bitset */ |
6195 | bool array_bitset_container_xor(const array_container_t *src_1, |
6196 | const bitset_container_t *src_2, void **dst) { |
6197 | bitset_container_t *result = bitset_container_create(); |
6198 | bitset_container_copy(src_2, result); |
6199 | result->cardinality = (int32_t)bitset_flip_list_withcard( |
6200 | result->array, result->cardinality, src_1->array, src_1->cardinality); |
6201 | |
6202 | // do required type conversions. |
6203 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
6204 | *dst = array_container_from_bitset(result); |
6205 | bitset_container_free(result); |
6206 | return false; // not bitset |
6207 | } |
6208 | *dst = result; |
6209 | return true; // bitset |
6210 | } |
6211 | |
6212 | /* Compute the xor of src_1 and src_2 and write the result to |
6213 | * dst. It is allowed for src_2 to be dst. This version does not |
6214 | * update the cardinality of dst (it is set to BITSET_UNKNOWN_CARDINALITY). |
6215 | */ |
6216 | |
6217 | void array_bitset_container_lazy_xor(const array_container_t *src_1, |
6218 | const bitset_container_t *src_2, |
6219 | bitset_container_t *dst) { |
6220 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
6221 | bitset_flip_list(dst->array, src_1->array, src_1->cardinality); |
6222 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6223 | } |
6224 | |
6225 | /* Compute the xor of src_1 and src_2 and write the result to |
6226 | * dst. Result may be either a bitset or an array container |
6227 | * (returns "result is bitset"). dst does not initially have |
6228 | * any container, but becomes either a bitset container (return |
6229 | * result true) or an array container. |
6230 | */ |
6231 | |
6232 | bool run_bitset_container_xor(const run_container_t *src_1, |
6233 | const bitset_container_t *src_2, void **dst) { |
6234 | bitset_container_t *result = bitset_container_create(); |
6235 | |
6236 | bitset_container_copy(src_2, result); |
6237 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
6238 | rle16_t rle = src_1->runs[rlepos]; |
6239 | bitset_flip_range(result->array, rle.value, |
6240 | rle.value + rle.length + UINT32_C(1)); |
6241 | } |
6242 | result->cardinality = bitset_container_compute_cardinality(result); |
6243 | |
6244 | if (result->cardinality <= DEFAULT_MAX_SIZE) { |
6245 | *dst = array_container_from_bitset(result); |
6246 | bitset_container_free(result); |
6247 | return false; // not bitset |
6248 | } |
6249 | *dst = result; |
6250 | return true; // bitset |
6251 | } |
6252 | |
6253 | /* lazy xor. Dst is initialized and may be equal to src_2. |
6254 | * Result is left as a bitset container, even if actual |
6255 | * cardinality would dictate an array container. |
6256 | */ |
6257 | |
6258 | void run_bitset_container_lazy_xor(const run_container_t *src_1, |
6259 | const bitset_container_t *src_2, |
6260 | bitset_container_t *dst) { |
6261 | if (src_2 != dst) bitset_container_copy(src_2, dst); |
6262 | for (int32_t rlepos = 0; rlepos < src_1->n_runs; ++rlepos) { |
6263 | rle16_t rle = src_1->runs[rlepos]; |
6264 | bitset_flip_range(dst->array, rle.value, |
6265 | rle.value + rle.length + UINT32_C(1)); |
6266 | } |
6267 | dst->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6268 | } |
6269 | |
6270 | /* dst does not indicate a valid container initially. Eventually it |
6271 | * can become any kind of container. |
6272 | */ |
6273 | |
6274 | int array_run_container_xor(const array_container_t *src_1, |
6275 | const run_container_t *src_2, void **dst) { |
6276 | // semi following Java XOR implementation as of May 2016 |
6277 | // the C OR implementation works quite differently and can return a run |
6278 | // container |
6279 | // TODO could optimize for full run containers. |
6280 | |
6281 | // use of lazy following Java impl. |
6282 | const int arbitrary_threshold = 32; |
6283 | if (src_1->cardinality < arbitrary_threshold) { |
6284 | run_container_t *ans = run_container_create(); |
6285 | array_run_container_lazy_xor(src_1, src_2, ans); // keeps runs. |
6286 | uint8_t typecode_after; |
6287 | *dst = |
6288 | convert_run_to_efficient_container_and_free(ans, &typecode_after); |
6289 | return typecode_after; |
6290 | } |
6291 | |
6292 | int card = run_container_cardinality(src_2); |
6293 | if (card <= DEFAULT_MAX_SIZE) { |
6294 | // Java implementation works with the array, xoring the run elements via |
6295 | // iterator |
6296 | array_container_t *temp = array_container_from_run(src_2); |
6297 | bool ret_is_bitset = array_array_container_xor(temp, src_1, dst); |
6298 | array_container_free(temp); |
6299 | return ret_is_bitset ? BITSET_CONTAINER_TYPE_CODE |
6300 | : ARRAY_CONTAINER_TYPE_CODE; |
6301 | |
6302 | } else { // guess that it will end up as a bitset |
6303 | bitset_container_t *result = bitset_container_from_run(src_2); |
6304 | bool is_bitset = bitset_array_container_ixor(result, src_1, dst); |
6305 | // any necessary type conversion has been done by the ixor |
6306 | int retval = (is_bitset ? BITSET_CONTAINER_TYPE_CODE |
6307 | : ARRAY_CONTAINER_TYPE_CODE); |
6308 | return retval; |
6309 | } |
6310 | } |
6311 | |
6312 | /* Dst is a valid run container. (Can it be src_2? Let's say not.) |
6313 | * Leaves result as run container, even if other options are |
6314 | * smaller. |
6315 | */ |
6316 | |
6317 | void array_run_container_lazy_xor(const array_container_t *src_1, |
6318 | const run_container_t *src_2, |
6319 | run_container_t *dst) { |
6320 | run_container_grow(dst, src_1->cardinality + src_2->n_runs, false); |
6321 | int32_t rlepos = 0; |
6322 | int32_t arraypos = 0; |
6323 | dst->n_runs = 0; |
6324 | |
6325 | while ((rlepos < src_2->n_runs) && (arraypos < src_1->cardinality)) { |
6326 | if (src_2->runs[rlepos].value <= src_1->array[arraypos]) { |
6327 | run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, |
6328 | src_2->runs[rlepos].length); |
6329 | rlepos++; |
6330 | } else { |
6331 | run_container_smart_append_exclusive(dst, src_1->array[arraypos], |
6332 | 0); |
6333 | arraypos++; |
6334 | } |
6335 | } |
6336 | while (arraypos < src_1->cardinality) { |
6337 | run_container_smart_append_exclusive(dst, src_1->array[arraypos], 0); |
6338 | arraypos++; |
6339 | } |
6340 | while (rlepos < src_2->n_runs) { |
6341 | run_container_smart_append_exclusive(dst, src_2->runs[rlepos].value, |
6342 | src_2->runs[rlepos].length); |
6343 | rlepos++; |
6344 | } |
6345 | } |
6346 | |
6347 | /* dst does not indicate a valid container initially. Eventually it |
6348 | * can become any kind of container. |
6349 | */ |
6350 | |
6351 | int run_run_container_xor(const run_container_t *src_1, |
6352 | const run_container_t *src_2, void **dst) { |
6353 | run_container_t *ans = run_container_create(); |
6354 | run_container_xor(src_1, src_2, ans); |
6355 | uint8_t typecode_after; |
6356 | *dst = convert_run_to_efficient_container_and_free(ans, &typecode_after); |
6357 | return typecode_after; |
6358 | } |
6359 | |
6360 | /* |
6361 | * Java implementation (as of May 2016) for array_run, run_run |
6362 | * and bitset_run don't do anything different for inplace. |
6363 | * Could adopt the mixed_union.c approach instead (ie, using |
6364 | * smart_append_exclusive) |
6365 | * |
6366 | */ |
6367 | |
6368 | bool array_array_container_xor(const array_container_t *src_1, |
6369 | const array_container_t *src_2, void **dst) { |
6370 | int totalCardinality = |
6371 | src_1->cardinality + src_2->cardinality; // upper bound |
6372 | if (totalCardinality <= DEFAULT_MAX_SIZE) { |
6373 | *dst = array_container_create_given_capacity(totalCardinality); |
6374 | array_container_xor(src_1, src_2, (array_container_t *)*dst); |
6375 | return false; // not a bitset |
6376 | } |
6377 | *dst = bitset_container_from_array(src_1); |
6378 | bool returnval = true; // expect a bitset |
6379 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6380 | ourbitset->cardinality = (uint32_t)bitset_flip_list_withcard( |
6381 | ourbitset->array, src_1->cardinality, src_2->array, src_2->cardinality); |
6382 | if (ourbitset->cardinality <= DEFAULT_MAX_SIZE) { |
6383 | // need to convert! |
6384 | *dst = array_container_from_bitset(ourbitset); |
6385 | bitset_container_free(ourbitset); |
6386 | returnval = false; // not going to be a bitset |
6387 | } |
6388 | |
6389 | return returnval; |
6390 | } |
6391 | |
6392 | bool array_array_container_lazy_xor(const array_container_t *src_1, |
6393 | const array_container_t *src_2, |
6394 | void **dst) { |
6395 | int totalCardinality = src_1->cardinality + src_2->cardinality; |
6396 | // upper bound, but probably poor estimate for xor |
6397 | if (totalCardinality <= ARRAY_LAZY_LOWERBOUND) { |
6398 | *dst = array_container_create_given_capacity(totalCardinality); |
6399 | if (*dst != NULL) |
6400 | array_container_xor(src_1, src_2, (array_container_t *)*dst); |
6401 | return false; // not a bitset |
6402 | } |
6403 | *dst = bitset_container_from_array(src_1); |
6404 | bool returnval = true; // expect a bitset (maybe, for XOR??) |
6405 | if (*dst != NULL) { |
6406 | bitset_container_t *ourbitset = (bitset_container_t *)*dst; |
6407 | bitset_flip_list(ourbitset->array, src_2->array, src_2->cardinality); |
6408 | ourbitset->cardinality = BITSET_UNKNOWN_CARDINALITY; |
6409 | } |
6410 | return returnval; |
6411 | } |
6412 | |
6413 | /* Compute the xor of src_1 and src_2 and write the result to |
6414 | * dst (which has no container initially). Return value is |
6415 | * "dst is a bitset" |
6416 | */ |
6417 | |
6418 | bool bitset_bitset_container_xor(const bitset_container_t *src_1, |
6419 | const bitset_container_t *src_2, void **dst) { |
6420 | bitset_container_t *ans = bitset_container_create(); |
6421 | int card = bitset_container_xor(src_1, src_2, ans); |
6422 | if (card <= DEFAULT_MAX_SIZE) { |
6423 | *dst = array_container_from_bitset(ans); |
6424 | bitset_container_free(ans); |
6425 | return false; // not bitset |
6426 | } else { |
6427 | *dst = ans; |
6428 | return true; |
6429 | } |
6430 | } |
6431 | |
6432 | /* Compute the xor of src_1 and src_2 and write the result to |
6433 | * dst (which has no container initially). It will modify src_1 |
6434 | * to be dst if the result is a bitset. Otherwise, it will |
6435 | * free src_1 and dst will be a new array container. In both |
6436 | * cases, the caller is responsible for deallocating dst. |
6437 | * Returns true iff dst is a bitset */ |
6438 | |
6439 | bool bitset_array_container_ixor(bitset_container_t *src_1, |
6440 | const array_container_t *src_2, void **dst) { |
6441 | *dst = src_1; |
6442 | src_1->cardinality = (uint32_t)bitset_flip_list_withcard( |
6443 | src_1->array, src_1->cardinality, src_2->array, src_2->cardinality); |
6444 | |
6445 | if (src_1->cardinality <= DEFAULT_MAX_SIZE) { |
6446 | *dst = array_container_from_bitset(src_1); |
6447 | bitset_container_free(src_1); |
6448 | return false; // not bitset |
6449 | } else |
6450 | return true; |
6451 | } |
6452 | |
6453 | /* a bunch of in-place, some of which may not *really* be inplace. |
6454 | * TODO: write actual inplace routine if efficiency warrants it |
6455 | * Anything inplace with a bitset is a good candidate |
6456 | */ |
6457 | |
6458 | bool bitset_bitset_container_ixor(bitset_container_t *src_1, |
6459 | const bitset_container_t *src_2, void **dst) { |
6460 | bool ans = bitset_bitset_container_xor(src_1, src_2, dst); |
6461 | bitset_container_free(src_1); |
6462 | return ans; |
6463 | } |
6464 | |
6465 | bool array_bitset_container_ixor(array_container_t *src_1, |
6466 | const bitset_container_t *src_2, void **dst) { |
6467 | bool ans = array_bitset_container_xor(src_1, src_2, dst); |
6468 | array_container_free(src_1); |
6469 | return ans; |
6470 | } |
6471 | |
6472 | /* Compute the xor of src_1 and src_2 and write the result to |
6473 | * dst. Result may be either a bitset or an array container |
6474 | * (returns "result is bitset"). dst does not initially have |
6475 | * any container, but becomes either a bitset container (return |
6476 | * result true) or an array container. |
6477 | */ |
6478 | |
6479 | bool run_bitset_container_ixor(run_container_t *src_1, |
6480 | const bitset_container_t *src_2, void **dst) { |
6481 | bool ans = run_bitset_container_xor(src_1, src_2, dst); |
6482 | run_container_free(src_1); |
6483 | return ans; |
6484 | } |
6485 | |
6486 | bool bitset_run_container_ixor(bitset_container_t *src_1, |
6487 | const run_container_t *src_2, void **dst) { |
6488 | bool ans = run_bitset_container_xor(src_2, src_1, dst); |
6489 | bitset_container_free(src_1); |
6490 | return ans; |
6491 | } |
6492 | |
6493 | /* dst does not indicate a valid container initially. Eventually it |
6494 | * can become any kind of container. |
6495 | */ |
6496 | |
6497 | int array_run_container_ixor(array_container_t *src_1, |
6498 | const run_container_t *src_2, void **dst) { |
6499 | int ans = array_run_container_xor(src_1, src_2, dst); |
6500 | array_container_free(src_1); |
6501 | return ans; |
6502 | } |
6503 | |
6504 | int run_array_container_ixor(run_container_t *src_1, |
6505 | const array_container_t *src_2, void **dst) { |
6506 | int ans = array_run_container_xor(src_2, src_1, dst); |
6507 | run_container_free(src_1); |
6508 | return ans; |
6509 | } |
6510 | |
6511 | bool array_array_container_ixor(array_container_t *src_1, |
6512 | const array_container_t *src_2, void **dst) { |
6513 | bool ans = array_array_container_xor(src_1, src_2, dst); |
6514 | array_container_free(src_1); |
6515 | return ans; |
6516 | } |
6517 | |
6518 | int run_run_container_ixor(run_container_t *src_1, const run_container_t *src_2, |
6519 | void **dst) { |
6520 | int ans = run_run_container_xor(src_1, src_2, dst); |
6521 | run_container_free(src_1); |
6522 | return ans; |
6523 | } |
6524 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/mixed_xor.c */ |
6525 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/containers/run.c */ |
6526 | #include <stdio.h> |
6527 | #include <stdlib.h> |
6528 | |
6529 | |
6530 | extern inline uint16_t run_container_minimum(const run_container_t *run); |
6531 | extern inline uint16_t run_container_maximum(const run_container_t *run); |
6532 | extern inline int32_t interleavedBinarySearch(const rle16_t *array, |
6533 | int32_t lenarray, uint16_t ikey); |
6534 | extern inline bool run_container_contains(const run_container_t *run, |
6535 | uint16_t pos); |
6536 | extern inline int run_container_index_equalorlarger(const run_container_t *arr, uint16_t x); |
6537 | extern bool run_container_is_full(const run_container_t *run); |
6538 | extern bool run_container_nonzero_cardinality(const run_container_t *r); |
6539 | extern void run_container_clear(run_container_t *run); |
6540 | extern int32_t run_container_serialized_size_in_bytes(int32_t num_runs); |
6541 | extern run_container_t *run_container_create_range(uint32_t start, |
6542 | uint32_t stop); |
6543 | |
6544 | bool run_container_add(run_container_t *run, uint16_t pos) { |
6545 | int32_t index = interleavedBinarySearch(run->runs, run->n_runs, pos); |
6546 | if (index >= 0) return false; // already there |
6547 | index = -index - 2; // points to preceding value, possibly -1 |
6548 | if (index >= 0) { // possible match |
6549 | int32_t offset = pos - run->runs[index].value; |
6550 | int32_t le = run->runs[index].length; |
6551 | if (offset <= le) return false; // already there |
6552 | if (offset == le + 1) { |
6553 | // we may need to fuse |
6554 | if (index + 1 < run->n_runs) { |
6555 | if (run->runs[index + 1].value == pos + 1) { |
6556 | // indeed fusion is needed |
6557 | run->runs[index].length = run->runs[index + 1].value + |
6558 | run->runs[index + 1].length - |
6559 | run->runs[index].value; |
6560 | recoverRoomAtIndex(run, (uint16_t)(index + 1)); |
6561 | return true; |
6562 | } |
6563 | } |
6564 | run->runs[index].length++; |
6565 | return true; |
6566 | } |
6567 | if (index + 1 < run->n_runs) { |
6568 | // we may need to fuse |
6569 | if (run->runs[index + 1].value == pos + 1) { |
6570 | // indeed fusion is needed |
6571 | run->runs[index + 1].value = pos; |
6572 | run->runs[index + 1].length = run->runs[index + 1].length + 1; |
6573 | return true; |
6574 | } |
6575 | } |
6576 | } |
6577 | if (index == -1) { |
6578 | // we may need to extend the first run |
6579 | if (0 < run->n_runs) { |
6580 | if (run->runs[0].value == pos + 1) { |
6581 | run->runs[0].length++; |
6582 | run->runs[0].value--; |
6583 | return true; |
6584 | } |
6585 | } |
6586 | } |
6587 | makeRoomAtIndex(run, (uint16_t)(index + 1)); |
6588 | run->runs[index + 1].value = pos; |
6589 | run->runs[index + 1].length = 0; |
6590 | return true; |
6591 | } |
6592 | |
6593 | /* Create a new run container. Return NULL in case of failure. */ |
6594 | run_container_t *run_container_create_given_capacity(int32_t size) { |
6595 | run_container_t *run; |
6596 | /* Allocate the run container itself. */ |
6597 | if ((run = (run_container_t *)malloc(sizeof(run_container_t))) == NULL) { |
6598 | return NULL; |
6599 | } |
6600 | if (size <= 0 ) { // we don't want to rely on malloc(0) |
6601 | run->runs = NULL; |
6602 | } else if ((run->runs = (rle16_t *)malloc(sizeof(rle16_t) * size)) == NULL) { |
6603 | free(run); |
6604 | return NULL; |
6605 | } |
6606 | run->capacity = size; |
6607 | run->n_runs = 0; |
6608 | return run; |
6609 | } |
6610 | |
6611 | int run_container_shrink_to_fit(run_container_t *src) { |
6612 | if (src->n_runs == src->capacity) return 0; // nothing to do |
6613 | int savings = src->capacity - src->n_runs; |
6614 | src->capacity = src->n_runs; |
6615 | rle16_t *oldruns = src->runs; |
6616 | src->runs = (rle16_t *)realloc(oldruns, src->capacity * sizeof(rle16_t)); |
6617 | if (src->runs == NULL) free(oldruns); // should never happen? |
6618 | return savings; |
6619 | } |
6620 | /* Create a new run container. Return NULL in case of failure. */ |
6621 | run_container_t *run_container_create(void) { |
6622 | return run_container_create_given_capacity(RUN_DEFAULT_INIT_SIZE); |
6623 | } |
6624 | |
6625 | run_container_t *run_container_clone(const run_container_t *src) { |
6626 | run_container_t *run = run_container_create_given_capacity(src->capacity); |
6627 | if (run == NULL) return NULL; |
6628 | run->capacity = src->capacity; |
6629 | run->n_runs = src->n_runs; |
6630 | memcpy(run->runs, src->runs, src->n_runs * sizeof(rle16_t)); |
6631 | return run; |
6632 | } |
6633 | |
6634 | /* Free memory. */ |
6635 | void run_container_free(run_container_t *run) { |
6636 | if(run->runs != NULL) {// Jon Strabala reports that some tools complain otherwise |
6637 | free(run->runs); |
6638 | run->runs = NULL; // pedantic |
6639 | } |
6640 | free(run); |
6641 | } |
6642 | |
6643 | void run_container_grow(run_container_t *run, int32_t min, bool copy) { |
6644 | int32_t newCapacity = |
6645 | (run->capacity == 0) |
6646 | ? RUN_DEFAULT_INIT_SIZE |
6647 | : run->capacity < 64 ? run->capacity * 2 |
6648 | : run->capacity < 1024 ? run->capacity * 3 / 2 |
6649 | : run->capacity * 5 / 4; |
6650 | if (newCapacity < min) newCapacity = min; |
6651 | run->capacity = newCapacity; |
6652 | assert(run->capacity >= min); |
6653 | if (copy) { |
6654 | rle16_t *oldruns = run->runs; |
6655 | run->runs = |
6656 | (rle16_t *)realloc(oldruns, run->capacity * sizeof(rle16_t)); |
6657 | if (run->runs == NULL) free(oldruns); |
6658 | } else { |
6659 | // Jon Strabala reports that some tools complain otherwise |
6660 | if (run->runs != NULL) { |
6661 | free(run->runs); |
6662 | } |
6663 | run->runs = (rle16_t *)malloc(run->capacity * sizeof(rle16_t)); |
6664 | } |
6665 | // handle the case where realloc fails |
6666 | if (run->runs == NULL) { |
6667 | fprintf(stderr, "could not allocate memory\n" ); |
6668 | } |
6669 | assert(run->runs != NULL); |
6670 | } |
6671 | |
6672 | /* copy one container into another */ |
6673 | void run_container_copy(const run_container_t *src, run_container_t *dst) { |
6674 | const int32_t n_runs = src->n_runs; |
6675 | if (src->n_runs > dst->capacity) { |
6676 | run_container_grow(dst, n_runs, false); |
6677 | } |
6678 | dst->n_runs = n_runs; |
6679 | memcpy(dst->runs, src->runs, sizeof(rle16_t) * n_runs); |
6680 | } |
6681 | |
6682 | /* Compute the union of `src_1' and `src_2' and write the result to `dst' |
6683 | * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ |
6684 | void run_container_union(const run_container_t *src_1, |
6685 | const run_container_t *src_2, run_container_t *dst) { |
6686 | // TODO: this could be a lot more efficient |
6687 | |
6688 | // we start out with inexpensive checks |
6689 | const bool if1 = run_container_is_full(src_1); |
6690 | const bool if2 = run_container_is_full(src_2); |
6691 | if (if1 || if2) { |
6692 | if (if1) { |
6693 | run_container_copy(src_1, dst); |
6694 | return; |
6695 | } |
6696 | if (if2) { |
6697 | run_container_copy(src_2, dst); |
6698 | return; |
6699 | } |
6700 | } |
6701 | const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; |
6702 | if (dst->capacity < neededcapacity) |
6703 | run_container_grow(dst, neededcapacity, false); |
6704 | dst->n_runs = 0; |
6705 | int32_t rlepos = 0; |
6706 | int32_t xrlepos = 0; |
6707 | |
6708 | rle16_t previousrle; |
6709 | if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { |
6710 | previousrle = run_container_append_first(dst, src_1->runs[rlepos]); |
6711 | rlepos++; |
6712 | } else { |
6713 | previousrle = run_container_append_first(dst, src_2->runs[xrlepos]); |
6714 | xrlepos++; |
6715 | } |
6716 | |
6717 | while ((xrlepos < src_2->n_runs) && (rlepos < src_1->n_runs)) { |
6718 | rle16_t newrl; |
6719 | if (src_1->runs[rlepos].value <= src_2->runs[xrlepos].value) { |
6720 | newrl = src_1->runs[rlepos]; |
6721 | rlepos++; |
6722 | } else { |
6723 | newrl = src_2->runs[xrlepos]; |
6724 | xrlepos++; |
6725 | } |
6726 | run_container_append(dst, newrl, &previousrle); |
6727 | } |
6728 | while (xrlepos < src_2->n_runs) { |
6729 | run_container_append(dst, src_2->runs[xrlepos], &previousrle); |
6730 | xrlepos++; |
6731 | } |
6732 | while (rlepos < src_1->n_runs) { |
6733 | run_container_append(dst, src_1->runs[rlepos], &previousrle); |
6734 | rlepos++; |
6735 | } |
6736 | } |
6737 | |
6738 | /* Compute the union of `src_1' and `src_2' and write the result to `src_1' |
6739 | */ |
6740 | void run_container_union_inplace(run_container_t *src_1, |
6741 | const run_container_t *src_2) { |
6742 | // TODO: this could be a lot more efficient |
6743 | |
6744 | // we start out with inexpensive checks |
6745 | const bool if1 = run_container_is_full(src_1); |
6746 | const bool if2 = run_container_is_full(src_2); |
6747 | if (if1 || if2) { |
6748 | if (if1) { |
6749 | return; |
6750 | } |
6751 | if (if2) { |
6752 | run_container_copy(src_2, src_1); |
6753 | return; |
6754 | } |
6755 | } |
6756 | // we move the data to the end of the current array |
6757 | const int32_t maxoutput = src_1->n_runs + src_2->n_runs; |
6758 | const int32_t neededcapacity = maxoutput + src_1->n_runs; |
6759 | if (src_1->capacity < neededcapacity) |
6760 | run_container_grow(src_1, neededcapacity, true); |
6761 | memmove(src_1->runs + maxoutput, src_1->runs, |
6762 | src_1->n_runs * sizeof(rle16_t)); |
6763 | rle16_t *inputsrc1 = src_1->runs + maxoutput; |
6764 | const int32_t input1nruns = src_1->n_runs; |
6765 | src_1->n_runs = 0; |
6766 | int32_t rlepos = 0; |
6767 | int32_t xrlepos = 0; |
6768 | |
6769 | rle16_t previousrle; |
6770 | if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { |
6771 | previousrle = run_container_append_first(src_1, inputsrc1[rlepos]); |
6772 | rlepos++; |
6773 | } else { |
6774 | previousrle = run_container_append_first(src_1, src_2->runs[xrlepos]); |
6775 | xrlepos++; |
6776 | } |
6777 | while ((xrlepos < src_2->n_runs) && (rlepos < input1nruns)) { |
6778 | rle16_t newrl; |
6779 | if (inputsrc1[rlepos].value <= src_2->runs[xrlepos].value) { |
6780 | newrl = inputsrc1[rlepos]; |
6781 | rlepos++; |
6782 | } else { |
6783 | newrl = src_2->runs[xrlepos]; |
6784 | xrlepos++; |
6785 | } |
6786 | run_container_append(src_1, newrl, &previousrle); |
6787 | } |
6788 | while (xrlepos < src_2->n_runs) { |
6789 | run_container_append(src_1, src_2->runs[xrlepos], &previousrle); |
6790 | xrlepos++; |
6791 | } |
6792 | while (rlepos < input1nruns) { |
6793 | run_container_append(src_1, inputsrc1[rlepos], &previousrle); |
6794 | rlepos++; |
6795 | } |
6796 | } |
6797 | |
6798 | /* Compute the symmetric difference of `src_1' and `src_2' and write the result |
6799 | * to `dst' |
6800 | * It is assumed that `dst' is distinct from both `src_1' and `src_2'. */ |
6801 | void run_container_xor(const run_container_t *src_1, |
6802 | const run_container_t *src_2, run_container_t *dst) { |
6803 | // don't bother to convert xor with full range into negation |
6804 | // since negation is implemented similarly |
6805 | |
6806 | const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; |
6807 | if (dst->capacity < neededcapacity) |
6808 | run_container_grow(dst, neededcapacity, false); |
6809 | |
6810 | int32_t pos1 = 0; |
6811 | int32_t pos2 = 0; |
6812 | dst->n_runs = 0; |
6813 | |
6814 | while ((pos1 < src_1->n_runs) && (pos2 < src_2->n_runs)) { |
6815 | if (src_1->runs[pos1].value <= src_2->runs[pos2].value) { |
6816 | run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, |
6817 | src_1->runs[pos1].length); |
6818 | pos1++; |
6819 | } else { |
6820 | run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, |
6821 | src_2->runs[pos2].length); |
6822 | pos2++; |
6823 | } |
6824 | } |
6825 | while (pos1 < src_1->n_runs) { |
6826 | run_container_smart_append_exclusive(dst, src_1->runs[pos1].value, |
6827 | src_1->runs[pos1].length); |
6828 | pos1++; |
6829 | } |
6830 | |
6831 | while (pos2 < src_2->n_runs) { |
6832 | run_container_smart_append_exclusive(dst, src_2->runs[pos2].value, |
6833 | src_2->runs[pos2].length); |
6834 | pos2++; |
6835 | } |
6836 | } |
6837 | |
6838 | /* Compute the intersection of src_1 and src_2 and write the result to |
6839 | * dst. It is assumed that dst is distinct from both src_1 and src_2. */ |
6840 | void run_container_intersection(const run_container_t *src_1, |
6841 | const run_container_t *src_2, |
6842 | run_container_t *dst) { |
6843 | const bool if1 = run_container_is_full(src_1); |
6844 | const bool if2 = run_container_is_full(src_2); |
6845 | if (if1 || if2) { |
6846 | if (if1) { |
6847 | run_container_copy(src_2, dst); |
6848 | return; |
6849 | } |
6850 | if (if2) { |
6851 | run_container_copy(src_1, dst); |
6852 | return; |
6853 | } |
6854 | } |
6855 | // TODO: this could be a lot more efficient, could use SIMD optimizations |
6856 | const int32_t neededcapacity = src_1->n_runs + src_2->n_runs; |
6857 | if (dst->capacity < neededcapacity) |
6858 | run_container_grow(dst, neededcapacity, false); |
6859 | dst->n_runs = 0; |
6860 | int32_t rlepos = 0; |
6861 | int32_t xrlepos = 0; |
6862 | int32_t start = src_1->runs[rlepos].value; |
6863 | int32_t end = start + src_1->runs[rlepos].length + 1; |
6864 | int32_t xstart = src_2->runs[xrlepos].value; |
6865 | int32_t xend = xstart + src_2->runs[xrlepos].length + 1; |
6866 | while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { |
6867 | if (end <= xstart) { |
6868 | ++rlepos; |
6869 | if (rlepos < src_1->n_runs) { |
6870 | start = src_1->runs[rlepos].value; |
6871 | end = start + src_1->runs[rlepos].length + 1; |
6872 | } |
6873 | } else if (xend <= start) { |
6874 | ++xrlepos; |
6875 | if (xrlepos < src_2->n_runs) { |
6876 | xstart = src_2->runs[xrlepos].value; |
6877 | xend = xstart + src_2->runs[xrlepos].length + 1; |
6878 | } |
6879 | } else { // they overlap |
6880 | const int32_t lateststart = start > xstart ? start : xstart; |
6881 | int32_t earliestend; |
6882 | if (end == xend) { // improbable |
6883 | earliestend = end; |
6884 | rlepos++; |
6885 | xrlepos++; |
6886 | if (rlepos < src_1->n_runs) { |
6887 | start = src_1->runs[rlepos].value; |
6888 | end = start + src_1->runs[rlepos].length + 1; |
6889 | } |
6890 | if (xrlepos < src_2->n_runs) { |
6891 | xstart = src_2->runs[xrlepos].value; |
6892 | xend = xstart + src_2->runs[xrlepos].length + 1; |
6893 | } |
6894 | } else if (end < xend) { |
6895 | earliestend = end; |
6896 | rlepos++; |
6897 | if (rlepos < src_1->n_runs) { |
6898 | start = src_1->runs[rlepos].value; |
6899 | end = start + src_1->runs[rlepos].length + 1; |
6900 | } |
6901 | |
6902 | } else { // end > xend |
6903 | earliestend = xend; |
6904 | xrlepos++; |
6905 | if (xrlepos < src_2->n_runs) { |
6906 | xstart = src_2->runs[xrlepos].value; |
6907 | xend = xstart + src_2->runs[xrlepos].length + 1; |
6908 | } |
6909 | } |
6910 | dst->runs[dst->n_runs].value = (uint16_t)lateststart; |
6911 | dst->runs[dst->n_runs].length = |
6912 | (uint16_t)(earliestend - lateststart - 1); |
6913 | dst->n_runs++; |
6914 | } |
6915 | } |
6916 | } |
6917 | |
6918 | /* Compute the size of the intersection of src_1 and src_2 . */ |
6919 | int run_container_intersection_cardinality(const run_container_t *src_1, |
6920 | const run_container_t *src_2) { |
6921 | const bool if1 = run_container_is_full(src_1); |
6922 | const bool if2 = run_container_is_full(src_2); |
6923 | if (if1 || if2) { |
6924 | if (if1) { |
6925 | return run_container_cardinality(src_2); |
6926 | } |
6927 | if (if2) { |
6928 | return run_container_cardinality(src_1); |
6929 | } |
6930 | } |
6931 | int answer = 0; |
6932 | int32_t rlepos = 0; |
6933 | int32_t xrlepos = 0; |
6934 | int32_t start = src_1->runs[rlepos].value; |
6935 | int32_t end = start + src_1->runs[rlepos].length + 1; |
6936 | int32_t xstart = src_2->runs[xrlepos].value; |
6937 | int32_t xend = xstart + src_2->runs[xrlepos].length + 1; |
6938 | while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { |
6939 | if (end <= xstart) { |
6940 | ++rlepos; |
6941 | if (rlepos < src_1->n_runs) { |
6942 | start = src_1->runs[rlepos].value; |
6943 | end = start + src_1->runs[rlepos].length + 1; |
6944 | } |
6945 | } else if (xend <= start) { |
6946 | ++xrlepos; |
6947 | if (xrlepos < src_2->n_runs) { |
6948 | xstart = src_2->runs[xrlepos].value; |
6949 | xend = xstart + src_2->runs[xrlepos].length + 1; |
6950 | } |
6951 | } else { // they overlap |
6952 | const int32_t lateststart = start > xstart ? start : xstart; |
6953 | int32_t earliestend; |
6954 | if (end == xend) { // improbable |
6955 | earliestend = end; |
6956 | rlepos++; |
6957 | xrlepos++; |
6958 | if (rlepos < src_1->n_runs) { |
6959 | start = src_1->runs[rlepos].value; |
6960 | end = start + src_1->runs[rlepos].length + 1; |
6961 | } |
6962 | if (xrlepos < src_2->n_runs) { |
6963 | xstart = src_2->runs[xrlepos].value; |
6964 | xend = xstart + src_2->runs[xrlepos].length + 1; |
6965 | } |
6966 | } else if (end < xend) { |
6967 | earliestend = end; |
6968 | rlepos++; |
6969 | if (rlepos < src_1->n_runs) { |
6970 | start = src_1->runs[rlepos].value; |
6971 | end = start + src_1->runs[rlepos].length + 1; |
6972 | } |
6973 | |
6974 | } else { // end > xend |
6975 | earliestend = xend; |
6976 | xrlepos++; |
6977 | if (xrlepos < src_2->n_runs) { |
6978 | xstart = src_2->runs[xrlepos].value; |
6979 | xend = xstart + src_2->runs[xrlepos].length + 1; |
6980 | } |
6981 | } |
6982 | answer += earliestend - lateststart; |
6983 | } |
6984 | } |
6985 | return answer; |
6986 | } |
6987 | |
6988 | bool run_container_intersect(const run_container_t *src_1, |
6989 | const run_container_t *src_2) { |
6990 | const bool if1 = run_container_is_full(src_1); |
6991 | const bool if2 = run_container_is_full(src_2); |
6992 | if (if1 || if2) { |
6993 | if (if1) { |
6994 | return !run_container_empty(src_2); |
6995 | } |
6996 | if (if2) { |
6997 | return !run_container_empty(src_1); |
6998 | } |
6999 | } |
7000 | int32_t rlepos = 0; |
7001 | int32_t xrlepos = 0; |
7002 | int32_t start = src_1->runs[rlepos].value; |
7003 | int32_t end = start + src_1->runs[rlepos].length + 1; |
7004 | int32_t xstart = src_2->runs[xrlepos].value; |
7005 | int32_t xend = xstart + src_2->runs[xrlepos].length + 1; |
7006 | while ((rlepos < src_1->n_runs) && (xrlepos < src_2->n_runs)) { |
7007 | if (end <= xstart) { |
7008 | ++rlepos; |
7009 | if (rlepos < src_1->n_runs) { |
7010 | start = src_1->runs[rlepos].value; |
7011 | end = start + src_1->runs[rlepos].length + 1; |
7012 | } |
7013 | } else if (xend <= start) { |
7014 | ++xrlepos; |
7015 | if (xrlepos < src_2->n_runs) { |
7016 | xstart = src_2->runs[xrlepos].value; |
7017 | xend = xstart + src_2->runs[xrlepos].length + 1; |
7018 | } |
7019 | } else { // they overlap |
7020 | return true; |
7021 | } |
7022 | } |
7023 | return false; |
7024 | } |
7025 | |
7026 | |
7027 | /* Compute the difference of src_1 and src_2 and write the result to |
7028 | * dst. It is assumed that dst is distinct from both src_1 and src_2. */ |
7029 | void run_container_andnot(const run_container_t *src_1, |
7030 | const run_container_t *src_2, run_container_t *dst) { |
7031 | // following Java implementation as of June 2016 |
7032 | |
7033 | if (dst->capacity < src_1->n_runs + src_2->n_runs) |
7034 | run_container_grow(dst, src_1->n_runs + src_2->n_runs, false); |
7035 | |
7036 | dst->n_runs = 0; |
7037 | |
7038 | int rlepos1 = 0; |
7039 | int rlepos2 = 0; |
7040 | int32_t start = src_1->runs[rlepos1].value; |
7041 | int32_t end = start + src_1->runs[rlepos1].length + 1; |
7042 | int32_t start2 = src_2->runs[rlepos2].value; |
7043 | int32_t end2 = start2 + src_2->runs[rlepos2].length + 1; |
7044 | |
7045 | while ((rlepos1 < src_1->n_runs) && (rlepos2 < src_2->n_runs)) { |
7046 | if (end <= start2) { |
7047 | // output the first run |
7048 | dst->runs[dst->n_runs++] = |
7049 | (rle16_t){.value = (uint16_t)start, |
7050 | .length = (uint16_t)(end - start - 1)}; |
7051 | rlepos1++; |
7052 | if (rlepos1 < src_1->n_runs) { |
7053 | start = src_1->runs[rlepos1].value; |
7054 | end = start + src_1->runs[rlepos1].length + 1; |
7055 | } |
7056 | } else if (end2 <= start) { |
7057 | // exit the second run |
7058 | rlepos2++; |
7059 | if (rlepos2 < src_2->n_runs) { |
7060 | start2 = src_2->runs[rlepos2].value; |
7061 | end2 = start2 + src_2->runs[rlepos2].length + 1; |
7062 | } |
7063 | } else { |
7064 | if (start < start2) { |
7065 | dst->runs[dst->n_runs++] = |
7066 | (rle16_t){.value = (uint16_t)start, |
7067 | .length = (uint16_t)(start2 - start - 1)}; |
7068 | } |
7069 | if (end2 < end) { |
7070 | start = end2; |
7071 | } else { |
7072 | rlepos1++; |
7073 | if (rlepos1 < src_1->n_runs) { |
7074 | start = src_1->runs[rlepos1].value; |
7075 | end = start + src_1->runs[rlepos1].length + 1; |
7076 | } |
7077 | } |
7078 | } |
7079 | } |
7080 | if (rlepos1 < src_1->n_runs) { |
7081 | dst->runs[dst->n_runs++] = (rle16_t){ |
7082 | .value = (uint16_t)start, .length = (uint16_t)(end - start - 1)}; |
7083 | rlepos1++; |
7084 | if (rlepos1 < src_1->n_runs) { |
7085 | memcpy(dst->runs + dst->n_runs, src_1->runs + rlepos1, |
7086 | sizeof(rle16_t) * (src_1->n_runs - rlepos1)); |
7087 | dst->n_runs += src_1->n_runs - rlepos1; |
7088 | } |
7089 | } |
7090 | } |
7091 | |
7092 | int run_container_to_uint32_array(void *vout, const run_container_t *cont, |
7093 | uint32_t base) { |
7094 | int outpos = 0; |
7095 | uint32_t *out = (uint32_t *)vout; |
7096 | for (int i = 0; i < cont->n_runs; ++i) { |
7097 | uint32_t run_start = base + cont->runs[i].value; |
7098 | uint16_t le = cont->runs[i].length; |
7099 | for (int j = 0; j <= le; ++j) { |
7100 | uint32_t val = run_start + j; |
7101 | memcpy(out + outpos, &val, |
7102 | sizeof(uint32_t)); // should be compiled as a MOV on x64 |
7103 | outpos++; |
7104 | } |
7105 | } |
7106 | return outpos; |
7107 | } |
7108 | |
7109 | /* |
7110 | * Print this container using printf (useful for debugging). |
7111 | */ |
7112 | void run_container_printf(const run_container_t *cont) { |
7113 | for (int i = 0; i < cont->n_runs; ++i) { |
7114 | uint16_t run_start = cont->runs[i].value; |
7115 | uint16_t le = cont->runs[i].length; |
7116 | printf("[%d,%d]" , run_start, run_start + le); |
7117 | } |
7118 | } |
7119 | |
7120 | /* |
7121 | * Print this container using printf as a comma-separated list of 32-bit |
7122 | * integers starting at base. |
7123 | */ |
7124 | void run_container_printf_as_uint32_array(const run_container_t *cont, |
7125 | uint32_t base) { |
7126 | if (cont->n_runs == 0) return; |
7127 | { |
7128 | uint32_t run_start = base + cont->runs[0].value; |
7129 | uint16_t le = cont->runs[0].length; |
7130 | printf("%u" , run_start); |
7131 | for (uint32_t j = 1; j <= le; ++j) printf(",%u" , run_start + j); |
7132 | } |
7133 | for (int32_t i = 1; i < cont->n_runs; ++i) { |
7134 | uint32_t run_start = base + cont->runs[i].value; |
7135 | uint16_t le = cont->runs[i].length; |
7136 | for (uint32_t j = 0; j <= le; ++j) printf(",%u" , run_start + j); |
7137 | } |
7138 | } |
7139 | |
7140 | int32_t run_container_serialize(const run_container_t *container, char *buf) { |
7141 | int32_t l, off; |
7142 | |
7143 | memcpy(buf, &container->n_runs, off = sizeof(container->n_runs)); |
7144 | memcpy(&buf[off], &container->capacity, sizeof(container->capacity)); |
7145 | off += sizeof(container->capacity); |
7146 | |
7147 | l = sizeof(rle16_t) * container->n_runs; |
7148 | memcpy(&buf[off], container->runs, l); |
7149 | return (off + l); |
7150 | } |
7151 | |
7152 | int32_t run_container_write(const run_container_t *container, char *buf) { |
7153 | memcpy(buf, &container->n_runs, sizeof(uint16_t)); |
7154 | memcpy(buf + sizeof(uint16_t), container->runs, |
7155 | container->n_runs * sizeof(rle16_t)); |
7156 | return run_container_size_in_bytes(container); |
7157 | } |
7158 | |
7159 | int32_t run_container_read(int32_t cardinality, run_container_t *container, |
7160 | const char *buf) { |
7161 | (void)cardinality; |
7162 | memcpy(&container->n_runs, buf, sizeof(uint16_t)); |
7163 | if (container->n_runs > container->capacity) |
7164 | run_container_grow(container, container->n_runs, false); |
7165 | if(container->n_runs > 0) { |
7166 | memcpy(container->runs, buf + sizeof(uint16_t), |
7167 | container->n_runs * sizeof(rle16_t)); |
7168 | } |
7169 | return run_container_size_in_bytes(container); |
7170 | } |
7171 | |
7172 | uint32_t run_container_serialization_len(const run_container_t *container) { |
7173 | return (sizeof(container->n_runs) + sizeof(container->capacity) + |
7174 | sizeof(rle16_t) * container->n_runs); |
7175 | } |
7176 | |
7177 | void *run_container_deserialize(const char *buf, size_t buf_len) { |
7178 | run_container_t *ptr; |
7179 | |
7180 | if (buf_len < 8 /* n_runs + capacity */) |
7181 | return (NULL); |
7182 | else |
7183 | buf_len -= 8; |
7184 | |
7185 | if ((ptr = (run_container_t *)malloc(sizeof(run_container_t))) != NULL) { |
7186 | size_t len; |
7187 | int32_t off; |
7188 | |
7189 | memcpy(&ptr->n_runs, buf, off = 4); |
7190 | memcpy(&ptr->capacity, &buf[off], 4); |
7191 | off += 4; |
7192 | |
7193 | len = sizeof(rle16_t) * ptr->n_runs; |
7194 | |
7195 | if (len != buf_len) { |
7196 | free(ptr); |
7197 | return (NULL); |
7198 | } |
7199 | |
7200 | if ((ptr->runs = (rle16_t *)malloc(len)) == NULL) { |
7201 | free(ptr); |
7202 | return (NULL); |
7203 | } |
7204 | |
7205 | memcpy(ptr->runs, &buf[off], len); |
7206 | |
7207 | /* Check if returned values are monotonically increasing */ |
7208 | for (int32_t i = 0, j = 0; i < ptr->n_runs; i++) { |
7209 | if (ptr->runs[i].value < j) { |
7210 | free(ptr->runs); |
7211 | free(ptr); |
7212 | return (NULL); |
7213 | } else |
7214 | j = ptr->runs[i].value; |
7215 | } |
7216 | } |
7217 | |
7218 | return (ptr); |
7219 | } |
7220 | |
7221 | bool run_container_iterate(const run_container_t *cont, uint32_t base, |
7222 | roaring_iterator iterator, void *ptr) { |
7223 | for (int i = 0; i < cont->n_runs; ++i) { |
7224 | uint32_t run_start = base + cont->runs[i].value; |
7225 | uint16_t le = cont->runs[i].length; |
7226 | |
7227 | for (int j = 0; j <= le; ++j) |
7228 | if (!iterator(run_start + j, ptr)) return false; |
7229 | } |
7230 | return true; |
7231 | } |
7232 | |
7233 | bool run_container_iterate64(const run_container_t *cont, uint32_t base, |
7234 | roaring_iterator64 iterator, uint64_t high_bits, |
7235 | void *ptr) { |
7236 | for (int i = 0; i < cont->n_runs; ++i) { |
7237 | uint32_t run_start = base + cont->runs[i].value; |
7238 | uint16_t le = cont->runs[i].length; |
7239 | |
7240 | for (int j = 0; j <= le; ++j) |
7241 | if (!iterator(high_bits | (uint64_t)(run_start + j), ptr)) |
7242 | return false; |
7243 | } |
7244 | return true; |
7245 | } |
7246 | |
7247 | bool run_container_equals(const run_container_t *container1, |
7248 | const run_container_t *container2) { |
7249 | if (container1->n_runs != container2->n_runs) { |
7250 | return false; |
7251 | } |
7252 | for (int32_t i = 0; i < container1->n_runs; ++i) { |
7253 | if ((container1->runs[i].value != container2->runs[i].value) || |
7254 | (container1->runs[i].length != container2->runs[i].length)) |
7255 | return false; |
7256 | } |
7257 | return true; |
7258 | } |
7259 | |
7260 | bool run_container_is_subset(const run_container_t *container1, |
7261 | const run_container_t *container2) { |
7262 | int i1 = 0, i2 = 0; |
7263 | while (i1 < container1->n_runs && i2 < container2->n_runs) { |
7264 | int start1 = container1->runs[i1].value; |
7265 | int stop1 = start1 + container1->runs[i1].length; |
7266 | int start2 = container2->runs[i2].value; |
7267 | int stop2 = start2 + container2->runs[i2].length; |
7268 | if (start1 < start2) { |
7269 | return false; |
7270 | } else { // start1 >= start2 |
7271 | if (stop1 < stop2) { |
7272 | i1++; |
7273 | } else if (stop1 == stop2) { |
7274 | i1++; |
7275 | i2++; |
7276 | } else { // stop1 > stop2 |
7277 | i2++; |
7278 | } |
7279 | } |
7280 | } |
7281 | if (i1 == container1->n_runs) { |
7282 | return true; |
7283 | } else { |
7284 | return false; |
7285 | } |
7286 | } |
7287 | |
7288 | // TODO: write smart_append_exclusive version to match the overloaded 1 param |
7289 | // Java version (or is it even used?) |
7290 | |
7291 | // follows the Java implementation closely |
7292 | // length is the rle-value. Ie, run [10,12) uses a length value 1. |
7293 | void run_container_smart_append_exclusive(run_container_t *src, |
7294 | const uint16_t start, |
7295 | const uint16_t length) { |
7296 | int old_end; |
7297 | rle16_t *last_run = src->n_runs ? src->runs + (src->n_runs - 1) : NULL; |
7298 | rle16_t *appended_last_run = src->runs + src->n_runs; |
7299 | |
7300 | if (!src->n_runs || |
7301 | (start > (old_end = last_run->value + last_run->length + 1))) { |
7302 | *appended_last_run = (rle16_t){.value = start, .length = length}; |
7303 | src->n_runs++; |
7304 | return; |
7305 | } |
7306 | if (old_end == start) { |
7307 | // we merge |
7308 | last_run->length += (length + 1); |
7309 | return; |
7310 | } |
7311 | int new_end = start + length + 1; |
7312 | |
7313 | if (start == last_run->value) { |
7314 | // wipe out previous |
7315 | if (new_end < old_end) { |
7316 | *last_run = (rle16_t){.value = (uint16_t)new_end, |
7317 | .length = (uint16_t)(old_end - new_end - 1)}; |
7318 | return; |
7319 | } else if (new_end > old_end) { |
7320 | *last_run = (rle16_t){.value = (uint16_t)old_end, |
7321 | .length = (uint16_t)(new_end - old_end - 1)}; |
7322 | return; |
7323 | } else { |
7324 | src->n_runs--; |
7325 | return; |
7326 | } |
7327 | } |
7328 | last_run->length = start - last_run->value - 1; |
7329 | if (new_end < old_end) { |
7330 | *appended_last_run = |
7331 | (rle16_t){.value = (uint16_t)new_end, |
7332 | .length = (uint16_t)(old_end - new_end - 1)}; |
7333 | src->n_runs++; |
7334 | } else if (new_end > old_end) { |
7335 | *appended_last_run = |
7336 | (rle16_t){.value = (uint16_t)old_end, |
7337 | .length = (uint16_t)(new_end - old_end - 1)}; |
7338 | src->n_runs++; |
7339 | } |
7340 | } |
7341 | |
7342 | bool run_container_select(const run_container_t *container, |
7343 | uint32_t *start_rank, uint32_t rank, |
7344 | uint32_t *element) { |
7345 | for (int i = 0; i < container->n_runs; i++) { |
7346 | uint16_t length = container->runs[i].length; |
7347 | if (rank <= *start_rank + length) { |
7348 | uint16_t value = container->runs[i].value; |
7349 | *element = value + rank - (*start_rank); |
7350 | return true; |
7351 | } else |
7352 | *start_rank += length + 1; |
7353 | } |
7354 | return false; |
7355 | } |
7356 | |
7357 | int run_container_rank(const run_container_t *container, uint16_t x) { |
7358 | int sum = 0; |
7359 | uint32_t x32 = x; |
7360 | for (int i = 0; i < container->n_runs; i++) { |
7361 | uint32_t startpoint = container->runs[i].value; |
7362 | uint32_t length = container->runs[i].length; |
7363 | uint32_t endpoint = length + startpoint; |
7364 | if (x <= endpoint) { |
7365 | if (x < startpoint) break; |
7366 | return sum + (x32 - startpoint) + 1; |
7367 | } else { |
7368 | sum += length + 1; |
7369 | } |
7370 | } |
7371 | return sum; |
7372 | } |
7373 | /* end file /opt/bitmap/CRoaring-0.2.57/src/containers/run.c */ |
7374 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring.c */ |
7375 | #include <assert.h> |
7376 | #include <stdarg.h> |
7377 | #include <stdint.h> |
7378 | #include <stdio.h> |
7379 | #include <string.h> |
7380 | #include <inttypes.h> |
7381 | |
7382 | extern inline bool roaring_bitmap_contains(const roaring_bitmap_t *r, |
7383 | uint32_t val); |
7384 | |
7385 | // this is like roaring_bitmap_add, but it populates pointer arguments in such a |
7386 | // way |
7387 | // that we can recover the container touched, which, in turn can be used to |
7388 | // accelerate some functions (when you repeatedly need to add to the same |
7389 | // container) |
7390 | void *containerptr_roaring_bitmap_add(roaring_bitmap_t *r, |
7391 | uint32_t val, |
7392 | uint8_t *typecode, |
7393 | int *index) { |
7394 | uint16_t hb = val >> 16; |
7395 | const int i = ra_get_index(&r->high_low_container, hb); |
7396 | if (i >= 0) { |
7397 | ra_unshare_container_at_index(&r->high_low_container, i); |
7398 | void *container = |
7399 | ra_get_container_at_index(&r->high_low_container, i, typecode); |
7400 | uint8_t newtypecode = *typecode; |
7401 | void *container2 = |
7402 | container_add(container, val & 0xFFFF, *typecode, &newtypecode); |
7403 | *index = i; |
7404 | if (container2 != container) { |
7405 | container_free(container, *typecode); |
7406 | ra_set_container_at_index(&r->high_low_container, i, container2, |
7407 | newtypecode); |
7408 | *typecode = newtypecode; |
7409 | return container2; |
7410 | } else { |
7411 | return container; |
7412 | } |
7413 | } else { |
7414 | array_container_t *newac = array_container_create(); |
7415 | void *container = container_add(newac, val & 0xFFFF, |
7416 | ARRAY_CONTAINER_TYPE_CODE, typecode); |
7417 | // we could just assume that it stays an array container |
7418 | ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, |
7419 | container, *typecode); |
7420 | *index = -i - 1; |
7421 | return container; |
7422 | } |
7423 | } |
7424 | |
7425 | roaring_bitmap_t *roaring_bitmap_create() { |
7426 | roaring_bitmap_t *ans = |
7427 | (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); |
7428 | if (!ans) { |
7429 | return NULL; |
7430 | } |
7431 | bool is_ok = ra_init(&ans->high_low_container); |
7432 | if (!is_ok) { |
7433 | free(ans); |
7434 | return NULL; |
7435 | } |
7436 | ans->copy_on_write = false; |
7437 | return ans; |
7438 | } |
7439 | |
7440 | roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) { |
7441 | roaring_bitmap_t *ans = |
7442 | (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); |
7443 | if (!ans) { |
7444 | return NULL; |
7445 | } |
7446 | bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap); |
7447 | if (!is_ok) { |
7448 | free(ans); |
7449 | return NULL; |
7450 | } |
7451 | ans->copy_on_write = false; |
7452 | return ans; |
7453 | } |
7454 | |
7455 | void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args, |
7456 | const uint32_t *vals) { |
7457 | void *container = NULL; // hold value of last container touched |
7458 | uint8_t typecode = 0; // typecode of last container touched |
7459 | uint32_t prev = 0; // previous valued inserted |
7460 | size_t i = 0; // index of value |
7461 | int containerindex = 0; |
7462 | if (n_args == 0) return; |
7463 | uint32_t val; |
7464 | memcpy(&val, vals + i, sizeof(val)); |
7465 | container = |
7466 | containerptr_roaring_bitmap_add(r, val, &typecode, &containerindex); |
7467 | prev = val; |
7468 | i++; |
7469 | for (; i < n_args; i++) { |
7470 | memcpy(&val, vals + i, sizeof(val)); |
7471 | if (((prev ^ val) >> 16) == |
7472 | 0) { // no need to seek the container, it is at hand |
7473 | // because we already have the container at hand, we can do the |
7474 | // insertion |
7475 | // automatically, bypassing the roaring_bitmap_add call |
7476 | uint8_t newtypecode = typecode; |
7477 | void *container2 = |
7478 | container_add(container, val & 0xFFFF, typecode, &newtypecode); |
7479 | if (container2 != container) { // rare instance when we need to |
7480 | // change the container type |
7481 | container_free(container, typecode); |
7482 | ra_set_container_at_index(&r->high_low_container, |
7483 | containerindex, container2, |
7484 | newtypecode); |
7485 | typecode = newtypecode; |
7486 | container = container2; |
7487 | } |
7488 | } else { |
7489 | container = containerptr_roaring_bitmap_add(r, val, &typecode, |
7490 | &containerindex); |
7491 | } |
7492 | prev = val; |
7493 | } |
7494 | } |
7495 | |
7496 | roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) { |
7497 | roaring_bitmap_t *answer = roaring_bitmap_create(); |
7498 | roaring_bitmap_add_many(answer, n_args, vals); |
7499 | return answer; |
7500 | } |
7501 | |
7502 | roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) { |
7503 | // todo: could be greatly optimized but we do not expect this call to ever |
7504 | // include long lists |
7505 | roaring_bitmap_t *answer = roaring_bitmap_create(); |
7506 | va_list ap; |
7507 | va_start(ap, n_args); |
7508 | for (size_t i = 1; i <= n_args; i++) { |
7509 | uint32_t val = va_arg(ap, uint32_t); |
7510 | roaring_bitmap_add(answer, val); |
7511 | } |
7512 | va_end(ap); |
7513 | return answer; |
7514 | } |
7515 | |
7516 | static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) { |
7517 | return (a < b) ? a : b; |
7518 | } |
7519 | |
7520 | static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) { |
7521 | return (a < b) ? a : b; |
7522 | } |
7523 | |
7524 | roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max, |
7525 | uint32_t step) { |
7526 | if(max >= UINT64_C(0x100000000)) { |
7527 | max = UINT64_C(0x100000000); |
7528 | } |
7529 | if (step == 0) return NULL; |
7530 | if (max <= min) return NULL; |
7531 | roaring_bitmap_t *answer = roaring_bitmap_create(); |
7532 | if (step >= (1 << 16)) { |
7533 | for (uint32_t value = (uint32_t)min; value < max; value += step) { |
7534 | roaring_bitmap_add(answer, value); |
7535 | } |
7536 | return answer; |
7537 | } |
7538 | uint64_t min_tmp = min; |
7539 | do { |
7540 | uint32_t key = (uint32_t)min_tmp >> 16; |
7541 | uint32_t container_min = min_tmp & 0xFFFF; |
7542 | uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16); |
7543 | uint8_t type; |
7544 | void *container = container_from_range(&type, container_min, |
7545 | container_max, (uint16_t)step); |
7546 | ra_append(&answer->high_low_container, key, container, type); |
7547 | uint32_t gap = container_max - container_min + step - 1; |
7548 | min_tmp += gap - (gap % step); |
7549 | } while (min_tmp < max); |
7550 | // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step |
7551 | return answer; |
7552 | } |
7553 | |
7554 | void roaring_bitmap_add_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { |
7555 | if (min > max) { |
7556 | return; |
7557 | } |
7558 | |
7559 | uint32_t min_key = min >> 16; |
7560 | uint32_t max_key = max >> 16; |
7561 | |
7562 | int32_t num_required_containers = max_key - min_key + 1; |
7563 | int32_t suffix_length = count_greater(ra->high_low_container.keys, |
7564 | ra->high_low_container.size, |
7565 | max_key); |
7566 | int32_t prefix_length = count_less(ra->high_low_container.keys, |
7567 | ra->high_low_container.size - suffix_length, |
7568 | min_key); |
7569 | int32_t common_length = ra->high_low_container.size - prefix_length - suffix_length; |
7570 | |
7571 | if (num_required_containers > common_length) { |
7572 | ra_shift_tail(&ra->high_low_container, suffix_length, |
7573 | num_required_containers - common_length); |
7574 | } |
7575 | |
7576 | int32_t src = prefix_length + common_length - 1; |
7577 | int32_t dst = ra->high_low_container.size - suffix_length - 1; |
7578 | for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0 |
7579 | uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0; |
7580 | uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff; |
7581 | void* new_container; |
7582 | uint8_t new_type; |
7583 | |
7584 | if (src >= 0 && ra->high_low_container.keys[src] == key) { |
7585 | ra_unshare_container_at_index(&ra->high_low_container, src); |
7586 | new_container = container_add_range(ra->high_low_container.containers[src], |
7587 | ra->high_low_container.typecodes[src], |
7588 | container_min, container_max, &new_type); |
7589 | if (new_container != ra->high_low_container.containers[src]) { |
7590 | container_free(ra->high_low_container.containers[src], |
7591 | ra->high_low_container.typecodes[src]); |
7592 | } |
7593 | src--; |
7594 | } else { |
7595 | new_container = container_from_range(&new_type, container_min, |
7596 | container_max+1, 1); |
7597 | } |
7598 | ra_replace_key_and_container_at_index(&ra->high_low_container, dst, |
7599 | key, new_container, new_type); |
7600 | dst--; |
7601 | } |
7602 | } |
7603 | |
7604 | void roaring_bitmap_remove_range_closed(roaring_bitmap_t *ra, uint32_t min, uint32_t max) { |
7605 | if (min > max) { |
7606 | return; |
7607 | } |
7608 | |
7609 | uint32_t min_key = min >> 16; |
7610 | uint32_t max_key = max >> 16; |
7611 | |
7612 | int32_t src = count_less(ra->high_low_container.keys, ra->high_low_container.size, min_key); |
7613 | int32_t dst = src; |
7614 | while (src < ra->high_low_container.size && ra->high_low_container.keys[src] <= max_key) { |
7615 | uint32_t container_min = (min_key == ra->high_low_container.keys[src]) ? (min & 0xffff) : 0; |
7616 | uint32_t container_max = (max_key == ra->high_low_container.keys[src]) ? (max & 0xffff) : 0xffff; |
7617 | ra_unshare_container_at_index(&ra->high_low_container, src); |
7618 | void *new_container; |
7619 | uint8_t new_type; |
7620 | new_container = container_remove_range(ra->high_low_container.containers[src], |
7621 | ra->high_low_container.typecodes[src], |
7622 | container_min, container_max, |
7623 | &new_type); |
7624 | if (new_container != ra->high_low_container.containers[src]) { |
7625 | container_free(ra->high_low_container.containers[src], |
7626 | ra->high_low_container.typecodes[src]); |
7627 | } |
7628 | if (new_container) { |
7629 | ra_replace_key_and_container_at_index(&ra->high_low_container, dst, |
7630 | ra->high_low_container.keys[src], |
7631 | new_container, new_type); |
7632 | dst++; |
7633 | } |
7634 | src++; |
7635 | } |
7636 | if (src > dst) { |
7637 | ra_shift_tail(&ra->high_low_container, ra->high_low_container.size - src, dst - src); |
7638 | } |
7639 | } |
7640 | |
7641 | void roaring_bitmap_add_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); |
7642 | void roaring_bitmap_remove_range(roaring_bitmap_t *ra, uint64_t min, uint64_t max); |
7643 | |
7644 | void roaring_bitmap_printf(const roaring_bitmap_t *ra) { |
7645 | printf("{" ); |
7646 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
7647 | container_printf_as_uint32_array( |
7648 | ra->high_low_container.containers[i], |
7649 | ra->high_low_container.typecodes[i], |
7650 | ((uint32_t)ra->high_low_container.keys[i]) << 16); |
7651 | if (i + 1 < ra->high_low_container.size) printf("," ); |
7652 | } |
7653 | printf("}" ); |
7654 | } |
7655 | |
7656 | void roaring_bitmap_printf_describe(const roaring_bitmap_t *ra) { |
7657 | printf("{" ); |
7658 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
7659 | printf("%d: %s (%d)" , ra->high_low_container.keys[i], |
7660 | get_full_container_name(ra->high_low_container.containers[i], |
7661 | ra->high_low_container.typecodes[i]), |
7662 | container_get_cardinality(ra->high_low_container.containers[i], |
7663 | ra->high_low_container.typecodes[i])); |
7664 | if (ra->high_low_container.typecodes[i] == SHARED_CONTAINER_TYPE_CODE) { |
7665 | printf( |
7666 | "(shared count = %" PRIu32 " )" , |
7667 | ((shared_container_t *)(ra->high_low_container.containers[i])) |
7668 | ->counter); |
7669 | } |
7670 | |
7671 | if (i + 1 < ra->high_low_container.size) printf(", " ); |
7672 | } |
7673 | printf("}" ); |
7674 | } |
7675 | |
7676 | typedef struct min_max_sum_s { |
7677 | uint32_t min; |
7678 | uint32_t max; |
7679 | uint64_t sum; |
7680 | } min_max_sum_t; |
7681 | |
7682 | static bool min_max_sum_fnc(uint32_t value, void *param) { |
7683 | min_max_sum_t *mms = (min_max_sum_t *)param; |
7684 | if (value > mms->max) mms->max = value; |
7685 | if (value < mms->min) mms->min = value; |
7686 | mms->sum += value; |
7687 | return true; // we always process all data points |
7688 | } |
7689 | |
7690 | /** |
7691 | * (For advanced users.) |
7692 | * Collect statistics about the bitmap |
7693 | */ |
7694 | void roaring_bitmap_statistics(const roaring_bitmap_t *ra, |
7695 | roaring_statistics_t *stat) { |
7696 | memset(stat, 0, sizeof(*stat)); |
7697 | stat->n_containers = ra->high_low_container.size; |
7698 | stat->cardinality = roaring_bitmap_get_cardinality(ra); |
7699 | min_max_sum_t mms; |
7700 | mms.min = UINT32_C(0xFFFFFFFF); |
7701 | mms.max = UINT32_C(0); |
7702 | mms.sum = 0; |
7703 | roaring_iterate(ra, &min_max_sum_fnc, &mms); |
7704 | stat->min_value = mms.min; |
7705 | stat->max_value = mms.max; |
7706 | stat->sum_value = mms.sum; |
7707 | |
7708 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
7709 | uint8_t truetype = |
7710 | get_container_type(ra->high_low_container.containers[i], |
7711 | ra->high_low_container.typecodes[i]); |
7712 | uint32_t card = |
7713 | container_get_cardinality(ra->high_low_container.containers[i], |
7714 | ra->high_low_container.typecodes[i]); |
7715 | uint32_t sbytes = |
7716 | container_size_in_bytes(ra->high_low_container.containers[i], |
7717 | ra->high_low_container.typecodes[i]); |
7718 | switch (truetype) { |
7719 | case BITSET_CONTAINER_TYPE_CODE: |
7720 | stat->n_bitset_containers++; |
7721 | stat->n_values_bitset_containers += card; |
7722 | stat->n_bytes_bitset_containers += sbytes; |
7723 | break; |
7724 | case ARRAY_CONTAINER_TYPE_CODE: |
7725 | stat->n_array_containers++; |
7726 | stat->n_values_array_containers += card; |
7727 | stat->n_bytes_array_containers += sbytes; |
7728 | break; |
7729 | case RUN_CONTAINER_TYPE_CODE: |
7730 | stat->n_run_containers++; |
7731 | stat->n_values_run_containers += card; |
7732 | stat->n_bytes_run_containers += sbytes; |
7733 | break; |
7734 | default: |
7735 | assert(false); |
7736 | __builtin_unreachable(); |
7737 | } |
7738 | } |
7739 | } |
7740 | |
7741 | roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) { |
7742 | roaring_bitmap_t *ans = |
7743 | (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); |
7744 | if (!ans) { |
7745 | return NULL; |
7746 | } |
7747 | bool is_ok = ra_copy(&r->high_low_container, &ans->high_low_container, |
7748 | r->copy_on_write); |
7749 | if (!is_ok) { |
7750 | free(ans); |
7751 | return NULL; |
7752 | } |
7753 | ans->copy_on_write = r->copy_on_write; |
7754 | return ans; |
7755 | } |
7756 | |
7757 | bool roaring_bitmap_overwrite(roaring_bitmap_t *dest, |
7758 | const roaring_bitmap_t *src) { |
7759 | return ra_overwrite(&src->high_low_container, &dest->high_low_container, |
7760 | src->copy_on_write); |
7761 | } |
7762 | |
7763 | void roaring_bitmap_free(roaring_bitmap_t *r) { |
7764 | ra_clear(&r->high_low_container); |
7765 | free(r); |
7766 | } |
7767 | |
7768 | void roaring_bitmap_clear(roaring_bitmap_t *r) { |
7769 | ra_reset(&r->high_low_container); |
7770 | } |
7771 | |
7772 | void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) { |
7773 | const uint16_t hb = val >> 16; |
7774 | const int i = ra_get_index(&r->high_low_container, hb); |
7775 | uint8_t typecode; |
7776 | if (i >= 0) { |
7777 | ra_unshare_container_at_index(&r->high_low_container, i); |
7778 | void *container = |
7779 | ra_get_container_at_index(&r->high_low_container, i, &typecode); |
7780 | uint8_t newtypecode = typecode; |
7781 | void *container2 = |
7782 | container_add(container, val & 0xFFFF, typecode, &newtypecode); |
7783 | if (container2 != container) { |
7784 | container_free(container, typecode); |
7785 | ra_set_container_at_index(&r->high_low_container, i, container2, |
7786 | newtypecode); |
7787 | } |
7788 | } else { |
7789 | array_container_t *newac = array_container_create(); |
7790 | void *container = container_add(newac, val & 0xFFFF, |
7791 | ARRAY_CONTAINER_TYPE_CODE, &typecode); |
7792 | // we could just assume that it stays an array container |
7793 | ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, |
7794 | container, typecode); |
7795 | } |
7796 | } |
7797 | |
7798 | bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) { |
7799 | const uint16_t hb = val >> 16; |
7800 | const int i = ra_get_index(&r->high_low_container, hb); |
7801 | uint8_t typecode; |
7802 | bool result = false; |
7803 | if (i >= 0) { |
7804 | ra_unshare_container_at_index(&r->high_low_container, i); |
7805 | void *container = |
7806 | ra_get_container_at_index(&r->high_low_container, i, &typecode); |
7807 | |
7808 | const int oldCardinality = |
7809 | container_get_cardinality(container, typecode); |
7810 | |
7811 | uint8_t newtypecode = typecode; |
7812 | void *container2 = |
7813 | container_add(container, val & 0xFFFF, typecode, &newtypecode); |
7814 | if (container2 != container) { |
7815 | container_free(container, typecode); |
7816 | ra_set_container_at_index(&r->high_low_container, i, container2, |
7817 | newtypecode); |
7818 | result = true; |
7819 | } else { |
7820 | const int newCardinality = |
7821 | container_get_cardinality(container, newtypecode); |
7822 | |
7823 | result = oldCardinality != newCardinality; |
7824 | } |
7825 | } else { |
7826 | array_container_t *newac = array_container_create(); |
7827 | void *container = container_add(newac, val & 0xFFFF, |
7828 | ARRAY_CONTAINER_TYPE_CODE, &typecode); |
7829 | // we could just assume that it stays an array container |
7830 | ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb, |
7831 | container, typecode); |
7832 | result = true; |
7833 | } |
7834 | |
7835 | return result; |
7836 | } |
7837 | |
7838 | void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) { |
7839 | const uint16_t hb = val >> 16; |
7840 | const int i = ra_get_index(&r->high_low_container, hb); |
7841 | uint8_t typecode; |
7842 | if (i >= 0) { |
7843 | ra_unshare_container_at_index(&r->high_low_container, i); |
7844 | void *container = |
7845 | ra_get_container_at_index(&r->high_low_container, i, &typecode); |
7846 | uint8_t newtypecode = typecode; |
7847 | void *container2 = |
7848 | container_remove(container, val & 0xFFFF, typecode, &newtypecode); |
7849 | if (container2 != container) { |
7850 | container_free(container, typecode); |
7851 | ra_set_container_at_index(&r->high_low_container, i, container2, |
7852 | newtypecode); |
7853 | } |
7854 | if (container_get_cardinality(container2, newtypecode) != 0) { |
7855 | ra_set_container_at_index(&r->high_low_container, i, container2, |
7856 | newtypecode); |
7857 | } else { |
7858 | ra_remove_at_index_and_free(&r->high_low_container, i); |
7859 | } |
7860 | } |
7861 | } |
7862 | |
7863 | bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) { |
7864 | const uint16_t hb = val >> 16; |
7865 | const int i = ra_get_index(&r->high_low_container, hb); |
7866 | uint8_t typecode; |
7867 | bool result = false; |
7868 | if (i >= 0) { |
7869 | ra_unshare_container_at_index(&r->high_low_container, i); |
7870 | void *container = |
7871 | ra_get_container_at_index(&r->high_low_container, i, &typecode); |
7872 | |
7873 | const int oldCardinality = |
7874 | container_get_cardinality(container, typecode); |
7875 | |
7876 | uint8_t newtypecode = typecode; |
7877 | void *container2 = |
7878 | container_remove(container, val & 0xFFFF, typecode, &newtypecode); |
7879 | if (container2 != container) { |
7880 | container_free(container, typecode); |
7881 | ra_set_container_at_index(&r->high_low_container, i, container2, |
7882 | newtypecode); |
7883 | } |
7884 | |
7885 | const int newCardinality = |
7886 | container_get_cardinality(container2, newtypecode); |
7887 | |
7888 | if (newCardinality != 0) { |
7889 | ra_set_container_at_index(&r->high_low_container, i, container2, |
7890 | newtypecode); |
7891 | } else { |
7892 | ra_remove_at_index_and_free(&r->high_low_container, i); |
7893 | } |
7894 | |
7895 | result = oldCardinality != newCardinality; |
7896 | } |
7897 | return result; |
7898 | } |
7899 | |
7900 | void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args, |
7901 | const uint32_t *vals) { |
7902 | if (n_args == 0 || r->high_low_container.size == 0) { |
7903 | return; |
7904 | } |
7905 | int32_t pos = -1; // position of the container used in the previous iteration |
7906 | for (size_t i = 0; i < n_args; i++) { |
7907 | uint16_t key = (uint16_t)(vals[i] >> 16); |
7908 | if (pos < 0 || key != r->high_low_container.keys[pos]) { |
7909 | pos = ra_get_index(&r->high_low_container, key); |
7910 | } |
7911 | if (pos >= 0) { |
7912 | uint8_t new_typecode; |
7913 | void *new_container; |
7914 | new_container = container_remove(r->high_low_container.containers[pos], |
7915 | vals[i] & 0xffff, |
7916 | r->high_low_container.typecodes[pos], |
7917 | &new_typecode); |
7918 | if (new_container != r->high_low_container.containers[pos]) { |
7919 | container_free(r->high_low_container.containers[pos], |
7920 | r->high_low_container.typecodes[pos]); |
7921 | ra_replace_key_and_container_at_index(&r->high_low_container, |
7922 | pos, key, new_container, |
7923 | new_typecode); |
7924 | } |
7925 | if (!container_nonzero_cardinality(new_container, new_typecode)) { |
7926 | container_free(new_container, new_typecode); |
7927 | ra_remove_at_index(&r->high_low_container, pos); |
7928 | pos = -1; |
7929 | } |
7930 | } |
7931 | } |
7932 | } |
7933 | |
7934 | // there should be some SIMD optimizations possible here |
7935 | roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1, |
7936 | const roaring_bitmap_t *x2) { |
7937 | uint8_t container_result_type = 0; |
7938 | const int length1 = x1->high_low_container.size, |
7939 | length2 = x2->high_low_container.size; |
7940 | uint32_t neededcap = length1 > length2 ? length2 : length1; |
7941 | roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); |
7942 | answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; |
7943 | |
7944 | int pos1 = 0, pos2 = 0; |
7945 | |
7946 | while (pos1 < length1 && pos2 < length2) { |
7947 | const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
7948 | const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
7949 | |
7950 | if (s1 == s2) { |
7951 | uint8_t container_type_1, container_type_2; |
7952 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
7953 | &container_type_1); |
7954 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
7955 | &container_type_2); |
7956 | void *c = container_and(c1, container_type_1, c2, container_type_2, |
7957 | &container_result_type); |
7958 | if (container_nonzero_cardinality(c, container_result_type)) { |
7959 | ra_append(&answer->high_low_container, s1, c, |
7960 | container_result_type); |
7961 | } else { |
7962 | container_free( |
7963 | c, container_result_type); // otherwise:memory leak! |
7964 | } |
7965 | ++pos1; |
7966 | ++pos2; |
7967 | } else if (s1 < s2) { // s1 < s2 |
7968 | pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); |
7969 | } else { // s1 > s2 |
7970 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
7971 | } |
7972 | } |
7973 | return answer; |
7974 | } |
7975 | |
7976 | /** |
7977 | * Compute the union of 'number' bitmaps. |
7978 | */ |
7979 | roaring_bitmap_t *roaring_bitmap_or_many(size_t number, |
7980 | const roaring_bitmap_t **x) { |
7981 | if (number == 0) { |
7982 | return roaring_bitmap_create(); |
7983 | } |
7984 | if (number == 1) { |
7985 | return roaring_bitmap_copy(x[0]); |
7986 | } |
7987 | roaring_bitmap_t *answer = |
7988 | roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION); |
7989 | for (size_t i = 2; i < number; i++) { |
7990 | roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION); |
7991 | } |
7992 | roaring_bitmap_repair_after_lazy(answer); |
7993 | return answer; |
7994 | } |
7995 | |
7996 | /** |
7997 | * Compute the xor of 'number' bitmaps. |
7998 | */ |
7999 | roaring_bitmap_t *roaring_bitmap_xor_many(size_t number, |
8000 | const roaring_bitmap_t **x) { |
8001 | if (number == 0) { |
8002 | return roaring_bitmap_create(); |
8003 | } |
8004 | if (number == 1) { |
8005 | return roaring_bitmap_copy(x[0]); |
8006 | } |
8007 | roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]); |
8008 | for (size_t i = 2; i < number; i++) { |
8009 | roaring_bitmap_lazy_xor_inplace(answer, x[i]); |
8010 | } |
8011 | roaring_bitmap_repair_after_lazy(answer); |
8012 | return answer; |
8013 | } |
8014 | |
8015 | // inplace and (modifies its first argument). |
8016 | void roaring_bitmap_and_inplace(roaring_bitmap_t *x1, |
8017 | const roaring_bitmap_t *x2) { |
8018 | if (x1 == x2) return; |
8019 | int pos1 = 0, pos2 = 0, intersection_size = 0; |
8020 | const int length1 = ra_get_size(&x1->high_low_container); |
8021 | const int length2 = ra_get_size(&x2->high_low_container); |
8022 | |
8023 | // any skipped-over or newly emptied containers in x1 |
8024 | // have to be freed. |
8025 | while (pos1 < length1 && pos2 < length2) { |
8026 | const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8027 | const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8028 | |
8029 | if (s1 == s2) { |
8030 | uint8_t typecode1, typecode2, typecode_result; |
8031 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
8032 | &typecode1); |
8033 | c1 = get_writable_copy_if_shared(c1, &typecode1); |
8034 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8035 | &typecode2); |
8036 | void *c = |
8037 | container_iand(c1, typecode1, c2, typecode2, &typecode_result); |
8038 | if (c != c1) { // in this instance a new container was created, and |
8039 | // we need to free the old one |
8040 | container_free(c1, typecode1); |
8041 | } |
8042 | if (container_nonzero_cardinality(c, typecode_result)) { |
8043 | ra_replace_key_and_container_at_index(&x1->high_low_container, |
8044 | intersection_size, s1, c, |
8045 | typecode_result); |
8046 | intersection_size++; |
8047 | } else { |
8048 | container_free(c, typecode_result); |
8049 | } |
8050 | ++pos1; |
8051 | ++pos2; |
8052 | } else if (s1 < s2) { |
8053 | pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1); |
8054 | } else { // s1 > s2 |
8055 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
8056 | } |
8057 | } |
8058 | |
8059 | // if we ended early because x2 ran out, then all remaining in x1 should be |
8060 | // freed |
8061 | while (pos1 < length1) { |
8062 | container_free(x1->high_low_container.containers[pos1], |
8063 | x1->high_low_container.typecodes[pos1]); |
8064 | ++pos1; |
8065 | } |
8066 | |
8067 | // all containers after this have either been copied or freed |
8068 | ra_downsize(&x1->high_low_container, intersection_size); |
8069 | } |
8070 | |
8071 | roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1, |
8072 | const roaring_bitmap_t *x2) { |
8073 | uint8_t container_result_type = 0; |
8074 | const int length1 = x1->high_low_container.size, |
8075 | length2 = x2->high_low_container.size; |
8076 | if (0 == length1) { |
8077 | return roaring_bitmap_copy(x2); |
8078 | } |
8079 | if (0 == length2) { |
8080 | return roaring_bitmap_copy(x1); |
8081 | } |
8082 | roaring_bitmap_t *answer = |
8083 | roaring_bitmap_create_with_capacity(length1 + length2); |
8084 | answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; |
8085 | int pos1 = 0, pos2 = 0; |
8086 | uint8_t container_type_1, container_type_2; |
8087 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8088 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8089 | while (true) { |
8090 | if (s1 == s2) { |
8091 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
8092 | &container_type_1); |
8093 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8094 | &container_type_2); |
8095 | void *c = container_or(c1, container_type_1, c2, container_type_2, |
8096 | &container_result_type); |
8097 | // since we assume that the initial containers are non-empty, the |
8098 | // result here |
8099 | // can only be non-empty |
8100 | ra_append(&answer->high_low_container, s1, c, |
8101 | container_result_type); |
8102 | ++pos1; |
8103 | ++pos2; |
8104 | if (pos1 == length1) break; |
8105 | if (pos2 == length2) break; |
8106 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8107 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8108 | |
8109 | } else if (s1 < s2) { // s1 < s2 |
8110 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
8111 | &container_type_1); |
8112 | // c1 = container_clone(c1, container_type_1); |
8113 | c1 = |
8114 | get_copy_of_container(c1, &container_type_1, x1->copy_on_write); |
8115 | if (x1->copy_on_write) { |
8116 | ra_set_container_at_index(&x1->high_low_container, pos1, c1, |
8117 | container_type_1); |
8118 | } |
8119 | ra_append(&answer->high_low_container, s1, c1, container_type_1); |
8120 | pos1++; |
8121 | if (pos1 == length1) break; |
8122 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8123 | |
8124 | } else { // s1 > s2 |
8125 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8126 | &container_type_2); |
8127 | // c2 = container_clone(c2, container_type_2); |
8128 | c2 = |
8129 | get_copy_of_container(c2, &container_type_2, x2->copy_on_write); |
8130 | if (x2->copy_on_write) { |
8131 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
8132 | container_type_2); |
8133 | } |
8134 | ra_append(&answer->high_low_container, s2, c2, container_type_2); |
8135 | pos2++; |
8136 | if (pos2 == length2) break; |
8137 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8138 | } |
8139 | } |
8140 | if (pos1 == length1) { |
8141 | ra_append_copy_range(&answer->high_low_container, |
8142 | &x2->high_low_container, pos2, length2, |
8143 | x2->copy_on_write); |
8144 | } else if (pos2 == length2) { |
8145 | ra_append_copy_range(&answer->high_low_container, |
8146 | &x1->high_low_container, pos1, length1, |
8147 | x1->copy_on_write); |
8148 | } |
8149 | return answer; |
8150 | } |
8151 | |
8152 | // inplace or (modifies its first argument). |
8153 | void roaring_bitmap_or_inplace(roaring_bitmap_t *x1, |
8154 | const roaring_bitmap_t *x2) { |
8155 | uint8_t container_result_type = 0; |
8156 | int length1 = x1->high_low_container.size; |
8157 | const int length2 = x2->high_low_container.size; |
8158 | |
8159 | if (0 == length2) return; |
8160 | |
8161 | if (0 == length1) { |
8162 | roaring_bitmap_overwrite(x1, x2); |
8163 | return; |
8164 | } |
8165 | int pos1 = 0, pos2 = 0; |
8166 | uint8_t container_type_1, container_type_2; |
8167 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8168 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8169 | while (true) { |
8170 | if (s1 == s2) { |
8171 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
8172 | &container_type_1); |
8173 | if (!container_is_full(c1, container_type_1)) { |
8174 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
8175 | |
8176 | void *c2 = ra_get_container_at_index(&x2->high_low_container, |
8177 | pos2, &container_type_2); |
8178 | void *c = |
8179 | container_ior(c1, container_type_1, c2, container_type_2, |
8180 | &container_result_type); |
8181 | if (c != |
8182 | c1) { // in this instance a new container was created, and |
8183 | // we need to free the old one |
8184 | container_free(c1, container_type_1); |
8185 | } |
8186 | |
8187 | ra_set_container_at_index(&x1->high_low_container, pos1, c, |
8188 | container_result_type); |
8189 | } |
8190 | ++pos1; |
8191 | ++pos2; |
8192 | if (pos1 == length1) break; |
8193 | if (pos2 == length2) break; |
8194 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8195 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8196 | |
8197 | } else if (s1 < s2) { // s1 < s2 |
8198 | pos1++; |
8199 | if (pos1 == length1) break; |
8200 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8201 | |
8202 | } else { // s1 > s2 |
8203 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8204 | &container_type_2); |
8205 | c2 = |
8206 | get_copy_of_container(c2, &container_type_2, x2->copy_on_write); |
8207 | if (x2->copy_on_write) { |
8208 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
8209 | container_type_2); |
8210 | } |
8211 | |
8212 | // void *c2_clone = container_clone(c2, container_type_2); |
8213 | ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, |
8214 | container_type_2); |
8215 | pos1++; |
8216 | length1++; |
8217 | pos2++; |
8218 | if (pos2 == length2) break; |
8219 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8220 | } |
8221 | } |
8222 | if (pos1 == length1) { |
8223 | ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, |
8224 | pos2, length2, x2->copy_on_write); |
8225 | } |
8226 | } |
8227 | |
8228 | roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1, |
8229 | const roaring_bitmap_t *x2) { |
8230 | uint8_t container_result_type = 0; |
8231 | const int length1 = x1->high_low_container.size, |
8232 | length2 = x2->high_low_container.size; |
8233 | if (0 == length1) { |
8234 | return roaring_bitmap_copy(x2); |
8235 | } |
8236 | if (0 == length2) { |
8237 | return roaring_bitmap_copy(x1); |
8238 | } |
8239 | roaring_bitmap_t *answer = |
8240 | roaring_bitmap_create_with_capacity(length1 + length2); |
8241 | answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; |
8242 | int pos1 = 0, pos2 = 0; |
8243 | uint8_t container_type_1, container_type_2; |
8244 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8245 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8246 | while (true) { |
8247 | if (s1 == s2) { |
8248 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
8249 | &container_type_1); |
8250 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8251 | &container_type_2); |
8252 | void *c = container_xor(c1, container_type_1, c2, container_type_2, |
8253 | &container_result_type); |
8254 | |
8255 | if (container_nonzero_cardinality(c, container_result_type)) { |
8256 | ra_append(&answer->high_low_container, s1, c, |
8257 | container_result_type); |
8258 | } else { |
8259 | container_free(c, container_result_type); |
8260 | } |
8261 | ++pos1; |
8262 | ++pos2; |
8263 | if (pos1 == length1) break; |
8264 | if (pos2 == length2) break; |
8265 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8266 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8267 | |
8268 | } else if (s1 < s2) { // s1 < s2 |
8269 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
8270 | &container_type_1); |
8271 | c1 = |
8272 | get_copy_of_container(c1, &container_type_1, x1->copy_on_write); |
8273 | if (x1->copy_on_write) { |
8274 | ra_set_container_at_index(&x1->high_low_container, pos1, c1, |
8275 | container_type_1); |
8276 | } |
8277 | ra_append(&answer->high_low_container, s1, c1, container_type_1); |
8278 | pos1++; |
8279 | if (pos1 == length1) break; |
8280 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8281 | |
8282 | } else { // s1 > s2 |
8283 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8284 | &container_type_2); |
8285 | c2 = |
8286 | get_copy_of_container(c2, &container_type_2, x2->copy_on_write); |
8287 | if (x2->copy_on_write) { |
8288 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
8289 | container_type_2); |
8290 | } |
8291 | ra_append(&answer->high_low_container, s2, c2, container_type_2); |
8292 | pos2++; |
8293 | if (pos2 == length2) break; |
8294 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8295 | } |
8296 | } |
8297 | if (pos1 == length1) { |
8298 | ra_append_copy_range(&answer->high_low_container, |
8299 | &x2->high_low_container, pos2, length2, |
8300 | x2->copy_on_write); |
8301 | } else if (pos2 == length2) { |
8302 | ra_append_copy_range(&answer->high_low_container, |
8303 | &x1->high_low_container, pos1, length1, |
8304 | x1->copy_on_write); |
8305 | } |
8306 | return answer; |
8307 | } |
8308 | |
8309 | // inplace xor (modifies its first argument). |
8310 | |
8311 | void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1, |
8312 | const roaring_bitmap_t *x2) { |
8313 | assert(x1 != x2); |
8314 | uint8_t container_result_type = 0; |
8315 | int length1 = x1->high_low_container.size; |
8316 | const int length2 = x2->high_low_container.size; |
8317 | |
8318 | if (0 == length2) return; |
8319 | |
8320 | if (0 == length1) { |
8321 | roaring_bitmap_overwrite(x1, x2); |
8322 | return; |
8323 | } |
8324 | |
8325 | // XOR can have new containers inserted from x2, but can also |
8326 | // lose containers when x1 and x2 are nonempty and identical. |
8327 | |
8328 | int pos1 = 0, pos2 = 0; |
8329 | uint8_t container_type_1, container_type_2; |
8330 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8331 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8332 | while (true) { |
8333 | if (s1 == s2) { |
8334 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
8335 | &container_type_1); |
8336 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
8337 | |
8338 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8339 | &container_type_2); |
8340 | void *c = container_ixor(c1, container_type_1, c2, container_type_2, |
8341 | &container_result_type); |
8342 | |
8343 | if (container_nonzero_cardinality(c, container_result_type)) { |
8344 | ra_set_container_at_index(&x1->high_low_container, pos1, c, |
8345 | container_result_type); |
8346 | ++pos1; |
8347 | } else { |
8348 | container_free(c, container_result_type); |
8349 | ra_remove_at_index(&x1->high_low_container, pos1); |
8350 | --length1; |
8351 | } |
8352 | |
8353 | ++pos2; |
8354 | if (pos1 == length1) break; |
8355 | if (pos2 == length2) break; |
8356 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8357 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8358 | |
8359 | } else if (s1 < s2) { // s1 < s2 |
8360 | pos1++; |
8361 | if (pos1 == length1) break; |
8362 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8363 | |
8364 | } else { // s1 > s2 |
8365 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8366 | &container_type_2); |
8367 | c2 = |
8368 | get_copy_of_container(c2, &container_type_2, x2->copy_on_write); |
8369 | if (x2->copy_on_write) { |
8370 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
8371 | container_type_2); |
8372 | } |
8373 | |
8374 | ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, |
8375 | container_type_2); |
8376 | pos1++; |
8377 | length1++; |
8378 | pos2++; |
8379 | if (pos2 == length2) break; |
8380 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8381 | } |
8382 | } |
8383 | if (pos1 == length1) { |
8384 | ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, |
8385 | pos2, length2, x2->copy_on_write); |
8386 | } |
8387 | } |
8388 | |
8389 | roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1, |
8390 | const roaring_bitmap_t *x2) { |
8391 | uint8_t container_result_type = 0; |
8392 | const int length1 = x1->high_low_container.size, |
8393 | length2 = x2->high_low_container.size; |
8394 | if (0 == length1) { |
8395 | roaring_bitmap_t *empty_bitmap = roaring_bitmap_create(); |
8396 | empty_bitmap->copy_on_write = x1->copy_on_write && x2->copy_on_write; |
8397 | return empty_bitmap; |
8398 | } |
8399 | if (0 == length2) { |
8400 | return roaring_bitmap_copy(x1); |
8401 | } |
8402 | roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1); |
8403 | answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; |
8404 | |
8405 | int pos1 = 0, pos2 = 0; |
8406 | uint8_t container_type_1, container_type_2; |
8407 | uint16_t s1 = 0; |
8408 | uint16_t s2 = 0; |
8409 | while (true) { |
8410 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8411 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8412 | |
8413 | if (s1 == s2) { |
8414 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
8415 | &container_type_1); |
8416 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8417 | &container_type_2); |
8418 | void *c = |
8419 | container_andnot(c1, container_type_1, c2, container_type_2, |
8420 | &container_result_type); |
8421 | |
8422 | if (container_nonzero_cardinality(c, container_result_type)) { |
8423 | ra_append(&answer->high_low_container, s1, c, |
8424 | container_result_type); |
8425 | } else { |
8426 | container_free(c, container_result_type); |
8427 | } |
8428 | ++pos1; |
8429 | ++pos2; |
8430 | if (pos1 == length1) break; |
8431 | if (pos2 == length2) break; |
8432 | } else if (s1 < s2) { // s1 < s2 |
8433 | const int next_pos1 = |
8434 | ra_advance_until(&x1->high_low_container, s2, pos1); |
8435 | ra_append_copy_range(&answer->high_low_container, |
8436 | &x1->high_low_container, pos1, next_pos1, |
8437 | x1->copy_on_write); |
8438 | // TODO : perhaps some of the copy_on_write should be based on |
8439 | // answer rather than x1 (more stringent?). Many similar cases |
8440 | pos1 = next_pos1; |
8441 | if (pos1 == length1) break; |
8442 | } else { // s1 > s2 |
8443 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
8444 | if (pos2 == length2) break; |
8445 | } |
8446 | } |
8447 | if (pos2 == length2) { |
8448 | ra_append_copy_range(&answer->high_low_container, |
8449 | &x1->high_low_container, pos1, length1, |
8450 | x1->copy_on_write); |
8451 | } |
8452 | return answer; |
8453 | } |
8454 | |
8455 | // inplace andnot (modifies its first argument). |
8456 | |
8457 | void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1, |
8458 | const roaring_bitmap_t *x2) { |
8459 | assert(x1 != x2); |
8460 | |
8461 | uint8_t container_result_type = 0; |
8462 | int length1 = x1->high_low_container.size; |
8463 | const int length2 = x2->high_low_container.size; |
8464 | int intersection_size = 0; |
8465 | |
8466 | if (0 == length2) return; |
8467 | |
8468 | if (0 == length1) { |
8469 | roaring_bitmap_clear(x1); |
8470 | return; |
8471 | } |
8472 | |
8473 | int pos1 = 0, pos2 = 0; |
8474 | uint8_t container_type_1, container_type_2; |
8475 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8476 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8477 | while (true) { |
8478 | if (s1 == s2) { |
8479 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
8480 | &container_type_1); |
8481 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
8482 | |
8483 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
8484 | &container_type_2); |
8485 | void *c = |
8486 | container_iandnot(c1, container_type_1, c2, container_type_2, |
8487 | &container_result_type); |
8488 | |
8489 | if (container_nonzero_cardinality(c, container_result_type)) { |
8490 | ra_replace_key_and_container_at_index(&x1->high_low_container, |
8491 | intersection_size++, s1, |
8492 | c, container_result_type); |
8493 | } else { |
8494 | container_free(c, container_result_type); |
8495 | } |
8496 | |
8497 | ++pos1; |
8498 | ++pos2; |
8499 | if (pos1 == length1) break; |
8500 | if (pos2 == length2) break; |
8501 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8502 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8503 | |
8504 | } else if (s1 < s2) { // s1 < s2 |
8505 | if (pos1 != intersection_size) { |
8506 | void *c1 = ra_get_container_at_index(&x1->high_low_container, |
8507 | pos1, &container_type_1); |
8508 | |
8509 | ra_replace_key_and_container_at_index(&x1->high_low_container, |
8510 | intersection_size, s1, c1, |
8511 | container_type_1); |
8512 | } |
8513 | intersection_size++; |
8514 | pos1++; |
8515 | if (pos1 == length1) break; |
8516 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
8517 | |
8518 | } else { // s1 > s2 |
8519 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
8520 | if (pos2 == length2) break; |
8521 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
8522 | } |
8523 | } |
8524 | |
8525 | if (pos1 < length1) { |
8526 | // all containers between intersection_size and |
8527 | // pos1 are junk. However, they have either been moved |
8528 | // (thus still referenced) or involved in an iandnot |
8529 | // that will clean up all containers that could not be reused. |
8530 | // Thus we should not free the junk containers between |
8531 | // intersection_size and pos1. |
8532 | if (pos1 > intersection_size) { |
8533 | // left slide of remaining items |
8534 | ra_copy_range(&x1->high_low_container, pos1, length1, |
8535 | intersection_size); |
8536 | } |
8537 | // else current placement is fine |
8538 | intersection_size += (length1 - pos1); |
8539 | } |
8540 | ra_downsize(&x1->high_low_container, intersection_size); |
8541 | } |
8542 | |
8543 | uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *ra) { |
8544 | uint64_t card = 0; |
8545 | for (int i = 0; i < ra->high_low_container.size; ++i) |
8546 | card += container_get_cardinality(ra->high_low_container.containers[i], |
8547 | ra->high_low_container.typecodes[i]); |
8548 | return card; |
8549 | } |
8550 | |
8551 | uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *ra, |
8552 | uint64_t range_start, |
8553 | uint64_t range_end) { |
8554 | if (range_end > UINT32_MAX) { |
8555 | range_end = UINT32_MAX + UINT64_C(1); |
8556 | } |
8557 | if (range_start >= range_end) { |
8558 | return 0; |
8559 | } |
8560 | range_end--; // make range_end inclusive |
8561 | // now we have: 0 <= range_start <= range_end <= UINT32_MAX |
8562 | |
8563 | int minhb = range_start >> 16; |
8564 | int maxhb = range_end >> 16; |
8565 | |
8566 | uint64_t card = 0; |
8567 | |
8568 | int i = ra_get_index(&ra->high_low_container, minhb); |
8569 | if (i >= 0) { |
8570 | if (minhb == maxhb) { |
8571 | card += container_rank(ra->high_low_container.containers[i], |
8572 | ra->high_low_container.typecodes[i], |
8573 | range_end & 0xffff); |
8574 | } else { |
8575 | card += container_get_cardinality(ra->high_low_container.containers[i], |
8576 | ra->high_low_container.typecodes[i]); |
8577 | } |
8578 | if ((range_start & 0xffff) != 0) { |
8579 | card -= container_rank(ra->high_low_container.containers[i], |
8580 | ra->high_low_container.typecodes[i], |
8581 | (range_start & 0xffff) - 1); |
8582 | } |
8583 | i++; |
8584 | } else { |
8585 | i = -i - 1; |
8586 | } |
8587 | |
8588 | for (; i < ra->high_low_container.size; i++) { |
8589 | uint16_t key = ra->high_low_container.keys[i]; |
8590 | if (key < maxhb) { |
8591 | card += container_get_cardinality(ra->high_low_container.containers[i], |
8592 | ra->high_low_container.typecodes[i]); |
8593 | } else if (key == maxhb) { |
8594 | card += container_rank(ra->high_low_container.containers[i], |
8595 | ra->high_low_container.typecodes[i], |
8596 | range_end & 0xffff); |
8597 | break; |
8598 | } else { |
8599 | break; |
8600 | } |
8601 | } |
8602 | |
8603 | return card; |
8604 | } |
8605 | |
8606 | |
8607 | bool roaring_bitmap_is_empty(const roaring_bitmap_t *ra) { |
8608 | return ra->high_low_container.size == 0; |
8609 | } |
8610 | |
8611 | void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *ra, uint32_t *ans) { |
8612 | ra_to_uint32_array(&ra->high_low_container, ans); |
8613 | } |
8614 | |
8615 | bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *ra, size_t offset, size_t limit, uint32_t *ans) { |
8616 | return ra_range_uint32_array(&ra->high_low_container, offset, limit, ans); |
8617 | } |
8618 | |
8619 | /** convert array and bitmap containers to run containers when it is more |
8620 | * efficient; |
8621 | * also convert from run containers when more space efficient. Returns |
8622 | * true if the result has at least one run container. |
8623 | */ |
8624 | bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) { |
8625 | bool answer = false; |
8626 | for (int i = 0; i < r->high_low_container.size; i++) { |
8627 | uint8_t typecode_original, typecode_after; |
8628 | ra_unshare_container_at_index( |
8629 | &r->high_low_container, i); // TODO: this introduces extra cloning! |
8630 | void *c = ra_get_container_at_index(&r->high_low_container, i, |
8631 | &typecode_original); |
8632 | void *c1 = convert_run_optimize(c, typecode_original, &typecode_after); |
8633 | if (typecode_after == RUN_CONTAINER_TYPE_CODE) answer = true; |
8634 | ra_set_container_at_index(&r->high_low_container, i, c1, |
8635 | typecode_after); |
8636 | } |
8637 | return answer; |
8638 | } |
8639 | |
8640 | size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) { |
8641 | size_t answer = 0; |
8642 | for (int i = 0; i < r->high_low_container.size; i++) { |
8643 | uint8_t typecode_original; |
8644 | void *c = ra_get_container_at_index(&r->high_low_container, i, |
8645 | &typecode_original); |
8646 | answer += container_shrink_to_fit(c, typecode_original); |
8647 | } |
8648 | answer += ra_shrink_to_fit(&r->high_low_container); |
8649 | return answer; |
8650 | } |
8651 | |
8652 | /** |
8653 | * Remove run-length encoding even when it is more space efficient |
8654 | * return whether a change was applied |
8655 | */ |
8656 | bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) { |
8657 | bool answer = false; |
8658 | for (int i = 0; i < r->high_low_container.size; i++) { |
8659 | uint8_t typecode_original, typecode_after; |
8660 | void *c = ra_get_container_at_index(&r->high_low_container, i, |
8661 | &typecode_original); |
8662 | if (get_container_type(c, typecode_original) == |
8663 | RUN_CONTAINER_TYPE_CODE) { |
8664 | answer = true; |
8665 | if (typecode_original == SHARED_CONTAINER_TYPE_CODE) { |
8666 | run_container_t *truec = |
8667 | (run_container_t *)((shared_container_t *)c)->container; |
8668 | int32_t card = run_container_cardinality(truec); |
8669 | void *c1 = convert_to_bitset_or_array_container( |
8670 | truec, card, &typecode_after); |
8671 | shared_container_free((shared_container_t *)c); |
8672 | ra_set_container_at_index(&r->high_low_container, i, c1, |
8673 | typecode_after); |
8674 | |
8675 | } else { |
8676 | int32_t card = run_container_cardinality((run_container_t *)c); |
8677 | void *c1 = convert_to_bitset_or_array_container( |
8678 | (run_container_t *)c, card, &typecode_after); |
8679 | ra_set_container_at_index(&r->high_low_container, i, c1, |
8680 | typecode_after); |
8681 | } |
8682 | } |
8683 | } |
8684 | return answer; |
8685 | } |
8686 | |
8687 | size_t roaring_bitmap_serialize(const roaring_bitmap_t *ra, char *buf) { |
8688 | size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); |
8689 | uint64_t cardinality = roaring_bitmap_get_cardinality(ra); |
8690 | uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t); |
8691 | if (portablesize < sizeasarray) { |
8692 | buf[0] = SERIALIZATION_CONTAINER; |
8693 | return roaring_bitmap_portable_serialize(ra, buf + 1) + 1; |
8694 | } else { |
8695 | buf[0] = SERIALIZATION_ARRAY_UINT32; |
8696 | memcpy(buf + 1, &cardinality, sizeof(uint32_t)); |
8697 | roaring_bitmap_to_uint32_array( |
8698 | ra, (uint32_t *)(buf + 1 + sizeof(uint32_t))); |
8699 | return 1 + (size_t)sizeasarray; |
8700 | } |
8701 | } |
8702 | |
8703 | size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *ra) { |
8704 | size_t portablesize = roaring_bitmap_portable_size_in_bytes(ra); |
8705 | uint64_t sizeasarray = roaring_bitmap_get_cardinality(ra) * sizeof(uint32_t) + |
8706 | sizeof(uint32_t); |
8707 | return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1; |
8708 | } |
8709 | |
8710 | size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *ra) { |
8711 | return ra_portable_size_in_bytes(&ra->high_low_container); |
8712 | } |
8713 | |
8714 | |
8715 | roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) { |
8716 | roaring_bitmap_t *ans = |
8717 | (roaring_bitmap_t *)malloc(sizeof(roaring_bitmap_t)); |
8718 | if (ans == NULL) { |
8719 | return NULL; |
8720 | } |
8721 | size_t bytesread; |
8722 | bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread); |
8723 | if(is_ok) assert(bytesread <= maxbytes); |
8724 | ans->copy_on_write = false; |
8725 | if (!is_ok) { |
8726 | free(ans); |
8727 | return NULL; |
8728 | } |
8729 | return ans; |
8730 | } |
8731 | |
8732 | roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) { |
8733 | return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX); |
8734 | } |
8735 | |
8736 | |
8737 | size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) { |
8738 | return ra_portable_deserialize_size(buf, maxbytes); |
8739 | } |
8740 | |
8741 | |
8742 | size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *ra, |
8743 | char *buf) { |
8744 | return ra_portable_serialize(&ra->high_low_container, buf); |
8745 | } |
8746 | |
8747 | roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) { |
8748 | const char *bufaschar = (const char *)buf; |
8749 | if (*(const unsigned char *)buf == SERIALIZATION_ARRAY_UINT32) { |
8750 | /* This looks like a compressed set of uint32_t elements */ |
8751 | uint32_t card; |
8752 | memcpy(&card, bufaschar + 1, sizeof(uint32_t)); |
8753 | const uint32_t *elems = |
8754 | (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t)); |
8755 | |
8756 | return roaring_bitmap_of_ptr(card, elems); |
8757 | } else if (bufaschar[0] == SERIALIZATION_CONTAINER) { |
8758 | return roaring_bitmap_portable_deserialize(bufaschar + 1); |
8759 | } else |
8760 | return (NULL); |
8761 | } |
8762 | |
8763 | bool roaring_iterate(const roaring_bitmap_t *ra, roaring_iterator iterator, |
8764 | void *ptr) { |
8765 | for (int i = 0; i < ra->high_low_container.size; ++i) |
8766 | if (!container_iterate(ra->high_low_container.containers[i], |
8767 | ra->high_low_container.typecodes[i], |
8768 | ((uint32_t)ra->high_low_container.keys[i]) << 16, |
8769 | iterator, ptr)) { |
8770 | return false; |
8771 | } |
8772 | return true; |
8773 | } |
8774 | |
8775 | bool roaring_iterate64(const roaring_bitmap_t *ra, roaring_iterator64 iterator, |
8776 | uint64_t high_bits, void *ptr) { |
8777 | for (int i = 0; i < ra->high_low_container.size; ++i) |
8778 | if (!container_iterate64( |
8779 | ra->high_low_container.containers[i], |
8780 | ra->high_low_container.typecodes[i], |
8781 | ((uint32_t)ra->high_low_container.keys[i]) << 16, iterator, |
8782 | high_bits, ptr)) { |
8783 | return false; |
8784 | } |
8785 | return true; |
8786 | } |
8787 | |
8788 | /**** |
8789 | * begin roaring_uint32_iterator_t |
8790 | *****/ |
8791 | |
8792 | static bool loadfirstvalue(roaring_uint32_iterator_t *newit) { |
8793 | newit->in_container_index = 0; |
8794 | newit->run_index = 0; |
8795 | newit->current_value = 0; |
8796 | if (newit->container_index >= |
8797 | newit->parent->high_low_container.size) { // otherwise nothing |
8798 | newit->current_value = UINT32_MAX; |
8799 | return (newit->has_value = false); |
8800 | } |
8801 | // assume not empty |
8802 | newit->has_value = true; |
8803 | // we precompute container, typecode and highbits so that successive |
8804 | // iterators do not have to grab them from odd memory locations |
8805 | // and have to worry about the (easily predicted) container_unwrap_shared |
8806 | // call. |
8807 | newit->container = |
8808 | newit->parent->high_low_container.containers[newit->container_index]; |
8809 | newit->typecode = |
8810 | newit->parent->high_low_container.typecodes[newit->container_index]; |
8811 | newit->highbits = |
8812 | ((uint32_t) |
8813 | newit->parent->high_low_container.keys[newit->container_index]) |
8814 | << 16; |
8815 | newit->container = |
8816 | container_unwrap_shared(newit->container, &(newit->typecode)); |
8817 | uint32_t wordindex; |
8818 | uint64_t word; // used for bitsets |
8819 | switch (newit->typecode) { |
8820 | case BITSET_CONTAINER_TYPE_CODE: |
8821 | wordindex = 0; |
8822 | while ((word = ((const bitset_container_t *)(newit->container)) |
8823 | ->array[wordindex]) == 0) |
8824 | wordindex++; // advance |
8825 | // here "word" is non-zero |
8826 | newit->in_container_index = wordindex * 64 + __builtin_ctzll(word); |
8827 | newit->current_value = newit->highbits | newit->in_container_index; |
8828 | break; |
8829 | case ARRAY_CONTAINER_TYPE_CODE: |
8830 | newit->current_value = |
8831 | newit->highbits | |
8832 | ((const array_container_t *)(newit->container))->array[0]; |
8833 | break; |
8834 | case RUN_CONTAINER_TYPE_CODE: |
8835 | newit->current_value = |
8836 | newit->highbits | |
8837 | (((const run_container_t *)(newit->container))->runs[0].value); |
8838 | newit->in_run_index = |
8839 | newit->current_value + |
8840 | (((const run_container_t *)(newit->container))->runs[0].length); |
8841 | break; |
8842 | default: |
8843 | // if this ever happens, bug! |
8844 | assert(false); |
8845 | } // switch (typecode) |
8846 | return true; |
8847 | } |
8848 | |
8849 | // prerequesite: the value should be in range of the container |
8850 | static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) { |
8851 | uint16_t lb = val & 0xFFFF; |
8852 | newit->in_container_index = 0; |
8853 | newit->run_index = 0; |
8854 | newit->current_value = 0; |
8855 | // assume it is found |
8856 | newit->has_value = true; |
8857 | newit->container = |
8858 | newit->parent->high_low_container.containers[newit->container_index]; |
8859 | newit->typecode = |
8860 | newit->parent->high_low_container.typecodes[newit->container_index]; |
8861 | newit->highbits = |
8862 | ((uint32_t) |
8863 | newit->parent->high_low_container.keys[newit->container_index]) |
8864 | << 16; |
8865 | newit->container = |
8866 | container_unwrap_shared(newit->container, &(newit->typecode)); |
8867 | switch (newit->typecode) { |
8868 | case BITSET_CONTAINER_TYPE_CODE: |
8869 | newit->in_container_index = bitset_container_index_equalorlarger((const bitset_container_t *)(newit->container), lb); |
8870 | newit->current_value = newit->highbits | newit->in_container_index; |
8871 | break; |
8872 | case ARRAY_CONTAINER_TYPE_CODE: |
8873 | newit->in_container_index = array_container_index_equalorlarger((const array_container_t *)(newit->container), lb); |
8874 | newit->current_value = |
8875 | newit->highbits | |
8876 | ((const array_container_t *)(newit->container))->array[newit->in_container_index]; |
8877 | break; |
8878 | case RUN_CONTAINER_TYPE_CODE: |
8879 | newit->run_index = run_container_index_equalorlarger((const run_container_t *)(newit->container), lb); |
8880 | if(((const run_container_t *)(newit->container))->runs[newit->run_index].value <= lb) { |
8881 | newit->current_value = val; |
8882 | } else { |
8883 | newit->current_value = |
8884 | newit->highbits | |
8885 | (((const run_container_t *)(newit->container))->runs[newit->run_index].value); |
8886 | } |
8887 | newit->in_run_index = |
8888 | (newit->highbits | (((const run_container_t *)(newit->container))->runs[newit->run_index].value)) + |
8889 | (((const run_container_t *)(newit->container))->runs[newit->run_index].length); |
8890 | |
8891 | break; |
8892 | default: |
8893 | // if this ever happens, bug! |
8894 | assert(false); |
8895 | } // switch (typecode) |
8896 | return true; |
8897 | } |
8898 | |
8899 | void roaring_init_iterator(const roaring_bitmap_t *ra, |
8900 | roaring_uint32_iterator_t *newit) { |
8901 | newit->parent = ra; |
8902 | newit->container_index = 0; |
8903 | newit->has_value = loadfirstvalue(newit); |
8904 | } |
8905 | |
8906 | roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *ra) { |
8907 | roaring_uint32_iterator_t *newit = |
8908 | (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); |
8909 | if (newit == NULL) return NULL; |
8910 | roaring_init_iterator(ra, newit); |
8911 | return newit; |
8912 | } |
8913 | |
8914 | roaring_uint32_iterator_t *roaring_copy_uint32_iterator( |
8915 | const roaring_uint32_iterator_t *it) { |
8916 | roaring_uint32_iterator_t *newit = |
8917 | (roaring_uint32_iterator_t *)malloc(sizeof(roaring_uint32_iterator_t)); |
8918 | memcpy(newit, it, sizeof(roaring_uint32_iterator_t)); |
8919 | return newit; |
8920 | } |
8921 | |
8922 | bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) { |
8923 | uint16_t hb = val >> 16; |
8924 | const int i = ra_get_index(& it->parent->high_low_container, hb); |
8925 | if (i >= 0) { |
8926 | uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]); |
8927 | uint16_t lb = val & 0xFFFF; |
8928 | if(lowvalue < lb ) { |
8929 | it->container_index = i+1; // will have to load first value of next container |
8930 | } else {// the value is necessarily within the range of the container |
8931 | it->container_index = i; |
8932 | it->has_value = loadfirstvalue_largeorequal(it, val); |
8933 | return it->has_value; |
8934 | } |
8935 | } else { |
8936 | // there is no matching, so we are going for the next container |
8937 | it->container_index = -i-1; |
8938 | } |
8939 | it->has_value = loadfirstvalue(it); |
8940 | return it->has_value; |
8941 | } |
8942 | |
8943 | |
8944 | bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) { |
8945 | if (it->container_index >= it->parent->high_low_container.size) { |
8946 | return (it->has_value = false); |
8947 | } |
8948 | uint32_t wordindex; // used for bitsets |
8949 | uint64_t word; // used for bitsets |
8950 | switch (it->typecode) { |
8951 | case BITSET_CONTAINER_TYPE_CODE: |
8952 | it->in_container_index++; |
8953 | wordindex = it->in_container_index / 64; |
8954 | if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break; |
8955 | word = ((const bitset_container_t *)(it->container)) |
8956 | ->array[wordindex] & |
8957 | (UINT64_MAX << (it->in_container_index % 64)); |
8958 | // next part could be optimized/simplified |
8959 | while ((word == 0) && |
8960 | (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) { |
8961 | wordindex++; |
8962 | word = ((const bitset_container_t *)(it->container)) |
8963 | ->array[wordindex]; |
8964 | } |
8965 | if (word != 0) { |
8966 | it->in_container_index = wordindex * 64 + __builtin_ctzll(word); |
8967 | it->current_value = it->highbits | it->in_container_index; |
8968 | return (it->has_value = true); |
8969 | } |
8970 | break; |
8971 | case ARRAY_CONTAINER_TYPE_CODE: |
8972 | it->in_container_index++; |
8973 | if (it->in_container_index < |
8974 | ((const array_container_t *)(it->container))->cardinality) { |
8975 | it->current_value = it->highbits | |
8976 | ((const array_container_t *)(it->container)) |
8977 | ->array[it->in_container_index]; |
8978 | return true; |
8979 | } |
8980 | break; |
8981 | case RUN_CONTAINER_TYPE_CODE: |
8982 | if(it->current_value == UINT32_MAX) { |
8983 | return (it->has_value = false); // without this, we risk an overflow to zero |
8984 | } |
8985 | it->current_value++; |
8986 | if (it->current_value <= it->in_run_index) { |
8987 | return (it->has_value = true); |
8988 | } |
8989 | it->run_index++; |
8990 | if (it->run_index < |
8991 | ((const run_container_t *)(it->container))->n_runs) { |
8992 | it->current_value = |
8993 | it->highbits | (((const run_container_t *)(it->container)) |
8994 | ->runs[it->run_index] |
8995 | .value); |
8996 | it->in_run_index = it->current_value + |
8997 | ((const run_container_t *)(it->container)) |
8998 | ->runs[it->run_index] |
8999 | .length; |
9000 | return (it->has_value = true); |
9001 | } |
9002 | break; |
9003 | default: |
9004 | // if this ever happens, bug! |
9005 | assert(false); |
9006 | } // switch (typecode) |
9007 | // moving to next container |
9008 | it->container_index++; |
9009 | return (it->has_value = loadfirstvalue(it)); |
9010 | } |
9011 | |
9012 | uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) { |
9013 | uint32_t ret = 0; |
9014 | uint32_t num_values; |
9015 | uint32_t wordindex; // used for bitsets |
9016 | uint64_t word; // used for bitsets |
9017 | const array_container_t* acont; //TODO remove |
9018 | const run_container_t* rcont; //TODO remove |
9019 | const bitset_container_t* bcont; //TODO remove |
9020 | |
9021 | while (it->has_value && ret < count) { |
9022 | switch (it->typecode) { |
9023 | case BITSET_CONTAINER_TYPE_CODE: |
9024 | bcont = (const bitset_container_t*)(it->container); |
9025 | wordindex = it->in_container_index / 64; |
9026 | word = bcont->array[wordindex] & (UINT64_MAX << (it->in_container_index % 64)); |
9027 | do { |
9028 | while (word != 0 && ret < count) { |
9029 | buf[0] = it->highbits | (wordindex * 64 + __builtin_ctzll(word)); |
9030 | word = word & (word - 1); |
9031 | buf++; |
9032 | ret++; |
9033 | } |
9034 | while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) { |
9035 | wordindex++; |
9036 | word = bcont->array[wordindex]; |
9037 | } |
9038 | } while (word != 0 && ret < count); |
9039 | it->has_value = (word != 0); |
9040 | if (it->has_value) { |
9041 | it->in_container_index = wordindex * 64 + __builtin_ctzll(word); |
9042 | it->current_value = it->highbits | it->in_container_index; |
9043 | } |
9044 | break; |
9045 | case ARRAY_CONTAINER_TYPE_CODE: |
9046 | acont = (const array_container_t *)(it->container); |
9047 | num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret); |
9048 | for (uint32_t i = 0; i < num_values; i++) { |
9049 | buf[i] = it->highbits | acont->array[it->in_container_index + i]; |
9050 | } |
9051 | buf += num_values; |
9052 | ret += num_values; |
9053 | it->in_container_index += num_values; |
9054 | it->has_value = (it->in_container_index < acont->cardinality); |
9055 | if (it->has_value) { |
9056 | it->current_value = it->highbits | acont->array[it->in_container_index]; |
9057 | } |
9058 | break; |
9059 | case RUN_CONTAINER_TYPE_CODE: |
9060 | rcont = (const run_container_t*)(it->container); |
9061 | //"in_run_index" name is misleading, read it as "max_value_in_current_run" |
9062 | do { |
9063 | num_values = minimum_uint32(it->in_run_index - it->current_value + 1, count - ret); |
9064 | for (uint32_t i = 0; i < num_values; i++) { |
9065 | buf[i] = it->current_value + i; |
9066 | } |
9067 | it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0 |
9068 | buf += num_values; |
9069 | ret += num_values; |
9070 | |
9071 | if (it->current_value > it->in_run_index || it->current_value == 0) { |
9072 | it->run_index++; |
9073 | if (it->run_index < rcont->n_runs) { |
9074 | it->current_value = it->highbits | rcont->runs[it->run_index].value; |
9075 | it->in_run_index = it->current_value + rcont->runs[it->run_index].length; |
9076 | } else { |
9077 | it->has_value = false; |
9078 | } |
9079 | } |
9080 | } while ((ret < count) && it->has_value); |
9081 | break; |
9082 | default: |
9083 | assert(false); |
9084 | } |
9085 | if (it->has_value) { |
9086 | assert(ret == count); |
9087 | return ret; |
9088 | } |
9089 | it->container_index++; |
9090 | it->has_value = loadfirstvalue(it); |
9091 | } |
9092 | return ret; |
9093 | } |
9094 | |
9095 | |
9096 | |
9097 | void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { free(it); } |
9098 | |
9099 | /**** |
9100 | * end of roaring_uint32_iterator_t |
9101 | *****/ |
9102 | |
9103 | bool roaring_bitmap_equals(const roaring_bitmap_t *ra1, |
9104 | const roaring_bitmap_t *ra2) { |
9105 | if (ra1->high_low_container.size != ra2->high_low_container.size) { |
9106 | return false; |
9107 | } |
9108 | for (int i = 0; i < ra1->high_low_container.size; ++i) { |
9109 | if (ra1->high_low_container.keys[i] != |
9110 | ra2->high_low_container.keys[i]) { |
9111 | return false; |
9112 | } |
9113 | } |
9114 | for (int i = 0; i < ra1->high_low_container.size; ++i) { |
9115 | bool areequal = container_equals(ra1->high_low_container.containers[i], |
9116 | ra1->high_low_container.typecodes[i], |
9117 | ra2->high_low_container.containers[i], |
9118 | ra2->high_low_container.typecodes[i]); |
9119 | if (!areequal) { |
9120 | return false; |
9121 | } |
9122 | } |
9123 | return true; |
9124 | } |
9125 | |
9126 | bool roaring_bitmap_is_subset(const roaring_bitmap_t *ra1, |
9127 | const roaring_bitmap_t *ra2) { |
9128 | const int length1 = ra1->high_low_container.size, |
9129 | length2 = ra2->high_low_container.size; |
9130 | |
9131 | int pos1 = 0, pos2 = 0; |
9132 | |
9133 | while (pos1 < length1 && pos2 < length2) { |
9134 | const uint16_t s1 = ra_get_key_at_index(&ra1->high_low_container, pos1); |
9135 | const uint16_t s2 = ra_get_key_at_index(&ra2->high_low_container, pos2); |
9136 | |
9137 | if (s1 == s2) { |
9138 | uint8_t container_type_1, container_type_2; |
9139 | void *c1 = ra_get_container_at_index(&ra1->high_low_container, pos1, |
9140 | &container_type_1); |
9141 | void *c2 = ra_get_container_at_index(&ra2->high_low_container, pos2, |
9142 | &container_type_2); |
9143 | bool subset = |
9144 | container_is_subset(c1, container_type_1, c2, container_type_2); |
9145 | if (!subset) return false; |
9146 | ++pos1; |
9147 | ++pos2; |
9148 | } else if (s1 < s2) { // s1 < s2 |
9149 | return false; |
9150 | } else { // s1 > s2 |
9151 | pos2 = ra_advance_until(&ra2->high_low_container, s1, pos2); |
9152 | } |
9153 | } |
9154 | if (pos1 == length1) |
9155 | return true; |
9156 | else |
9157 | return false; |
9158 | } |
9159 | |
9160 | static void insert_flipped_container(roaring_array_t *ans_arr, |
9161 | const roaring_array_t *x1_arr, uint16_t hb, |
9162 | uint16_t lb_start, uint16_t lb_end) { |
9163 | const int i = ra_get_index(x1_arr, hb); |
9164 | const int j = ra_get_index(ans_arr, hb); |
9165 | uint8_t ctype_in, ctype_out; |
9166 | void *flipped_container = NULL; |
9167 | if (i >= 0) { |
9168 | void *container_to_flip = |
9169 | ra_get_container_at_index(x1_arr, i, &ctype_in); |
9170 | flipped_container = |
9171 | container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start, |
9172 | (uint32_t)(lb_end + 1), &ctype_out); |
9173 | |
9174 | if (container_get_cardinality(flipped_container, ctype_out)) |
9175 | ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, |
9176 | ctype_out); |
9177 | else { |
9178 | container_free(flipped_container, ctype_out); |
9179 | } |
9180 | } else { |
9181 | flipped_container = container_range_of_ones( |
9182 | (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); |
9183 | ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, |
9184 | ctype_out); |
9185 | } |
9186 | } |
9187 | |
9188 | static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb, |
9189 | uint16_t lb_start, uint16_t lb_end) { |
9190 | const int i = ra_get_index(x1_arr, hb); |
9191 | uint8_t ctype_in, ctype_out; |
9192 | void *flipped_container = NULL; |
9193 | if (i >= 0) { |
9194 | void *container_to_flip = |
9195 | ra_get_container_at_index(x1_arr, i, &ctype_in); |
9196 | flipped_container = container_inot_range( |
9197 | container_to_flip, ctype_in, (uint32_t)lb_start, |
9198 | (uint32_t)(lb_end + 1), &ctype_out); |
9199 | // if a new container was created, the old one was already freed |
9200 | if (container_get_cardinality(flipped_container, ctype_out)) { |
9201 | ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); |
9202 | } else { |
9203 | container_free(flipped_container, ctype_out); |
9204 | ra_remove_at_index(x1_arr, i); |
9205 | } |
9206 | |
9207 | } else { |
9208 | flipped_container = container_range_of_ones( |
9209 | (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out); |
9210 | ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, |
9211 | ctype_out); |
9212 | } |
9213 | } |
9214 | |
9215 | static void insert_fully_flipped_container(roaring_array_t *ans_arr, |
9216 | const roaring_array_t *x1_arr, |
9217 | uint16_t hb) { |
9218 | const int i = ra_get_index(x1_arr, hb); |
9219 | const int j = ra_get_index(ans_arr, hb); |
9220 | uint8_t ctype_in, ctype_out; |
9221 | void *flipped_container = NULL; |
9222 | if (i >= 0) { |
9223 | void *container_to_flip = |
9224 | ra_get_container_at_index(x1_arr, i, &ctype_in); |
9225 | flipped_container = |
9226 | container_not(container_to_flip, ctype_in, &ctype_out); |
9227 | if (container_get_cardinality(flipped_container, ctype_out)) |
9228 | ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, |
9229 | ctype_out); |
9230 | else { |
9231 | container_free(flipped_container, ctype_out); |
9232 | } |
9233 | } else { |
9234 | flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); |
9235 | ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container, |
9236 | ctype_out); |
9237 | } |
9238 | } |
9239 | |
9240 | static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) { |
9241 | const int i = ra_get_index(x1_arr, hb); |
9242 | uint8_t ctype_in, ctype_out; |
9243 | void *flipped_container = NULL; |
9244 | if (i >= 0) { |
9245 | void *container_to_flip = |
9246 | ra_get_container_at_index(x1_arr, i, &ctype_in); |
9247 | flipped_container = |
9248 | container_inot(container_to_flip, ctype_in, &ctype_out); |
9249 | |
9250 | if (container_get_cardinality(flipped_container, ctype_out)) { |
9251 | ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out); |
9252 | } else { |
9253 | container_free(flipped_container, ctype_out); |
9254 | ra_remove_at_index(x1_arr, i); |
9255 | } |
9256 | |
9257 | } else { |
9258 | flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out); |
9259 | ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container, |
9260 | ctype_out); |
9261 | } |
9262 | } |
9263 | |
9264 | roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1, |
9265 | uint64_t range_start, |
9266 | uint64_t range_end) { |
9267 | if (range_start >= range_end) { |
9268 | return roaring_bitmap_copy(x1); |
9269 | } |
9270 | if(range_end >= UINT64_C(0x100000000)) { |
9271 | range_end = UINT64_C(0x100000000); |
9272 | } |
9273 | |
9274 | roaring_bitmap_t *ans = roaring_bitmap_create(); |
9275 | ans->copy_on_write = x1->copy_on_write; |
9276 | |
9277 | uint16_t hb_start = (uint16_t)(range_start >> 16); |
9278 | const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF; |
9279 | uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); |
9280 | const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF; |
9281 | |
9282 | ra_append_copies_until(&ans->high_low_container, &x1->high_low_container, |
9283 | hb_start, x1->copy_on_write); |
9284 | if (hb_start == hb_end) { |
9285 | insert_flipped_container(&ans->high_low_container, |
9286 | &x1->high_low_container, hb_start, lb_start, |
9287 | lb_end); |
9288 | } else { |
9289 | // start and end containers are distinct |
9290 | if (lb_start > 0) { |
9291 | // handle first (partial) container |
9292 | insert_flipped_container(&ans->high_low_container, |
9293 | &x1->high_low_container, hb_start, |
9294 | lb_start, 0xFFFF); |
9295 | ++hb_start; // for the full containers. Can't wrap. |
9296 | } |
9297 | |
9298 | if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block |
9299 | |
9300 | for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { |
9301 | insert_fully_flipped_container(&ans->high_low_container, |
9302 | &x1->high_low_container, hb); |
9303 | } |
9304 | |
9305 | // handle a partial final container |
9306 | if (lb_end != 0xFFFF) { |
9307 | insert_flipped_container(&ans->high_low_container, |
9308 | &x1->high_low_container, hb_end + 1, 0, |
9309 | lb_end); |
9310 | ++hb_end; |
9311 | } |
9312 | } |
9313 | ra_append_copies_after(&ans->high_low_container, &x1->high_low_container, |
9314 | hb_end, x1->copy_on_write); |
9315 | return ans; |
9316 | } |
9317 | |
9318 | void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start, |
9319 | uint64_t range_end) { |
9320 | if (range_start >= range_end) { |
9321 | return; // empty range |
9322 | } |
9323 | if(range_end >= UINT64_C(0x100000000)) { |
9324 | range_end = UINT64_C(0x100000000); |
9325 | } |
9326 | |
9327 | uint16_t hb_start = (uint16_t)(range_start >> 16); |
9328 | const uint16_t lb_start = (uint16_t)range_start; |
9329 | uint16_t hb_end = (uint16_t)((range_end - 1) >> 16); |
9330 | const uint16_t lb_end = (uint16_t)(range_end - 1); |
9331 | |
9332 | if (hb_start == hb_end) { |
9333 | inplace_flip_container(&x1->high_low_container, hb_start, lb_start, |
9334 | lb_end); |
9335 | } else { |
9336 | // start and end containers are distinct |
9337 | if (lb_start > 0) { |
9338 | // handle first (partial) container |
9339 | inplace_flip_container(&x1->high_low_container, hb_start, lb_start, |
9340 | 0xFFFF); |
9341 | ++hb_start; // for the full containers. Can't wrap. |
9342 | } |
9343 | |
9344 | if (lb_end != 0xFFFF) --hb_end; |
9345 | |
9346 | for (uint32_t hb = hb_start; hb <= hb_end; ++hb) { |
9347 | inplace_fully_flip_container(&x1->high_low_container, hb); |
9348 | } |
9349 | // handle a partial final container |
9350 | if (lb_end != 0xFFFF) { |
9351 | inplace_flip_container(&x1->high_low_container, hb_end + 1, 0, |
9352 | lb_end); |
9353 | ++hb_end; |
9354 | } |
9355 | } |
9356 | } |
9357 | |
9358 | roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1, |
9359 | const roaring_bitmap_t *x2, |
9360 | const bool bitsetconversion) { |
9361 | uint8_t container_result_type = 0; |
9362 | const int length1 = x1->high_low_container.size, |
9363 | length2 = x2->high_low_container.size; |
9364 | if (0 == length1) { |
9365 | return roaring_bitmap_copy(x2); |
9366 | } |
9367 | if (0 == length2) { |
9368 | return roaring_bitmap_copy(x1); |
9369 | } |
9370 | roaring_bitmap_t *answer = |
9371 | roaring_bitmap_create_with_capacity(length1 + length2); |
9372 | answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; |
9373 | int pos1 = 0, pos2 = 0; |
9374 | uint8_t container_type_1, container_type_2; |
9375 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9376 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9377 | while (true) { |
9378 | if (s1 == s2) { |
9379 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
9380 | &container_type_1); |
9381 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
9382 | &container_type_2); |
9383 | void *c; |
9384 | if (bitsetconversion && (get_container_type(c1, container_type_1) != |
9385 | BITSET_CONTAINER_TYPE_CODE) && |
9386 | (get_container_type(c2, container_type_2) != |
9387 | BITSET_CONTAINER_TYPE_CODE)) { |
9388 | void *newc1 = |
9389 | container_mutable_unwrap_shared(c1, &container_type_1); |
9390 | newc1 = container_to_bitset(newc1, container_type_1); |
9391 | container_type_1 = BITSET_CONTAINER_TYPE_CODE; |
9392 | c = container_lazy_ior(newc1, container_type_1, c2, |
9393 | container_type_2, |
9394 | &container_result_type); |
9395 | if (c != newc1) { // should not happen |
9396 | container_free(newc1, container_type_1); |
9397 | } |
9398 | } else { |
9399 | c = container_lazy_or(c1, container_type_1, c2, |
9400 | container_type_2, &container_result_type); |
9401 | } |
9402 | // since we assume that the initial containers are non-empty, |
9403 | // the |
9404 | // result here |
9405 | // can only be non-empty |
9406 | ra_append(&answer->high_low_container, s1, c, |
9407 | container_result_type); |
9408 | ++pos1; |
9409 | ++pos2; |
9410 | if (pos1 == length1) break; |
9411 | if (pos2 == length2) break; |
9412 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9413 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9414 | |
9415 | } else if (s1 < s2) { // s1 < s2 |
9416 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
9417 | &container_type_1); |
9418 | c1 = |
9419 | get_copy_of_container(c1, &container_type_1, x1->copy_on_write); |
9420 | if (x1->copy_on_write) { |
9421 | ra_set_container_at_index(&x1->high_low_container, pos1, c1, |
9422 | container_type_1); |
9423 | } |
9424 | ra_append(&answer->high_low_container, s1, c1, container_type_1); |
9425 | pos1++; |
9426 | if (pos1 == length1) break; |
9427 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9428 | |
9429 | } else { // s1 > s2 |
9430 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
9431 | &container_type_2); |
9432 | c2 = |
9433 | get_copy_of_container(c2, &container_type_2, x2->copy_on_write); |
9434 | if (x2->copy_on_write) { |
9435 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
9436 | container_type_2); |
9437 | } |
9438 | ra_append(&answer->high_low_container, s2, c2, container_type_2); |
9439 | pos2++; |
9440 | if (pos2 == length2) break; |
9441 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9442 | } |
9443 | } |
9444 | if (pos1 == length1) { |
9445 | ra_append_copy_range(&answer->high_low_container, |
9446 | &x2->high_low_container, pos2, length2, |
9447 | x2->copy_on_write); |
9448 | } else if (pos2 == length2) { |
9449 | ra_append_copy_range(&answer->high_low_container, |
9450 | &x1->high_low_container, pos1, length1, |
9451 | x1->copy_on_write); |
9452 | } |
9453 | return answer; |
9454 | } |
9455 | |
9456 | void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1, |
9457 | const roaring_bitmap_t *x2, |
9458 | const bool bitsetconversion) { |
9459 | uint8_t container_result_type = 0; |
9460 | int length1 = x1->high_low_container.size; |
9461 | const int length2 = x2->high_low_container.size; |
9462 | |
9463 | if (0 == length2) return; |
9464 | |
9465 | if (0 == length1) { |
9466 | roaring_bitmap_overwrite(x1, x2); |
9467 | return; |
9468 | } |
9469 | int pos1 = 0, pos2 = 0; |
9470 | uint8_t container_type_1, container_type_2; |
9471 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9472 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9473 | while (true) { |
9474 | if (s1 == s2) { |
9475 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
9476 | &container_type_1); |
9477 | if (!container_is_full(c1, container_type_1)) { |
9478 | if ((bitsetconversion == false) || |
9479 | (get_container_type(c1, container_type_1) == |
9480 | BITSET_CONTAINER_TYPE_CODE)) { |
9481 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
9482 | } else { |
9483 | // convert to bitset |
9484 | void *oldc1 = c1; |
9485 | uint8_t oldt1 = container_type_1; |
9486 | c1 = container_mutable_unwrap_shared(c1, &container_type_1); |
9487 | c1 = container_to_bitset(c1, container_type_1); |
9488 | container_free(oldc1, oldt1); |
9489 | container_type_1 = BITSET_CONTAINER_TYPE_CODE; |
9490 | } |
9491 | |
9492 | void *c2 = ra_get_container_at_index(&x2->high_low_container, |
9493 | pos2, &container_type_2); |
9494 | void *c = container_lazy_ior(c1, container_type_1, c2, |
9495 | container_type_2, |
9496 | &container_result_type); |
9497 | if (c != |
9498 | c1) { // in this instance a new container was created, and |
9499 | // we need to free the old one |
9500 | container_free(c1, container_type_1); |
9501 | } |
9502 | |
9503 | ra_set_container_at_index(&x1->high_low_container, pos1, c, |
9504 | container_result_type); |
9505 | } |
9506 | ++pos1; |
9507 | ++pos2; |
9508 | if (pos1 == length1) break; |
9509 | if (pos2 == length2) break; |
9510 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9511 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9512 | |
9513 | } else if (s1 < s2) { // s1 < s2 |
9514 | pos1++; |
9515 | if (pos1 == length1) break; |
9516 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9517 | |
9518 | } else { // s1 > s2 |
9519 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
9520 | &container_type_2); |
9521 | // void *c2_clone = container_clone(c2, container_type_2); |
9522 | c2 = |
9523 | get_copy_of_container(c2, &container_type_2, x2->copy_on_write); |
9524 | if (x2->copy_on_write) { |
9525 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
9526 | container_type_2); |
9527 | } |
9528 | ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, |
9529 | container_type_2); |
9530 | pos1++; |
9531 | length1++; |
9532 | pos2++; |
9533 | if (pos2 == length2) break; |
9534 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9535 | } |
9536 | } |
9537 | if (pos1 == length1) { |
9538 | ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, |
9539 | pos2, length2, x2->copy_on_write); |
9540 | } |
9541 | } |
9542 | |
9543 | roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1, |
9544 | const roaring_bitmap_t *x2) { |
9545 | uint8_t container_result_type = 0; |
9546 | const int length1 = x1->high_low_container.size, |
9547 | length2 = x2->high_low_container.size; |
9548 | if (0 == length1) { |
9549 | return roaring_bitmap_copy(x2); |
9550 | } |
9551 | if (0 == length2) { |
9552 | return roaring_bitmap_copy(x1); |
9553 | } |
9554 | roaring_bitmap_t *answer = |
9555 | roaring_bitmap_create_with_capacity(length1 + length2); |
9556 | answer->copy_on_write = x1->copy_on_write && x2->copy_on_write; |
9557 | int pos1 = 0, pos2 = 0; |
9558 | uint8_t container_type_1, container_type_2; |
9559 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9560 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9561 | while (true) { |
9562 | if (s1 == s2) { |
9563 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
9564 | &container_type_1); |
9565 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
9566 | &container_type_2); |
9567 | void *c = |
9568 | container_lazy_xor(c1, container_type_1, c2, container_type_2, |
9569 | &container_result_type); |
9570 | |
9571 | if (container_nonzero_cardinality(c, container_result_type)) { |
9572 | ra_append(&answer->high_low_container, s1, c, |
9573 | container_result_type); |
9574 | } else { |
9575 | container_free(c, container_result_type); |
9576 | } |
9577 | |
9578 | ++pos1; |
9579 | ++pos2; |
9580 | if (pos1 == length1) break; |
9581 | if (pos2 == length2) break; |
9582 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9583 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9584 | |
9585 | } else if (s1 < s2) { // s1 < s2 |
9586 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
9587 | &container_type_1); |
9588 | c1 = |
9589 | get_copy_of_container(c1, &container_type_1, x1->copy_on_write); |
9590 | if (x1->copy_on_write) { |
9591 | ra_set_container_at_index(&x1->high_low_container, pos1, c1, |
9592 | container_type_1); |
9593 | } |
9594 | ra_append(&answer->high_low_container, s1, c1, container_type_1); |
9595 | pos1++; |
9596 | if (pos1 == length1) break; |
9597 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9598 | |
9599 | } else { // s1 > s2 |
9600 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
9601 | &container_type_2); |
9602 | c2 = |
9603 | get_copy_of_container(c2, &container_type_2, x2->copy_on_write); |
9604 | if (x2->copy_on_write) { |
9605 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
9606 | container_type_2); |
9607 | } |
9608 | ra_append(&answer->high_low_container, s2, c2, container_type_2); |
9609 | pos2++; |
9610 | if (pos2 == length2) break; |
9611 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9612 | } |
9613 | } |
9614 | if (pos1 == length1) { |
9615 | ra_append_copy_range(&answer->high_low_container, |
9616 | &x2->high_low_container, pos2, length2, |
9617 | x2->copy_on_write); |
9618 | } else if (pos2 == length2) { |
9619 | ra_append_copy_range(&answer->high_low_container, |
9620 | &x1->high_low_container, pos1, length1, |
9621 | x1->copy_on_write); |
9622 | } |
9623 | return answer; |
9624 | } |
9625 | |
9626 | void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1, |
9627 | const roaring_bitmap_t *x2) { |
9628 | assert(x1 != x2); |
9629 | uint8_t container_result_type = 0; |
9630 | int length1 = x1->high_low_container.size; |
9631 | const int length2 = x2->high_low_container.size; |
9632 | |
9633 | if (0 == length2) return; |
9634 | |
9635 | if (0 == length1) { |
9636 | roaring_bitmap_overwrite(x1, x2); |
9637 | return; |
9638 | } |
9639 | int pos1 = 0, pos2 = 0; |
9640 | uint8_t container_type_1, container_type_2; |
9641 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9642 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9643 | while (true) { |
9644 | if (s1 == s2) { |
9645 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
9646 | &container_type_1); |
9647 | c1 = get_writable_copy_if_shared(c1, &container_type_1); |
9648 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
9649 | &container_type_2); |
9650 | void *c = |
9651 | container_lazy_ixor(c1, container_type_1, c2, container_type_2, |
9652 | &container_result_type); |
9653 | if (container_nonzero_cardinality(c, container_result_type)) { |
9654 | ra_set_container_at_index(&x1->high_low_container, pos1, c, |
9655 | container_result_type); |
9656 | ++pos1; |
9657 | } else { |
9658 | container_free(c, container_result_type); |
9659 | ra_remove_at_index(&x1->high_low_container, pos1); |
9660 | --length1; |
9661 | } |
9662 | ++pos2; |
9663 | if (pos1 == length1) break; |
9664 | if (pos2 == length2) break; |
9665 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9666 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9667 | |
9668 | } else if (s1 < s2) { // s1 < s2 |
9669 | pos1++; |
9670 | if (pos1 == length1) break; |
9671 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9672 | |
9673 | } else { // s1 > s2 |
9674 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
9675 | &container_type_2); |
9676 | // void *c2_clone = container_clone(c2, container_type_2); |
9677 | c2 = |
9678 | get_copy_of_container(c2, &container_type_2, x2->copy_on_write); |
9679 | if (x2->copy_on_write) { |
9680 | ra_set_container_at_index(&x2->high_low_container, pos2, c2, |
9681 | container_type_2); |
9682 | } |
9683 | ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2, |
9684 | container_type_2); |
9685 | pos1++; |
9686 | length1++; |
9687 | pos2++; |
9688 | if (pos2 == length2) break; |
9689 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9690 | } |
9691 | } |
9692 | if (pos1 == length1) { |
9693 | ra_append_copy_range(&x1->high_low_container, &x2->high_low_container, |
9694 | pos2, length2, x2->copy_on_write); |
9695 | } |
9696 | } |
9697 | |
9698 | void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *ra) { |
9699 | for (int i = 0; i < ra->high_low_container.size; ++i) { |
9700 | const uint8_t original_typecode = ra->high_low_container.typecodes[i]; |
9701 | void *container = ra->high_low_container.containers[i]; |
9702 | uint8_t new_typecode = original_typecode; |
9703 | void *newcontainer = |
9704 | container_repair_after_lazy(container, &new_typecode); |
9705 | ra->high_low_container.containers[i] = newcontainer; |
9706 | ra->high_low_container.typecodes[i] = new_typecode; |
9707 | } |
9708 | } |
9709 | |
9710 | |
9711 | |
9712 | /** |
9713 | * roaring_bitmap_rank returns the number of integers that are smaller or equal |
9714 | * to x. |
9715 | */ |
9716 | uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) { |
9717 | uint64_t size = 0; |
9718 | uint32_t xhigh = x >> 16; |
9719 | for (int i = 0; i < bm->high_low_container.size; i++) { |
9720 | uint32_t key = bm->high_low_container.keys[i]; |
9721 | if (xhigh > key) { |
9722 | size += |
9723 | container_get_cardinality(bm->high_low_container.containers[i], |
9724 | bm->high_low_container.typecodes[i]); |
9725 | } else if (xhigh == key) { |
9726 | return size + container_rank(bm->high_low_container.containers[i], |
9727 | bm->high_low_container.typecodes[i], |
9728 | x & 0xFFFF); |
9729 | } else { |
9730 | return size; |
9731 | } |
9732 | } |
9733 | return size; |
9734 | } |
9735 | |
9736 | /** |
9737 | * roaring_bitmap_smallest returns the smallest value in the set. |
9738 | * Returns UINT32_MAX if the set is empty. |
9739 | */ |
9740 | uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) { |
9741 | if (bm->high_low_container.size > 0) { |
9742 | void *container = bm->high_low_container.containers[0]; |
9743 | uint8_t typecode = bm->high_low_container.typecodes[0]; |
9744 | uint32_t key = bm->high_low_container.keys[0]; |
9745 | uint32_t lowvalue = container_minimum(container, typecode); |
9746 | return lowvalue | (key << 16); |
9747 | } |
9748 | return UINT32_MAX; |
9749 | } |
9750 | |
9751 | /** |
9752 | * roaring_bitmap_smallest returns the greatest value in the set. |
9753 | * Returns 0 if the set is empty. |
9754 | */ |
9755 | uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) { |
9756 | if (bm->high_low_container.size > 0) { |
9757 | void *container = |
9758 | bm->high_low_container.containers[bm->high_low_container.size - 1]; |
9759 | uint8_t typecode = |
9760 | bm->high_low_container.typecodes[bm->high_low_container.size - 1]; |
9761 | uint32_t key = |
9762 | bm->high_low_container.keys[bm->high_low_container.size - 1]; |
9763 | uint32_t lowvalue = container_maximum(container, typecode); |
9764 | return lowvalue | (key << 16); |
9765 | } |
9766 | return 0; |
9767 | } |
9768 | |
9769 | bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank, |
9770 | uint32_t *element) { |
9771 | void *container; |
9772 | uint8_t typecode; |
9773 | uint16_t key; |
9774 | uint32_t start_rank = 0; |
9775 | int i = 0; |
9776 | bool valid = false; |
9777 | while (!valid && i < bm->high_low_container.size) { |
9778 | container = bm->high_low_container.containers[i]; |
9779 | typecode = bm->high_low_container.typecodes[i]; |
9780 | valid = |
9781 | container_select(container, typecode, &start_rank, rank, element); |
9782 | i++; |
9783 | } |
9784 | |
9785 | if (valid) { |
9786 | key = bm->high_low_container.keys[i - 1]; |
9787 | *element |= (key << 16); |
9788 | return true; |
9789 | } else |
9790 | return false; |
9791 | } |
9792 | |
9793 | bool roaring_bitmap_intersect(const roaring_bitmap_t *x1, |
9794 | const roaring_bitmap_t *x2) { |
9795 | const int length1 = x1->high_low_container.size, |
9796 | length2 = x2->high_low_container.size; |
9797 | uint64_t answer = 0; |
9798 | int pos1 = 0, pos2 = 0; |
9799 | |
9800 | while (pos1 < length1 && pos2 < length2) { |
9801 | const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1); |
9802 | const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2); |
9803 | |
9804 | if (s1 == s2) { |
9805 | uint8_t container_type_1, container_type_2; |
9806 | void *c1 = ra_get_container_at_index(& x1->high_low_container, pos1, |
9807 | &container_type_1); |
9808 | void *c2 = ra_get_container_at_index(& x2->high_low_container, pos2, |
9809 | &container_type_2); |
9810 | if( container_intersect(c1, container_type_1, c2, container_type_2) ) return true; |
9811 | ++pos1; |
9812 | ++pos2; |
9813 | } else if (s1 < s2) { // s1 < s2 |
9814 | pos1 = ra_advance_until(& x1->high_low_container, s2, pos1); |
9815 | } else { // s1 > s2 |
9816 | pos2 = ra_advance_until(& x2->high_low_container, s1, pos2); |
9817 | } |
9818 | } |
9819 | return answer; |
9820 | } |
9821 | |
9822 | |
9823 | uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1, |
9824 | const roaring_bitmap_t *x2) { |
9825 | const int length1 = x1->high_low_container.size, |
9826 | length2 = x2->high_low_container.size; |
9827 | uint64_t answer = 0; |
9828 | int pos1 = 0, pos2 = 0; |
9829 | |
9830 | while (pos1 < length1 && pos2 < length2) { |
9831 | const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
9832 | const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
9833 | |
9834 | if (s1 == s2) { |
9835 | uint8_t container_type_1, container_type_2; |
9836 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
9837 | &container_type_1); |
9838 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
9839 | &container_type_2); |
9840 | answer += container_and_cardinality(c1, container_type_1, c2, |
9841 | container_type_2); |
9842 | ++pos1; |
9843 | ++pos2; |
9844 | } else if (s1 < s2) { // s1 < s2 |
9845 | pos1 = ra_advance_until(&x1->high_low_container, s2, pos1); |
9846 | } else { // s1 > s2 |
9847 | pos2 = ra_advance_until(&x2->high_low_container, s1, pos2); |
9848 | } |
9849 | } |
9850 | return answer; |
9851 | } |
9852 | |
9853 | double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1, |
9854 | const roaring_bitmap_t *x2) { |
9855 | const uint64_t c1 = roaring_bitmap_get_cardinality(x1); |
9856 | const uint64_t c2 = roaring_bitmap_get_cardinality(x2); |
9857 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
9858 | return (double)inter / (double)(c1 + c2 - inter); |
9859 | } |
9860 | |
9861 | uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1, |
9862 | const roaring_bitmap_t *x2) { |
9863 | const uint64_t c1 = roaring_bitmap_get_cardinality(x1); |
9864 | const uint64_t c2 = roaring_bitmap_get_cardinality(x2); |
9865 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
9866 | return c1 + c2 - inter; |
9867 | } |
9868 | |
9869 | uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1, |
9870 | const roaring_bitmap_t *x2) { |
9871 | const uint64_t c1 = roaring_bitmap_get_cardinality(x1); |
9872 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
9873 | return c1 - inter; |
9874 | } |
9875 | |
9876 | uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1, |
9877 | const roaring_bitmap_t *x2) { |
9878 | const uint64_t c1 = roaring_bitmap_get_cardinality(x1); |
9879 | const uint64_t c2 = roaring_bitmap_get_cardinality(x2); |
9880 | const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2); |
9881 | return c1 + c2 - 2 * inter; |
9882 | } |
9883 | |
9884 | |
9885 | /** |
9886 | * Check whether a range of values from range_start (included) to range_end (excluded) is present |
9887 | */ |
9888 | bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) { |
9889 | if(range_end >= UINT64_C(0x100000000)) { |
9890 | range_end = UINT64_C(0x100000000); |
9891 | } |
9892 | if (range_start >= range_end) return true; // empty range are always contained! |
9893 | if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start); |
9894 | uint16_t hb_rs = (uint16_t)(range_start >> 16); |
9895 | uint16_t hb_re = (uint16_t)((range_end - 1) >> 16); |
9896 | const int32_t span = hb_re - hb_rs; |
9897 | const int32_t hlc_sz = ra_get_size(&r->high_low_container); |
9898 | if (hlc_sz < span + 1) { |
9899 | return false; |
9900 | } |
9901 | int32_t is = ra_get_index(&r->high_low_container, hb_rs); |
9902 | int32_t ie = ra_get_index(&r->high_low_container, hb_re); |
9903 | ie = (ie < 0 ? -ie - 1 : ie); |
9904 | if ((is < 0) || ((ie - is) != span)) { |
9905 | return false; |
9906 | } |
9907 | const uint32_t lb_rs = range_start & 0xFFFF; |
9908 | const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1; |
9909 | uint8_t typecode; |
9910 | void *container = ra_get_container_at_index(&r->high_low_container, is, &typecode); |
9911 | if (hb_rs == hb_re) { |
9912 | return container_contains_range(container, lb_rs, lb_re, typecode); |
9913 | } |
9914 | if (!container_contains_range(container, lb_rs, 1 << 16, typecode)) { |
9915 | return false; |
9916 | } |
9917 | assert(ie < hlc_sz); // would indicate an algorithmic bug |
9918 | container = ra_get_container_at_index(&r->high_low_container, ie, &typecode); |
9919 | if (!container_contains_range(container, 0, lb_re, typecode)) { |
9920 | return false; |
9921 | } |
9922 | for (int32_t i = is + 1; i < ie; ++i) { |
9923 | container = ra_get_container_at_index(&r->high_low_container, i, &typecode); |
9924 | if (!container_is_full(container, typecode) ) { |
9925 | return false; |
9926 | } |
9927 | } |
9928 | return true; |
9929 | } |
9930 | |
9931 | |
9932 | bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *ra1, |
9933 | const roaring_bitmap_t *ra2) { |
9934 | return (roaring_bitmap_get_cardinality(ra2) > |
9935 | roaring_bitmap_get_cardinality(ra1) && |
9936 | roaring_bitmap_is_subset(ra1, ra2)); |
9937 | } |
9938 | /* end file /opt/bitmap/CRoaring-0.2.57/src/roaring.c */ |
9939 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring_array.c */ |
9940 | #include <assert.h> |
9941 | #include <stdbool.h> |
9942 | #include <stdio.h> |
9943 | #include <stdlib.h> |
9944 | #include <string.h> |
9945 | #include <inttypes.h> |
9946 | |
9947 | |
9948 | // Convention: [0,ra->size) all elements are initialized |
9949 | // [ra->size, ra->allocation_size) is junk and contains nothing needing freeing |
9950 | |
9951 | extern inline int32_t ra_get_size(const roaring_array_t *ra); |
9952 | extern inline int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); |
9953 | extern inline void *ra_get_container_at_index(const roaring_array_t *ra, |
9954 | uint16_t i, uint8_t *typecode); |
9955 | extern inline void ra_unshare_container_at_index(roaring_array_t *ra, |
9956 | uint16_t i); |
9957 | extern inline void ra_replace_key_and_container_at_index(roaring_array_t *ra, |
9958 | int32_t i, |
9959 | uint16_t key, void *c, |
9960 | uint8_t typecode); |
9961 | extern inline void ra_set_container_at_index(const roaring_array_t *ra, |
9962 | int32_t i, void *c, |
9963 | uint8_t typecode); |
9964 | |
9965 | #define INITIAL_CAPACITY 4 |
9966 | |
9967 | static bool realloc_array(roaring_array_t *ra, int32_t new_capacity) { |
9968 | // because we combine the allocations, it is not possible to use realloc |
9969 | /*ra->keys = |
9970 | (uint16_t *)realloc(ra->keys, sizeof(uint16_t) * new_capacity); |
9971 | ra->containers = |
9972 | (void **)realloc(ra->containers, sizeof(void *) * new_capacity); |
9973 | ra->typecodes = |
9974 | (uint8_t *)realloc(ra->typecodes, sizeof(uint8_t) * new_capacity); |
9975 | if (!ra->keys || !ra->containers || !ra->typecodes) { |
9976 | free(ra->keys); |
9977 | free(ra->containers); |
9978 | free(ra->typecodes); |
9979 | return false; |
9980 | }*/ |
9981 | |
9982 | if ( new_capacity == 0 ) { |
9983 | free(ra->containers); |
9984 | ra->containers = NULL; |
9985 | ra->keys = NULL; |
9986 | ra->typecodes = NULL; |
9987 | ra->allocation_size = 0; |
9988 | return true; |
9989 | } |
9990 | const size_t memoryneeded = |
9991 | new_capacity * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); |
9992 | void *bigalloc = malloc(memoryneeded); |
9993 | if (!bigalloc) return false; |
9994 | void *oldbigalloc = ra->containers; |
9995 | void **newcontainers = (void **)bigalloc; |
9996 | uint16_t *newkeys = (uint16_t *)(newcontainers + new_capacity); |
9997 | uint8_t *newtypecodes = (uint8_t *)(newkeys + new_capacity); |
9998 | assert((char *)(newtypecodes + new_capacity) == |
9999 | (char *)bigalloc + memoryneeded); |
10000 | if(ra->size > 0) { |
10001 | memcpy(newcontainers, ra->containers, sizeof(void *) * ra->size); |
10002 | memcpy(newkeys, ra->keys, sizeof(uint16_t) * ra->size); |
10003 | memcpy(newtypecodes, ra->typecodes, sizeof(uint8_t) * ra->size); |
10004 | } |
10005 | ra->containers = newcontainers; |
10006 | ra->keys = newkeys; |
10007 | ra->typecodes = newtypecodes; |
10008 | ra->allocation_size = new_capacity; |
10009 | free(oldbigalloc); |
10010 | return true; |
10011 | } |
10012 | |
10013 | bool ra_init_with_capacity(roaring_array_t *new_ra, uint32_t cap) { |
10014 | if (!new_ra) return false; |
10015 | new_ra->keys = NULL; |
10016 | new_ra->containers = NULL; |
10017 | new_ra->typecodes = NULL; |
10018 | |
10019 | new_ra->allocation_size = cap; |
10020 | new_ra->size = 0; |
10021 | if(cap > 0) { |
10022 | void *bigalloc = |
10023 | malloc(cap * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t))); |
10024 | if( bigalloc == NULL ) return false; |
10025 | new_ra->containers = (void **)bigalloc; |
10026 | new_ra->keys = (uint16_t *)(new_ra->containers + cap); |
10027 | new_ra->typecodes = (uint8_t *)(new_ra->keys + cap); |
10028 | } |
10029 | return true; |
10030 | } |
10031 | |
10032 | int ra_shrink_to_fit(roaring_array_t *ra) { |
10033 | int savings = (ra->allocation_size - ra->size) * |
10034 | (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); |
10035 | if (!realloc_array(ra, ra->size)) { |
10036 | return 0; |
10037 | } |
10038 | ra->allocation_size = ra->size; |
10039 | return savings; |
10040 | } |
10041 | |
10042 | bool ra_init(roaring_array_t *t) { |
10043 | return ra_init_with_capacity(t, INITIAL_CAPACITY); |
10044 | } |
10045 | |
10046 | bool ra_copy(const roaring_array_t *source, roaring_array_t *dest, |
10047 | bool copy_on_write) { |
10048 | if (!ra_init_with_capacity(dest, source->size)) return false; |
10049 | dest->size = source->size; |
10050 | dest->allocation_size = source->size; |
10051 | if(dest->size > 0) { |
10052 | memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); |
10053 | } |
10054 | // we go through the containers, turning them into shared containers... |
10055 | if (copy_on_write) { |
10056 | for (int32_t i = 0; i < dest->size; ++i) { |
10057 | source->containers[i] = get_copy_of_container( |
10058 | source->containers[i], &source->typecodes[i], copy_on_write); |
10059 | } |
10060 | // we do a shallow copy to the other bitmap |
10061 | if(dest->size > 0) { |
10062 | memcpy(dest->containers, source->containers, |
10063 | dest->size * sizeof(void *)); |
10064 | memcpy(dest->typecodes, source->typecodes, |
10065 | dest->size * sizeof(uint8_t)); |
10066 | } |
10067 | } else { |
10068 | if(dest->size > 0) { |
10069 | memcpy(dest->typecodes, source->typecodes, |
10070 | dest->size * sizeof(uint8_t)); |
10071 | } |
10072 | for (int32_t i = 0; i < dest->size; i++) { |
10073 | dest->containers[i] = |
10074 | container_clone(source->containers[i], source->typecodes[i]); |
10075 | if (dest->containers[i] == NULL) { |
10076 | for (int32_t j = 0; j < i; j++) { |
10077 | container_free(dest->containers[j], dest->typecodes[j]); |
10078 | } |
10079 | ra_clear_without_containers(dest); |
10080 | return false; |
10081 | } |
10082 | } |
10083 | } |
10084 | return true; |
10085 | } |
10086 | |
10087 | bool ra_overwrite(const roaring_array_t *source, roaring_array_t *dest, |
10088 | bool copy_on_write) { |
10089 | ra_clear_containers(dest); // we are going to overwrite them |
10090 | if (dest->allocation_size < source->size) { |
10091 | if (!realloc_array(dest, source->size)) { |
10092 | return false; |
10093 | } |
10094 | } |
10095 | dest->size = source->size; |
10096 | memcpy(dest->keys, source->keys, dest->size * sizeof(uint16_t)); |
10097 | // we go through the containers, turning them into shared containers... |
10098 | if (copy_on_write) { |
10099 | for (int32_t i = 0; i < dest->size; ++i) { |
10100 | source->containers[i] = get_copy_of_container( |
10101 | source->containers[i], &source->typecodes[i], copy_on_write); |
10102 | } |
10103 | // we do a shallow copy to the other bitmap |
10104 | memcpy(dest->containers, source->containers, |
10105 | dest->size * sizeof(void *)); |
10106 | memcpy(dest->typecodes, source->typecodes, |
10107 | dest->size * sizeof(uint8_t)); |
10108 | } else { |
10109 | memcpy(dest->typecodes, source->typecodes, |
10110 | dest->size * sizeof(uint8_t)); |
10111 | for (int32_t i = 0; i < dest->size; i++) { |
10112 | dest->containers[i] = |
10113 | container_clone(source->containers[i], source->typecodes[i]); |
10114 | if (dest->containers[i] == NULL) { |
10115 | for (int32_t j = 0; j < i; j++) { |
10116 | container_free(dest->containers[j], dest->typecodes[j]); |
10117 | } |
10118 | ra_clear_without_containers(dest); |
10119 | return false; |
10120 | } |
10121 | } |
10122 | } |
10123 | return true; |
10124 | } |
10125 | |
10126 | void ra_clear_containers(roaring_array_t *ra) { |
10127 | for (int32_t i = 0; i < ra->size; ++i) { |
10128 | container_free(ra->containers[i], ra->typecodes[i]); |
10129 | } |
10130 | } |
10131 | |
10132 | void ra_reset(roaring_array_t *ra) { |
10133 | ra_clear_containers(ra); |
10134 | ra->size = 0; |
10135 | ra_shrink_to_fit(ra); |
10136 | } |
10137 | |
10138 | void ra_clear_without_containers(roaring_array_t *ra) { |
10139 | free(ra->containers); // keys and typecodes are allocated with containers |
10140 | ra->size = 0; |
10141 | ra->allocation_size = 0; |
10142 | ra->containers = NULL; |
10143 | ra->keys = NULL; |
10144 | ra->typecodes = NULL; |
10145 | } |
10146 | |
10147 | void ra_clear(roaring_array_t *ra) { |
10148 | ra_clear_containers(ra); |
10149 | ra_clear_without_containers(ra); |
10150 | } |
10151 | |
10152 | bool extend_array(roaring_array_t *ra, int32_t k) { |
10153 | int32_t desired_size = ra->size + k; |
10154 | assert(desired_size <= MAX_CONTAINERS); |
10155 | if (desired_size > ra->allocation_size) { |
10156 | int32_t new_capacity = |
10157 | (ra->size < 1024) ? 2 * desired_size : 5 * desired_size / 4; |
10158 | if (new_capacity > MAX_CONTAINERS) { |
10159 | new_capacity = MAX_CONTAINERS; |
10160 | } |
10161 | |
10162 | return realloc_array(ra, new_capacity); |
10163 | } |
10164 | return true; |
10165 | } |
10166 | |
10167 | void ra_append(roaring_array_t *ra, uint16_t key, void *container, |
10168 | uint8_t typecode) { |
10169 | extend_array(ra, 1); |
10170 | const int32_t pos = ra->size; |
10171 | |
10172 | ra->keys[pos] = key; |
10173 | ra->containers[pos] = container; |
10174 | ra->typecodes[pos] = typecode; |
10175 | ra->size++; |
10176 | } |
10177 | |
10178 | void ra_append_copy(roaring_array_t *ra, const roaring_array_t *sa, |
10179 | uint16_t index, bool copy_on_write) { |
10180 | extend_array(ra, 1); |
10181 | const int32_t pos = ra->size; |
10182 | |
10183 | // old contents is junk not needing freeing |
10184 | ra->keys[pos] = sa->keys[index]; |
10185 | // the shared container will be in two bitmaps |
10186 | if (copy_on_write) { |
10187 | sa->containers[index] = get_copy_of_container( |
10188 | sa->containers[index], &sa->typecodes[index], copy_on_write); |
10189 | ra->containers[pos] = sa->containers[index]; |
10190 | ra->typecodes[pos] = sa->typecodes[index]; |
10191 | } else { |
10192 | ra->containers[pos] = |
10193 | container_clone(sa->containers[index], sa->typecodes[index]); |
10194 | ra->typecodes[pos] = sa->typecodes[index]; |
10195 | } |
10196 | ra->size++; |
10197 | } |
10198 | |
10199 | void ra_append_copies_until(roaring_array_t *ra, const roaring_array_t *sa, |
10200 | uint16_t stopping_key, bool copy_on_write) { |
10201 | for (int32_t i = 0; i < sa->size; ++i) { |
10202 | if (sa->keys[i] >= stopping_key) break; |
10203 | ra_append_copy(ra, sa, i, copy_on_write); |
10204 | } |
10205 | } |
10206 | |
10207 | void ra_append_copy_range(roaring_array_t *ra, const roaring_array_t *sa, |
10208 | int32_t start_index, int32_t end_index, |
10209 | bool copy_on_write) { |
10210 | extend_array(ra, end_index - start_index); |
10211 | for (int32_t i = start_index; i < end_index; ++i) { |
10212 | const int32_t pos = ra->size; |
10213 | ra->keys[pos] = sa->keys[i]; |
10214 | if (copy_on_write) { |
10215 | sa->containers[i] = get_copy_of_container( |
10216 | sa->containers[i], &sa->typecodes[i], copy_on_write); |
10217 | ra->containers[pos] = sa->containers[i]; |
10218 | ra->typecodes[pos] = sa->typecodes[i]; |
10219 | } else { |
10220 | ra->containers[pos] = |
10221 | container_clone(sa->containers[i], sa->typecodes[i]); |
10222 | ra->typecodes[pos] = sa->typecodes[i]; |
10223 | } |
10224 | ra->size++; |
10225 | } |
10226 | } |
10227 | |
10228 | void ra_append_copies_after(roaring_array_t *ra, const roaring_array_t *sa, |
10229 | uint16_t before_start, bool copy_on_write) { |
10230 | int start_location = ra_get_index(sa, before_start); |
10231 | if (start_location >= 0) |
10232 | ++start_location; |
10233 | else |
10234 | start_location = -start_location - 1; |
10235 | ra_append_copy_range(ra, sa, start_location, sa->size, copy_on_write); |
10236 | } |
10237 | |
10238 | void ra_append_move_range(roaring_array_t *ra, roaring_array_t *sa, |
10239 | int32_t start_index, int32_t end_index) { |
10240 | extend_array(ra, end_index - start_index); |
10241 | |
10242 | for (int32_t i = start_index; i < end_index; ++i) { |
10243 | const int32_t pos = ra->size; |
10244 | |
10245 | ra->keys[pos] = sa->keys[i]; |
10246 | ra->containers[pos] = sa->containers[i]; |
10247 | ra->typecodes[pos] = sa->typecodes[i]; |
10248 | ra->size++; |
10249 | } |
10250 | } |
10251 | |
10252 | void ra_append_range(roaring_array_t *ra, roaring_array_t *sa, |
10253 | int32_t start_index, int32_t end_index, |
10254 | bool copy_on_write) { |
10255 | extend_array(ra, end_index - start_index); |
10256 | |
10257 | for (int32_t i = start_index; i < end_index; ++i) { |
10258 | const int32_t pos = ra->size; |
10259 | ra->keys[pos] = sa->keys[i]; |
10260 | if (copy_on_write) { |
10261 | sa->containers[i] = get_copy_of_container( |
10262 | sa->containers[i], &sa->typecodes[i], copy_on_write); |
10263 | ra->containers[pos] = sa->containers[i]; |
10264 | ra->typecodes[pos] = sa->typecodes[i]; |
10265 | } else { |
10266 | ra->containers[pos] = |
10267 | container_clone(sa->containers[i], sa->typecodes[i]); |
10268 | ra->typecodes[pos] = sa->typecodes[i]; |
10269 | } |
10270 | ra->size++; |
10271 | } |
10272 | } |
10273 | |
10274 | void *ra_get_container(roaring_array_t *ra, uint16_t x, uint8_t *typecode) { |
10275 | int i = binarySearch(ra->keys, (int32_t)ra->size, x); |
10276 | if (i < 0) return NULL; |
10277 | *typecode = ra->typecodes[i]; |
10278 | return ra->containers[i]; |
10279 | } |
10280 | |
10281 | extern void *ra_get_container_at_index(const roaring_array_t *ra, uint16_t i, |
10282 | uint8_t *typecode); |
10283 | |
10284 | void *ra_get_writable_container(roaring_array_t *ra, uint16_t x, |
10285 | uint8_t *typecode) { |
10286 | int i = binarySearch(ra->keys, (int32_t)ra->size, x); |
10287 | if (i < 0) return NULL; |
10288 | *typecode = ra->typecodes[i]; |
10289 | return get_writable_copy_if_shared(ra->containers[i], typecode); |
10290 | } |
10291 | |
10292 | void *ra_get_writable_container_at_index(roaring_array_t *ra, uint16_t i, |
10293 | uint8_t *typecode) { |
10294 | assert(i < ra->size); |
10295 | *typecode = ra->typecodes[i]; |
10296 | return get_writable_copy_if_shared(ra->containers[i], typecode); |
10297 | } |
10298 | |
10299 | uint16_t ra_get_key_at_index(const roaring_array_t *ra, uint16_t i) { |
10300 | return ra->keys[i]; |
10301 | } |
10302 | |
10303 | extern int32_t ra_get_index(const roaring_array_t *ra, uint16_t x); |
10304 | |
10305 | extern int32_t ra_advance_until(const roaring_array_t *ra, uint16_t x, |
10306 | int32_t pos); |
10307 | |
10308 | // everything skipped over is freed |
10309 | int32_t ra_advance_until_freeing(roaring_array_t *ra, uint16_t x, int32_t pos) { |
10310 | while (pos < ra->size && ra->keys[pos] < x) { |
10311 | container_free(ra->containers[pos], ra->typecodes[pos]); |
10312 | ++pos; |
10313 | } |
10314 | return pos; |
10315 | } |
10316 | |
10317 | void ra_insert_new_key_value_at(roaring_array_t *ra, int32_t i, uint16_t key, |
10318 | void *container, uint8_t typecode) { |
10319 | extend_array(ra, 1); |
10320 | // May be an optimization opportunity with DIY memmove |
10321 | memmove(&(ra->keys[i + 1]), &(ra->keys[i]), |
10322 | sizeof(uint16_t) * (ra->size - i)); |
10323 | memmove(&(ra->containers[i + 1]), &(ra->containers[i]), |
10324 | sizeof(void *) * (ra->size - i)); |
10325 | memmove(&(ra->typecodes[i + 1]), &(ra->typecodes[i]), |
10326 | sizeof(uint8_t) * (ra->size - i)); |
10327 | ra->keys[i] = key; |
10328 | ra->containers[i] = container; |
10329 | ra->typecodes[i] = typecode; |
10330 | ra->size++; |
10331 | } |
10332 | |
10333 | // note: Java routine set things to 0, enabling GC. |
10334 | // Java called it "resize" but it was always used to downsize. |
10335 | // Allowing upsize would break the conventions about |
10336 | // valid containers below ra->size. |
10337 | |
10338 | void ra_downsize(roaring_array_t *ra, int32_t new_length) { |
10339 | assert(new_length <= ra->size); |
10340 | ra->size = new_length; |
10341 | } |
10342 | |
10343 | void ra_remove_at_index(roaring_array_t *ra, int32_t i) { |
10344 | memmove(&(ra->containers[i]), &(ra->containers[i + 1]), |
10345 | sizeof(void *) * (ra->size - i - 1)); |
10346 | memmove(&(ra->keys[i]), &(ra->keys[i + 1]), |
10347 | sizeof(uint16_t) * (ra->size - i - 1)); |
10348 | memmove(&(ra->typecodes[i]), &(ra->typecodes[i + 1]), |
10349 | sizeof(uint8_t) * (ra->size - i - 1)); |
10350 | ra->size--; |
10351 | } |
10352 | |
10353 | void ra_remove_at_index_and_free(roaring_array_t *ra, int32_t i) { |
10354 | container_free(ra->containers[i], ra->typecodes[i]); |
10355 | ra_remove_at_index(ra, i); |
10356 | } |
10357 | |
10358 | // used in inplace andNot only, to slide left the containers from |
10359 | // the mutated RoaringBitmap that are after the largest container of |
10360 | // the argument RoaringBitmap. In use it should be followed by a call to |
10361 | // downsize. |
10362 | // |
10363 | void ra_copy_range(roaring_array_t *ra, uint32_t begin, uint32_t end, |
10364 | uint32_t new_begin) { |
10365 | assert(begin <= end); |
10366 | assert(new_begin < begin); |
10367 | |
10368 | const int range = end - begin; |
10369 | |
10370 | // We ensure to previously have freed overwritten containers |
10371 | // that are not copied elsewhere |
10372 | |
10373 | memmove(&(ra->containers[new_begin]), &(ra->containers[begin]), |
10374 | sizeof(void *) * range); |
10375 | memmove(&(ra->keys[new_begin]), &(ra->keys[begin]), |
10376 | sizeof(uint16_t) * range); |
10377 | memmove(&(ra->typecodes[new_begin]), &(ra->typecodes[begin]), |
10378 | sizeof(uint8_t) * range); |
10379 | } |
10380 | |
10381 | void ra_shift_tail(roaring_array_t *ra, int32_t count, int32_t distance) { |
10382 | if (distance > 0) { |
10383 | extend_array(ra, distance); |
10384 | } |
10385 | int32_t srcpos = ra->size - count; |
10386 | int32_t dstpos = srcpos + distance; |
10387 | memmove(&(ra->keys[dstpos]), &(ra->keys[srcpos]), |
10388 | sizeof(uint16_t) * count); |
10389 | memmove(&(ra->containers[dstpos]), &(ra->containers[srcpos]), |
10390 | sizeof(void *) * count); |
10391 | memmove(&(ra->typecodes[dstpos]), &(ra->typecodes[srcpos]), |
10392 | sizeof(uint8_t) * count); |
10393 | ra->size += distance; |
10394 | } |
10395 | |
10396 | |
10397 | size_t ra_size_in_bytes(roaring_array_t *ra) { |
10398 | size_t cardinality = 0; |
10399 | size_t tot_len = |
10400 | 1 /* initial byte type */ + 4 /* tot_len */ + sizeof(roaring_array_t) + |
10401 | ra->size * (sizeof(uint16_t) + sizeof(void *) + sizeof(uint8_t)); |
10402 | for (int32_t i = 0; i < ra->size; i++) { |
10403 | tot_len += |
10404 | (container_serialization_len(ra->containers[i], ra->typecodes[i]) + |
10405 | sizeof(uint16_t)); |
10406 | cardinality += |
10407 | container_get_cardinality(ra->containers[i], ra->typecodes[i]); |
10408 | } |
10409 | |
10410 | if ((cardinality * sizeof(uint32_t) + sizeof(uint32_t)) < tot_len) { |
10411 | return cardinality * sizeof(uint32_t) + 1 + sizeof(uint32_t); |
10412 | } |
10413 | return tot_len; |
10414 | } |
10415 | |
10416 | void ra_to_uint32_array(const roaring_array_t *ra, uint32_t *ans) { |
10417 | size_t ctr = 0; |
10418 | for (int32_t i = 0; i < ra->size; ++i) { |
10419 | int num_added = container_to_uint32_array( |
10420 | ans + ctr, ra->containers[i], ra->typecodes[i], |
10421 | ((uint32_t)ra->keys[i]) << 16); |
10422 | ctr += num_added; |
10423 | } |
10424 | } |
10425 | |
10426 | bool ra_range_uint32_array(const roaring_array_t *ra, size_t offset, size_t limit, uint32_t *ans) { |
10427 | size_t ctr = 0; |
10428 | size_t dtr = 0; |
10429 | |
10430 | size_t t_limit = 0; |
10431 | |
10432 | bool first = false; |
10433 | size_t first_skip = 0; |
10434 | |
10435 | uint32_t *t_ans = NULL; |
10436 | size_t cur_len = 0; |
10437 | |
10438 | for (int i = 0; i < ra->size; ++i) { |
10439 | |
10440 | const void *container = container_unwrap_shared(ra->containers[i], &ra->typecodes[i]); |
10441 | switch (ra->typecodes[i]) { |
10442 | case BITSET_CONTAINER_TYPE_CODE: |
10443 | t_limit = ((const bitset_container_t *)container)->cardinality; |
10444 | break; |
10445 | case ARRAY_CONTAINER_TYPE_CODE: |
10446 | t_limit = ((const array_container_t *)container)->cardinality; |
10447 | break; |
10448 | case RUN_CONTAINER_TYPE_CODE: |
10449 | t_limit = run_container_cardinality((const run_container_t *)container); |
10450 | break; |
10451 | } |
10452 | if (ctr + t_limit - 1 >= offset && ctr < offset + limit){ |
10453 | if (!first){ |
10454 | //first_skip = t_limit - (ctr + t_limit - offset); |
10455 | first_skip = offset - ctr; |
10456 | first = true; |
10457 | t_ans = (uint32_t *)malloc(sizeof(*t_ans) * (first_skip + limit)); |
10458 | if(t_ans == NULL) { |
10459 | return false; |
10460 | } |
10461 | memset(t_ans, 0, sizeof(*t_ans) * (first_skip + limit)) ; |
10462 | cur_len = first_skip + limit; |
10463 | } |
10464 | if (dtr + t_limit > cur_len){ |
10465 | uint32_t * append_ans = (uint32_t *)malloc(sizeof(*append_ans) * (cur_len + t_limit)); |
10466 | if(append_ans == NULL) { |
10467 | if(t_ans != NULL) free(t_ans); |
10468 | return false; |
10469 | } |
10470 | memset(append_ans, 0, sizeof(*append_ans) * (cur_len + t_limit)); |
10471 | cur_len = cur_len + t_limit; |
10472 | memcpy(append_ans, t_ans, dtr * sizeof(uint32_t)); |
10473 | free(t_ans); |
10474 | t_ans = append_ans; |
10475 | } |
10476 | switch (ra->typecodes[i]) { |
10477 | case BITSET_CONTAINER_TYPE_CODE: |
10478 | container_to_uint32_array( |
10479 | t_ans + dtr, (const bitset_container_t *)container, ra->typecodes[i], |
10480 | ((uint32_t)ra->keys[i]) << 16); |
10481 | break; |
10482 | case ARRAY_CONTAINER_TYPE_CODE: |
10483 | container_to_uint32_array( |
10484 | t_ans + dtr, (const array_container_t *)container, ra->typecodes[i], |
10485 | ((uint32_t)ra->keys[i]) << 16); |
10486 | break; |
10487 | case RUN_CONTAINER_TYPE_CODE: |
10488 | container_to_uint32_array( |
10489 | t_ans + dtr, (const run_container_t *)container, ra->typecodes[i], |
10490 | ((uint32_t)ra->keys[i]) << 16); |
10491 | break; |
10492 | } |
10493 | dtr += t_limit; |
10494 | } |
10495 | ctr += t_limit; |
10496 | if (dtr-first_skip >= limit) break; |
10497 | } |
10498 | if(t_ans != NULL) { |
10499 | memcpy(ans, t_ans+first_skip, limit * sizeof(uint32_t)); |
10500 | free(t_ans); |
10501 | } |
10502 | return true; |
10503 | } |
10504 | |
10505 | bool ra_has_run_container(const roaring_array_t *ra) { |
10506 | for (int32_t k = 0; k < ra->size; ++k) { |
10507 | if (get_container_type(ra->containers[k], ra->typecodes[k]) == |
10508 | RUN_CONTAINER_TYPE_CODE) |
10509 | return true; |
10510 | } |
10511 | return false; |
10512 | } |
10513 | |
10514 | uint32_t (const roaring_array_t *ra) { |
10515 | if (ra_has_run_container(ra)) { |
10516 | if (ra->size < |
10517 | NO_OFFSET_THRESHOLD) { // for small bitmaps, we omit the offsets |
10518 | return 4 + (ra->size + 7) / 8 + 4 * ra->size; |
10519 | } |
10520 | return 4 + (ra->size + 7) / 8 + |
10521 | 8 * ra->size; // - 4 because we pack the size with the cookie |
10522 | } else { |
10523 | return 4 + 4 + 8 * ra->size; |
10524 | } |
10525 | } |
10526 | |
10527 | size_t ra_portable_size_in_bytes(const roaring_array_t *ra) { |
10528 | size_t count = ra_portable_header_size(ra); |
10529 | |
10530 | for (int32_t k = 0; k < ra->size; ++k) { |
10531 | count += container_size_in_bytes(ra->containers[k], ra->typecodes[k]); |
10532 | } |
10533 | return count; |
10534 | } |
10535 | |
10536 | size_t ra_portable_serialize(const roaring_array_t *ra, char *buf) { |
10537 | char *initbuf = buf; |
10538 | uint32_t startOffset = 0; |
10539 | bool hasrun = ra_has_run_container(ra); |
10540 | if (hasrun) { |
10541 | uint32_t cookie = SERIAL_COOKIE | ((ra->size - 1) << 16); |
10542 | memcpy(buf, &cookie, sizeof(cookie)); |
10543 | buf += sizeof(cookie); |
10544 | uint32_t s = (ra->size + 7) / 8; |
10545 | uint8_t *bitmapOfRunContainers = (uint8_t *)calloc(s, 1); |
10546 | assert(bitmapOfRunContainers != NULL); // todo: handle |
10547 | for (int32_t i = 0; i < ra->size; ++i) { |
10548 | if (get_container_type(ra->containers[i], ra->typecodes[i]) == |
10549 | RUN_CONTAINER_TYPE_CODE) { |
10550 | bitmapOfRunContainers[i / 8] |= (1 << (i % 8)); |
10551 | } |
10552 | } |
10553 | memcpy(buf, bitmapOfRunContainers, s); |
10554 | buf += s; |
10555 | free(bitmapOfRunContainers); |
10556 | if (ra->size < NO_OFFSET_THRESHOLD) { |
10557 | startOffset = 4 + 4 * ra->size + s; |
10558 | } else { |
10559 | startOffset = 4 + 8 * ra->size + s; |
10560 | } |
10561 | } else { // backwards compatibility |
10562 | uint32_t cookie = SERIAL_COOKIE_NO_RUNCONTAINER; |
10563 | |
10564 | memcpy(buf, &cookie, sizeof(cookie)); |
10565 | buf += sizeof(cookie); |
10566 | memcpy(buf, &ra->size, sizeof(ra->size)); |
10567 | buf += sizeof(ra->size); |
10568 | |
10569 | startOffset = 4 + 4 + 4 * ra->size + 4 * ra->size; |
10570 | } |
10571 | for (int32_t k = 0; k < ra->size; ++k) { |
10572 | memcpy(buf, &ra->keys[k], sizeof(ra->keys[k])); |
10573 | buf += sizeof(ra->keys[k]); |
10574 | // get_cardinality returns a value in [1,1<<16], subtracting one |
10575 | // we get [0,1<<16 - 1] which fits in 16 bits |
10576 | uint16_t card = (uint16_t)( |
10577 | container_get_cardinality(ra->containers[k], ra->typecodes[k]) - 1); |
10578 | memcpy(buf, &card, sizeof(card)); |
10579 | buf += sizeof(card); |
10580 | } |
10581 | if ((!hasrun) || (ra->size >= NO_OFFSET_THRESHOLD)) { |
10582 | // writing the containers offsets |
10583 | for (int32_t k = 0; k < ra->size; k++) { |
10584 | memcpy(buf, &startOffset, sizeof(startOffset)); |
10585 | buf += sizeof(startOffset); |
10586 | startOffset = |
10587 | startOffset + |
10588 | container_size_in_bytes(ra->containers[k], ra->typecodes[k]); |
10589 | } |
10590 | } |
10591 | for (int32_t k = 0; k < ra->size; ++k) { |
10592 | buf += container_write(ra->containers[k], ra->typecodes[k], buf); |
10593 | } |
10594 | return buf - initbuf; |
10595 | } |
10596 | |
10597 | // Quickly checks whether there is a serialized bitmap at the pointer, |
10598 | // not exceeding size "maxbytes" in bytes. This function does not allocate |
10599 | // memory dynamically. |
10600 | // |
10601 | // This function returns 0 if and only if no valid bitmap is found. |
10602 | // Otherwise, it returns how many bytes are occupied. |
10603 | // |
10604 | size_t ra_portable_deserialize_size(const char *buf, const size_t maxbytes) { |
10605 | size_t bytestotal = sizeof(int32_t);// for cookie |
10606 | if(bytestotal > maxbytes) return 0; |
10607 | uint32_t cookie; |
10608 | memcpy(&cookie, buf, sizeof(int32_t)); |
10609 | buf += sizeof(uint32_t); |
10610 | if ((cookie & 0xFFFF) != SERIAL_COOKIE && |
10611 | cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { |
10612 | return 0; |
10613 | } |
10614 | int32_t size; |
10615 | |
10616 | if ((cookie & 0xFFFF) == SERIAL_COOKIE) |
10617 | size = (cookie >> 16) + 1; |
10618 | else { |
10619 | bytestotal += sizeof(int32_t); |
10620 | if(bytestotal > maxbytes) return 0; |
10621 | memcpy(&size, buf, sizeof(int32_t)); |
10622 | buf += sizeof(uint32_t); |
10623 | } |
10624 | if (size > (1<<16)) { |
10625 | return 0; // logically impossible |
10626 | } |
10627 | char *bitmapOfRunContainers = NULL; |
10628 | bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; |
10629 | if (hasrun) { |
10630 | int32_t s = (size + 7) / 8; |
10631 | bytestotal += s; |
10632 | if(bytestotal > maxbytes) return 0; |
10633 | bitmapOfRunContainers = (char *)buf; |
10634 | buf += s; |
10635 | } |
10636 | bytestotal += size * 2 * sizeof(uint16_t); |
10637 | if(bytestotal > maxbytes) return 0; |
10638 | uint16_t *keyscards = (uint16_t *)buf; |
10639 | buf += size * 2 * sizeof(uint16_t); |
10640 | if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { |
10641 | // skipping the offsets |
10642 | bytestotal += size * 4; |
10643 | if(bytestotal > maxbytes) return 0; |
10644 | buf += size * 4; |
10645 | } |
10646 | // Reading the containers |
10647 | for (int32_t k = 0; k < size; ++k) { |
10648 | uint16_t tmp; |
10649 | memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); |
10650 | uint32_t thiscard = tmp + 1; |
10651 | bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); |
10652 | bool isrun = false; |
10653 | if(hasrun) { |
10654 | if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { |
10655 | isbitmap = false; |
10656 | isrun = true; |
10657 | } |
10658 | } |
10659 | if (isbitmap) { |
10660 | size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
10661 | bytestotal += containersize; |
10662 | if(bytestotal > maxbytes) return 0; |
10663 | buf += containersize; |
10664 | } else if (isrun) { |
10665 | bytestotal += sizeof(uint16_t); |
10666 | if(bytestotal > maxbytes) return 0; |
10667 | uint16_t n_runs; |
10668 | memcpy(&n_runs, buf, sizeof(uint16_t)); |
10669 | buf += sizeof(uint16_t); |
10670 | size_t containersize = n_runs * sizeof(rle16_t); |
10671 | bytestotal += containersize; |
10672 | if(bytestotal > maxbytes) return 0; |
10673 | buf += containersize; |
10674 | } else { |
10675 | size_t containersize = thiscard * sizeof(uint16_t); |
10676 | bytestotal += containersize; |
10677 | if(bytestotal > maxbytes) return 0; |
10678 | buf += containersize; |
10679 | } |
10680 | } |
10681 | return bytestotal; |
10682 | } |
10683 | |
10684 | |
10685 | // this function populates answer from the content of buf (reading up to maxbytes bytes). |
10686 | // The function returns false if a properly serialized bitmap cannot be found. |
10687 | // if it returns true, readbytes is populated by how many bytes were read, we have that *readbytes <= maxbytes. |
10688 | bool ra_portable_deserialize(roaring_array_t *answer, const char *buf, const size_t maxbytes, size_t * readbytes) { |
10689 | *readbytes = sizeof(int32_t);// for cookie |
10690 | if(*readbytes > maxbytes) { |
10691 | fprintf(stderr, "Ran out of bytes while reading first 4 bytes.\n" ); |
10692 | return false; |
10693 | } |
10694 | uint32_t cookie; |
10695 | memcpy(&cookie, buf, sizeof(int32_t)); |
10696 | buf += sizeof(uint32_t); |
10697 | if ((cookie & 0xFFFF) != SERIAL_COOKIE && |
10698 | cookie != SERIAL_COOKIE_NO_RUNCONTAINER) { |
10699 | fprintf(stderr, "I failed to find one of the right cookies. Found %" PRIu32 "\n" , |
10700 | cookie); |
10701 | return false; |
10702 | } |
10703 | int32_t size; |
10704 | |
10705 | if ((cookie & 0xFFFF) == SERIAL_COOKIE) |
10706 | size = (cookie >> 16) + 1; |
10707 | else { |
10708 | *readbytes += sizeof(int32_t); |
10709 | if(*readbytes > maxbytes) { |
10710 | fprintf(stderr, "Ran out of bytes while reading second part of the cookie.\n" ); |
10711 | return false; |
10712 | } |
10713 | memcpy(&size, buf, sizeof(int32_t)); |
10714 | buf += sizeof(uint32_t); |
10715 | } |
10716 | if (size > (1<<16)) { |
10717 | fprintf(stderr, "You cannot have so many containers, the data must be corrupted: %" PRId32 "\n" , |
10718 | size); |
10719 | return false; // logically impossible |
10720 | } |
10721 | const char *bitmapOfRunContainers = NULL; |
10722 | bool hasrun = (cookie & 0xFFFF) == SERIAL_COOKIE; |
10723 | if (hasrun) { |
10724 | int32_t s = (size + 7) / 8; |
10725 | *readbytes += s; |
10726 | if(*readbytes > maxbytes) {// data is corrupted? |
10727 | fprintf(stderr, "Ran out of bytes while reading run bitmap.\n" ); |
10728 | return false; |
10729 | } |
10730 | bitmapOfRunContainers = buf; |
10731 | buf += s; |
10732 | } |
10733 | uint16_t *keyscards = (uint16_t *)buf; |
10734 | |
10735 | *readbytes += size * 2 * sizeof(uint16_t); |
10736 | if(*readbytes > maxbytes) { |
10737 | fprintf(stderr, "Ran out of bytes while reading key-cardinality array.\n" ); |
10738 | return false; |
10739 | } |
10740 | buf += size * 2 * sizeof(uint16_t); |
10741 | |
10742 | bool is_ok = ra_init_with_capacity(answer, size); |
10743 | if (!is_ok) { |
10744 | fprintf(stderr, "Failed to allocate memory for roaring array. Bailing out.\n" ); |
10745 | return false; |
10746 | } |
10747 | |
10748 | for (int32_t k = 0; k < size; ++k) { |
10749 | uint16_t tmp; |
10750 | memcpy(&tmp, keyscards + 2*k, sizeof(tmp)); |
10751 | answer->keys[k] = tmp; |
10752 | } |
10753 | if ((!hasrun) || (size >= NO_OFFSET_THRESHOLD)) { |
10754 | *readbytes += size * 4; |
10755 | if(*readbytes > maxbytes) {// data is corrupted? |
10756 | fprintf(stderr, "Ran out of bytes while reading offsets.\n" ); |
10757 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
10758 | return false; |
10759 | } |
10760 | |
10761 | // skipping the offsets |
10762 | buf += size * 4; |
10763 | } |
10764 | // Reading the containers |
10765 | for (int32_t k = 0; k < size; ++k) { |
10766 | uint16_t tmp; |
10767 | memcpy(&tmp, keyscards + 2*k+1, sizeof(tmp)); |
10768 | uint32_t thiscard = tmp + 1; |
10769 | bool isbitmap = (thiscard > DEFAULT_MAX_SIZE); |
10770 | bool isrun = false; |
10771 | if(hasrun) { |
10772 | if((bitmapOfRunContainers[k / 8] & (1 << (k % 8))) != 0) { |
10773 | isbitmap = false; |
10774 | isrun = true; |
10775 | } |
10776 | } |
10777 | if (isbitmap) { |
10778 | // we check that the read is allowed |
10779 | size_t containersize = BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t); |
10780 | *readbytes += containersize; |
10781 | if(*readbytes > maxbytes) { |
10782 | fprintf(stderr, "Running out of bytes while reading a bitset container.\n" ); |
10783 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
10784 | return false; |
10785 | } |
10786 | // it is now safe to read |
10787 | bitset_container_t *c = bitset_container_create(); |
10788 | if(c == NULL) {// memory allocation failure |
10789 | fprintf(stderr, "Failed to allocate memory for a bitset container.\n" ); |
10790 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
10791 | return false; |
10792 | } |
10793 | answer->size++; |
10794 | buf += bitset_container_read(thiscard, c, buf); |
10795 | answer->containers[k] = c; |
10796 | answer->typecodes[k] = BITSET_CONTAINER_TYPE_CODE; |
10797 | } else if (isrun) { |
10798 | // we check that the read is allowed |
10799 | *readbytes += sizeof(uint16_t); |
10800 | if(*readbytes > maxbytes) { |
10801 | fprintf(stderr, "Running out of bytes while reading a run container (header).\n" ); |
10802 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
10803 | return false; |
10804 | } |
10805 | uint16_t n_runs; |
10806 | memcpy(&n_runs, buf, sizeof(uint16_t)); |
10807 | size_t containersize = n_runs * sizeof(rle16_t); |
10808 | *readbytes += containersize; |
10809 | if(*readbytes > maxbytes) {// data is corrupted? |
10810 | fprintf(stderr, "Running out of bytes while reading a run container.\n" ); |
10811 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
10812 | return false; |
10813 | } |
10814 | // it is now safe to read |
10815 | |
10816 | run_container_t *c = run_container_create(); |
10817 | if(c == NULL) {// memory allocation failure |
10818 | fprintf(stderr, "Failed to allocate memory for a run container.\n" ); |
10819 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
10820 | return false; |
10821 | } |
10822 | answer->size++; |
10823 | buf += run_container_read(thiscard, c, buf); |
10824 | answer->containers[k] = c; |
10825 | answer->typecodes[k] = RUN_CONTAINER_TYPE_CODE; |
10826 | } else { |
10827 | // we check that the read is allowed |
10828 | size_t containersize = thiscard * sizeof(uint16_t); |
10829 | *readbytes += containersize; |
10830 | if(*readbytes > maxbytes) {// data is corrupted? |
10831 | fprintf(stderr, "Running out of bytes while reading an array container.\n" ); |
10832 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
10833 | return false; |
10834 | } |
10835 | // it is now safe to read |
10836 | array_container_t *c = |
10837 | array_container_create_given_capacity(thiscard); |
10838 | if(c == NULL) {// memory allocation failure |
10839 | fprintf(stderr, "Failed to allocate memory for an array container.\n" ); |
10840 | ra_clear(answer);// we need to clear the containers already allocated, and the roaring array |
10841 | return false; |
10842 | } |
10843 | answer->size++; |
10844 | buf += array_container_read(thiscard, c, buf); |
10845 | answer->containers[k] = c; |
10846 | answer->typecodes[k] = ARRAY_CONTAINER_TYPE_CODE; |
10847 | } |
10848 | } |
10849 | return true; |
10850 | } |
10851 | /* end file /opt/bitmap/CRoaring-0.2.57/src/roaring_array.c */ |
10852 | /* begin file /opt/bitmap/CRoaring-0.2.57/src/roaring_priority_queue.c */ |
10853 | |
10854 | struct roaring_pq_element_s { |
10855 | uint64_t size; |
10856 | bool is_temporary; |
10857 | roaring_bitmap_t *bitmap; |
10858 | }; |
10859 | |
10860 | typedef struct roaring_pq_element_s roaring_pq_element_t; |
10861 | |
10862 | struct roaring_pq_s { |
10863 | roaring_pq_element_t *elements; |
10864 | uint64_t size; |
10865 | }; |
10866 | |
10867 | typedef struct roaring_pq_s roaring_pq_t; |
10868 | |
10869 | static inline bool compare(roaring_pq_element_t *t1, roaring_pq_element_t *t2) { |
10870 | return t1->size < t2->size; |
10871 | } |
10872 | |
10873 | static void pq_add(roaring_pq_t *pq, roaring_pq_element_t *t) { |
10874 | uint64_t i = pq->size; |
10875 | pq->elements[pq->size++] = *t; |
10876 | while (i > 0) { |
10877 | uint64_t p = (i - 1) >> 1; |
10878 | roaring_pq_element_t ap = pq->elements[p]; |
10879 | if (!compare(t, &ap)) break; |
10880 | pq->elements[i] = ap; |
10881 | i = p; |
10882 | } |
10883 | pq->elements[i] = *t; |
10884 | } |
10885 | |
10886 | static void pq_free(roaring_pq_t *pq) { |
10887 | free(pq->elements); |
10888 | pq->elements = NULL; // paranoid |
10889 | free(pq); |
10890 | } |
10891 | |
10892 | static void percolate_down(roaring_pq_t *pq, uint32_t i) { |
10893 | uint32_t size = (uint32_t)pq->size; |
10894 | uint32_t hsize = size >> 1; |
10895 | roaring_pq_element_t ai = pq->elements[i]; |
10896 | while (i < hsize) { |
10897 | uint32_t l = (i << 1) + 1; |
10898 | uint32_t r = l + 1; |
10899 | roaring_pq_element_t bestc = pq->elements[l]; |
10900 | if (r < size) { |
10901 | if (compare(pq->elements + r, &bestc)) { |
10902 | l = r; |
10903 | bestc = pq->elements[r]; |
10904 | } |
10905 | } |
10906 | if (!compare(&bestc, &ai)) { |
10907 | break; |
10908 | } |
10909 | pq->elements[i] = bestc; |
10910 | i = l; |
10911 | } |
10912 | pq->elements[i] = ai; |
10913 | } |
10914 | |
10915 | static roaring_pq_t *create_pq(const roaring_bitmap_t **arr, uint32_t length) { |
10916 | roaring_pq_t *answer = (roaring_pq_t *)malloc(sizeof(roaring_pq_t)); |
10917 | answer->elements = |
10918 | (roaring_pq_element_t *)malloc(sizeof(roaring_pq_element_t) * length); |
10919 | answer->size = length; |
10920 | for (uint32_t i = 0; i < length; i++) { |
10921 | answer->elements[i].bitmap = (roaring_bitmap_t *)arr[i]; |
10922 | answer->elements[i].is_temporary = false; |
10923 | answer->elements[i].size = |
10924 | roaring_bitmap_portable_size_in_bytes(arr[i]); |
10925 | } |
10926 | for (int32_t i = (length >> 1); i >= 0; i--) { |
10927 | percolate_down(answer, i); |
10928 | } |
10929 | return answer; |
10930 | } |
10931 | |
10932 | static roaring_pq_element_t pq_poll(roaring_pq_t *pq) { |
10933 | roaring_pq_element_t ans = *pq->elements; |
10934 | if (pq->size > 1) { |
10935 | pq->elements[0] = pq->elements[--pq->size]; |
10936 | percolate_down(pq, 0); |
10937 | } else |
10938 | --pq->size; |
10939 | // memmove(pq->elements,pq->elements+1,(pq->size-1)*sizeof(roaring_pq_element_t));--pq->size; |
10940 | return ans; |
10941 | } |
10942 | |
10943 | // this function consumes and frees the inputs |
10944 | static roaring_bitmap_t *lazy_or_from_lazy_inputs(roaring_bitmap_t *x1, |
10945 | roaring_bitmap_t *x2) { |
10946 | uint8_t container_result_type = 0; |
10947 | const int length1 = ra_get_size(&x1->high_low_container), |
10948 | length2 = ra_get_size(&x2->high_low_container); |
10949 | if (0 == length1) { |
10950 | roaring_bitmap_free(x1); |
10951 | return x2; |
10952 | } |
10953 | if (0 == length2) { |
10954 | roaring_bitmap_free(x2); |
10955 | return x1; |
10956 | } |
10957 | uint32_t neededcap = length1 > length2 ? length2 : length1; |
10958 | roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap); |
10959 | int pos1 = 0, pos2 = 0; |
10960 | uint8_t container_type_1, container_type_2; |
10961 | uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
10962 | uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
10963 | while (true) { |
10964 | if (s1 == s2) { |
10965 | // todo: unsharing can be inefficient as it may create a clone where |
10966 | // none |
10967 | // is needed, but it has the benefit of being easy to reason about. |
10968 | ra_unshare_container_at_index(&x1->high_low_container, pos1); |
10969 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
10970 | &container_type_1); |
10971 | assert(container_type_1 != SHARED_CONTAINER_TYPE_CODE); |
10972 | ra_unshare_container_at_index(&x2->high_low_container, pos2); |
10973 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
10974 | &container_type_2); |
10975 | assert(container_type_2 != SHARED_CONTAINER_TYPE_CODE); |
10976 | void *c; |
10977 | |
10978 | if ((container_type_2 == BITSET_CONTAINER_TYPE_CODE) && |
10979 | (container_type_1 != BITSET_CONTAINER_TYPE_CODE)) { |
10980 | c = container_lazy_ior(c2, container_type_2, c1, |
10981 | container_type_1, |
10982 | &container_result_type); |
10983 | container_free(c1, container_type_1); |
10984 | if (c != c2) { |
10985 | container_free(c2, container_type_2); |
10986 | } |
10987 | } else { |
10988 | c = container_lazy_ior(c1, container_type_1, c2, |
10989 | container_type_2, |
10990 | &container_result_type); |
10991 | container_free(c2, container_type_2); |
10992 | if (c != c1) { |
10993 | container_free(c1, container_type_1); |
10994 | } |
10995 | } |
10996 | // since we assume that the initial containers are non-empty, the |
10997 | // result here |
10998 | // can only be non-empty |
10999 | ra_append(&answer->high_low_container, s1, c, |
11000 | container_result_type); |
11001 | ++pos1; |
11002 | ++pos2; |
11003 | if (pos1 == length1) break; |
11004 | if (pos2 == length2) break; |
11005 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
11006 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
11007 | |
11008 | } else if (s1 < s2) { // s1 < s2 |
11009 | void *c1 = ra_get_container_at_index(&x1->high_low_container, pos1, |
11010 | &container_type_1); |
11011 | ra_append(&answer->high_low_container, s1, c1, container_type_1); |
11012 | pos1++; |
11013 | if (pos1 == length1) break; |
11014 | s1 = ra_get_key_at_index(&x1->high_low_container, pos1); |
11015 | |
11016 | } else { // s1 > s2 |
11017 | void *c2 = ra_get_container_at_index(&x2->high_low_container, pos2, |
11018 | &container_type_2); |
11019 | ra_append(&answer->high_low_container, s2, c2, container_type_2); |
11020 | pos2++; |
11021 | if (pos2 == length2) break; |
11022 | s2 = ra_get_key_at_index(&x2->high_low_container, pos2); |
11023 | } |
11024 | } |
11025 | if (pos1 == length1) { |
11026 | ra_append_move_range(&answer->high_low_container, |
11027 | &x2->high_low_container, pos2, length2); |
11028 | } else if (pos2 == length2) { |
11029 | ra_append_move_range(&answer->high_low_container, |
11030 | &x1->high_low_container, pos1, length1); |
11031 | } |
11032 | ra_clear_without_containers(&x1->high_low_container); |
11033 | ra_clear_without_containers(&x2->high_low_container); |
11034 | free(x1); |
11035 | free(x2); |
11036 | return answer; |
11037 | } |
11038 | |
11039 | /** |
11040 | * Compute the union of 'number' bitmaps using a heap. This can |
11041 | * sometimes be faster than roaring_bitmap_or_many which uses |
11042 | * a naive algorithm. Caller is responsible for freeing the |
11043 | * result. |
11044 | */ |
11045 | roaring_bitmap_t *roaring_bitmap_or_many_heap(uint32_t number, |
11046 | const roaring_bitmap_t **x) { |
11047 | if (number == 0) { |
11048 | return roaring_bitmap_create(); |
11049 | } |
11050 | if (number == 1) { |
11051 | return roaring_bitmap_copy(x[0]); |
11052 | } |
11053 | roaring_pq_t *pq = create_pq(x, number); |
11054 | while (pq->size > 1) { |
11055 | roaring_pq_element_t x1 = pq_poll(pq); |
11056 | roaring_pq_element_t x2 = pq_poll(pq); |
11057 | |
11058 | if (x1.is_temporary && x2.is_temporary) { |
11059 | roaring_bitmap_t *newb = |
11060 | lazy_or_from_lazy_inputs(x1.bitmap, x2.bitmap); |
11061 | // should normally return a fresh new bitmap *except* that |
11062 | // it can return x1.bitmap or x2.bitmap in degenerate cases |
11063 | bool temporary = !((newb == x1.bitmap) && (newb == x2.bitmap)); |
11064 | uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); |
11065 | roaring_pq_element_t newelement = { |
11066 | .size = bsize, .is_temporary = temporary, .bitmap = newb}; |
11067 | pq_add(pq, &newelement); |
11068 | } else if (x2.is_temporary) { |
11069 | roaring_bitmap_lazy_or_inplace(x2.bitmap, x1.bitmap, false); |
11070 | x2.size = roaring_bitmap_portable_size_in_bytes(x2.bitmap); |
11071 | pq_add(pq, &x2); |
11072 | } else if (x1.is_temporary) { |
11073 | roaring_bitmap_lazy_or_inplace(x1.bitmap, x2.bitmap, false); |
11074 | x1.size = roaring_bitmap_portable_size_in_bytes(x1.bitmap); |
11075 | |
11076 | pq_add(pq, &x1); |
11077 | } else { |
11078 | roaring_bitmap_t *newb = |
11079 | roaring_bitmap_lazy_or(x1.bitmap, x2.bitmap, false); |
11080 | uint64_t bsize = roaring_bitmap_portable_size_in_bytes(newb); |
11081 | roaring_pq_element_t newelement = { |
11082 | .size = bsize, .is_temporary = true, .bitmap = newb}; |
11083 | |
11084 | pq_add(pq, &newelement); |
11085 | } |
11086 | } |
11087 | roaring_pq_element_t X = pq_poll(pq); |
11088 | roaring_bitmap_t *answer = X.bitmap; |
11089 | roaring_bitmap_repair_after_lazy(answer); |
11090 | pq_free(pq); |
11091 | return answer; |
11092 | } |
11093 | /* end file /opt/bitmap/CRoaring-0.2.57/src/roaring_priority_queue.c */ |
11094 | |