1 | #ifndef ARRAY_UTIL_H |
2 | #define ARRAY_UTIL_H |
3 | |
4 | #include <stddef.h> // for size_t |
5 | #include <stdint.h> |
6 | |
7 | #include <roaring/portability.h> |
8 | |
9 | /* |
10 | * Good old binary search. |
11 | * Assumes that array is sorted, has logarithmic complexity. |
12 | * if the result is x, then: |
13 | * if ( x>0 ) you have array[x] = ikey |
14 | * if ( x<0 ) then inserting ikey at position -x-1 in array (insuring that array[-x-1]=ikey) |
15 | * keys the array sorted. |
16 | */ |
17 | inline int32_t binarySearch(const uint16_t *array, int32_t lenarray, |
18 | uint16_t ikey) { |
19 | int32_t low = 0; |
20 | int32_t high = lenarray - 1; |
21 | while (low <= high) { |
22 | int32_t middleIndex = (low + high) >> 1; |
23 | uint16_t middleValue = array[middleIndex]; |
24 | if (middleValue < ikey) { |
25 | low = middleIndex + 1; |
26 | } else if (middleValue > ikey) { |
27 | high = middleIndex - 1; |
28 | } else { |
29 | return middleIndex; |
30 | } |
31 | } |
32 | return -(low + 1); |
33 | } |
34 | |
35 | /** |
36 | * Galloping search |
37 | * Assumes that array is sorted, has logarithmic complexity. |
38 | * if the result is x, then if x = length, you have that all values in array between pos and length |
39 | * are smaller than min. |
40 | * otherwise returns the first index x such that array[x] >= min. |
41 | */ |
42 | static inline int32_t advanceUntil(const uint16_t *array, int32_t pos, |
43 | int32_t length, uint16_t min) { |
44 | int32_t lower = pos + 1; |
45 | |
46 | if ((lower >= length) || (array[lower] >= min)) { |
47 | return lower; |
48 | } |
49 | |
50 | int32_t spansize = 1; |
51 | |
52 | while ((lower + spansize < length) && (array[lower + spansize] < min)) { |
53 | spansize <<= 1; |
54 | } |
55 | int32_t upper = (lower + spansize < length) ? lower + spansize : length - 1; |
56 | |
57 | if (array[upper] == min) { |
58 | return upper; |
59 | } |
60 | if (array[upper] < min) { |
61 | // means |
62 | // array |
63 | // has no |
64 | // item |
65 | // >= min |
66 | // pos = array.length; |
67 | return length; |
68 | } |
69 | |
70 | // we know that the next-smallest span was too small |
71 | lower += (spansize >> 1); |
72 | |
73 | int32_t mid = 0; |
74 | while (lower + 1 != upper) { |
75 | mid = (lower + upper) >> 1; |
76 | if (array[mid] == min) { |
77 | return mid; |
78 | } else if (array[mid] < min) { |
79 | lower = mid; |
80 | } else { |
81 | upper = mid; |
82 | } |
83 | } |
84 | return upper; |
85 | } |
86 | |
87 | /** |
88 | * Returns number of elements which are less then $ikey. |
89 | * Array elements must be unique and sorted. |
90 | */ |
91 | static inline int32_t count_less(const uint16_t *array, int32_t lenarray, |
92 | uint16_t ikey) { |
93 | if (lenarray == 0) return 0; |
94 | int32_t pos = binarySearch(array, lenarray, ikey); |
95 | return pos >= 0 ? pos : -(pos+1); |
96 | } |
97 | |
98 | /** |
99 | * Returns number of elements which are greater then $ikey. |
100 | * Array elements must be unique and sorted. |
101 | */ |
102 | static inline int32_t count_greater(const uint16_t *array, int32_t lenarray, |
103 | uint16_t ikey) { |
104 | if (lenarray == 0) return 0; |
105 | int32_t pos = binarySearch(array, lenarray, ikey); |
106 | if (pos >= 0) { |
107 | return lenarray - (pos+1); |
108 | } else { |
109 | return lenarray - (-pos-1); |
110 | } |
111 | } |
112 | |
113 | /** |
114 | * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions |
115 | * Optimized by D. Lemire on May 3rd 2013 |
116 | * |
117 | * C should have capacity greater than the minimum of s_1 and s_b + 8 |
118 | * where 8 is sizeof(__m128i)/sizeof(uint16_t). |
119 | */ |
120 | int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a, |
121 | const uint16_t *__restrict__ B, size_t s_b, |
122 | uint16_t *C); |
123 | |
124 | /** |
125 | * Compute the cardinality of the intersection using SSE4 instructions |
126 | */ |
127 | int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A, |
128 | size_t s_a, |
129 | const uint16_t *__restrict__ B, |
130 | size_t s_b); |
131 | |
132 | /* Computes the intersection between one small and one large set of uint16_t. |
133 | * Stores the result into buffer and return the number of elements. */ |
134 | int32_t intersect_skewed_uint16(const uint16_t *smallarray, size_t size_s, |
135 | const uint16_t *largearray, size_t size_l, |
136 | uint16_t *buffer); |
137 | |
138 | /* Computes the size of the intersection between one small and one large set of |
139 | * uint16_t. */ |
140 | int32_t intersect_skewed_uint16_cardinality(const uint16_t *smallarray, |
141 | size_t size_s, |
142 | const uint16_t *largearray, |
143 | size_t size_l); |
144 | |
145 | |
146 | /* Check whether the size of the intersection between one small and one large set of uint16_t is non-zero. */ |
147 | bool intersect_skewed_uint16_nonempty(const uint16_t *smallarray, size_t size_s, |
148 | const uint16_t *largearray, size_t size_l); |
149 | /** |
150 | * Generic intersection function. |
151 | */ |
152 | int32_t intersect_uint16(const uint16_t *A, const size_t lenA, |
153 | const uint16_t *B, const size_t lenB, uint16_t *out); |
154 | /** |
155 | * Compute the size of the intersection (generic). |
156 | */ |
157 | int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA, |
158 | const uint16_t *B, const size_t lenB); |
159 | |
160 | /** |
161 | * Checking whether the size of the intersection is non-zero. |
162 | */ |
163 | bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA, |
164 | const uint16_t *B, const size_t lenB); |
165 | /** |
166 | * Generic union function. |
167 | */ |
168 | size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, |
169 | size_t size_2, uint16_t *buffer); |
170 | |
171 | /** |
172 | * Generic XOR function. |
173 | */ |
174 | int32_t xor_uint16(const uint16_t *array_1, int32_t card_1, |
175 | const uint16_t *array_2, int32_t card_2, uint16_t *out); |
176 | |
177 | /** |
178 | * Generic difference function (ANDNOT). |
179 | */ |
180 | int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2, |
181 | int length2, uint16_t *a_out); |
182 | |
183 | /** |
184 | * Generic intersection function. |
185 | */ |
186 | size_t intersection_uint32(const uint32_t *A, const size_t lenA, |
187 | const uint32_t *B, const size_t lenB, uint32_t *out); |
188 | |
189 | /** |
190 | * Generic intersection function, returns just the cardinality. |
191 | */ |
192 | size_t intersection_uint32_card(const uint32_t *A, const size_t lenA, |
193 | const uint32_t *B, const size_t lenB); |
194 | |
195 | /** |
196 | * Generic union function. |
197 | */ |
198 | size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2, |
199 | size_t size_2, uint32_t *buffer); |
200 | |
201 | /** |
202 | * A fast SSE-based union function. |
203 | */ |
204 | uint32_t union_vector16(const uint16_t *__restrict__ set_1, uint32_t size_1, |
205 | const uint16_t *__restrict__ set_2, uint32_t size_2, |
206 | uint16_t *__restrict__ buffer); |
207 | /** |
208 | * A fast SSE-based XOR function. |
209 | */ |
210 | uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1, |
211 | const uint16_t *__restrict__ array2, uint32_t length2, |
212 | uint16_t *__restrict__ output); |
213 | |
214 | /** |
215 | * A fast SSE-based difference function. |
216 | */ |
217 | int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a, |
218 | const uint16_t *__restrict__ B, size_t s_b, |
219 | uint16_t *C); |
220 | |
221 | /** |
222 | * Generic union function, returns just the cardinality. |
223 | */ |
224 | size_t union_uint32_card(const uint32_t *set_1, size_t size_1, |
225 | const uint32_t *set_2, size_t size_2); |
226 | |
227 | /** |
228 | * combines union_uint16 and union_vector16 optimally |
229 | */ |
230 | size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2, |
231 | size_t size_2, uint16_t *buffer); |
232 | |
233 | |
234 | bool memequals(const void *s1, const void *s2, size_t n); |
235 | |
236 | #endif |
237 | |