1 | /* |
2 | * Bitmap Module |
3 | * |
4 | * Stolen from linux/src/lib/bitmap.c |
5 | * |
6 | * Copyright (C) 2010 Corentin Chary |
7 | * |
8 | * This source code is licensed under the GNU General Public License, |
9 | * Version 2. |
10 | */ |
11 | |
12 | #include "qemu/osdep.h" |
13 | #include "qemu/bitops.h" |
14 | #include "qemu/bitmap.h" |
15 | #include "qemu/atomic.h" |
16 | |
17 | /* |
18 | * bitmaps provide an array of bits, implemented using an |
19 | * array of unsigned longs. The number of valid bits in a |
20 | * given bitmap does _not_ need to be an exact multiple of |
21 | * BITS_PER_LONG. |
22 | * |
23 | * The possible unused bits in the last, partially used word |
24 | * of a bitmap are 'don't care'. The implementation makes |
25 | * no particular effort to keep them zero. It ensures that |
26 | * their value will not affect the results of any operation. |
27 | * The bitmap operations that return Boolean (bitmap_empty, |
28 | * for example) or scalar (bitmap_weight, for example) results |
29 | * carefully filter out these unused bits from impacting their |
30 | * results. |
31 | * |
32 | * These operations actually hold to a slightly stronger rule: |
33 | * if you don't input any bitmaps to these ops that have some |
34 | * unused bits set, then they won't output any set unused bits |
35 | * in output bitmaps. |
36 | * |
37 | * The byte ordering of bitmaps is more natural on little |
38 | * endian architectures. |
39 | */ |
40 | |
41 | int slow_bitmap_empty(const unsigned long *bitmap, long bits) |
42 | { |
43 | long k, lim = bits/BITS_PER_LONG; |
44 | |
45 | for (k = 0; k < lim; ++k) { |
46 | if (bitmap[k]) { |
47 | return 0; |
48 | } |
49 | } |
50 | if (bits % BITS_PER_LONG) { |
51 | if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) { |
52 | return 0; |
53 | } |
54 | } |
55 | |
56 | return 1; |
57 | } |
58 | |
59 | int slow_bitmap_full(const unsigned long *bitmap, long bits) |
60 | { |
61 | long k, lim = bits/BITS_PER_LONG; |
62 | |
63 | for (k = 0; k < lim; ++k) { |
64 | if (~bitmap[k]) { |
65 | return 0; |
66 | } |
67 | } |
68 | |
69 | if (bits % BITS_PER_LONG) { |
70 | if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) { |
71 | return 0; |
72 | } |
73 | } |
74 | |
75 | return 1; |
76 | } |
77 | |
78 | int slow_bitmap_equal(const unsigned long *bitmap1, |
79 | const unsigned long *bitmap2, long bits) |
80 | { |
81 | long k, lim = bits/BITS_PER_LONG; |
82 | |
83 | for (k = 0; k < lim; ++k) { |
84 | if (bitmap1[k] != bitmap2[k]) { |
85 | return 0; |
86 | } |
87 | } |
88 | |
89 | if (bits % BITS_PER_LONG) { |
90 | if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) { |
91 | return 0; |
92 | } |
93 | } |
94 | |
95 | return 1; |
96 | } |
97 | |
98 | void slow_bitmap_complement(unsigned long *dst, const unsigned long *src, |
99 | long bits) |
100 | { |
101 | long k, lim = bits/BITS_PER_LONG; |
102 | |
103 | for (k = 0; k < lim; ++k) { |
104 | dst[k] = ~src[k]; |
105 | } |
106 | |
107 | if (bits % BITS_PER_LONG) { |
108 | dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); |
109 | } |
110 | } |
111 | |
112 | int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1, |
113 | const unsigned long *bitmap2, long bits) |
114 | { |
115 | long k; |
116 | long nr = BITS_TO_LONGS(bits); |
117 | unsigned long result = 0; |
118 | |
119 | for (k = 0; k < nr; k++) { |
120 | result |= (dst[k] = bitmap1[k] & bitmap2[k]); |
121 | } |
122 | return result != 0; |
123 | } |
124 | |
125 | void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1, |
126 | const unsigned long *bitmap2, long bits) |
127 | { |
128 | long k; |
129 | long nr = BITS_TO_LONGS(bits); |
130 | |
131 | for (k = 0; k < nr; k++) { |
132 | dst[k] = bitmap1[k] | bitmap2[k]; |
133 | } |
134 | } |
135 | |
136 | void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, |
137 | const unsigned long *bitmap2, long bits) |
138 | { |
139 | long k; |
140 | long nr = BITS_TO_LONGS(bits); |
141 | |
142 | for (k = 0; k < nr; k++) { |
143 | dst[k] = bitmap1[k] ^ bitmap2[k]; |
144 | } |
145 | } |
146 | |
147 | int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, |
148 | const unsigned long *bitmap2, long bits) |
149 | { |
150 | long k; |
151 | long nr = BITS_TO_LONGS(bits); |
152 | unsigned long result = 0; |
153 | |
154 | for (k = 0; k < nr; k++) { |
155 | result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); |
156 | } |
157 | return result != 0; |
158 | } |
159 | |
160 | void bitmap_set(unsigned long *map, long start, long nr) |
161 | { |
162 | unsigned long *p = map + BIT_WORD(start); |
163 | const long size = start + nr; |
164 | int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); |
165 | unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); |
166 | |
167 | assert(start >= 0 && nr >= 0); |
168 | |
169 | while (nr - bits_to_set >= 0) { |
170 | *p |= mask_to_set; |
171 | nr -= bits_to_set; |
172 | bits_to_set = BITS_PER_LONG; |
173 | mask_to_set = ~0UL; |
174 | p++; |
175 | } |
176 | if (nr) { |
177 | mask_to_set &= BITMAP_LAST_WORD_MASK(size); |
178 | *p |= mask_to_set; |
179 | } |
180 | } |
181 | |
182 | void bitmap_set_atomic(unsigned long *map, long start, long nr) |
183 | { |
184 | unsigned long *p = map + BIT_WORD(start); |
185 | const long size = start + nr; |
186 | int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); |
187 | unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); |
188 | |
189 | assert(start >= 0 && nr >= 0); |
190 | |
191 | /* First word */ |
192 | if (nr - bits_to_set > 0) { |
193 | atomic_or(p, mask_to_set); |
194 | nr -= bits_to_set; |
195 | bits_to_set = BITS_PER_LONG; |
196 | mask_to_set = ~0UL; |
197 | p++; |
198 | } |
199 | |
200 | /* Full words */ |
201 | if (bits_to_set == BITS_PER_LONG) { |
202 | while (nr >= BITS_PER_LONG) { |
203 | *p = ~0UL; |
204 | nr -= BITS_PER_LONG; |
205 | p++; |
206 | } |
207 | } |
208 | |
209 | /* Last word */ |
210 | if (nr) { |
211 | mask_to_set &= BITMAP_LAST_WORD_MASK(size); |
212 | atomic_or(p, mask_to_set); |
213 | } else { |
214 | /* If we avoided the full barrier in atomic_or(), issue a |
215 | * barrier to account for the assignments in the while loop. |
216 | */ |
217 | smp_mb(); |
218 | } |
219 | } |
220 | |
221 | void bitmap_clear(unsigned long *map, long start, long nr) |
222 | { |
223 | unsigned long *p = map + BIT_WORD(start); |
224 | const long size = start + nr; |
225 | int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); |
226 | unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); |
227 | |
228 | assert(start >= 0 && nr >= 0); |
229 | |
230 | while (nr - bits_to_clear >= 0) { |
231 | *p &= ~mask_to_clear; |
232 | nr -= bits_to_clear; |
233 | bits_to_clear = BITS_PER_LONG; |
234 | mask_to_clear = ~0UL; |
235 | p++; |
236 | } |
237 | if (nr) { |
238 | mask_to_clear &= BITMAP_LAST_WORD_MASK(size); |
239 | *p &= ~mask_to_clear; |
240 | } |
241 | } |
242 | |
243 | bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr) |
244 | { |
245 | unsigned long *p = map + BIT_WORD(start); |
246 | const long size = start + nr; |
247 | int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); |
248 | unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); |
249 | unsigned long dirty = 0; |
250 | unsigned long old_bits; |
251 | |
252 | assert(start >= 0 && nr >= 0); |
253 | |
254 | /* First word */ |
255 | if (nr - bits_to_clear > 0) { |
256 | old_bits = atomic_fetch_and(p, ~mask_to_clear); |
257 | dirty |= old_bits & mask_to_clear; |
258 | nr -= bits_to_clear; |
259 | bits_to_clear = BITS_PER_LONG; |
260 | mask_to_clear = ~0UL; |
261 | p++; |
262 | } |
263 | |
264 | /* Full words */ |
265 | if (bits_to_clear == BITS_PER_LONG) { |
266 | while (nr >= BITS_PER_LONG) { |
267 | if (*p) { |
268 | old_bits = atomic_xchg(p, 0); |
269 | dirty |= old_bits; |
270 | } |
271 | nr -= BITS_PER_LONG; |
272 | p++; |
273 | } |
274 | } |
275 | |
276 | /* Last word */ |
277 | if (nr) { |
278 | mask_to_clear &= BITMAP_LAST_WORD_MASK(size); |
279 | old_bits = atomic_fetch_and(p, ~mask_to_clear); |
280 | dirty |= old_bits & mask_to_clear; |
281 | } else { |
282 | if (!dirty) { |
283 | smp_mb(); |
284 | } |
285 | } |
286 | |
287 | return dirty != 0; |
288 | } |
289 | |
290 | void bitmap_copy_and_clear_atomic(unsigned long *dst, unsigned long *src, |
291 | long nr) |
292 | { |
293 | while (nr > 0) { |
294 | *dst = atomic_xchg(src, 0); |
295 | dst++; |
296 | src++; |
297 | nr -= BITS_PER_LONG; |
298 | } |
299 | } |
300 | |
301 | #define ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) |
302 | |
303 | /** |
304 | * bitmap_find_next_zero_area - find a contiguous aligned zero area |
305 | * @map: The address to base the search on |
306 | * @size: The bitmap size in bits |
307 | * @start: The bitnumber to start searching at |
308 | * @nr: The number of zeroed bits we're looking for |
309 | * @align_mask: Alignment mask for zero area |
310 | * |
311 | * The @align_mask should be one less than a power of 2; the effect is that |
312 | * the bit offset of all zero areas this function finds is multiples of that |
313 | * power of 2. A @align_mask of 0 means no alignment is required. |
314 | */ |
315 | unsigned long bitmap_find_next_zero_area(unsigned long *map, |
316 | unsigned long size, |
317 | unsigned long start, |
318 | unsigned long nr, |
319 | unsigned long align_mask) |
320 | { |
321 | unsigned long index, end, i; |
322 | again: |
323 | index = find_next_zero_bit(map, size, start); |
324 | |
325 | /* Align allocation */ |
326 | index = ALIGN_MASK(index, align_mask); |
327 | |
328 | end = index + nr; |
329 | if (end > size) { |
330 | return end; |
331 | } |
332 | i = find_next_bit(map, end, index); |
333 | if (i < end) { |
334 | start = i + 1; |
335 | goto again; |
336 | } |
337 | return index; |
338 | } |
339 | |
340 | int slow_bitmap_intersects(const unsigned long *bitmap1, |
341 | const unsigned long *bitmap2, long bits) |
342 | { |
343 | long k, lim = bits/BITS_PER_LONG; |
344 | |
345 | for (k = 0; k < lim; ++k) { |
346 | if (bitmap1[k] & bitmap2[k]) { |
347 | return 1; |
348 | } |
349 | } |
350 | |
351 | if (bits % BITS_PER_LONG) { |
352 | if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) { |
353 | return 1; |
354 | } |
355 | } |
356 | return 0; |
357 | } |
358 | |
359 | long slow_bitmap_count_one(const unsigned long *bitmap, long nbits) |
360 | { |
361 | long k, lim = nbits / BITS_PER_LONG, result = 0; |
362 | |
363 | for (k = 0; k < lim; k++) { |
364 | result += ctpopl(bitmap[k]); |
365 | } |
366 | |
367 | if (nbits % BITS_PER_LONG) { |
368 | result += ctpopl(bitmap[k] & BITMAP_LAST_WORD_MASK(nbits)); |
369 | } |
370 | |
371 | return result; |
372 | } |
373 | |
374 | static void bitmap_to_from_le(unsigned long *dst, |
375 | const unsigned long *src, long nbits) |
376 | { |
377 | long len = BITS_TO_LONGS(nbits); |
378 | |
379 | #ifdef HOST_WORDS_BIGENDIAN |
380 | long index; |
381 | |
382 | for (index = 0; index < len; index++) { |
383 | # if HOST_LONG_BITS == 64 |
384 | dst[index] = bswap64(src[index]); |
385 | # else |
386 | dst[index] = bswap32(src[index]); |
387 | # endif |
388 | } |
389 | #else |
390 | memcpy(dst, src, len * sizeof(unsigned long)); |
391 | #endif |
392 | } |
393 | |
394 | void bitmap_from_le(unsigned long *dst, const unsigned long *src, |
395 | long nbits) |
396 | { |
397 | bitmap_to_from_le(dst, src, nbits); |
398 | } |
399 | |
400 | void bitmap_to_le(unsigned long *dst, const unsigned long *src, |
401 | long nbits) |
402 | { |
403 | bitmap_to_from_le(dst, src, nbits); |
404 | } |
405 | |
406 | /* |
407 | * Copy "src" bitmap with a positive offset and put it into the "dst" |
408 | * bitmap. The caller needs to make sure the bitmap size of "src" |
409 | * is bigger than (shift + nbits). |
410 | */ |
411 | void bitmap_copy_with_src_offset(unsigned long *dst, const unsigned long *src, |
412 | unsigned long shift, unsigned long nbits) |
413 | { |
414 | unsigned long left_mask, right_mask, last_mask; |
415 | |
416 | /* Proper shift src pointer to the first word to copy from */ |
417 | src += BIT_WORD(shift); |
418 | shift %= BITS_PER_LONG; |
419 | |
420 | if (!shift) { |
421 | /* Fast path */ |
422 | bitmap_copy(dst, src, nbits); |
423 | return; |
424 | } |
425 | |
426 | right_mask = (1ul << shift) - 1; |
427 | left_mask = ~right_mask; |
428 | |
429 | while (nbits >= BITS_PER_LONG) { |
430 | *dst = (*src & left_mask) >> shift; |
431 | *dst |= (src[1] & right_mask) << (BITS_PER_LONG - shift); |
432 | dst++; |
433 | src++; |
434 | nbits -= BITS_PER_LONG; |
435 | } |
436 | |
437 | if (nbits > BITS_PER_LONG - shift) { |
438 | *dst = (*src & left_mask) >> shift; |
439 | nbits -= BITS_PER_LONG - shift; |
440 | last_mask = (1ul << nbits) - 1; |
441 | *dst |= (src[1] & last_mask) << (BITS_PER_LONG - shift); |
442 | } else if (nbits) { |
443 | last_mask = (1ul << nbits) - 1; |
444 | *dst = (*src >> shift) & last_mask; |
445 | } |
446 | } |
447 | |
448 | /* |
449 | * Copy "src" bitmap into the "dst" bitmap with an offset in the |
450 | * "dst". The caller needs to make sure the bitmap size of "dst" is |
451 | * bigger than (shift + nbits). |
452 | */ |
453 | void bitmap_copy_with_dst_offset(unsigned long *dst, const unsigned long *src, |
454 | unsigned long shift, unsigned long nbits) |
455 | { |
456 | unsigned long left_mask, right_mask, last_mask; |
457 | |
458 | /* Proper shift dst pointer to the first word to copy from */ |
459 | dst += BIT_WORD(shift); |
460 | shift %= BITS_PER_LONG; |
461 | |
462 | if (!shift) { |
463 | /* Fast path */ |
464 | bitmap_copy(dst, src, nbits); |
465 | return; |
466 | } |
467 | |
468 | right_mask = (1ul << (BITS_PER_LONG - shift)) - 1; |
469 | left_mask = ~right_mask; |
470 | |
471 | *dst &= (1ul << shift) - 1; |
472 | while (nbits >= BITS_PER_LONG) { |
473 | *dst |= (*src & right_mask) << shift; |
474 | dst[1] = (*src & left_mask) >> (BITS_PER_LONG - shift); |
475 | dst++; |
476 | src++; |
477 | nbits -= BITS_PER_LONG; |
478 | } |
479 | |
480 | if (nbits > BITS_PER_LONG - shift) { |
481 | *dst |= (*src & right_mask) << shift; |
482 | nbits -= BITS_PER_LONG - shift; |
483 | last_mask = ((1ul << nbits) - 1) << (BITS_PER_LONG - shift); |
484 | dst[1] = (*src & last_mask) >> (BITS_PER_LONG - shift); |
485 | } else if (nbits) { |
486 | last_mask = (1ul << nbits) - 1; |
487 | *dst |= (*src & last_mask) << shift; |
488 | } |
489 | } |
490 | |