1 | /* |
2 | * Copyright © 2012,2017 Google, Inc. |
3 | * Copyright © 2021 Behdad Esfahbod |
4 | * |
5 | * This is part of HarfBuzz, a text shaping library. |
6 | * |
7 | * Permission is hereby granted, without written agreement and without |
8 | * license or royalty fees, to use, copy, modify, and distribute this |
9 | * software and its documentation for any purpose, provided that the |
10 | * above copyright notice and the following two paragraphs appear in |
11 | * all copies of this software. |
12 | * |
13 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
14 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
15 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
16 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
17 | * DAMAGE. |
18 | * |
19 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
20 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
21 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
22 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
23 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
24 | * |
25 | * Google Author(s): Behdad Esfahbod |
26 | */ |
27 | |
28 | #ifndef HB_BIT_SET_HH |
29 | #define HB_BIT_SET_HH |
30 | |
31 | #include "hb.hh" |
32 | #include "hb-bit-page.hh" |
33 | |
34 | |
35 | struct hb_bit_set_t |
36 | { |
37 | hb_bit_set_t () = default; |
38 | ~hb_bit_set_t () = default; |
39 | |
40 | hb_bit_set_t (const hb_bit_set_t& other) : hb_bit_set_t () { set (other, true); } |
41 | hb_bit_set_t ( hb_bit_set_t&& other) : hb_bit_set_t () { hb_swap (*this, other); } |
42 | hb_bit_set_t& operator= (const hb_bit_set_t& other) { set (other); return *this; } |
43 | hb_bit_set_t& operator= (hb_bit_set_t&& other) { hb_swap (*this, other); return *this; } |
44 | friend void swap (hb_bit_set_t &a, hb_bit_set_t &b) |
45 | { |
46 | if (likely (!a.successful || !b.successful)) |
47 | return; |
48 | hb_swap (a.population, b.population); |
49 | hb_swap (a.last_page_lookup, b.last_page_lookup); |
50 | hb_swap (a.page_map, b.page_map); |
51 | hb_swap (a.pages, b.pages); |
52 | } |
53 | |
54 | void init () |
55 | { |
56 | successful = true; |
57 | population = 0; |
58 | last_page_lookup = 0; |
59 | page_map.init (); |
60 | pages.init (); |
61 | } |
62 | void fini () |
63 | { |
64 | page_map.fini (); |
65 | pages.fini (); |
66 | } |
67 | |
68 | using page_t = hb_bit_page_t; |
69 | struct page_map_t |
70 | { |
71 | int cmp (const page_map_t &o) const { return cmp (o.major); } |
72 | int cmp (uint32_t o_major) const { return (int) o_major - (int) major; } |
73 | |
74 | uint32_t major; |
75 | uint32_t index; |
76 | }; |
77 | |
78 | bool successful = true; /* Allocations successful */ |
79 | mutable unsigned int population = 0; |
80 | mutable hb_atomic_int_t last_page_lookup = 0; |
81 | hb_sorted_vector_t<page_map_t> page_map; |
82 | hb_vector_t<page_t> pages; |
83 | |
84 | void err () { if (successful) successful = false; } /* TODO Remove */ |
85 | bool in_error () const { return !successful; } |
86 | |
87 | bool resize (unsigned int count, bool clear = true, bool exact_size = false) |
88 | { |
89 | if (unlikely (!successful)) return false; |
90 | |
91 | if (pages.length == 0 && count == 1) |
92 | exact_size = true; // Most sets are small and local |
93 | |
94 | if (unlikely (!pages.resize (count, clear, exact_size) || !page_map.resize (count, clear, exact_size))) |
95 | { |
96 | pages.resize (page_map.length, clear, exact_size); |
97 | successful = false; |
98 | return false; |
99 | } |
100 | return true; |
101 | } |
102 | |
103 | void alloc (unsigned sz) |
104 | { |
105 | sz >>= (page_t::PAGE_BITS_LOG_2 - 1); |
106 | pages.alloc (sz); |
107 | page_map.alloc (sz); |
108 | } |
109 | |
110 | void reset () |
111 | { |
112 | successful = true; |
113 | clear (); |
114 | } |
115 | |
116 | void clear () |
117 | { |
118 | resize (0); |
119 | if (likely (successful)) |
120 | population = 0; |
121 | } |
122 | bool is_empty () const |
123 | { |
124 | unsigned int count = pages.length; |
125 | for (unsigned int i = 0; i < count; i++) |
126 | if (!pages[i].is_empty ()) |
127 | return false; |
128 | return true; |
129 | } |
130 | explicit operator bool () const { return !is_empty (); } |
131 | |
132 | uint32_t hash () const |
133 | { |
134 | uint32_t h = 0; |
135 | for (auto &map : page_map) |
136 | { |
137 | auto &page = pages.arrayZ[map.index]; |
138 | if (unlikely (page.is_empty ())) continue; |
139 | h = h * 31 + hb_hash (map.major) + hb_hash (page); |
140 | } |
141 | return h; |
142 | } |
143 | |
144 | private: |
145 | void dirty () { population = UINT_MAX; } |
146 | public: |
147 | |
148 | void add (hb_codepoint_t g) |
149 | { |
150 | if (unlikely (!successful)) return; |
151 | if (unlikely (g == INVALID)) return; |
152 | dirty (); |
153 | page_t *page = page_for (g, true); if (unlikely (!page)) return; |
154 | page->add (g); |
155 | } |
156 | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
157 | { |
158 | if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ |
159 | if (unlikely (a > b || a == INVALID || b == INVALID)) return false; |
160 | dirty (); |
161 | unsigned int ma = get_major (a); |
162 | unsigned int mb = get_major (b); |
163 | if (ma == mb) |
164 | { |
165 | page_t *page = page_for (a, true); if (unlikely (!page)) return false; |
166 | page->add_range (a, b); |
167 | } |
168 | else |
169 | { |
170 | page_t *page = page_for (a, true); if (unlikely (!page)) return false; |
171 | page->add_range (a, major_start (ma + 1) - 1); |
172 | |
173 | for (unsigned int m = ma + 1; m < mb; m++) |
174 | { |
175 | page = page_for (major_start (m), true); if (unlikely (!page)) return false; |
176 | page->init1 (); |
177 | } |
178 | |
179 | page = page_for (b, true); if (unlikely (!page)) return false; |
180 | page->add_range (major_start (mb), b); |
181 | } |
182 | return true; |
183 | } |
184 | |
185 | /* Duplicated here from hb-machinery.hh to avoid including it. */ |
186 | template<typename Type> |
187 | static inline const Type& StructAtOffsetUnaligned(const void *P, unsigned int offset) |
188 | { |
189 | #pragma GCC diagnostic push |
190 | #pragma GCC diagnostic ignored "-Wcast-align" |
191 | return * reinterpret_cast<const Type*> ((const char *) P + offset); |
192 | #pragma GCC diagnostic pop |
193 | } |
194 | |
195 | template <typename T> |
196 | void set_array (bool v, const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
197 | { |
198 | if (unlikely (!successful)) return; |
199 | if (!count) return; |
200 | dirty (); |
201 | hb_codepoint_t g = *array; |
202 | while (count) |
203 | { |
204 | unsigned int m = get_major (g); |
205 | page_t *page = page_for (g, v); if (unlikely (v && !page)) return; |
206 | unsigned int start = major_start (m); |
207 | unsigned int end = major_start (m + 1); |
208 | do |
209 | { |
210 | if (g != INVALID && (v || page)) /* The v check is to optimize out the page check if v is true. */ |
211 | page->set (g, v); |
212 | |
213 | array = &StructAtOffsetUnaligned<T> (array, stride); |
214 | count--; |
215 | } |
216 | while (count && (g = *array, start <= g && g < end)); |
217 | } |
218 | } |
219 | |
220 | template <typename T> |
221 | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
222 | { set_array (true, array, count, stride); } |
223 | template <typename T> |
224 | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } |
225 | |
226 | template <typename T> |
227 | void del_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
228 | { set_array (false, array, count, stride); } |
229 | template <typename T> |
230 | void del_array (const hb_array_t<const T>& arr) { del_array (&arr, arr.len ()); } |
231 | |
232 | /* Might return false if array looks unsorted. |
233 | * Used for faster rejection of corrupt data. */ |
234 | template <typename T> |
235 | bool set_sorted_array (bool v, const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
236 | { |
237 | if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */ |
238 | if (unlikely (!count)) return true; |
239 | dirty (); |
240 | hb_codepoint_t g = *array; |
241 | hb_codepoint_t last_g = g; |
242 | while (count) |
243 | { |
244 | unsigned int m = get_major (g); |
245 | page_t *page = page_for (g, v); if (unlikely (v && !page)) return false; |
246 | unsigned int end = major_start (m + 1); |
247 | do |
248 | { |
249 | /* If we try harder we can change the following comparison to <=; |
250 | * Not sure if it's worth it. */ |
251 | if (g < last_g) return false; |
252 | last_g = g; |
253 | |
254 | if (g != INVALID && (v || page)) /* The v check is to optimize out the page check if v is true. */ |
255 | page->add (g); |
256 | |
257 | array = &StructAtOffsetUnaligned<T> (array, stride); |
258 | count--; |
259 | } |
260 | while (count && (g = *array, g < end)); |
261 | } |
262 | return true; |
263 | } |
264 | |
265 | template <typename T> |
266 | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
267 | { return set_sorted_array (true, array, count, stride); } |
268 | template <typename T> |
269 | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } |
270 | |
271 | template <typename T> |
272 | bool del_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
273 | { return set_sorted_array (false, array, count, stride); } |
274 | template <typename T> |
275 | bool del_sorted_array (const hb_sorted_array_t<const T>& arr) { return del_sorted_array (&arr, arr.len ()); } |
276 | |
277 | void del (hb_codepoint_t g) |
278 | { |
279 | if (unlikely (!successful)) return; |
280 | page_t *page = page_for (g); |
281 | if (!page) |
282 | return; |
283 | dirty (); |
284 | page->del (g); |
285 | } |
286 | |
287 | private: |
288 | void del_pages (int ds, int de) |
289 | { |
290 | if (ds <= de) |
291 | { |
292 | // Pre-allocate the workspace that compact() will need so we can bail on allocation failure |
293 | // before attempting to rewrite the page map. |
294 | hb_vector_t<unsigned> compact_workspace; |
295 | if (unlikely (!allocate_compact_workspace (compact_workspace))) return; |
296 | |
297 | unsigned int write_index = 0; |
298 | for (unsigned int i = 0; i < page_map.length; i++) |
299 | { |
300 | int m = (int) page_map[i].major; |
301 | if (m < ds || de < m) |
302 | page_map[write_index++] = page_map[i]; |
303 | } |
304 | compact (compact_workspace, write_index); |
305 | resize (write_index); |
306 | } |
307 | } |
308 | |
309 | |
310 | public: |
311 | void del_range (hb_codepoint_t a, hb_codepoint_t b) |
312 | { |
313 | if (unlikely (!successful)) return; |
314 | if (unlikely (a > b || a == INVALID)) return; |
315 | dirty (); |
316 | unsigned int ma = get_major (a); |
317 | unsigned int mb = get_major (b); |
318 | /* Delete pages from ds through de if ds <= de. */ |
319 | int ds = (a == major_start (ma))? (int) ma: (int) (ma + 1); |
320 | int de = (b + 1 == major_start (mb + 1))? (int) mb: ((int) mb - 1); |
321 | if (ds > de || (int) ma < ds) |
322 | { |
323 | page_t *page = page_for (a); |
324 | if (page) |
325 | { |
326 | if (ma == mb) |
327 | page->del_range (a, b); |
328 | else |
329 | page->del_range (a, major_start (ma + 1) - 1); |
330 | } |
331 | } |
332 | if (de < (int) mb && ma != mb) |
333 | { |
334 | page_t *page = page_for (b); |
335 | if (page) |
336 | page->del_range (major_start (mb), b); |
337 | } |
338 | del_pages (ds, de); |
339 | } |
340 | |
341 | bool get (hb_codepoint_t g) const |
342 | { |
343 | const page_t *page = page_for (g); |
344 | if (!page) |
345 | return false; |
346 | return page->get (g); |
347 | } |
348 | |
349 | /* Has interface. */ |
350 | bool operator [] (hb_codepoint_t k) const { return get (k); } |
351 | bool has (hb_codepoint_t k) const { return (*this)[k]; } |
352 | /* Predicate. */ |
353 | bool operator () (hb_codepoint_t k) const { return has (k); } |
354 | |
355 | /* Sink interface. */ |
356 | hb_bit_set_t& operator << (hb_codepoint_t v) |
357 | { add (v); return *this; } |
358 | hb_bit_set_t& operator << (const hb_codepoint_pair_t& range) |
359 | { add_range (range.first, range.second); return *this; } |
360 | |
361 | bool intersects (hb_codepoint_t first, hb_codepoint_t last) const |
362 | { |
363 | hb_codepoint_t c = first - 1; |
364 | return next (&c) && c <= last; |
365 | } |
366 | void set (const hb_bit_set_t &other, bool exact_size = false) |
367 | { |
368 | if (unlikely (!successful)) return; |
369 | unsigned int count = other.pages.length; |
370 | if (unlikely (!resize (count, false, exact_size))) |
371 | return; |
372 | population = other.population; |
373 | |
374 | page_map = other.page_map; |
375 | pages = other.pages; |
376 | } |
377 | |
378 | bool is_equal (const hb_bit_set_t &other) const |
379 | { |
380 | if (has_population () && other.has_population () && |
381 | population != other.population) |
382 | return false; |
383 | |
384 | unsigned int na = pages.length; |
385 | unsigned int nb = other.pages.length; |
386 | |
387 | unsigned int a = 0, b = 0; |
388 | for (; a < na && b < nb; ) |
389 | { |
390 | if (page_at (a).is_empty ()) { a++; continue; } |
391 | if (other.page_at (b).is_empty ()) { b++; continue; } |
392 | if (page_map[a].major != other.page_map[b].major || |
393 | !page_at (a).is_equal (other.page_at (b))) |
394 | return false; |
395 | a++; |
396 | b++; |
397 | } |
398 | for (; a < na; a++) |
399 | if (!page_at (a).is_empty ()) { return false; } |
400 | for (; b < nb; b++) |
401 | if (!other.page_at (b).is_empty ()) { return false; } |
402 | |
403 | return true; |
404 | } |
405 | |
406 | bool is_subset (const hb_bit_set_t &larger_set) const |
407 | { |
408 | if (has_population () && larger_set.has_population () && |
409 | population > larger_set.population) |
410 | return false; |
411 | |
412 | uint32_t spi = 0; |
413 | for (uint32_t lpi = 0; spi < page_map.length && lpi < larger_set.page_map.length; lpi++) |
414 | { |
415 | uint32_t spm = page_map[spi].major; |
416 | uint32_t lpm = larger_set.page_map[lpi].major; |
417 | auto sp = page_at (spi); |
418 | |
419 | if (spm < lpm && !sp.is_empty ()) |
420 | return false; |
421 | |
422 | if (lpm < spm) |
423 | continue; |
424 | |
425 | auto lp = larger_set.page_at (lpi); |
426 | if (!sp.is_subset (lp)) |
427 | return false; |
428 | |
429 | spi++; |
430 | } |
431 | |
432 | while (spi < page_map.length) |
433 | if (!page_at (spi++).is_empty ()) |
434 | return false; |
435 | |
436 | return true; |
437 | } |
438 | |
439 | private: |
440 | bool allocate_compact_workspace (hb_vector_t<unsigned>& workspace) |
441 | { |
442 | if (unlikely (!workspace.resize_exact (pages.length))) |
443 | { |
444 | successful = false; |
445 | return false; |
446 | } |
447 | |
448 | return true; |
449 | } |
450 | |
451 | /* |
452 | * workspace should be a pre-sized vector allocated to hold at exactly pages.length |
453 | * elements. |
454 | */ |
455 | void compact (hb_vector_t<unsigned>& workspace, |
456 | unsigned int length) |
457 | { |
458 | assert(workspace.length == pages.length); |
459 | hb_vector_t<unsigned>& old_index_to_page_map_index = workspace; |
460 | |
461 | hb_fill (old_index_to_page_map_index.writer(), 0xFFFFFFFF); |
462 | for (unsigned i = 0; i < length; i++) |
463 | old_index_to_page_map_index[page_map[i].index] = i; |
464 | |
465 | compact_pages (old_index_to_page_map_index); |
466 | } |
467 | void compact_pages (const hb_vector_t<unsigned>& old_index_to_page_map_index) |
468 | { |
469 | unsigned int write_index = 0; |
470 | for (unsigned int i = 0; i < pages.length; i++) |
471 | { |
472 | if (old_index_to_page_map_index[i] == 0xFFFFFFFF) continue; |
473 | |
474 | if (write_index < i) |
475 | pages[write_index] = pages[i]; |
476 | |
477 | page_map[old_index_to_page_map_index[i]].index = write_index; |
478 | write_index++; |
479 | } |
480 | } |
481 | public: |
482 | |
483 | void process_ (hb_bit_page_t::vector_t (*op) (const hb_bit_page_t::vector_t &, const hb_bit_page_t::vector_t &), |
484 | bool passthru_left, bool passthru_right, |
485 | const hb_bit_set_t &other) |
486 | { |
487 | if (unlikely (!successful)) return; |
488 | |
489 | dirty (); |
490 | |
491 | unsigned int na = pages.length; |
492 | unsigned int nb = other.pages.length; |
493 | unsigned int next_page = na; |
494 | |
495 | unsigned int count = 0, newCount = 0; |
496 | unsigned int a = 0, b = 0; |
497 | unsigned int write_index = 0; |
498 | |
499 | // Pre-allocate the workspace that compact() will need so we can bail on allocation failure |
500 | // before attempting to rewrite the page map. |
501 | hb_vector_t<unsigned> compact_workspace; |
502 | if (!passthru_left && unlikely (!allocate_compact_workspace (compact_workspace))) return; |
503 | |
504 | for (; a < na && b < nb; ) |
505 | { |
506 | if (page_map[a].major == other.page_map[b].major) |
507 | { |
508 | if (!passthru_left) |
509 | { |
510 | // Move page_map entries that we're keeping from the left side set |
511 | // to the front of the page_map vector. This isn't necessary if |
512 | // passthru_left is set since no left side pages will be removed |
513 | // in that case. |
514 | if (write_index < a) |
515 | page_map[write_index] = page_map[a]; |
516 | write_index++; |
517 | } |
518 | |
519 | count++; |
520 | a++; |
521 | b++; |
522 | } |
523 | else if (page_map[a].major < other.page_map[b].major) |
524 | { |
525 | if (passthru_left) |
526 | count++; |
527 | a++; |
528 | } |
529 | else |
530 | { |
531 | if (passthru_right) |
532 | count++; |
533 | b++; |
534 | } |
535 | } |
536 | if (passthru_left) |
537 | count += na - a; |
538 | if (passthru_right) |
539 | count += nb - b; |
540 | |
541 | if (!passthru_left) |
542 | { |
543 | na = write_index; |
544 | next_page = write_index; |
545 | compact (compact_workspace, write_index); |
546 | } |
547 | |
548 | if (unlikely (!resize (count))) |
549 | return; |
550 | |
551 | newCount = count; |
552 | |
553 | /* Process in-place backward. */ |
554 | a = na; |
555 | b = nb; |
556 | for (; a && b; ) |
557 | { |
558 | if (page_map.arrayZ[a - 1].major == other.page_map.arrayZ[b - 1].major) |
559 | { |
560 | a--; |
561 | b--; |
562 | count--; |
563 | page_map.arrayZ[count] = page_map.arrayZ[a]; |
564 | page_at (count).v = op (page_at (a).v, other.page_at (b).v); |
565 | page_at (count).dirty (); |
566 | } |
567 | else if (page_map.arrayZ[a - 1].major > other.page_map.arrayZ[b - 1].major) |
568 | { |
569 | a--; |
570 | if (passthru_left) |
571 | { |
572 | count--; |
573 | page_map.arrayZ[count] = page_map.arrayZ[a]; |
574 | } |
575 | } |
576 | else |
577 | { |
578 | b--; |
579 | if (passthru_right) |
580 | { |
581 | count--; |
582 | page_map.arrayZ[count].major = other.page_map.arrayZ[b].major; |
583 | page_map.arrayZ[count].index = next_page++; |
584 | page_at (count) = other.page_at (b); |
585 | } |
586 | } |
587 | } |
588 | if (passthru_left) |
589 | while (a) |
590 | { |
591 | a--; |
592 | count--; |
593 | page_map.arrayZ[count] = page_map.arrayZ[a]; |
594 | } |
595 | if (passthru_right) |
596 | while (b) |
597 | { |
598 | b--; |
599 | count--; |
600 | page_map.arrayZ[count].major = other.page_map.arrayZ[b].major; |
601 | page_map.arrayZ[count].index = next_page++; |
602 | page_at (count) = other.page_at (b); |
603 | } |
604 | assert (!count); |
605 | resize (newCount); |
606 | } |
607 | template <typename Op> |
608 | static hb_bit_page_t::vector_t |
609 | op_ (const hb_bit_page_t::vector_t &a, const hb_bit_page_t::vector_t &b) |
610 | { return Op{} (a, b); } |
611 | template <typename Op> |
612 | void process (const Op& op, const hb_bit_set_t &other) |
613 | { |
614 | process_ (op_<Op>, op (1, 0), op (0, 1), other); |
615 | } |
616 | |
617 | void union_ (const hb_bit_set_t &other) { process (hb_bitwise_or, other); } |
618 | void intersect (const hb_bit_set_t &other) { process (hb_bitwise_and, other); } |
619 | void subtract (const hb_bit_set_t &other) { process (hb_bitwise_gt, other); } |
620 | void symmetric_difference (const hb_bit_set_t &other) { process (hb_bitwise_xor, other); } |
621 | |
622 | bool next (hb_codepoint_t *codepoint) const |
623 | { |
624 | if (unlikely (*codepoint == INVALID)) { |
625 | *codepoint = get_min (); |
626 | return *codepoint != INVALID; |
627 | } |
628 | |
629 | const auto* page_map_array = page_map.arrayZ; |
630 | unsigned int major = get_major (*codepoint); |
631 | unsigned int i = last_page_lookup; |
632 | |
633 | if (unlikely (i >= page_map.length || page_map_array[i].major != major)) |
634 | { |
635 | page_map.bfind (major, &i, HB_NOT_FOUND_STORE_CLOSEST); |
636 | if (i >= page_map.length) { |
637 | *codepoint = INVALID; |
638 | return false; |
639 | } |
640 | last_page_lookup = i; |
641 | } |
642 | |
643 | const auto* pages_array = pages.arrayZ; |
644 | const page_map_t ¤t = page_map_array[i]; |
645 | if (likely (current.major == major)) |
646 | { |
647 | if (pages_array[current.index].next (codepoint)) |
648 | { |
649 | *codepoint += current.major * page_t::PAGE_BITS; |
650 | return true; |
651 | } |
652 | i++; |
653 | } |
654 | |
655 | for (; i < page_map.length; i++) |
656 | { |
657 | const page_map_t ¤t = page_map_array[i]; |
658 | hb_codepoint_t m = pages_array[current.index].get_min (); |
659 | if (m != INVALID) |
660 | { |
661 | *codepoint = current.major * page_t::PAGE_BITS + m; |
662 | last_page_lookup = i; |
663 | return true; |
664 | } |
665 | } |
666 | *codepoint = INVALID; |
667 | return false; |
668 | } |
669 | bool previous (hb_codepoint_t *codepoint) const |
670 | { |
671 | if (unlikely (*codepoint == INVALID)) { |
672 | *codepoint = get_max (); |
673 | return *codepoint != INVALID; |
674 | } |
675 | |
676 | page_map_t map = {get_major (*codepoint), 0}; |
677 | unsigned int i; |
678 | page_map.bfind (map, &i, HB_NOT_FOUND_STORE_CLOSEST); |
679 | if (i < page_map.length && page_map.arrayZ[i].major == map.major) |
680 | { |
681 | if (pages[page_map.arrayZ[i].index].previous (codepoint)) |
682 | { |
683 | *codepoint += page_map.arrayZ[i].major * page_t::PAGE_BITS; |
684 | return true; |
685 | } |
686 | } |
687 | i--; |
688 | for (; (int) i >= 0; i--) |
689 | { |
690 | hb_codepoint_t m = pages.arrayZ[page_map.arrayZ[i].index].get_max (); |
691 | if (m != INVALID) |
692 | { |
693 | *codepoint = page_map.arrayZ[i].major * page_t::PAGE_BITS + m; |
694 | return true; |
695 | } |
696 | } |
697 | *codepoint = INVALID; |
698 | return false; |
699 | } |
700 | bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const |
701 | { |
702 | hb_codepoint_t i; |
703 | |
704 | i = *last; |
705 | if (!next (&i)) |
706 | { |
707 | *last = *first = INVALID; |
708 | return false; |
709 | } |
710 | |
711 | /* TODO Speed up. */ |
712 | *last = *first = i; |
713 | while (next (&i) && i == *last + 1) |
714 | (*last)++; |
715 | |
716 | return true; |
717 | } |
718 | bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const |
719 | { |
720 | hb_codepoint_t i; |
721 | |
722 | i = *first; |
723 | if (!previous (&i)) |
724 | { |
725 | *last = *first = INVALID; |
726 | return false; |
727 | } |
728 | |
729 | /* TODO Speed up. */ |
730 | *last = *first = i; |
731 | while (previous (&i) && i == *first - 1) |
732 | (*first)--; |
733 | |
734 | return true; |
735 | } |
736 | |
737 | unsigned int next_many (hb_codepoint_t codepoint, |
738 | hb_codepoint_t *out, |
739 | unsigned int size) const |
740 | { |
741 | // By default, start at the first bit of the first page of values. |
742 | unsigned int start_page = 0; |
743 | unsigned int start_page_value = 0; |
744 | if (unlikely (codepoint != INVALID)) |
745 | { |
746 | const auto* page_map_array = page_map.arrayZ; |
747 | unsigned int major = get_major (codepoint); |
748 | unsigned int i = last_page_lookup; |
749 | if (unlikely (i >= page_map.length || page_map_array[i].major != major)) |
750 | { |
751 | page_map.bfind (major, &i, HB_NOT_FOUND_STORE_CLOSEST); |
752 | if (i >= page_map.length) |
753 | return 0; // codepoint is greater than our max element. |
754 | } |
755 | start_page = i; |
756 | start_page_value = page_remainder (codepoint + 1); |
757 | if (unlikely (start_page_value == 0)) |
758 | { |
759 | // The export-after value was last in the page. Start on next page. |
760 | start_page++; |
761 | start_page_value = 0; |
762 | } |
763 | } |
764 | |
765 | unsigned int initial_size = size; |
766 | for (unsigned int i = start_page; i < page_map.length && size; i++) |
767 | { |
768 | uint32_t base = major_start (page_map[i].major); |
769 | unsigned int n = pages[page_map[i].index].write (base, start_page_value, out, size); |
770 | out += n; |
771 | size -= n; |
772 | start_page_value = 0; |
773 | } |
774 | return initial_size - size; |
775 | } |
776 | |
777 | unsigned int next_many_inverted (hb_codepoint_t codepoint, |
778 | hb_codepoint_t *out, |
779 | unsigned int size) const |
780 | { |
781 | unsigned int initial_size = size; |
782 | // By default, start at the first bit of the first page of values. |
783 | unsigned int start_page = 0; |
784 | unsigned int start_page_value = 0; |
785 | if (unlikely (codepoint != INVALID)) |
786 | { |
787 | const auto* page_map_array = page_map.arrayZ; |
788 | unsigned int major = get_major (codepoint); |
789 | unsigned int i = last_page_lookup; |
790 | if (unlikely (i >= page_map.length || page_map_array[i].major != major)) |
791 | { |
792 | page_map.bfind(major, &i, HB_NOT_FOUND_STORE_CLOSEST); |
793 | if (unlikely (i >= page_map.length)) |
794 | { |
795 | // codepoint is greater than our max element. |
796 | while (++codepoint != INVALID && size) |
797 | { |
798 | *out++ = codepoint; |
799 | size--; |
800 | } |
801 | return initial_size - size; |
802 | } |
803 | } |
804 | start_page = i; |
805 | start_page_value = page_remainder (codepoint + 1); |
806 | if (unlikely (start_page_value == 0)) |
807 | { |
808 | // The export-after value was last in the page. Start on next page. |
809 | start_page++; |
810 | start_page_value = 0; |
811 | } |
812 | } |
813 | |
814 | hb_codepoint_t next_value = codepoint + 1; |
815 | for (unsigned int i=start_page; i<page_map.length && size; i++) |
816 | { |
817 | uint32_t base = major_start (page_map[i].major); |
818 | unsigned int n = pages[page_map[i].index].write_inverted (base, start_page_value, out, size, &next_value); |
819 | out += n; |
820 | size -= n; |
821 | start_page_value = 0; |
822 | } |
823 | while (next_value < HB_SET_VALUE_INVALID && size) { |
824 | *out++ = next_value++; |
825 | size--; |
826 | } |
827 | return initial_size - size; |
828 | } |
829 | |
830 | bool has_population () const { return population != UINT_MAX; } |
831 | unsigned int get_population () const |
832 | { |
833 | if (has_population ()) |
834 | return population; |
835 | |
836 | unsigned int pop = 0; |
837 | unsigned int count = pages.length; |
838 | for (unsigned int i = 0; i < count; i++) |
839 | pop += pages[i].get_population (); |
840 | |
841 | population = pop; |
842 | return pop; |
843 | } |
844 | hb_codepoint_t get_min () const |
845 | { |
846 | unsigned count = pages.length; |
847 | for (unsigned i = 0; i < count; i++) |
848 | { |
849 | const auto& map = page_map[i]; |
850 | const auto& page = pages[map.index]; |
851 | |
852 | if (!page.is_empty ()) |
853 | return map.major * page_t::PAGE_BITS + page.get_min (); |
854 | } |
855 | return INVALID; |
856 | } |
857 | hb_codepoint_t get_max () const |
858 | { |
859 | unsigned count = pages.length; |
860 | for (signed i = count - 1; i >= 0; i--) |
861 | { |
862 | const auto& map = page_map[(unsigned) i]; |
863 | const auto& page = pages[map.index]; |
864 | |
865 | if (!page.is_empty ()) |
866 | return map.major * page_t::PAGE_BITS + page.get_max (); |
867 | } |
868 | return INVALID; |
869 | } |
870 | |
871 | static constexpr hb_codepoint_t INVALID = page_t::INVALID; |
872 | |
873 | /* |
874 | * Iterator implementation. |
875 | */ |
876 | struct iter_t : hb_iter_with_fallback_t<iter_t, hb_codepoint_t> |
877 | { |
878 | static constexpr bool is_sorted_iterator = true; |
879 | static constexpr bool has_fast_len = true; |
880 | iter_t (const hb_bit_set_t &s_ = Null (hb_bit_set_t), |
881 | bool init = true) : s (&s_), v (INVALID), l(0) |
882 | { |
883 | if (init) |
884 | { |
885 | l = s->get_population () + 1; |
886 | __next__ (); |
887 | } |
888 | } |
889 | |
890 | typedef hb_codepoint_t __item_t__; |
891 | hb_codepoint_t __item__ () const { return v; } |
892 | bool __more__ () const { return v != INVALID; } |
893 | void __next__ () { s->next (&v); if (l) l--; } |
894 | void __prev__ () { s->previous (&v); } |
895 | unsigned __len__ () const { return l; } |
896 | iter_t end () const { return iter_t (*s, false); } |
897 | bool operator != (const iter_t& o) const |
898 | { return s != o.s || v != o.v; } |
899 | |
900 | protected: |
901 | const hb_bit_set_t *s; |
902 | hb_codepoint_t v; |
903 | unsigned l; |
904 | }; |
905 | iter_t iter () const { return iter_t (*this); } |
906 | operator iter_t () const { return iter (); } |
907 | |
908 | protected: |
909 | |
910 | page_t *page_for (hb_codepoint_t g, bool insert = false) |
911 | { |
912 | unsigned major = get_major (g); |
913 | |
914 | /* The extra page_map length is necessary; can't just rely on vector here, |
915 | * since the next check would be tricked because a null page also has |
916 | * major==0, which we can't distinguish from an actualy major==0 page... */ |
917 | unsigned i = last_page_lookup; |
918 | if (likely (i < page_map.length)) |
919 | { |
920 | auto &cached_page = page_map.arrayZ[i]; |
921 | if (cached_page.major == major) |
922 | return &pages.arrayZ[cached_page.index]; |
923 | } |
924 | |
925 | page_map_t map = {major, pages.length}; |
926 | if (!page_map.bfind (map, &i, HB_NOT_FOUND_STORE_CLOSEST)) |
927 | { |
928 | if (!insert) |
929 | return nullptr; |
930 | |
931 | if (unlikely (!resize (pages.length + 1))) |
932 | return nullptr; |
933 | |
934 | pages.arrayZ[map.index].init0 (); |
935 | memmove (page_map.arrayZ + i + 1, |
936 | page_map.arrayZ + i, |
937 | (page_map.length - 1 - i) * page_map.item_size); |
938 | page_map.arrayZ[i] = map; |
939 | } |
940 | |
941 | last_page_lookup = i; |
942 | return &pages.arrayZ[page_map.arrayZ[i].index]; |
943 | } |
944 | const page_t *page_for (hb_codepoint_t g) const |
945 | { |
946 | unsigned major = get_major (g); |
947 | |
948 | /* The extra page_map length is necessary; can't just rely on vector here, |
949 | * since the next check would be tricked because a null page also has |
950 | * major==0, which we can't distinguish from an actualy major==0 page... */ |
951 | unsigned i = last_page_lookup; |
952 | if (likely (i < page_map.length)) |
953 | { |
954 | auto &cached_page = page_map.arrayZ[i]; |
955 | if (cached_page.major == major) |
956 | return &pages.arrayZ[cached_page.index]; |
957 | } |
958 | |
959 | page_map_t key = {major}; |
960 | if (!page_map.bfind (key, &i)) |
961 | return nullptr; |
962 | |
963 | last_page_lookup = i; |
964 | return &pages.arrayZ[page_map[i].index]; |
965 | } |
966 | page_t &page_at (unsigned int i) |
967 | { |
968 | assert (i < page_map.length); |
969 | return pages.arrayZ[page_map.arrayZ[i].index]; |
970 | } |
971 | const page_t &page_at (unsigned int i) const |
972 | { |
973 | assert (i < page_map.length); |
974 | return pages.arrayZ[page_map.arrayZ[i].index]; |
975 | } |
976 | unsigned int get_major (hb_codepoint_t g) const { return g >> page_t::PAGE_BITS_LOG_2; } |
977 | unsigned int page_remainder (hb_codepoint_t g) const { return g & page_t::PAGE_BITMASK; } |
978 | hb_codepoint_t major_start (unsigned int major) const { return major << page_t::PAGE_BITS_LOG_2; } |
979 | }; |
980 | |
981 | |
982 | #endif /* HB_BIT_SET_HH */ |
983 | |