1/*
2 * Copyright © 2012,2017 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_SET_HH
28#define HB_SET_HH
29
30#include "hb.hh"
31
32
33/*
34 * hb_set_t
35 */
36
37/* TODO Keep a free-list so we can free pages that are completely zeroed. At that
38 * point maybe also use a sentinel value for "all-1" pages? */
39
40struct hb_set_t
41{
42 struct page_map_t
43 {
44 inline int cmp (const page_map_t *o) const { return (int) o->major - (int) major; }
45
46 uint32_t major;
47 uint32_t index;
48 };
49
50 struct page_t
51 {
52 inline void init0 (void) { memset (&v, 0, sizeof (v)); }
53 inline void init1 (void) { memset (&v, 0xff, sizeof (v)); }
54
55 inline unsigned int len (void) const
56 { return ARRAY_LENGTH_CONST (v); }
57
58 inline bool is_empty (void) const
59 {
60 for (unsigned int i = 0; i < len (); i++)
61 if (v[i])
62 return false;
63 return true;
64 }
65
66 inline void add (hb_codepoint_t g) { elt (g) |= mask (g); }
67 inline void del (hb_codepoint_t g) { elt (g) &= ~mask (g); }
68 inline bool has (hb_codepoint_t g) const { return !!(elt (g) & mask (g)); }
69
70 inline void add_range (hb_codepoint_t a, hb_codepoint_t b)
71 {
72 elt_t *la = &elt (a);
73 elt_t *lb = &elt (b);
74 if (la == lb)
75 *la |= (mask (b) << 1) - mask(a);
76 else
77 {
78 *la |= ~(mask (a) - 1);
79 la++;
80
81 memset (la, 0xff, (char *) lb - (char *) la);
82
83 *lb |= ((mask (b) << 1) - 1);
84 }
85 }
86
87 inline bool is_equal (const page_t *other) const
88 {
89 return 0 == memcmp (&v, &other->v, sizeof (v));
90 }
91
92 inline unsigned int get_population (void) const
93 {
94 unsigned int pop = 0;
95 for (unsigned int i = 0; i < len (); i++)
96 pop += hb_popcount (v[i]);
97 return pop;
98 }
99
100 inline bool next (hb_codepoint_t *codepoint) const
101 {
102 unsigned int m = (*codepoint + 1) & MASK;
103 if (!m)
104 {
105 *codepoint = INVALID;
106 return false;
107 }
108 unsigned int i = m / ELT_BITS;
109 unsigned int j = m & ELT_MASK;
110
111 const elt_t vv = v[i] & ~((elt_t (1) << j) - 1);
112 for (const elt_t *p = &vv; i < len (); p = &v[++i])
113 if (*p)
114 {
115 *codepoint = i * ELT_BITS + elt_get_min (*p);
116 return true;
117 }
118
119 *codepoint = INVALID;
120 return false;
121 }
122 inline bool previous (hb_codepoint_t *codepoint) const
123 {
124 unsigned int m = (*codepoint - 1) & MASK;
125 if (m == MASK)
126 {
127 *codepoint = INVALID;
128 return false;
129 }
130 unsigned int i = m / ELT_BITS;
131 unsigned int j = m & ELT_MASK;
132
133 const elt_t vv = v[i] & ((elt_t (1) << (j + 1)) - 1);
134 for (const elt_t *p = &vv; (int) i >= 0; p = &v[--i])
135 if (*p)
136 {
137 *codepoint = i * ELT_BITS + elt_get_max (*p);
138 return true;
139 }
140
141 *codepoint = INVALID;
142 return false;
143 }
144 inline hb_codepoint_t get_min (void) const
145 {
146 for (unsigned int i = 0; i < len (); i++)
147 if (v[i])
148 return i * ELT_BITS + elt_get_min (v[i]);
149 return INVALID;
150 }
151 inline hb_codepoint_t get_max (void) const
152 {
153 for (int i = len () - 1; i >= 0; i--)
154 if (v[i])
155 return i * ELT_BITS + elt_get_max (v[i]);
156 return 0;
157 }
158
159 typedef unsigned long long elt_t;
160 enum { PAGE_BITS = 512 };
161 static_assert ((PAGE_BITS & ((PAGE_BITS) - 1)) == 0, "");
162
163 static inline unsigned int elt_get_min (const elt_t &elt) { return hb_ctz (elt); }
164 static inline unsigned int elt_get_max (const elt_t &elt) { return hb_bit_storage (elt) - 1; }
165
166 typedef hb_vector_size_t<elt_t, PAGE_BITS / 8> vector_t;
167
168 enum { ELT_BITS = sizeof (elt_t) * 8 };
169 enum { ELT_MASK = ELT_BITS - 1 };
170 enum { BITS = sizeof (vector_t) * 8 };
171 enum { MASK = BITS - 1 };
172 static_assert ((unsigned) PAGE_BITS == (unsigned) BITS, "");
173
174 elt_t &elt (hb_codepoint_t g) { return v[(g & MASK) / ELT_BITS]; }
175 elt_t const &elt (hb_codepoint_t g) const { return v[(g & MASK) / ELT_BITS]; }
176 elt_t mask (hb_codepoint_t g) const { return elt_t (1) << (g & ELT_MASK); }
177
178 vector_t v;
179 };
180 static_assert (page_t::PAGE_BITS == sizeof (page_t) * 8, "");
181
182 hb_object_header_t header;
183 bool successful; /* Allocations successful */
184 mutable unsigned int population;
185 hb_vector_t<page_map_t, 1> page_map;
186 hb_vector_t<page_t, 1> pages;
187
188 inline void init_shallow (void)
189 {
190 successful = true;
191 population = 0;
192 page_map.init ();
193 pages.init ();
194 }
195 inline void init (void)
196 {
197 hb_object_init (this);
198 init_shallow ();
199 }
200 inline void fini_shallow (void)
201 {
202 page_map.fini ();
203 pages.fini ();
204 }
205 inline void fini (void)
206 {
207 hb_object_fini (this);
208 fini_shallow ();
209 }
210
211 inline bool resize (unsigned int count)
212 {
213 if (unlikely (!successful)) return false;
214 if (!pages.resize (count) || !page_map.resize (count))
215 {
216 pages.resize (page_map.len);
217 successful = false;
218 return false;
219 }
220 return true;
221 }
222
223 inline void clear (void) {
224 if (unlikely (hb_object_is_inert (this)))
225 return;
226 successful = true;
227 population = 0;
228 page_map.resize (0);
229 pages.resize (0);
230 }
231 inline bool is_empty (void) const {
232 unsigned int count = pages.len;
233 for (unsigned int i = 0; i < count; i++)
234 if (!pages[i].is_empty ())
235 return false;
236 return true;
237 }
238
239 inline void dirty (void) { population = (unsigned int) -1; }
240
241 inline void add (hb_codepoint_t g)
242 {
243 if (unlikely (!successful)) return;
244 if (unlikely (g == INVALID)) return;
245 dirty ();
246 page_t *page = page_for_insert (g); if (unlikely (!page)) return;
247 page->add (g);
248 }
249 inline bool add_range (hb_codepoint_t a, hb_codepoint_t b)
250 {
251 if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */
252 if (unlikely (a > b || a == INVALID || b == INVALID)) return false;
253 dirty ();
254 unsigned int ma = get_major (a);
255 unsigned int mb = get_major (b);
256 if (ma == mb)
257 {
258 page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
259 page->add_range (a, b);
260 }
261 else
262 {
263 page_t *page = page_for_insert (a); if (unlikely (!page)) return false;
264 page->add_range (a, major_start (ma + 1) - 1);
265
266 for (unsigned int m = ma + 1; m < mb; m++)
267 {
268 page = page_for_insert (major_start (m)); if (unlikely (!page)) return false;
269 page->init1 ();
270 }
271
272 page = page_for_insert (b); if (unlikely (!page)) return false;
273 page->add_range (major_start (mb), b);
274 }
275 return true;
276 }
277
278 template <typename T>
279 inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
280 {
281 if (unlikely (!successful)) return;
282 if (!count) return;
283 dirty ();
284 hb_codepoint_t g = *array;
285 while (count)
286 {
287 unsigned int m = get_major (g);
288 page_t *page = page_for_insert (g); if (unlikely (!page)) return;
289 unsigned int start = major_start (m);
290 unsigned int end = major_start (m + 1);
291 do
292 {
293 page->add (g);
294
295 array = (const T *) ((const char *) array + stride);
296 count--;
297 }
298 while (count && (g = *array, start <= g && g < end));
299 }
300 }
301
302 /* Might return false if array looks unsorted.
303 * Used for faster rejection of corrupt data. */
304 template <typename T>
305 inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
306 {
307 if (unlikely (!successful)) return true; /* https://github.com/harfbuzz/harfbuzz/issues/657 */
308 if (!count) return true;
309 dirty ();
310 hb_codepoint_t g = *array;
311 hb_codepoint_t last_g = g;
312 while (count)
313 {
314 unsigned int m = get_major (g);
315 page_t *page = page_for_insert (g); if (unlikely (!page)) return false;
316 unsigned int end = major_start (m + 1);
317 do
318 {
319 /* If we try harder we can change the following comparison to <=;
320 * Not sure if it's worth it. */
321 if (g < last_g) return false;
322 last_g = g;
323 page->add (g);
324
325 array = (const T *) ((const char *) array + stride);
326 count--;
327 }
328 while (count && (g = *array, g < end));
329 }
330 return true;
331 }
332
333 inline void del (hb_codepoint_t g)
334 {
335 /* TODO perform op even if !successful. */
336 if (unlikely (!successful)) return;
337 page_t *p = page_for (g);
338 if (!p)
339 return;
340 dirty ();
341 p->del (g);
342 }
343 inline void del_range (hb_codepoint_t a, hb_codepoint_t b)
344 {
345 /* TODO perform op even if !successful. */
346 /* TODO Optimize, like add_range(). */
347 if (unlikely (!successful)) return;
348 for (unsigned int i = a; i < b + 1; i++)
349 del (i);
350 }
351 inline bool has (hb_codepoint_t g) const
352 {
353 const page_t *p = page_for (g);
354 if (!p)
355 return false;
356 return p->has (g);
357 }
358 inline bool intersects (hb_codepoint_t first,
359 hb_codepoint_t last) const
360 {
361 hb_codepoint_t c = first - 1;
362 return next (&c) && c <= last;
363 }
364 inline void set (const hb_set_t *other)
365 {
366 if (unlikely (!successful)) return;
367 unsigned int count = other->pages.len;
368 if (!resize (count))
369 return;
370 population = other->population;
371 memcpy (pages.arrayZ, other->pages.arrayZ, count * sizeof (pages.arrayZ[0]));
372 memcpy (page_map.arrayZ, other->page_map.arrayZ, count * sizeof (page_map.arrayZ[0]));
373 }
374
375 inline bool is_equal (const hb_set_t *other) const
376 {
377 if (get_population () != other->get_population ())
378 return false;
379
380 unsigned int na = pages.len;
381 unsigned int nb = other->pages.len;
382
383 unsigned int a = 0, b = 0;
384 for (; a < na && b < nb; )
385 {
386 if (page_at (a).is_empty ()) { a++; continue; }
387 if (other->page_at (b).is_empty ()) { b++; continue; }
388 if (page_map[a].major != other->page_map[b].major ||
389 !page_at (a).is_equal (&other->page_at (b)))
390 return false;
391 a++;
392 b++;
393 }
394 for (; a < na; a++)
395 if (!page_at (a).is_empty ()) { return false; }
396 for (; b < nb; b++)
397 if (!other->page_at (b).is_empty ()) { return false; }
398
399 return true;
400 }
401
402 inline bool is_subset (const hb_set_t *larger_set) const
403 {
404 if (get_population () > larger_set->get_population ())
405 return false;
406
407 /* TODO Optimize to use pages. */
408 hb_codepoint_t c = INVALID;
409 while (next (&c))
410 if (!larger_set->has (c))
411 return false;
412
413 return true;
414 }
415
416 template <class Op>
417 inline void process (const hb_set_t *other)
418 {
419 if (unlikely (!successful)) return;
420
421 dirty ();
422
423 unsigned int na = pages.len;
424 unsigned int nb = other->pages.len;
425 unsigned int next_page = na;
426
427 unsigned int count = 0, newCount = 0;
428 unsigned int a = 0, b = 0;
429 for (; a < na && b < nb; )
430 {
431 if (page_map[a].major == other->page_map[b].major)
432 {
433 count++;
434 a++;
435 b++;
436 }
437 else if (page_map[a].major < other->page_map[b].major)
438 {
439 if (Op::passthru_left)
440 count++;
441 a++;
442 }
443 else
444 {
445 if (Op::passthru_right)
446 count++;
447 b++;
448 }
449 }
450 if (Op::passthru_left)
451 count += na - a;
452 if (Op::passthru_right)
453 count += nb - b;
454
455 if (count > pages.len)
456 if (!resize (count))
457 return;
458 newCount = count;
459
460 /* Process in-place backward. */
461 a = na;
462 b = nb;
463 for (; a && b; )
464 {
465 if (page_map[a - 1].major == other->page_map[b - 1].major)
466 {
467 a--;
468 b--;
469 count--;
470 page_map[count] = page_map[a];
471 Op::process (page_at (count).v, page_at (a).v, other->page_at (b).v);
472 }
473 else if (page_map[a - 1].major > other->page_map[b - 1].major)
474 {
475 a--;
476 if (Op::passthru_left)
477 {
478 count--;
479 page_map[count] = page_map[a];
480 }
481 }
482 else
483 {
484 b--;
485 if (Op::passthru_right)
486 {
487 count--;
488 page_map[count].major = other->page_map[b].major;
489 page_map[count].index = next_page++;
490 page_at (count).v = other->page_at (b).v;
491 }
492 }
493 }
494 if (Op::passthru_left)
495 while (a)
496 {
497 a--;
498 count--;
499 page_map[count] = page_map [a];
500 }
501 if (Op::passthru_right)
502 while (b)
503 {
504 b--;
505 count--;
506 page_map[count].major = other->page_map[b].major;
507 page_map[count].index = next_page++;
508 page_at (count).v = other->page_at (b).v;
509 }
510 assert (!count);
511 if (pages.len > newCount)
512 resize (newCount);
513 }
514
515 inline void union_ (const hb_set_t *other)
516 {
517 process<HbOpOr> (other);
518 }
519 inline void intersect (const hb_set_t *other)
520 {
521 process<HbOpAnd> (other);
522 }
523 inline void subtract (const hb_set_t *other)
524 {
525 process<HbOpMinus> (other);
526 }
527 inline void symmetric_difference (const hb_set_t *other)
528 {
529 process<HbOpXor> (other);
530 }
531 inline bool next (hb_codepoint_t *codepoint) const
532 {
533 if (unlikely (*codepoint == INVALID)) {
534 *codepoint = get_min ();
535 return *codepoint != INVALID;
536 }
537
538 page_map_t map = {get_major (*codepoint), 0};
539 unsigned int i;
540 page_map.bfind (map, &i);
541 if (i < page_map.len && page_map[i].major == map.major)
542 {
543 if (pages[page_map[i].index].next (codepoint))
544 {
545 *codepoint += page_map[i].major * page_t::PAGE_BITS;
546 return true;
547 }
548 i++;
549 }
550 for (; i < page_map.len; i++)
551 {
552 hb_codepoint_t m = pages[page_map[i].index].get_min ();
553 if (m != INVALID)
554 {
555 *codepoint = page_map[i].major * page_t::PAGE_BITS + m;
556 return true;
557 }
558 }
559 *codepoint = INVALID;
560 return false;
561 }
562 inline bool previous (hb_codepoint_t *codepoint) const
563 {
564 if (unlikely (*codepoint == INVALID)) {
565 *codepoint = get_max ();
566 return *codepoint != INVALID;
567 }
568
569 page_map_t map = {get_major (*codepoint), 0};
570 unsigned int i;
571 page_map.bfind (map, &i);
572 if (i < page_map.len && page_map[i].major == map.major)
573 {
574 if (pages[page_map[i].index].previous (codepoint))
575 {
576 *codepoint += page_map[i].major * page_t::PAGE_BITS;
577 return true;
578 }
579 }
580 i--;
581 for (; (int) i >= 0; i--)
582 {
583 hb_codepoint_t m = pages[page_map[i].index].get_max ();
584 if (m != INVALID)
585 {
586 *codepoint = page_map[i].major * page_t::PAGE_BITS + m;
587 return true;
588 }
589 }
590 *codepoint = INVALID;
591 return false;
592 }
593 inline bool next_range (hb_codepoint_t *first, hb_codepoint_t *last) const
594 {
595 hb_codepoint_t i;
596
597 i = *last;
598 if (!next (&i))
599 {
600 *last = *first = INVALID;
601 return false;
602 }
603
604 /* TODO Speed up. */
605 *last = *first = i;
606 while (next (&i) && i == *last + 1)
607 (*last)++;
608
609 return true;
610 }
611 inline bool previous_range (hb_codepoint_t *first, hb_codepoint_t *last) const
612 {
613 hb_codepoint_t i;
614
615 i = *first;
616 if (!previous (&i))
617 {
618 *last = *first = INVALID;
619 return false;
620 }
621
622 /* TODO Speed up. */
623 *last = *first = i;
624 while (previous (&i) && i == *first - 1)
625 (*first)--;
626
627 return true;
628 }
629
630 inline unsigned int get_population (void) const
631 {
632 if (population != (unsigned int) -1)
633 return population;
634
635 unsigned int pop = 0;
636 unsigned int count = pages.len;
637 for (unsigned int i = 0; i < count; i++)
638 pop += pages[i].get_population ();
639
640 population = pop;
641 return pop;
642 }
643 inline hb_codepoint_t get_min (void) const
644 {
645 unsigned int count = pages.len;
646 for (unsigned int i = 0; i < count; i++)
647 if (!page_at (i).is_empty ())
648 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_min ();
649 return INVALID;
650 }
651 inline hb_codepoint_t get_max (void) const
652 {
653 unsigned int count = pages.len;
654 for (int i = count - 1; i >= 0; i++)
655 if (!page_at (i).is_empty ())
656 return page_map[i].major * page_t::PAGE_BITS + page_at (i).get_max ();
657 return INVALID;
658 }
659
660 static const hb_codepoint_t INVALID = HB_SET_VALUE_INVALID;
661
662 inline page_t *page_for_insert (hb_codepoint_t g)
663 {
664 page_map_t map = {get_major (g), pages.len};
665 unsigned int i;
666 if (!page_map.bfind (map, &i))
667 {
668 if (!resize (pages.len + 1))
669 return nullptr;
670
671 pages[map.index].init0 ();
672 memmove (&page_map[i + 1], &page_map[i], (page_map.len - 1 - i) * sizeof (page_map[0]));
673 page_map[i] = map;
674 }
675 return &pages[page_map[i].index];
676 }
677 inline page_t *page_for (hb_codepoint_t g)
678 {
679 page_map_t key = {get_major (g)};
680 const page_map_t *found = page_map.bsearch (key);
681 if (found)
682 return &pages[found->index];
683 return nullptr;
684 }
685 inline const page_t *page_for (hb_codepoint_t g) const
686 {
687 page_map_t key = {get_major (g)};
688 const page_map_t *found = page_map.bsearch (key);
689 if (found)
690 return &pages[found->index];
691 return nullptr;
692 }
693 inline page_t &page_at (unsigned int i) { return pages[page_map[i].index]; }
694 inline const page_t &page_at (unsigned int i) const { return pages[page_map[i].index]; }
695 inline unsigned int get_major (hb_codepoint_t g) const { return g / page_t::PAGE_BITS; }
696 inline hb_codepoint_t major_start (unsigned int major) const { return major * page_t::PAGE_BITS; }
697};
698
699
700#endif /* HB_SET_HH */
701