1 | /* |
2 | * Copyright © 2012 Google, Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #ifndef HB_SET_DIGEST_HH |
28 | #define HB_SET_DIGEST_HH |
29 | |
30 | #include "hb.hh" |
31 | #include "hb-machinery.hh" |
32 | |
33 | /* |
34 | * The set-digests here implement various "filters" that support |
35 | * "approximate member query". Conceptually these are like Bloom |
36 | * Filter and Quotient Filter, however, much smaller, faster, and |
37 | * designed to fit the requirements of our uses for glyph coverage |
38 | * queries. |
39 | * |
40 | * Our filters are highly accurate if the lookup covers fairly local |
41 | * set of glyphs, but fully flooded and ineffective if coverage is |
42 | * all over the place. |
43 | * |
44 | * The way these are used is that the filter is first populated by |
45 | * a lookup's or subtable's Coverage table(s), and then when we |
46 | * want to apply the lookup or subtable to a glyph, before trying |
47 | * to apply, we ask the filter if the glyph may be covered. If it's |
48 | * not, we return early. We can also match a digest against another |
49 | * digest. |
50 | * |
51 | * We use these filters at three levels: |
52 | * - If the digest for all the glyphs in the buffer as a whole |
53 | * does not match the digest for the lookup, skip the lookup. |
54 | * - For each glyph, if it doesn't match the lookup digest, |
55 | * skip it. |
56 | * - For each glyph, if it doesn't match the subtable digest, |
57 | * skip it. |
58 | * |
59 | * The main filter we use is a combination of three bits-pattern |
60 | * filters. A bits-pattern filter checks a number of bits (5 or 6) |
61 | * of the input number (glyph-id in this case) and checks whether |
62 | * its pattern is amongst the patterns of any of the accepted values. |
63 | * The accepted patterns are represented as a "long" integer. The |
64 | * check is done using four bitwise operations only. |
65 | */ |
66 | |
67 | template <typename mask_t, unsigned int shift> |
68 | struct hb_set_digest_bits_pattern_t |
69 | { |
70 | static constexpr unsigned mask_bytes = sizeof (mask_t); |
71 | static constexpr unsigned mask_bits = sizeof (mask_t) * 8; |
72 | static constexpr unsigned num_bits = 0 |
73 | + (mask_bytes >= 1 ? 3 : 0) |
74 | + (mask_bytes >= 2 ? 1 : 0) |
75 | + (mask_bytes >= 4 ? 1 : 0) |
76 | + (mask_bytes >= 8 ? 1 : 0) |
77 | + (mask_bytes >= 16? 1 : 0) |
78 | + 0; |
79 | |
80 | static_assert ((shift < sizeof (hb_codepoint_t) * 8), "" ); |
81 | static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "" ); |
82 | |
83 | void init () { mask = 0; } |
84 | |
85 | void add (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; } |
86 | |
87 | void add (hb_codepoint_t g) { mask |= mask_for (g); } |
88 | |
89 | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
90 | { |
91 | if (mask == (mask_t) -1) return false; |
92 | if ((b >> shift) - (a >> shift) >= mask_bits - 1) |
93 | { |
94 | mask = (mask_t) -1; |
95 | return false; |
96 | } |
97 | else |
98 | { |
99 | mask_t ma = mask_for (a); |
100 | mask_t mb = mask_for (b); |
101 | mask |= mb + (mb - ma) - (mb < ma); |
102 | return true; |
103 | } |
104 | } |
105 | |
106 | template <typename T> |
107 | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
108 | { |
109 | for (unsigned int i = 0; i < count; i++) |
110 | { |
111 | add (*array); |
112 | array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); |
113 | } |
114 | } |
115 | template <typename T> |
116 | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } |
117 | template <typename T> |
118 | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
119 | { |
120 | add_array (array, count, stride); |
121 | return true; |
122 | } |
123 | template <typename T> |
124 | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } |
125 | |
126 | bool may_have (const hb_set_digest_bits_pattern_t &o) const |
127 | { return mask & o.mask; } |
128 | |
129 | bool may_have (hb_codepoint_t g) const |
130 | { return mask & mask_for (g); } |
131 | |
132 | private: |
133 | |
134 | static mask_t mask_for (hb_codepoint_t g) |
135 | { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } |
136 | mask_t mask; |
137 | }; |
138 | |
139 | template <typename head_t, typename tail_t> |
140 | struct hb_set_digest_combiner_t |
141 | { |
142 | void init () |
143 | { |
144 | head.init (); |
145 | tail.init (); |
146 | } |
147 | |
148 | void add (const hb_set_digest_combiner_t &o) |
149 | { |
150 | head.add (o.head); |
151 | tail.add (o.tail); |
152 | } |
153 | |
154 | void add (hb_codepoint_t g) |
155 | { |
156 | head.add (g); |
157 | tail.add (g); |
158 | } |
159 | |
160 | bool add_range (hb_codepoint_t a, hb_codepoint_t b) |
161 | { |
162 | return (int) head.add_range (a, b) | (int) tail.add_range (a, b); |
163 | } |
164 | template <typename T> |
165 | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
166 | { |
167 | head.add_array (array, count, stride); |
168 | tail.add_array (array, count, stride); |
169 | } |
170 | template <typename T> |
171 | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } |
172 | template <typename T> |
173 | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) |
174 | { |
175 | return head.add_sorted_array (array, count, stride) && |
176 | tail.add_sorted_array (array, count, stride); |
177 | } |
178 | template <typename T> |
179 | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } |
180 | |
181 | bool may_have (const hb_set_digest_combiner_t &o) const |
182 | { |
183 | return head.may_have (o.head) && tail.may_have (o.tail); |
184 | } |
185 | |
186 | bool may_have (hb_codepoint_t g) const |
187 | { |
188 | return head.may_have (g) && tail.may_have (g); |
189 | } |
190 | |
191 | private: |
192 | head_t head; |
193 | tail_t tail; |
194 | }; |
195 | |
196 | |
197 | /* |
198 | * hb_set_digest_t |
199 | * |
200 | * This is a combination of digests that performs "best". |
201 | * There is not much science to this: it's a result of intuition |
202 | * and testing. |
203 | */ |
204 | using hb_set_digest_t = |
205 | hb_set_digest_combiner_t |
206 | < |
207 | hb_set_digest_bits_pattern_t<unsigned long, 4>, |
208 | hb_set_digest_combiner_t |
209 | < |
210 | hb_set_digest_bits_pattern_t<unsigned long, 0>, |
211 | hb_set_digest_bits_pattern_t<unsigned long, 9> |
212 | > |
213 | > |
214 | ; |
215 | |
216 | |
217 | #endif /* HB_SET_DIGEST_HH */ |
218 | |