1/*
2 * Copyright © 2012 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_SET_DIGEST_HH
28#define HB_SET_DIGEST_HH
29
30#include "hb.hh"
31#include "hb-machinery.hh"
32
33/*
34 * The set-digests here implement various "filters" that support
35 * "approximate member query". Conceptually these are like Bloom
36 * Filter and Quotient Filter, however, much smaller, faster, and
37 * designed to fit the requirements of our uses for glyph coverage
38 * queries.
39 *
40 * Our filters are highly accurate if the lookup covers fairly local
41 * set of glyphs, but fully flooded and ineffective if coverage is
42 * all over the place.
43 *
44 * The way these are used is that the filter is first populated by
45 * a lookup's or subtable's Coverage table(s), and then when we
46 * want to apply the lookup or subtable to a glyph, before trying
47 * to apply, we ask the filter if the glyph may be covered. If it's
48 * not, we return early. We can also match a digest against another
49 * digest.
50 *
51 * We use these filters at three levels:
52 * - If the digest for all the glyphs in the buffer as a whole
53 * does not match the digest for the lookup, skip the lookup.
54 * - For each glyph, if it doesn't match the lookup digest,
55 * skip it.
56 * - For each glyph, if it doesn't match the subtable digest,
57 * skip it.
58 *
59 * The main filter we use is a combination of three bits-pattern
60 * filters. A bits-pattern filter checks a number of bits (5 or 6)
61 * of the input number (glyph-id in this case) and checks whether
62 * its pattern is amongst the patterns of any of the accepted values.
63 * The accepted patterns are represented as a "long" integer. The
64 * check is done using four bitwise operations only.
65 */
66
67template <typename mask_t, unsigned int shift>
68struct hb_set_digest_bits_pattern_t
69{
70 static constexpr unsigned mask_bytes = sizeof (mask_t);
71 static constexpr unsigned mask_bits = sizeof (mask_t) * 8;
72 static constexpr unsigned num_bits = 0
73 + (mask_bytes >= 1 ? 3 : 0)
74 + (mask_bytes >= 2 ? 1 : 0)
75 + (mask_bytes >= 4 ? 1 : 0)
76 + (mask_bytes >= 8 ? 1 : 0)
77 + (mask_bytes >= 16? 1 : 0)
78 + 0;
79
80 static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
81 static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
82
83 void init () { mask = 0; }
84
85 void add (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; }
86
87 void add (hb_codepoint_t g) { mask |= mask_for (g); }
88
89 bool add_range (hb_codepoint_t a, hb_codepoint_t b)
90 {
91 if (mask == (mask_t) -1) return false;
92 if ((b >> shift) - (a >> shift) >= mask_bits - 1)
93 {
94 mask = (mask_t) -1;
95 return false;
96 }
97 else
98 {
99 mask_t ma = mask_for (a);
100 mask_t mb = mask_for (b);
101 mask |= mb + (mb - ma) - (mb < ma);
102 return true;
103 }
104 }
105
106 template <typename T>
107 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
108 {
109 for (unsigned int i = 0; i < count; i++)
110 {
111 add (*array);
112 array = &StructAtOffsetUnaligned<T> ((const void *) array, stride);
113 }
114 }
115 template <typename T>
116 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
117 template <typename T>
118 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
119 {
120 add_array (array, count, stride);
121 return true;
122 }
123 template <typename T>
124 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
125
126 bool may_have (const hb_set_digest_bits_pattern_t &o) const
127 { return mask & o.mask; }
128
129 bool may_have (hb_codepoint_t g) const
130 { return mask & mask_for (g); }
131
132 private:
133
134 static mask_t mask_for (hb_codepoint_t g)
135 { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); }
136 mask_t mask;
137};
138
139template <typename head_t, typename tail_t>
140struct hb_set_digest_combiner_t
141{
142 void init ()
143 {
144 head.init ();
145 tail.init ();
146 }
147
148 void add (const hb_set_digest_combiner_t &o)
149 {
150 head.add (o.head);
151 tail.add (o.tail);
152 }
153
154 void add (hb_codepoint_t g)
155 {
156 head.add (g);
157 tail.add (g);
158 }
159
160 bool add_range (hb_codepoint_t a, hb_codepoint_t b)
161 {
162 return (int) head.add_range (a, b) | (int) tail.add_range (a, b);
163 }
164 template <typename T>
165 void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
166 {
167 head.add_array (array, count, stride);
168 tail.add_array (array, count, stride);
169 }
170 template <typename T>
171 void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); }
172 template <typename T>
173 bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
174 {
175 return head.add_sorted_array (array, count, stride) &&
176 tail.add_sorted_array (array, count, stride);
177 }
178 template <typename T>
179 bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); }
180
181 bool may_have (const hb_set_digest_combiner_t &o) const
182 {
183 return head.may_have (o.head) && tail.may_have (o.tail);
184 }
185
186 bool may_have (hb_codepoint_t g) const
187 {
188 return head.may_have (g) && tail.may_have (g);
189 }
190
191 private:
192 head_t head;
193 tail_t tail;
194};
195
196
197/*
198 * hb_set_digest_t
199 *
200 * This is a combination of digests that performs "best".
201 * There is not much science to this: it's a result of intuition
202 * and testing.
203 */
204using hb_set_digest_t =
205 hb_set_digest_combiner_t
206 <
207 hb_set_digest_bits_pattern_t<unsigned long, 4>,
208 hb_set_digest_combiner_t
209 <
210 hb_set_digest_bits_pattern_t<unsigned long, 0>,
211 hb_set_digest_bits_pattern_t<unsigned long, 9>
212 >
213 >
214;
215
216
217#endif /* HB_SET_DIGEST_HH */
218