1/*
2 * Copyright © 2012 Google, Inc.
3 *
4 * This is part of HarfBuzz, a text shaping library.
5 *
6 * Permission is hereby granted, without written agreement and without
7 * license or royalty fees, to use, copy, modify, and distribute this
8 * software and its documentation for any purpose, provided that the
9 * above copyright notice and the following two paragraphs appear in
10 * all copies of this software.
11 *
12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
16 * DAMAGE.
17 *
18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
23 *
24 * Google Author(s): Behdad Esfahbod
25 */
26
27#ifndef HB_SET_DIGEST_HH
28#define HB_SET_DIGEST_HH
29
30#include "hb.hh"
31
32/*
33 * The set digests here implement various "filters" that support
34 * "approximate member query". Conceptually these are like Bloom
35 * Filter and Quotient Filter, however, much smaller, faster, and
36 * designed to fit the requirements of our uses for glyph coverage
37 * queries.
38 *
39 * Our filters are highly accurate if the lookup covers fairly local
40 * set of glyphs, but fully flooded and ineffective if coverage is
41 * all over the place.
42 *
43 * The frozen-set can be used instead of a digest, to trade more
44 * memory for 100% accuracy, but in practice, that doesn't look like
45 * an attractive trade-off.
46 */
47
48template <typename mask_t, unsigned int shift>
49struct hb_set_digest_lowest_bits_t
50{
51 ASSERT_POD ();
52
53 enum { mask_bytes = sizeof (mask_t) };
54 enum { mask_bits = sizeof (mask_t) * 8 };
55 static const unsigned int num_bits = 0
56 + (mask_bytes >= 1 ? 3 : 0)
57 + (mask_bytes >= 2 ? 1 : 0)
58 + (mask_bytes >= 4 ? 1 : 0)
59 + (mask_bytes >= 8 ? 1 : 0)
60 + (mask_bytes >= 16? 1 : 0)
61 + 0;
62
63 static_assert ((shift < sizeof (hb_codepoint_t) * 8), "");
64 static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "");
65
66 inline void init (void) {
67 mask = 0;
68 }
69
70 inline void add (hb_codepoint_t g) {
71 mask |= mask_for (g);
72 }
73
74 inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) {
75 if ((b >> shift) - (a >> shift) >= mask_bits - 1)
76 mask = (mask_t) -1;
77 else {
78 mask_t ma = mask_for (a);
79 mask_t mb = mask_for (b);
80 mask |= mb + (mb - ma) - (mb < ma);
81 }
82 return true;
83 }
84
85 template <typename T>
86 inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
87 {
88 for (unsigned int i = 0; i < count; i++)
89 {
90 add (*array);
91 array = (const T *) (stride + (const char *) array);
92 }
93 }
94 template <typename T>
95 inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
96 {
97 for (unsigned int i = 0; i < count; i++)
98 {
99 add (*array);
100 array = (const T *) (stride + (const char *) array);
101 }
102 return true;
103 }
104
105 inline bool may_have (hb_codepoint_t g) const {
106 return !!(mask & mask_for (g));
107 }
108
109 private:
110
111 static inline mask_t mask_for (hb_codepoint_t g) {
112 return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1));
113 }
114 mask_t mask;
115};
116
117template <typename head_t, typename tail_t>
118struct hb_set_digest_combiner_t
119{
120 ASSERT_POD ();
121
122 inline void init (void) {
123 head.init ();
124 tail.init ();
125 }
126
127 inline void add (hb_codepoint_t g) {
128 head.add (g);
129 tail.add (g);
130 }
131
132 inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) {
133 head.add_range (a, b);
134 tail.add_range (a, b);
135 return true;
136 }
137 template <typename T>
138 inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
139 {
140 head.add_array (array, count, stride);
141 tail.add_array (array, count, stride);
142 }
143 template <typename T>
144 inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T))
145 {
146 head.add_sorted_array (array, count, stride);
147 tail.add_sorted_array (array, count, stride);
148 return true;
149 }
150
151 inline bool may_have (hb_codepoint_t g) const {
152 return head.may_have (g) && tail.may_have (g);
153 }
154
155 private:
156 head_t head;
157 tail_t tail;
158};
159
160
161/*
162 * hb_set_digest_t
163 *
164 * This is a combination of digests that performs "best".
165 * There is not much science to this: it's a result of intuition
166 * and testing.
167 */
168typedef hb_set_digest_combiner_t
169<
170 hb_set_digest_lowest_bits_t<unsigned long, 4>,
171 hb_set_digest_combiner_t
172 <
173 hb_set_digest_lowest_bits_t<unsigned long, 0>,
174 hb_set_digest_lowest_bits_t<unsigned long, 9>
175 >
176> hb_set_digest_t;
177
178
179#endif /* HB_SET_DIGEST_HH */
180