| 1 | /* | 
|---|
| 2 | * Copyright © 2012  Google, Inc. | 
|---|
| 3 | * | 
|---|
| 4 | *  This is part of HarfBuzz, a text shaping library. | 
|---|
| 5 | * | 
|---|
| 6 | * Permission is hereby granted, without written agreement and without | 
|---|
| 7 | * license or royalty fees, to use, copy, modify, and distribute this | 
|---|
| 8 | * software and its documentation for any purpose, provided that the | 
|---|
| 9 | * above copyright notice and the following two paragraphs appear in | 
|---|
| 10 | * all copies of this software. | 
|---|
| 11 | * | 
|---|
| 12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | 
|---|
| 13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | 
|---|
| 14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | 
|---|
| 15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | 
|---|
| 16 | * DAMAGE. | 
|---|
| 17 | * | 
|---|
| 18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | 
|---|
| 19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | 
|---|
| 20 | * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS | 
|---|
| 21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | 
|---|
| 22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | 
|---|
| 23 | * | 
|---|
| 24 | * Google Author(s): Behdad Esfahbod | 
|---|
| 25 | */ | 
|---|
| 26 |  | 
|---|
| 27 | #ifndef HB_SET_DIGEST_HH | 
|---|
| 28 | #define HB_SET_DIGEST_HH | 
|---|
| 29 |  | 
|---|
| 30 | #include "hb.hh" | 
|---|
| 31 | #include "hb-machinery.hh" | 
|---|
| 32 |  | 
|---|
| 33 | /* | 
|---|
| 34 | * The set-digests here implement various "filters" that support | 
|---|
| 35 | * "approximate member query".  Conceptually these are like Bloom | 
|---|
| 36 | * Filter and Quotient Filter, however, much smaller, faster, and | 
|---|
| 37 | * designed to fit the requirements of our uses for glyph coverage | 
|---|
| 38 | * queries. | 
|---|
| 39 | * | 
|---|
| 40 | * Our filters are highly accurate if the lookup covers fairly local | 
|---|
| 41 | * set of glyphs, but fully flooded and ineffective if coverage is | 
|---|
| 42 | * all over the place. | 
|---|
| 43 | * | 
|---|
| 44 | * The way these are used is that the filter is first populated by | 
|---|
| 45 | * a lookup's or subtable's Coverage table(s), and then when we | 
|---|
| 46 | * want to apply the lookup or subtable to a glyph, before trying | 
|---|
| 47 | * to apply, we ask the filter if the glyph may be covered. If it's | 
|---|
| 48 | * not, we return early.  We can also match a digest against another | 
|---|
| 49 | * digest. | 
|---|
| 50 | * | 
|---|
| 51 | * We use these filters at three levels: | 
|---|
| 52 | *   - If the digest for all the glyphs in the buffer as a whole | 
|---|
| 53 | *     does not match the digest for the lookup, skip the lookup. | 
|---|
| 54 | *   - For each glyph, if it doesn't match the lookup digest, | 
|---|
| 55 | *     skip it. | 
|---|
| 56 | *   - For each glyph, if it doesn't match the subtable digest, | 
|---|
| 57 | *     skip it. | 
|---|
| 58 | * | 
|---|
| 59 | * The main filter we use is a combination of three bits-pattern | 
|---|
| 60 | * filters. A bits-pattern filter checks a number of bits (5 or 6) | 
|---|
| 61 | * of the input number (glyph-id in this case) and checks whether | 
|---|
| 62 | * its pattern is amongst the patterns of any of the accepted values. | 
|---|
| 63 | * The accepted patterns are represented as a "long" integer. The | 
|---|
| 64 | * check is done using four bitwise operations only. | 
|---|
| 65 | */ | 
|---|
| 66 |  | 
|---|
| 67 | template <typename mask_t, unsigned int shift> | 
|---|
| 68 | struct hb_set_digest_bits_pattern_t | 
|---|
| 69 | { | 
|---|
| 70 | static constexpr unsigned mask_bytes = sizeof (mask_t); | 
|---|
| 71 | static constexpr unsigned mask_bits = sizeof (mask_t) * 8; | 
|---|
| 72 | static constexpr unsigned num_bits = 0 | 
|---|
| 73 | + (mask_bytes >= 1 ? 3 : 0) | 
|---|
| 74 | + (mask_bytes >= 2 ? 1 : 0) | 
|---|
| 75 | + (mask_bytes >= 4 ? 1 : 0) | 
|---|
| 76 | + (mask_bytes >= 8 ? 1 : 0) | 
|---|
| 77 | + (mask_bytes >= 16? 1 : 0) | 
|---|
| 78 | + 0; | 
|---|
| 79 |  | 
|---|
| 80 | static_assert ((shift < sizeof (hb_codepoint_t) * 8), ""); | 
|---|
| 81 | static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), ""); | 
|---|
| 82 |  | 
|---|
| 83 | void init () { mask = 0; } | 
|---|
| 84 |  | 
|---|
| 85 | void add (const hb_set_digest_bits_pattern_t &o) { mask |= o.mask; } | 
|---|
| 86 |  | 
|---|
| 87 | void add (hb_codepoint_t g) { mask |= mask_for (g); } | 
|---|
| 88 |  | 
|---|
| 89 | bool add_range (hb_codepoint_t a, hb_codepoint_t b) | 
|---|
| 90 | { | 
|---|
| 91 | if (mask == (mask_t) -1) return false; | 
|---|
| 92 | if ((b >> shift) - (a >> shift) >= mask_bits - 1) | 
|---|
| 93 | { | 
|---|
| 94 | mask = (mask_t) -1; | 
|---|
| 95 | return false; | 
|---|
| 96 | } | 
|---|
| 97 | else | 
|---|
| 98 | { | 
|---|
| 99 | mask_t ma = mask_for (a); | 
|---|
| 100 | mask_t mb = mask_for (b); | 
|---|
| 101 | mask |= mb + (mb - ma) - (mb < ma); | 
|---|
| 102 | return true; | 
|---|
| 103 | } | 
|---|
| 104 | } | 
|---|
| 105 |  | 
|---|
| 106 | template <typename T> | 
|---|
| 107 | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) | 
|---|
| 108 | { | 
|---|
| 109 | for (unsigned int i = 0; i < count; i++) | 
|---|
| 110 | { | 
|---|
| 111 | add (*array); | 
|---|
| 112 | array = &StructAtOffsetUnaligned<T> ((const void *) array, stride); | 
|---|
| 113 | } | 
|---|
| 114 | } | 
|---|
| 115 | template <typename T> | 
|---|
| 116 | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } | 
|---|
| 117 | template <typename T> | 
|---|
| 118 | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) | 
|---|
| 119 | { | 
|---|
| 120 | add_array (array, count, stride); | 
|---|
| 121 | return true; | 
|---|
| 122 | } | 
|---|
| 123 | template <typename T> | 
|---|
| 124 | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } | 
|---|
| 125 |  | 
|---|
| 126 | bool may_have (const hb_set_digest_bits_pattern_t &o) const | 
|---|
| 127 | { return mask & o.mask; } | 
|---|
| 128 |  | 
|---|
| 129 | bool may_have (hb_codepoint_t g) const | 
|---|
| 130 | { return mask & mask_for (g); } | 
|---|
| 131 |  | 
|---|
| 132 | private: | 
|---|
| 133 |  | 
|---|
| 134 | static mask_t mask_for (hb_codepoint_t g) | 
|---|
| 135 | { return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); } | 
|---|
| 136 | mask_t mask; | 
|---|
| 137 | }; | 
|---|
| 138 |  | 
|---|
| 139 | template <typename head_t, typename tail_t> | 
|---|
| 140 | struct hb_set_digest_combiner_t | 
|---|
| 141 | { | 
|---|
| 142 | void init () | 
|---|
| 143 | { | 
|---|
| 144 | head.init (); | 
|---|
| 145 | tail.init (); | 
|---|
| 146 | } | 
|---|
| 147 |  | 
|---|
| 148 | void add (const hb_set_digest_combiner_t &o) | 
|---|
| 149 | { | 
|---|
| 150 | head.add (o.head); | 
|---|
| 151 | tail.add (o.tail); | 
|---|
| 152 | } | 
|---|
| 153 |  | 
|---|
| 154 | void add (hb_codepoint_t g) | 
|---|
| 155 | { | 
|---|
| 156 | head.add (g); | 
|---|
| 157 | tail.add (g); | 
|---|
| 158 | } | 
|---|
| 159 |  | 
|---|
| 160 | bool add_range (hb_codepoint_t a, hb_codepoint_t b) | 
|---|
| 161 | { | 
|---|
| 162 | return (int) head.add_range (a, b) | (int) tail.add_range (a, b); | 
|---|
| 163 | } | 
|---|
| 164 | template <typename T> | 
|---|
| 165 | void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) | 
|---|
| 166 | { | 
|---|
| 167 | head.add_array (array, count, stride); | 
|---|
| 168 | tail.add_array (array, count, stride); | 
|---|
| 169 | } | 
|---|
| 170 | template <typename T> | 
|---|
| 171 | void add_array (const hb_array_t<const T>& arr) { add_array (&arr, arr.len ()); } | 
|---|
| 172 | template <typename T> | 
|---|
| 173 | bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) | 
|---|
| 174 | { | 
|---|
| 175 | return head.add_sorted_array (array, count, stride) && | 
|---|
| 176 | tail.add_sorted_array (array, count, stride); | 
|---|
| 177 | } | 
|---|
| 178 | template <typename T> | 
|---|
| 179 | bool add_sorted_array (const hb_sorted_array_t<const T>& arr) { return add_sorted_array (&arr, arr.len ()); } | 
|---|
| 180 |  | 
|---|
| 181 | bool may_have (const hb_set_digest_combiner_t &o) const | 
|---|
| 182 | { | 
|---|
| 183 | return head.may_have (o.head) && tail.may_have (o.tail); | 
|---|
| 184 | } | 
|---|
| 185 |  | 
|---|
| 186 | bool may_have (hb_codepoint_t g) const | 
|---|
| 187 | { | 
|---|
| 188 | return head.may_have (g) && tail.may_have (g); | 
|---|
| 189 | } | 
|---|
| 190 |  | 
|---|
| 191 | private: | 
|---|
| 192 | head_t head; | 
|---|
| 193 | tail_t tail; | 
|---|
| 194 | }; | 
|---|
| 195 |  | 
|---|
| 196 |  | 
|---|
| 197 | /* | 
|---|
| 198 | * hb_set_digest_t | 
|---|
| 199 | * | 
|---|
| 200 | * This is a combination of digests that performs "best". | 
|---|
| 201 | * There is not much science to this: it's a result of intuition | 
|---|
| 202 | * and testing. | 
|---|
| 203 | */ | 
|---|
| 204 | using hb_set_digest_t = | 
|---|
| 205 | hb_set_digest_combiner_t | 
|---|
| 206 | < | 
|---|
| 207 | hb_set_digest_bits_pattern_t<unsigned long, 4>, | 
|---|
| 208 | hb_set_digest_combiner_t | 
|---|
| 209 | < | 
|---|
| 210 | hb_set_digest_bits_pattern_t<unsigned long, 0>, | 
|---|
| 211 | hb_set_digest_bits_pattern_t<unsigned long, 9> | 
|---|
| 212 | > | 
|---|
| 213 | > | 
|---|
| 214 | ; | 
|---|
| 215 |  | 
|---|
| 216 |  | 
|---|
| 217 | #endif /* HB_SET_DIGEST_HH */ | 
|---|
| 218 |  | 
|---|