| 1 | /* | 
| 2 |  * Copyright © 2012  Google, Inc. | 
| 3 |  * | 
| 4 |  *  This is part of HarfBuzz, a text shaping library. | 
| 5 |  * | 
| 6 |  * Permission is hereby granted, without written agreement and without | 
| 7 |  * license or royalty fees, to use, copy, modify, and distribute this | 
| 8 |  * software and its documentation for any purpose, provided that the | 
| 9 |  * above copyright notice and the following two paragraphs appear in | 
| 10 |  * all copies of this software. | 
| 11 |  * | 
| 12 |  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR | 
| 13 |  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES | 
| 14 |  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN | 
| 15 |  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH | 
| 16 |  * DAMAGE. | 
| 17 |  * | 
| 18 |  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, | 
| 19 |  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND | 
| 20 |  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS | 
| 21 |  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO | 
| 22 |  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. | 
| 23 |  * | 
| 24 |  * Google Author(s): Behdad Esfahbod | 
| 25 |  */ | 
| 26 |  | 
| 27 | #ifndef HB_SET_DIGEST_HH | 
| 28 | #define HB_SET_DIGEST_HH | 
| 29 |  | 
| 30 | #include "hb.hh" | 
| 31 |  | 
| 32 | /* | 
| 33 |  * The set digests here implement various "filters" that support | 
| 34 |  * "approximate member query".  Conceptually these are like Bloom | 
| 35 |  * Filter and Quotient Filter, however, much smaller, faster, and | 
| 36 |  * designed to fit the requirements of our uses for glyph coverage | 
| 37 |  * queries. | 
| 38 |  * | 
| 39 |  * Our filters are highly accurate if the lookup covers fairly local | 
| 40 |  * set of glyphs, but fully flooded and ineffective if coverage is | 
| 41 |  * all over the place. | 
| 42 |  * | 
| 43 |  * The frozen-set can be used instead of a digest, to trade more | 
| 44 |  * memory for 100% accuracy, but in practice, that doesn't look like | 
| 45 |  * an attractive trade-off. | 
| 46 |  */ | 
| 47 |  | 
| 48 | template <typename mask_t, unsigned int shift> | 
| 49 | struct hb_set_digest_lowest_bits_t | 
| 50 | { | 
| 51 |   ASSERT_POD (); | 
| 52 |  | 
| 53 |   enum { mask_bytes = sizeof (mask_t) }; | 
| 54 |   enum { mask_bits = sizeof (mask_t) * 8 }; | 
| 55 |   static const unsigned int num_bits = 0 | 
| 56 | 				     + (mask_bytes >= 1 ? 3 : 0) | 
| 57 | 				     + (mask_bytes >= 2 ? 1 : 0) | 
| 58 | 				     + (mask_bytes >= 4 ? 1 : 0) | 
| 59 | 				     + (mask_bytes >= 8 ? 1 : 0) | 
| 60 | 				     + (mask_bytes >= 16? 1 : 0) | 
| 61 | 				     + 0; | 
| 62 |  | 
| 63 |   static_assert ((shift < sizeof (hb_codepoint_t) * 8), "" ); | 
| 64 |   static_assert ((shift + num_bits <= sizeof (hb_codepoint_t) * 8), "" ); | 
| 65 |  | 
| 66 |   inline void init (void) { | 
| 67 |     mask = 0; | 
| 68 |   } | 
| 69 |  | 
| 70 |   inline void add (hb_codepoint_t g) { | 
| 71 |     mask |= mask_for (g); | 
| 72 |   } | 
| 73 |  | 
| 74 |   inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) { | 
| 75 |     if ((b >> shift) - (a >> shift) >= mask_bits - 1) | 
| 76 |       mask = (mask_t) -1; | 
| 77 |     else { | 
| 78 |       mask_t ma = mask_for (a); | 
| 79 |       mask_t mb = mask_for (b); | 
| 80 |       mask |= mb + (mb - ma) - (mb < ma); | 
| 81 |     } | 
| 82 |     return true; | 
| 83 |   } | 
| 84 |  | 
| 85 |   template <typename T> | 
| 86 |   inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) | 
| 87 |   { | 
| 88 |     for (unsigned int i = 0; i < count; i++) | 
| 89 |     { | 
| 90 |       add (*array); | 
| 91 |       array = (const T *) (stride + (const char *) array); | 
| 92 |     } | 
| 93 |   } | 
| 94 |   template <typename T> | 
| 95 |   inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) | 
| 96 |   { | 
| 97 |     for (unsigned int i = 0; i < count; i++) | 
| 98 |     { | 
| 99 |       add (*array); | 
| 100 |       array = (const T *) (stride + (const char *) array); | 
| 101 |     } | 
| 102 |     return true; | 
| 103 |   } | 
| 104 |  | 
| 105 |   inline bool may_have (hb_codepoint_t g) const { | 
| 106 |     return !!(mask & mask_for (g)); | 
| 107 |   } | 
| 108 |  | 
| 109 |   private: | 
| 110 |  | 
| 111 |   static inline mask_t mask_for (hb_codepoint_t g) { | 
| 112 |     return ((mask_t) 1) << ((g >> shift) & (mask_bits - 1)); | 
| 113 |   } | 
| 114 |   mask_t mask; | 
| 115 | }; | 
| 116 |  | 
| 117 | template <typename head_t, typename tail_t> | 
| 118 | struct hb_set_digest_combiner_t | 
| 119 | { | 
| 120 |   ASSERT_POD (); | 
| 121 |  | 
| 122 |   inline void init (void) { | 
| 123 |     head.init (); | 
| 124 |     tail.init (); | 
| 125 |   } | 
| 126 |  | 
| 127 |   inline void add (hb_codepoint_t g) { | 
| 128 |     head.add (g); | 
| 129 |     tail.add (g); | 
| 130 |   } | 
| 131 |  | 
| 132 |   inline bool add_range (hb_codepoint_t a, hb_codepoint_t b) { | 
| 133 |     head.add_range (a, b); | 
| 134 |     tail.add_range (a, b); | 
| 135 |     return true; | 
| 136 |   } | 
| 137 |   template <typename T> | 
| 138 |   inline void add_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) | 
| 139 |   { | 
| 140 |     head.add_array (array, count, stride); | 
| 141 |     tail.add_array (array, count, stride); | 
| 142 |   } | 
| 143 |   template <typename T> | 
| 144 |   inline bool add_sorted_array (const T *array, unsigned int count, unsigned int stride=sizeof(T)) | 
| 145 |   { | 
| 146 |     head.add_sorted_array (array, count, stride); | 
| 147 |     tail.add_sorted_array (array, count, stride); | 
| 148 |     return true; | 
| 149 |   } | 
| 150 |  | 
| 151 |   inline bool may_have (hb_codepoint_t g) const { | 
| 152 |     return head.may_have (g) && tail.may_have (g); | 
| 153 |   } | 
| 154 |  | 
| 155 |   private: | 
| 156 |   head_t head; | 
| 157 |   tail_t tail; | 
| 158 | }; | 
| 159 |  | 
| 160 |  | 
| 161 | /* | 
| 162 |  * hb_set_digest_t | 
| 163 |  * | 
| 164 |  * This is a combination of digests that performs "best". | 
| 165 |  * There is not much science to this: it's a result of intuition | 
| 166 |  * and testing. | 
| 167 |  */ | 
| 168 | typedef hb_set_digest_combiner_t | 
| 169 | < | 
| 170 |   hb_set_digest_lowest_bits_t<unsigned long, 4>, | 
| 171 |   hb_set_digest_combiner_t | 
| 172 |   < | 
| 173 |     hb_set_digest_lowest_bits_t<unsigned long, 0>, | 
| 174 |     hb_set_digest_lowest_bits_t<unsigned long, 9> | 
| 175 |   > | 
| 176 | > hb_set_digest_t; | 
| 177 |  | 
| 178 |  | 
| 179 | #endif /* HB_SET_DIGEST_HH */ | 
| 180 |  |