| 1 | /* | 
|---|
| 2 | * Copyright (C) 2012 Grigori Goronzy <greg@kinoho.net> | 
|---|
| 3 | * | 
|---|
| 4 | * Permission to use, copy, modify, and/or distribute this software for any | 
|---|
| 5 | * purpose with or without fee is hereby granted, provided that the above | 
|---|
| 6 | * copyright notice and this permission notice appear in all copies. | 
|---|
| 7 | * | 
|---|
| 8 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | 
|---|
| 9 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | 
|---|
| 10 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | 
|---|
| 11 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 
|---|
| 12 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | 
|---|
| 13 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | 
|---|
| 14 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 
|---|
| 15 | */ | 
|---|
| 16 |  | 
|---|
| 17 | #include "hb.hh" | 
|---|
| 18 | #include "hb-unicode.hh" | 
|---|
| 19 | #include "hb-machinery.hh" | 
|---|
| 20 |  | 
|---|
| 21 | #include "hb-ucd-table.hh" | 
|---|
| 22 |  | 
|---|
| 23 | static hb_unicode_combining_class_t | 
|---|
| 24 | hb_ucd_combining_class (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 
|---|
| 25 | hb_codepoint_t unicode, | 
|---|
| 26 | void *user_data HB_UNUSED) | 
|---|
| 27 | { | 
|---|
| 28 | return (hb_unicode_combining_class_t) _hb_ucd_ccc (unicode); | 
|---|
| 29 | } | 
|---|
| 30 |  | 
|---|
| 31 | static hb_unicode_general_category_t | 
|---|
| 32 | hb_ucd_general_category (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 
|---|
| 33 | hb_codepoint_t unicode, | 
|---|
| 34 | void *user_data HB_UNUSED) | 
|---|
| 35 | { | 
|---|
| 36 | return (hb_unicode_general_category_t) _hb_ucd_gc (unicode); | 
|---|
| 37 | } | 
|---|
| 38 |  | 
|---|
| 39 | static hb_codepoint_t | 
|---|
| 40 | hb_ucd_mirroring (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 
|---|
| 41 | hb_codepoint_t unicode, | 
|---|
| 42 | void *user_data HB_UNUSED) | 
|---|
| 43 | { | 
|---|
| 44 | return unicode + _hb_ucd_bmg (unicode); | 
|---|
| 45 | } | 
|---|
| 46 |  | 
|---|
| 47 | static hb_script_t | 
|---|
| 48 | hb_ucd_script (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 
|---|
| 49 | hb_codepoint_t unicode, | 
|---|
| 50 | void *user_data HB_UNUSED) | 
|---|
| 51 | { | 
|---|
| 52 | return _hb_ucd_sc_map[_hb_ucd_sc (unicode)]; | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 |  | 
|---|
| 56 | #define SBASE 0xAC00u | 
|---|
| 57 | #define LBASE 0x1100u | 
|---|
| 58 | #define VBASE 0x1161u | 
|---|
| 59 | #define TBASE 0x11A7u | 
|---|
| 60 | #define SCOUNT 11172u | 
|---|
| 61 | #define LCOUNT 19u | 
|---|
| 62 | #define VCOUNT 21u | 
|---|
| 63 | #define TCOUNT 28u | 
|---|
| 64 | #define NCOUNT (VCOUNT * TCOUNT) | 
|---|
| 65 |  | 
|---|
| 66 | static inline bool | 
|---|
| 67 | _hb_ucd_decompose_hangul (hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b) | 
|---|
| 68 | { | 
|---|
| 69 | unsigned si = ab - SBASE; | 
|---|
| 70 |  | 
|---|
| 71 | if (si >= SCOUNT) | 
|---|
| 72 | return false; | 
|---|
| 73 |  | 
|---|
| 74 | if (si % TCOUNT) | 
|---|
| 75 | { | 
|---|
| 76 | /* LV,T */ | 
|---|
| 77 | *a = SBASE + (si / TCOUNT) * TCOUNT; | 
|---|
| 78 | *b = TBASE + (si % TCOUNT); | 
|---|
| 79 | return true; | 
|---|
| 80 | } else { | 
|---|
| 81 | /* L,V */ | 
|---|
| 82 | *a = LBASE + (si / NCOUNT); | 
|---|
| 83 | *b = VBASE + (si % NCOUNT) / TCOUNT; | 
|---|
| 84 | return true; | 
|---|
| 85 | } | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | static inline bool | 
|---|
| 89 | _hb_ucd_compose_hangul (hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab) | 
|---|
| 90 | { | 
|---|
| 91 | if (a >= SBASE && a < (SBASE + SCOUNT) && b > TBASE && b < (TBASE + TCOUNT) && | 
|---|
| 92 | !((a - SBASE) % TCOUNT)) | 
|---|
| 93 | { | 
|---|
| 94 | /* LV,T */ | 
|---|
| 95 | *ab = a + (b - TBASE); | 
|---|
| 96 | return true; | 
|---|
| 97 | } | 
|---|
| 98 | else if (a >= LBASE && a < (LBASE + LCOUNT) && b >= VBASE && b < (VBASE + VCOUNT)) | 
|---|
| 99 | { | 
|---|
| 100 | /* L,V */ | 
|---|
| 101 | int li = a - LBASE; | 
|---|
| 102 | int vi = b - VBASE; | 
|---|
| 103 | *ab = SBASE + li * NCOUNT + vi * TCOUNT; | 
|---|
| 104 | return true; | 
|---|
| 105 | } | 
|---|
| 106 | else | 
|---|
| 107 | return false; | 
|---|
| 108 | } | 
|---|
| 109 |  | 
|---|
| 110 | static int | 
|---|
| 111 | _cmp_pair (const void *_key, const void *_item) | 
|---|
| 112 | { | 
|---|
| 113 | uint64_t& a = * (uint64_t*) _key; | 
|---|
| 114 | uint64_t b = (* (uint64_t*) _item) & HB_CODEPOINT_ENCODE3(0x1FFFFFu, 0x1FFFFFu, 0); | 
|---|
| 115 |  | 
|---|
| 116 | return a < b ? -1 : a > b ? +1 : 0; | 
|---|
| 117 | } | 
|---|
| 118 | static int | 
|---|
| 119 | _cmp_pair_11_7_14 (const void *_key, const void *_item) | 
|---|
| 120 | { | 
|---|
| 121 | uint32_t& a = * (uint32_t*) _key; | 
|---|
| 122 | uint32_t b = (* (uint32_t*) _item) & HB_CODEPOINT_ENCODE3_11_7_14(0x1FFFFFu, 0x1FFFFFu, 0); | 
|---|
| 123 |  | 
|---|
| 124 | return a < b ? -1 : a > b ? +1 : 0; | 
|---|
| 125 | } | 
|---|
| 126 |  | 
|---|
| 127 | static hb_bool_t | 
|---|
| 128 | hb_ucd_compose (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 
|---|
| 129 | hb_codepoint_t a, hb_codepoint_t b, hb_codepoint_t *ab, | 
|---|
| 130 | void *user_data HB_UNUSED) | 
|---|
| 131 | { | 
|---|
| 132 | // Hangul is handled algorithmically. | 
|---|
| 133 | if (_hb_ucd_compose_hangul (a, b, ab)) return true; | 
|---|
| 134 |  | 
|---|
| 135 | hb_codepoint_t u = 0; | 
|---|
| 136 |  | 
|---|
| 137 | if ((a & 0xFFFFF800u) == 0x0000u && (b & 0xFFFFFF80) == 0x0300u) | 
|---|
| 138 | { | 
|---|
| 139 | /* If "a" is small enough and "b" is in the U+0300 range, | 
|---|
| 140 | * the composition data is encoded in a 32bit array sorted | 
|---|
| 141 | * by "a,b" pair. */ | 
|---|
| 142 | uint32_t k = HB_CODEPOINT_ENCODE3_11_7_14 (a, b, 0); | 
|---|
| 143 | const uint32_t *v = hb_bsearch (k, | 
|---|
| 144 | _hb_ucd_dm2_u32_map, | 
|---|
| 145 | ARRAY_LENGTH (_hb_ucd_dm2_u32_map), | 
|---|
| 146 | sizeof (*_hb_ucd_dm2_u32_map), | 
|---|
| 147 | _cmp_pair_11_7_14); | 
|---|
| 148 | if (likely (!v)) return false; | 
|---|
| 149 | u = HB_CODEPOINT_DECODE3_11_7_14_3 (*v); | 
|---|
| 150 | } | 
|---|
| 151 | else | 
|---|
| 152 | { | 
|---|
| 153 | /* Otherwise it is stored in a 64bit array sorted by | 
|---|
| 154 | * "a,b" pair. */ | 
|---|
| 155 | uint64_t k = HB_CODEPOINT_ENCODE3 (a, b, 0); | 
|---|
| 156 | const uint64_t *v = hb_bsearch (k, | 
|---|
| 157 | _hb_ucd_dm2_u64_map, | 
|---|
| 158 | ARRAY_LENGTH (_hb_ucd_dm2_u64_map), | 
|---|
| 159 | sizeof (*_hb_ucd_dm2_u64_map), | 
|---|
| 160 | _cmp_pair); | 
|---|
| 161 | if (likely (!v)) return false; | 
|---|
| 162 | u = HB_CODEPOINT_DECODE3_3 (*v); | 
|---|
| 163 | } | 
|---|
| 164 |  | 
|---|
| 165 | if (unlikely (!u)) return false; | 
|---|
| 166 | *ab = u; | 
|---|
| 167 | return true; | 
|---|
| 168 | } | 
|---|
| 169 |  | 
|---|
| 170 | static hb_bool_t | 
|---|
| 171 | hb_ucd_decompose (hb_unicode_funcs_t *ufuncs HB_UNUSED, | 
|---|
| 172 | hb_codepoint_t ab, hb_codepoint_t *a, hb_codepoint_t *b, | 
|---|
| 173 | void *user_data HB_UNUSED) | 
|---|
| 174 | { | 
|---|
| 175 | if (_hb_ucd_decompose_hangul (ab, a, b)) return true; | 
|---|
| 176 |  | 
|---|
| 177 | unsigned i = _hb_ucd_dm (ab); | 
|---|
| 178 |  | 
|---|
| 179 | /* If no data, there's no decomposition. */ | 
|---|
| 180 | if (likely (!i)) return false; | 
|---|
| 181 | i--; | 
|---|
| 182 |  | 
|---|
| 183 | /* Check if it's a single-character decomposition. */ | 
|---|
| 184 | if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map)) | 
|---|
| 185 | { | 
|---|
| 186 | /* Single-character decompositions currently are only in plane 0 or plane 2. */ | 
|---|
| 187 | if (i < ARRAY_LENGTH (_hb_ucd_dm1_p0_map)) | 
|---|
| 188 | { | 
|---|
| 189 | /* Plane 0. */ | 
|---|
| 190 | *a = _hb_ucd_dm1_p0_map[i]; | 
|---|
| 191 | } | 
|---|
| 192 | else | 
|---|
| 193 | { | 
|---|
| 194 | /* Plane 2. */ | 
|---|
| 195 | i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map); | 
|---|
| 196 | *a = 0x20000 | _hb_ucd_dm1_p2_map[i]; | 
|---|
| 197 | } | 
|---|
| 198 | *b = 0; | 
|---|
| 199 | return true; | 
|---|
| 200 | } | 
|---|
| 201 | i -= ARRAY_LENGTH (_hb_ucd_dm1_p0_map) + ARRAY_LENGTH (_hb_ucd_dm1_p2_map); | 
|---|
| 202 |  | 
|---|
| 203 | /* Otherwise they are encoded either in a 32bit array or a 64bit array. */ | 
|---|
| 204 | if (i < ARRAY_LENGTH (_hb_ucd_dm2_u32_map)) | 
|---|
| 205 | { | 
|---|
| 206 | /* 32bit array. */ | 
|---|
| 207 | uint32_t v = _hb_ucd_dm2_u32_map[i]; | 
|---|
| 208 | *a = HB_CODEPOINT_DECODE3_11_7_14_1 (v); | 
|---|
| 209 | *b = HB_CODEPOINT_DECODE3_11_7_14_2 (v); | 
|---|
| 210 | return true; | 
|---|
| 211 | } | 
|---|
| 212 | i -= ARRAY_LENGTH (_hb_ucd_dm2_u32_map); | 
|---|
| 213 |  | 
|---|
| 214 | /* 64bit array. */ | 
|---|
| 215 | uint64_t v = _hb_ucd_dm2_u64_map[i]; | 
|---|
| 216 | *a = HB_CODEPOINT_DECODE3_1 (v); | 
|---|
| 217 | *b = HB_CODEPOINT_DECODE3_2 (v); | 
|---|
| 218 | return true; | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|
| 221 |  | 
|---|
| 222 | static void free_static_ucd_funcs (); | 
|---|
| 223 |  | 
|---|
| 224 | static struct hb_ucd_unicode_funcs_lazy_loader_t : hb_unicode_funcs_lazy_loader_t<hb_ucd_unicode_funcs_lazy_loader_t> | 
|---|
| 225 | { | 
|---|
| 226 | static hb_unicode_funcs_t *create () | 
|---|
| 227 | { | 
|---|
| 228 | hb_unicode_funcs_t *funcs = hb_unicode_funcs_create (nullptr); | 
|---|
| 229 |  | 
|---|
| 230 | hb_unicode_funcs_set_combining_class_func (funcs, hb_ucd_combining_class, nullptr, nullptr); | 
|---|
| 231 | hb_unicode_funcs_set_general_category_func (funcs, hb_ucd_general_category, nullptr, nullptr); | 
|---|
| 232 | hb_unicode_funcs_set_mirroring_func (funcs, hb_ucd_mirroring, nullptr, nullptr); | 
|---|
| 233 | hb_unicode_funcs_set_script_func (funcs, hb_ucd_script, nullptr, nullptr); | 
|---|
| 234 | hb_unicode_funcs_set_compose_func (funcs, hb_ucd_compose, nullptr, nullptr); | 
|---|
| 235 | hb_unicode_funcs_set_decompose_func (funcs, hb_ucd_decompose, nullptr, nullptr); | 
|---|
| 236 |  | 
|---|
| 237 | hb_unicode_funcs_make_immutable (funcs); | 
|---|
| 238 |  | 
|---|
| 239 | hb_atexit (free_static_ucd_funcs); | 
|---|
| 240 |  | 
|---|
| 241 | return funcs; | 
|---|
| 242 | } | 
|---|
| 243 | } static_ucd_funcs; | 
|---|
| 244 |  | 
|---|
| 245 | static inline | 
|---|
| 246 | void free_static_ucd_funcs () | 
|---|
| 247 | { | 
|---|
| 248 | static_ucd_funcs.free_instance (); | 
|---|
| 249 | } | 
|---|
| 250 |  | 
|---|
| 251 | hb_unicode_funcs_t * | 
|---|
| 252 | hb_ucd_get_unicode_funcs () | 
|---|
| 253 | { | 
|---|
| 254 | #ifdef HB_NO_UCD | 
|---|
| 255 | return hb_unicode_funcs_get_empty (); | 
|---|
| 256 | #endif | 
|---|
| 257 | return static_ucd_funcs.get_unconst (); | 
|---|
| 258 | } | 
|---|
| 259 |  | 
|---|