1 | /* |
2 | * Copyright © 2010,2012 Google, Inc. |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #include "hb-ot-shape-complex.hh" |
28 | |
29 | |
30 | static bool |
31 | compose_hebrew (const hb_ot_shape_normalize_context_t *c, |
32 | hb_codepoint_t a, |
33 | hb_codepoint_t b, |
34 | hb_codepoint_t *ab) |
35 | { |
36 | /* Hebrew presentation-form shaping. |
37 | * https://bugzilla.mozilla.org/show_bug.cgi?id=728866 |
38 | * Hebrew presentation forms with dagesh, for characters U+05D0..05EA; |
39 | * Note that some letters do not have a dagesh presForm encoded. |
40 | */ |
41 | static const hb_codepoint_t sDageshForms[0x05EAu - 0x05D0u + 1] = { |
42 | 0xFB30u, /* ALEF */ |
43 | 0xFB31u, /* BET */ |
44 | 0xFB32u, /* GIMEL */ |
45 | 0xFB33u, /* DALET */ |
46 | 0xFB34u, /* HE */ |
47 | 0xFB35u, /* VAV */ |
48 | 0xFB36u, /* ZAYIN */ |
49 | 0x0000u, /* HET */ |
50 | 0xFB38u, /* TET */ |
51 | 0xFB39u, /* YOD */ |
52 | 0xFB3Au, /* FINAL KAF */ |
53 | 0xFB3Bu, /* KAF */ |
54 | 0xFB3Cu, /* LAMED */ |
55 | 0x0000u, /* FINAL MEM */ |
56 | 0xFB3Eu, /* MEM */ |
57 | 0x0000u, /* FINAL NUN */ |
58 | 0xFB40u, /* NUN */ |
59 | 0xFB41u, /* SAMEKH */ |
60 | 0x0000u, /* AYIN */ |
61 | 0xFB43u, /* FINAL PE */ |
62 | 0xFB44u, /* PE */ |
63 | 0x0000u, /* FINAL TSADI */ |
64 | 0xFB46u, /* TSADI */ |
65 | 0xFB47u, /* QOF */ |
66 | 0xFB48u, /* RESH */ |
67 | 0xFB49u, /* SHIN */ |
68 | 0xFB4Au /* TAV */ |
69 | }; |
70 | |
71 | bool found = (bool) c->unicode->compose (a, b, ab); |
72 | |
73 | if (!found && !c->plan->has_mark) |
74 | { |
75 | /* Special-case Hebrew presentation forms that are excluded from |
76 | * standard normalization, but wanted for old fonts. */ |
77 | switch (b) { |
78 | case 0x05B4u: /* HIRIQ */ |
79 | if (a == 0x05D9u) { /* YOD */ |
80 | *ab = 0xFB1Du; |
81 | found = true; |
82 | } |
83 | break; |
84 | case 0x05B7u: /* patah */ |
85 | if (a == 0x05F2u) { /* YIDDISH YOD YOD */ |
86 | *ab = 0xFB1Fu; |
87 | found = true; |
88 | } else if (a == 0x05D0u) { /* ALEF */ |
89 | *ab = 0xFB2Eu; |
90 | found = true; |
91 | } |
92 | break; |
93 | case 0x05B8u: /* QAMATS */ |
94 | if (a == 0x05D0u) { /* ALEF */ |
95 | *ab = 0xFB2Fu; |
96 | found = true; |
97 | } |
98 | break; |
99 | case 0x05B9u: /* HOLAM */ |
100 | if (a == 0x05D5u) { /* VAV */ |
101 | *ab = 0xFB4Bu; |
102 | found = true; |
103 | } |
104 | break; |
105 | case 0x05BCu: /* DAGESH */ |
106 | if (a >= 0x05D0u && a <= 0x05EAu) { |
107 | *ab = sDageshForms[a - 0x05D0u]; |
108 | found = (*ab != 0); |
109 | } else if (a == 0xFB2Au) { /* SHIN WITH SHIN DOT */ |
110 | *ab = 0xFB2Cu; |
111 | found = true; |
112 | } else if (a == 0xFB2Bu) { /* SHIN WITH SIN DOT */ |
113 | *ab = 0xFB2Du; |
114 | found = true; |
115 | } |
116 | break; |
117 | case 0x05BFu: /* RAFE */ |
118 | switch (a) { |
119 | case 0x05D1u: /* BET */ |
120 | *ab = 0xFB4Cu; |
121 | found = true; |
122 | break; |
123 | case 0x05DBu: /* KAF */ |
124 | *ab = 0xFB4Du; |
125 | found = true; |
126 | break; |
127 | case 0x05E4u: /* PE */ |
128 | *ab = 0xFB4Eu; |
129 | found = true; |
130 | break; |
131 | } |
132 | break; |
133 | case 0x05C1u: /* SHIN DOT */ |
134 | if (a == 0x05E9u) { /* SHIN */ |
135 | *ab = 0xFB2Au; |
136 | found = true; |
137 | } else if (a == 0xFB49u) { /* SHIN WITH DAGESH */ |
138 | *ab = 0xFB2Cu; |
139 | found = true; |
140 | } |
141 | break; |
142 | case 0x05C2u: /* SIN DOT */ |
143 | if (a == 0x05E9u) { /* SHIN */ |
144 | *ab = 0xFB2Bu; |
145 | found = true; |
146 | } else if (a == 0xFB49u) { /* SHIN WITH DAGESH */ |
147 | *ab = 0xFB2Du; |
148 | found = true; |
149 | } |
150 | break; |
151 | } |
152 | } |
153 | |
154 | return found; |
155 | } |
156 | |
157 | static bool |
158 | disable_otl_hebrew (const hb_ot_shape_plan_t *plan) |
159 | { |
160 | /* For Hebrew shaper, use fallback if GPOS does not have 'hebr' |
161 | * script. This matches Uniscribe better, and makes fonts like |
162 | * Arial that have GSUB/GPOS/GDEF but no data for Hebrew work. |
163 | * See: |
164 | * https://github.com/harfbuzz/harfbuzz/issues/347#issuecomment-267838368 |
165 | */ |
166 | return plan->map.chosen_script[1] != HB_TAG ('h','e','b','r'); |
167 | } |
168 | |
169 | |
170 | const hb_ot_complex_shaper_t _hb_ot_complex_shaper_hebrew = |
171 | { |
172 | nullptr, /* collect_features */ |
173 | nullptr, /* override_features */ |
174 | nullptr, /* data_create */ |
175 | nullptr, /* data_destroy */ |
176 | nullptr, /* preprocess_text */ |
177 | nullptr, /* postprocess_glyphs */ |
178 | HB_OT_SHAPE_NORMALIZATION_MODE_DEFAULT, |
179 | nullptr, /* decompose */ |
180 | compose_hebrew, |
181 | nullptr, /* setup_masks */ |
182 | disable_otl_hebrew, |
183 | nullptr, /* reorder_marks */ |
184 | HB_OT_SHAPE_ZERO_WIDTH_MARKS_BY_GDEF_LATE, |
185 | true, /* fallback_position */ |
186 | }; |
187 | |