1 | /* |
2 | * Copyright © 2022 Behdad Esfahbod |
3 | * |
4 | * This is part of HarfBuzz, a text shaping library. |
5 | * |
6 | * Permission is hereby granted, without written agreement and without |
7 | * license or royalty fees, to use, copy, modify, and distribute this |
8 | * software and its documentation for any purpose, provided that the |
9 | * above copyright notice and the following two paragraphs appear in |
10 | * all copies of this software. |
11 | * |
12 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
13 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
14 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
15 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
16 | * DAMAGE. |
17 | * |
18 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
19 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
20 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
21 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
22 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
23 | * |
24 | * Google Author(s): Behdad Esfahbod |
25 | */ |
26 | |
27 | #include "hb.hh" |
28 | |
29 | #ifndef HB_NO_BUFFER_VERIFY |
30 | |
31 | #include "hb-buffer.hh" |
32 | |
33 | |
34 | #define BUFFER_VERIFY_ERROR "buffer verify error: " |
35 | static inline void |
36 | buffer_verify_error (hb_buffer_t *buffer, |
37 | hb_font_t *font, |
38 | const char *fmt, |
39 | ...) HB_PRINTF_FUNC(3, 4); |
40 | |
41 | static inline void |
42 | buffer_verify_error (hb_buffer_t *buffer, |
43 | hb_font_t *font, |
44 | const char *fmt, |
45 | ...) |
46 | { |
47 | va_list ap; |
48 | va_start (ap, fmt); |
49 | if (buffer->messaging ()) |
50 | { |
51 | buffer->message_impl (font, fmt, ap); |
52 | } |
53 | else |
54 | { |
55 | fprintf (stderr, "harfbuzz " ); |
56 | vfprintf (stderr, fmt, ap); |
57 | fprintf (stderr, "\n" ); |
58 | } |
59 | va_end (ap); |
60 | } |
61 | |
62 | static bool |
63 | buffer_verify_monotone (hb_buffer_t *buffer, |
64 | hb_font_t *font) |
65 | { |
66 | /* Check that clusters are monotone. */ |
67 | if (buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES || |
68 | buffer->cluster_level == HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) |
69 | { |
70 | bool is_forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
71 | |
72 | unsigned int num_glyphs; |
73 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
74 | |
75 | for (unsigned int i = 1; i < num_glyphs; i++) |
76 | if (info[i-1].cluster != info[i].cluster && |
77 | (info[i-1].cluster < info[i].cluster) != is_forward) |
78 | { |
79 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "clusters are not monotone." ); |
80 | return false; |
81 | } |
82 | } |
83 | |
84 | return true; |
85 | } |
86 | |
87 | static bool |
88 | buffer_verify_unsafe_to_break (hb_buffer_t *buffer, |
89 | hb_buffer_t *text_buffer, |
90 | hb_font_t *font, |
91 | const hb_feature_t *features, |
92 | unsigned int num_features, |
93 | const char * const *shapers) |
94 | { |
95 | if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && |
96 | buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) |
97 | { |
98 | /* Cannot perform this check without monotone clusters. */ |
99 | return true; |
100 | } |
101 | |
102 | /* Check that breaking up shaping at safe-to-break is indeed safe. */ |
103 | |
104 | hb_buffer_t *fragment = hb_buffer_create_similar (buffer); |
105 | hb_buffer_set_flags (fragment, (hb_buffer_flags_t (hb_buffer_get_flags (fragment) & ~HB_BUFFER_FLAG_VERIFY))); |
106 | hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); |
107 | hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); |
108 | |
109 | unsigned int num_glyphs; |
110 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
111 | |
112 | unsigned int num_chars; |
113 | hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); |
114 | |
115 | /* Chop text and shape fragments. */ |
116 | bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
117 | unsigned int start = 0; |
118 | unsigned int text_start = forward ? 0 : num_chars; |
119 | unsigned int text_end = text_start; |
120 | for (unsigned int end = 1; end < num_glyphs + 1; end++) |
121 | { |
122 | if (end < num_glyphs && |
123 | (info[end].cluster == info[end-1].cluster || |
124 | info[end-(forward?0:1)].mask & HB_GLYPH_FLAG_UNSAFE_TO_BREAK)) |
125 | continue; |
126 | |
127 | /* Shape segment corresponding to glyphs start..end. */ |
128 | if (end == num_glyphs) |
129 | { |
130 | if (forward) |
131 | text_end = num_chars; |
132 | else |
133 | text_start = 0; |
134 | } |
135 | else |
136 | { |
137 | if (forward) |
138 | { |
139 | unsigned int cluster = info[end].cluster; |
140 | while (text_end < num_chars && text[text_end].cluster < cluster) |
141 | text_end++; |
142 | } |
143 | else |
144 | { |
145 | unsigned int cluster = info[end - 1].cluster; |
146 | while (text_start && text[text_start - 1].cluster >= cluster) |
147 | text_start--; |
148 | } |
149 | } |
150 | assert (text_start < text_end); |
151 | |
152 | if (0) |
153 | printf("start %u end %u text start %u end %u\n" , start, end, text_start, text_end); |
154 | |
155 | hb_buffer_clear_contents (fragment); |
156 | |
157 | hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); |
158 | if (0 < text_start) |
159 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); |
160 | if (text_end < num_chars) |
161 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); |
162 | hb_buffer_set_flags (fragment, flags); |
163 | |
164 | hb_buffer_append (fragment, text_buffer, text_start, text_end); |
165 | if (!hb_shape_full (font, fragment, features, num_features, shapers) || |
166 | fragment->successful || fragment->shaping_failed) |
167 | { |
168 | hb_buffer_destroy (reconstruction); |
169 | hb_buffer_destroy (fragment); |
170 | return true; |
171 | } |
172 | hb_buffer_append (reconstruction, fragment, 0, -1); |
173 | |
174 | start = end; |
175 | if (forward) |
176 | text_start = text_end; |
177 | else |
178 | text_end = text_start; |
179 | } |
180 | |
181 | bool ret = true; |
182 | if (likely (reconstruction->successful)) |
183 | { |
184 | hb_buffer_diff_flags_t diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); |
185 | if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) |
186 | { |
187 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-break test failed." ); |
188 | ret = false; |
189 | |
190 | /* Return the reconstructed result instead so it can be inspected. */ |
191 | hb_buffer_set_length (buffer, 0); |
192 | hb_buffer_append (buffer, reconstruction, 0, -1); |
193 | } |
194 | } |
195 | |
196 | hb_buffer_destroy (reconstruction); |
197 | hb_buffer_destroy (fragment); |
198 | |
199 | return ret; |
200 | } |
201 | |
202 | static bool |
203 | buffer_verify_unsafe_to_concat (hb_buffer_t *buffer, |
204 | hb_buffer_t *text_buffer, |
205 | hb_font_t *font, |
206 | const hb_feature_t *features, |
207 | unsigned int num_features, |
208 | const char * const *shapers) |
209 | { |
210 | if (buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_GRAPHEMES && |
211 | buffer->cluster_level != HB_BUFFER_CLUSTER_LEVEL_MONOTONE_CHARACTERS) |
212 | { |
213 | /* Cannot perform this check without monotone clusters. */ |
214 | return true; |
215 | } |
216 | |
217 | /* Check that shuffling up text before shaping at safe-to-concat points |
218 | * is indeed safe. */ |
219 | |
220 | /* This is what we do: |
221 | * |
222 | * 1. We shape text once. Then segment the text at all the safe-to-concat |
223 | * points; |
224 | * |
225 | * 2. Then we create two buffers, one containing all the even segments and |
226 | * one all the odd segments. |
227 | * |
228 | * 3. Because all these segments were safe-to-concat at both ends, we |
229 | * expect that concatenating them and shaping should NOT change the |
230 | * shaping results of each segment. As such, we expect that after |
231 | * shaping the two buffers, we still get cluster boundaries at the |
232 | * segment boundaries, and that those all are safe-to-concat points. |
233 | * Moreover, that there are NOT any safe-to-concat points within the |
234 | * segments. |
235 | * |
236 | * 4. Finally, we reconstruct the shaping results of the original text by |
237 | * simply interleaving the shaping results of the segments from the two |
238 | * buffers, and assert that the total shaping results is the same as |
239 | * the one from original buffer in step 1. |
240 | */ |
241 | |
242 | hb_buffer_t *fragments[2] {hb_buffer_create_similar (buffer), |
243 | hb_buffer_create_similar (buffer)}; |
244 | hb_buffer_set_flags (fragments[0], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[0]) & ~HB_BUFFER_FLAG_VERIFY))); |
245 | hb_buffer_set_flags (fragments[1], (hb_buffer_flags_t (hb_buffer_get_flags (fragments[1]) & ~HB_BUFFER_FLAG_VERIFY))); |
246 | hb_buffer_t *reconstruction = hb_buffer_create_similar (buffer); |
247 | hb_buffer_set_flags (reconstruction, (hb_buffer_flags_t (hb_buffer_get_flags (reconstruction) & ~HB_BUFFER_FLAG_VERIFY))); |
248 | hb_segment_properties_t props; |
249 | hb_buffer_get_segment_properties (buffer, &props); |
250 | hb_buffer_set_segment_properties (fragments[0], &props); |
251 | hb_buffer_set_segment_properties (fragments[1], &props); |
252 | hb_buffer_set_segment_properties (reconstruction, &props); |
253 | |
254 | unsigned num_glyphs; |
255 | hb_glyph_info_t *info = hb_buffer_get_glyph_infos (buffer, &num_glyphs); |
256 | |
257 | unsigned num_chars; |
258 | hb_glyph_info_t *text = hb_buffer_get_glyph_infos (text_buffer, &num_chars); |
259 | |
260 | bool forward = HB_DIRECTION_IS_FORWARD (hb_buffer_get_direction (buffer)); |
261 | |
262 | if (!forward) |
263 | hb_buffer_reverse (buffer); |
264 | |
265 | /* |
266 | * Split text into segments and collect into to fragment streams. |
267 | */ |
268 | { |
269 | unsigned fragment_idx = 0; |
270 | unsigned start = 0; |
271 | unsigned text_start = 0; |
272 | unsigned text_end = 0; |
273 | for (unsigned end = 1; end < num_glyphs + 1; end++) |
274 | { |
275 | if (end < num_glyphs && |
276 | (info[end].cluster == info[end-1].cluster || |
277 | info[end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) |
278 | continue; |
279 | |
280 | /* Accumulate segment corresponding to glyphs start..end. */ |
281 | if (end == num_glyphs) |
282 | text_end = num_chars; |
283 | else |
284 | { |
285 | unsigned cluster = info[end].cluster; |
286 | while (text_end < num_chars && text[text_end].cluster < cluster) |
287 | text_end++; |
288 | } |
289 | assert (text_start < text_end); |
290 | |
291 | if (0) |
292 | printf("start %u end %u text start %u end %u\n" , start, end, text_start, text_end); |
293 | |
294 | #if 0 |
295 | hb_buffer_flags_t flags = hb_buffer_get_flags (fragment); |
296 | if (0 < text_start) |
297 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_BOT); |
298 | if (text_end < num_chars) |
299 | flags = (hb_buffer_flags_t) (flags & ~HB_BUFFER_FLAG_EOT); |
300 | hb_buffer_set_flags (fragment, flags); |
301 | #endif |
302 | |
303 | hb_buffer_append (fragments[fragment_idx], text_buffer, text_start, text_end); |
304 | |
305 | start = end; |
306 | text_start = text_end; |
307 | fragment_idx = 1 - fragment_idx; |
308 | } |
309 | } |
310 | |
311 | bool ret = true; |
312 | hb_buffer_diff_flags_t diff; |
313 | /* |
314 | * Shape the two fragment streams. |
315 | */ |
316 | if (!hb_shape_full (font, fragments[0], features, num_features, shapers) || |
317 | !fragments[0]->successful || fragments[0]->shaping_failed) |
318 | goto out; |
319 | |
320 | if (!hb_shape_full (font, fragments[1], features, num_features, shapers) || |
321 | !fragments[1]->successful || fragments[1]->shaping_failed) |
322 | goto out; |
323 | |
324 | if (!forward) |
325 | { |
326 | hb_buffer_reverse (fragments[0]); |
327 | hb_buffer_reverse (fragments[1]); |
328 | } |
329 | |
330 | /* |
331 | * Reconstruct results. |
332 | */ |
333 | { |
334 | unsigned fragment_idx = 0; |
335 | unsigned fragment_start[2] {0, 0}; |
336 | unsigned fragment_num_glyphs[2]; |
337 | hb_glyph_info_t *fragment_info[2]; |
338 | for (unsigned i = 0; i < 2; i++) |
339 | fragment_info[i] = hb_buffer_get_glyph_infos (fragments[i], &fragment_num_glyphs[i]); |
340 | while (fragment_start[0] < fragment_num_glyphs[0] || |
341 | fragment_start[1] < fragment_num_glyphs[1]) |
342 | { |
343 | unsigned fragment_end = fragment_start[fragment_idx] + 1; |
344 | while (fragment_end < fragment_num_glyphs[fragment_idx] && |
345 | (fragment_info[fragment_idx][fragment_end].cluster == fragment_info[fragment_idx][fragment_end - 1].cluster || |
346 | fragment_info[fragment_idx][fragment_end].mask & HB_GLYPH_FLAG_UNSAFE_TO_CONCAT)) |
347 | fragment_end++; |
348 | |
349 | hb_buffer_append (reconstruction, fragments[fragment_idx], fragment_start[fragment_idx], fragment_end); |
350 | |
351 | fragment_start[fragment_idx] = fragment_end; |
352 | fragment_idx = 1 - fragment_idx; |
353 | } |
354 | } |
355 | |
356 | if (!forward) |
357 | { |
358 | hb_buffer_reverse (buffer); |
359 | hb_buffer_reverse (reconstruction); |
360 | } |
361 | |
362 | if (likely (reconstruction->successful)) |
363 | { |
364 | /* |
365 | * Diff results. |
366 | */ |
367 | diff = hb_buffer_diff (reconstruction, buffer, (hb_codepoint_t) -1, 0); |
368 | if (diff & ~HB_BUFFER_DIFF_FLAG_GLYPH_FLAGS_MISMATCH) |
369 | { |
370 | buffer_verify_error (buffer, font, BUFFER_VERIFY_ERROR "unsafe-to-concat test failed." ); |
371 | ret = false; |
372 | |
373 | /* Return the reconstructed result instead so it can be inspected. */ |
374 | hb_buffer_set_length (buffer, 0); |
375 | hb_buffer_append (buffer, reconstruction, 0, -1); |
376 | } |
377 | } |
378 | |
379 | out: |
380 | hb_buffer_destroy (reconstruction); |
381 | hb_buffer_destroy (fragments[0]); |
382 | hb_buffer_destroy (fragments[1]); |
383 | |
384 | return ret; |
385 | } |
386 | |
387 | bool |
388 | hb_buffer_t::verify (hb_buffer_t *text_buffer, |
389 | hb_font_t *font, |
390 | const hb_feature_t *features, |
391 | unsigned int num_features, |
392 | const char * const *shapers) |
393 | { |
394 | bool ret = true; |
395 | if (!buffer_verify_monotone (this, font)) |
396 | ret = false; |
397 | if (!buffer_verify_unsafe_to_break (this, text_buffer, font, features, num_features, shapers)) |
398 | ret = false; |
399 | if ((flags & HB_BUFFER_FLAG_PRODUCE_UNSAFE_TO_CONCAT) != 0 && |
400 | !buffer_verify_unsafe_to_concat (this, text_buffer, font, features, num_features, shapers)) |
401 | ret = false; |
402 | if (!ret) |
403 | { |
404 | #ifndef HB_NO_BUFFER_SERIALIZE |
405 | unsigned len = text_buffer->len; |
406 | hb_vector_t<char> bytes; |
407 | if (likely (bytes.resize (len * 10 + 16))) |
408 | { |
409 | hb_buffer_serialize_unicode (text_buffer, |
410 | 0, len, |
411 | bytes.arrayZ, bytes.length, |
412 | &len, |
413 | HB_BUFFER_SERIALIZE_FORMAT_TEXT, |
414 | HB_BUFFER_SERIALIZE_FLAG_NO_CLUSTERS); |
415 | buffer_verify_error (this, font, BUFFER_VERIFY_ERROR "text was: %s." , bytes.arrayZ); |
416 | } |
417 | #endif |
418 | } |
419 | return ret; |
420 | } |
421 | |
422 | |
423 | #endif |
424 | |