1/*
2 * Copyright © 2007,2008,2009,2010 Red Hat, Inc.
3 * Copyright © 2012,2018 Google, Inc.
4 *
5 * This is part of HarfBuzz, a text shaping library.
6 *
7 * Permission is hereby granted, without written agreement and without
8 * license or royalty fees, to use, copy, modify, and distribute this
9 * software and its documentation for any purpose, provided that the
10 * above copyright notice and the following two paragraphs appear in
11 * all copies of this software.
12 *
13 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
14 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
15 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
16 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
17 * DAMAGE.
18 *
19 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
20 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
21 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
22 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
23 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
24 *
25 * Red Hat Author(s): Behdad Esfahbod
26 * Google Author(s): Behdad Esfahbod
27 */
28
29#ifndef HB_SANITIZE_HH
30#define HB_SANITIZE_HH
31
32#include "hb.hh"
33#include "hb-blob.hh"
34#include "hb-dispatch.hh"
35
36
37/*
38 * Sanitize
39 *
40 *
41 * === Introduction ===
42 *
43 * The sanitize machinery is at the core of our zero-cost font loading. We
44 * mmap() font file into memory and create a blob out of it. Font subtables
45 * are returned as a readonly sub-blob of the main font blob. These table
46 * blobs are then sanitized before use, to ensure invalid memory access does
47 * not happen. The toplevel sanitize API use is like, eg. to load the 'head'
48 * table:
49 *
50 * hb_blob_t *head_blob = hb_sanitize_context_t ().reference_table<OT::head> (face);
51 *
52 * The blob then can be converted to a head table struct with:
53 *
54 * const head *head_table = head_blob->as<head> ();
55 *
56 * What the reference_table does is, to call hb_face_reference_table() to load
57 * the table blob, sanitize it and return either the sanitized blob, or empty
58 * blob if sanitization failed. The blob->as() function returns the null
59 * object of its template type argument if the blob is empty. Otherwise, it
60 * just casts the blob contents to the desired type.
61 *
62 * Sanitizing a blob of data with a type T works as follows (with minor
63 * simplification):
64 *
65 * - Cast blob content to T*, call sanitize() method of it,
66 * - If sanitize succeeded, return blob.
67 * - Otherwise, if blob is not writable, try making it writable,
68 * or copy if cannot be made writable in-place,
69 * - Call sanitize() again. Return blob if sanitize succeeded.
70 * - Return empty blob otherwise.
71 *
72 *
73 * === The sanitize() contract ===
74 *
75 * The sanitize() method of each object type shall return true if it's safe to
76 * call other methods of the object, and %false otherwise.
77 *
78 * Note that what sanitize() checks for might align with what the specification
79 * describes as valid table data, but does not have to be. In particular, we
80 * do NOT want to be pedantic and concern ourselves with validity checks that
81 * are irrelevant to our use of the table. On the contrary, we want to be
82 * lenient with error handling and accept invalid data to the extent that it
83 * does not impose extra burden on us.
84 *
85 * Based on the sanitize contract, one can see that what we check for depends
86 * on how we use the data in other table methods. Ie. if other table methods
87 * assume that offsets do NOT point out of the table data block, then that's
88 * something sanitize() must check for (GSUB/GPOS/GDEF/etc work this way). On
89 * the other hand, if other methods do such checks themselves, then sanitize()
90 * does not have to bother with them (glyf/local work this way). The choice
91 * depends on the table structure and sanitize() performance. For example, to
92 * check glyf/loca offsets in sanitize() would cost O(num-glyphs). We try hard
93 * to avoid such costs during font loading. By postponing such checks to the
94 * actual glyph loading, we reduce the sanitize cost to O(1) and total runtime
95 * cost to O(used-glyphs). As such, this is preferred.
96 *
97 * The same argument can be made re GSUB/GPOS/GDEF, but there, the table
98 * structure is so complicated that by checking all offsets at sanitize() time,
99 * we make the code much simpler in other methods, as offsets and referenced
100 * objects do not need to be validated at each use site.
101 */
102
103/* This limits sanitizing time on really broken fonts. */
104#ifndef HB_SANITIZE_MAX_EDITS
105#define HB_SANITIZE_MAX_EDITS 32
106#endif
107#ifndef HB_SANITIZE_MAX_OPS_FACTOR
108#define HB_SANITIZE_MAX_OPS_FACTOR 64
109#endif
110#ifndef HB_SANITIZE_MAX_OPS_MIN
111#define HB_SANITIZE_MAX_OPS_MIN 16384
112#endif
113#ifndef HB_SANITIZE_MAX_OPS_MAX
114#define HB_SANITIZE_MAX_OPS_MAX 0x3FFFFFFF
115#endif
116#ifndef HB_SANITIZE_MAX_SUBTABLES
117#define HB_SANITIZE_MAX_SUBTABLES 0x4000
118#endif
119
120struct hb_sanitize_context_t :
121 hb_dispatch_context_t<hb_sanitize_context_t, bool, HB_DEBUG_SANITIZE>
122{
123 hb_sanitize_context_t () :
124 start (nullptr), end (nullptr),
125 length (0),
126 max_ops (0), max_subtables (0),
127 recursion_depth (0),
128 writable (false), edit_count (0),
129 blob (nullptr),
130 num_glyphs (65536),
131 num_glyphs_set (false),
132 lazy_some_gpos (false) {}
133
134 const char *get_name () { return "SANITIZE"; }
135 template <typename T, typename F>
136 bool may_dispatch (const T *obj HB_UNUSED, const F *format)
137 { return format->sanitize (this); }
138 static return_t default_return_value () { return true; }
139 static return_t no_dispatch_return_value () { return false; }
140 bool stop_sublookup_iteration (const return_t r) const { return !r; }
141
142 bool visit_subtables (unsigned count)
143 {
144 max_subtables += count;
145 return max_subtables < HB_SANITIZE_MAX_SUBTABLES;
146 }
147
148 private:
149 template <typename T, typename ...Ts> auto
150 _dispatch (const T &obj, hb_priority<1>, Ts&&... ds) HB_AUTO_RETURN
151 ( obj.sanitize (this, std::forward<Ts> (ds)...) )
152 template <typename T, typename ...Ts> auto
153 _dispatch (const T &obj, hb_priority<0>, Ts&&... ds) HB_AUTO_RETURN
154 ( obj.dispatch (this, std::forward<Ts> (ds)...) )
155 public:
156 template <typename T, typename ...Ts> auto
157 dispatch (const T &obj, Ts&&... ds) HB_AUTO_RETURN
158 ( _dispatch (obj, hb_prioritize, std::forward<Ts> (ds)...) )
159
160 hb_sanitize_context_t (hb_blob_t *b) : hb_sanitize_context_t ()
161 {
162 init (b);
163
164 if (blob)
165 start_processing ();
166 }
167
168 ~hb_sanitize_context_t ()
169 {
170 if (blob)
171 end_processing ();
172 }
173
174 void init (hb_blob_t *b)
175 {
176 this->blob = hb_blob_reference (b);
177 this->writable = false;
178 }
179
180 void set_num_glyphs (unsigned int num_glyphs_)
181 {
182 num_glyphs = num_glyphs_;
183 num_glyphs_set = true;
184 }
185 unsigned int get_num_glyphs () { return num_glyphs; }
186
187 void set_max_ops (int max_ops_) { max_ops = max_ops_; }
188
189 template <typename T>
190 void set_object (const T *obj)
191 {
192 reset_object ();
193
194 if (!obj) return;
195
196 const char *obj_start = (const char *) obj;
197 if (unlikely (obj_start < this->start || this->end <= obj_start))
198 {
199 this->start = this->end = nullptr;
200 this->length = 0;
201 }
202 else
203 {
204 this->start = obj_start;
205 this->end = obj_start + hb_min (size_t (this->end - obj_start), obj->get_size ());
206 this->length = this->end - this->start;
207 }
208 }
209
210 void reset_object ()
211 {
212 this->start = this->blob->data;
213 this->end = this->start + this->blob->length;
214 this->length = this->end - this->start;
215 assert (this->start <= this->end); /* Must not overflow. */
216 }
217
218 void start_processing ()
219 {
220 reset_object ();
221 unsigned m;
222 if (unlikely (hb_unsigned_mul_overflows (this->end - this->start, HB_SANITIZE_MAX_OPS_FACTOR, &m)))
223 this->max_ops = HB_SANITIZE_MAX_OPS_MAX;
224 else
225 this->max_ops = hb_clamp (m,
226 (unsigned) HB_SANITIZE_MAX_OPS_MIN,
227 (unsigned) HB_SANITIZE_MAX_OPS_MAX);
228 this->edit_count = 0;
229 this->debug_depth = 0;
230 this->recursion_depth = 0;
231
232 DEBUG_MSG_LEVEL (SANITIZE, start, 0, +1,
233 "start [%p..%p] (%lu bytes)",
234 this->start, this->end,
235 (unsigned long) (this->end - this->start));
236 }
237
238 void end_processing ()
239 {
240 DEBUG_MSG_LEVEL (SANITIZE, this->start, 0, -1,
241 "end [%p..%p] %u edit requests",
242 this->start, this->end, this->edit_count);
243
244 hb_blob_destroy (this->blob);
245 this->blob = nullptr;
246 this->start = this->end = nullptr;
247 this->length = 0;
248 }
249
250 unsigned get_edit_count () { return edit_count; }
251
252
253 bool check_ops(unsigned count)
254 {
255 /* Avoid underflow */
256 if (unlikely (this->max_ops < 0 || count >= (unsigned) this->max_ops))
257 {
258 this->max_ops = -1;
259 return false;
260 }
261 this->max_ops -= (int) count;
262 return true;
263 }
264
265#ifndef HB_OPTIMIZE_SIZE
266 HB_ALWAYS_INLINE
267#endif
268 bool check_range (const void *base,
269 unsigned int len) const
270 {
271 const char *p = (const char *) base;
272 bool ok = (uintptr_t) (p - this->start) <= this->length &&
273 (unsigned int) (this->end - p) >= len &&
274 ((this->max_ops -= len) > 0);
275
276 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
277 "check_range [%p..%p]"
278 " (%u bytes) in [%p..%p] -> %s",
279 p, p + len, len,
280 this->start, this->end,
281 ok ? "OK" : "OUT-OF-RANGE");
282
283 return likely (ok);
284 }
285#ifndef HB_OPTIMIZE_SIZE
286 HB_ALWAYS_INLINE
287#endif
288 bool check_range_fast (const void *base,
289 unsigned int len) const
290 {
291 const char *p = (const char *) base;
292 bool ok = ((uintptr_t) (p - this->start) <= this->length &&
293 (unsigned int) (this->end - p) >= len);
294
295 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
296 "check_range_fast [%p..%p]"
297 " (%u bytes) in [%p..%p] -> %s",
298 p, p + len, len,
299 this->start, this->end,
300 ok ? "OK" : "OUT-OF-RANGE");
301
302 return likely (ok);
303 }
304
305#ifndef HB_OPTIMIZE_SIZE
306 HB_ALWAYS_INLINE
307#endif
308 bool check_point (const void *base) const
309 {
310 const char *p = (const char *) base;
311 bool ok = (uintptr_t) (p - this->start) <= this->length;
312
313 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
314 "check_point [%p]"
315 " in [%p..%p] -> %s",
316 p,
317 this->start, this->end,
318 ok ? "OK" : "OUT-OF-RANGE");
319
320 return likely (ok);
321 }
322
323 template <typename T>
324 bool check_range (const T *base,
325 unsigned int a,
326 unsigned int b) const
327 {
328 unsigned m;
329 return !hb_unsigned_mul_overflows (a, b, &m) &&
330 this->check_range (base, m);
331 }
332
333 template <typename T>
334 bool check_range (const T *base,
335 unsigned int a,
336 unsigned int b,
337 unsigned int c) const
338 {
339 unsigned m;
340 return !hb_unsigned_mul_overflows (a, b, &m) &&
341 this->check_range (base, m, c);
342 }
343
344 template <typename T>
345 HB_ALWAYS_INLINE
346 bool check_array_sized (const T *base, unsigned int len, unsigned len_size) const
347 {
348 if (len_size >= 4)
349 {
350 if (unlikely (hb_unsigned_mul_overflows (len, hb_static_size (T), &len)))
351 return false;
352 }
353 else
354 len = len * hb_static_size (T);
355 return this->check_range (base, len);
356 }
357
358 template <typename T>
359 bool check_array (const T *base, unsigned int len) const
360 {
361 return this->check_range (base, len, hb_static_size (T));
362 }
363
364 template <typename T>
365 bool check_array (const T *base,
366 unsigned int a,
367 unsigned int b) const
368 {
369 return this->check_range (base, hb_static_size (T), a, b);
370 }
371
372 bool check_start_recursion (int max_depth)
373 {
374 if (unlikely (recursion_depth >= max_depth)) return false;
375 return ++recursion_depth;
376 }
377
378 bool end_recursion (bool result)
379 {
380 recursion_depth--;
381 return result;
382 }
383
384 template <typename Type>
385#ifndef HB_OPTIMIZE_SIZE
386 HB_ALWAYS_INLINE
387#endif
388 bool check_struct (const Type *obj) const
389 {
390 if (sizeof (uintptr_t) == sizeof (uint32_t))
391 return likely (this->check_range_fast (obj, obj->min_size));
392 else
393 return likely (this->check_point ((const char *) obj + obj->min_size));
394 }
395
396 bool may_edit (const void *base, unsigned int len)
397 {
398 if (this->edit_count >= HB_SANITIZE_MAX_EDITS)
399 return false;
400
401 const char *p = (const char *) base;
402 this->edit_count++;
403
404 DEBUG_MSG_LEVEL (SANITIZE, p, this->debug_depth+1, 0,
405 "may_edit(%u) [%p..%p] (%u bytes) in [%p..%p] -> %s",
406 this->edit_count,
407 p, p + len, len,
408 this->start, this->end,
409 this->writable ? "GRANTED" : "DENIED");
410
411 return this->writable;
412 }
413
414 template <typename Type, typename ValueType>
415 bool try_set (const Type *obj, const ValueType &v)
416 {
417 if (this->may_edit (obj, hb_static_size (Type)))
418 {
419 * const_cast<Type *> (obj) = v;
420 return true;
421 }
422 return false;
423 }
424
425 template <typename Type>
426 hb_blob_t *sanitize_blob (hb_blob_t *blob)
427 {
428 bool sane;
429
430 init (blob);
431
432 retry:
433 DEBUG_MSG_FUNC (SANITIZE, start, "start");
434
435 start_processing ();
436
437 if (unlikely (!start))
438 {
439 end_processing ();
440 return blob;
441 }
442
443 Type *t = reinterpret_cast<Type *> (const_cast<char *> (start));
444
445 sane = t->sanitize (this);
446 if (sane)
447 {
448 if (edit_count)
449 {
450 DEBUG_MSG_FUNC (SANITIZE, start, "passed first round with %u edits; going for second round", edit_count);
451
452 /* sanitize again to ensure no toe-stepping */
453 edit_count = 0;
454 sane = t->sanitize (this);
455 if (edit_count) {
456 DEBUG_MSG_FUNC (SANITIZE, start, "requested %u edits in second round; FAILLING", edit_count);
457 sane = false;
458 }
459 }
460 }
461 else
462 {
463 if (edit_count && !writable) {
464 start = hb_blob_get_data_writable (blob, nullptr);
465 end = start + blob->length;
466
467 if (start)
468 {
469 writable = true;
470 /* ok, we made it writable by relocating. try again */
471 DEBUG_MSG_FUNC (SANITIZE, start, "retry");
472 goto retry;
473 }
474 }
475 }
476
477 end_processing ();
478
479 DEBUG_MSG_FUNC (SANITIZE, start, sane ? "PASSED" : "FAILED");
480 if (sane)
481 {
482 hb_blob_make_immutable (blob);
483 return blob;
484 }
485 else
486 {
487 hb_blob_destroy (blob);
488 return hb_blob_get_empty ();
489 }
490 }
491
492 template <typename Type>
493 hb_blob_t *reference_table (const hb_face_t *face, hb_tag_t tableTag = Type::tableTag)
494 {
495 if (!num_glyphs_set)
496 set_num_glyphs (hb_face_get_glyph_count (face));
497 return sanitize_blob<Type> (hb_face_reference_table (face, tableTag));
498 }
499
500 const char *start, *end;
501 unsigned length;
502 mutable int max_ops, max_subtables;
503 private:
504 int recursion_depth;
505 bool writable;
506 unsigned int edit_count;
507 hb_blob_t *blob;
508 unsigned int num_glyphs;
509 bool num_glyphs_set;
510 public:
511 bool lazy_some_gpos;
512};
513
514struct hb_sanitize_with_object_t
515{
516 template <typename T>
517 hb_sanitize_with_object_t (hb_sanitize_context_t *c, const T& obj) : c (c)
518 { c->set_object (obj); }
519 ~hb_sanitize_with_object_t ()
520 { c->reset_object (); }
521
522 private:
523 hb_sanitize_context_t *c;
524};
525
526
527#endif /* HB_SANITIZE_HH */
528