1/*
2 * Copyright © 2009 Red Hat, Inc.
3 * Copyright © 2011 Codethink Limited
4 * Copyright © 2010,2011,2012 Google, Inc.
5 *
6 * This is part of HarfBuzz, a text shaping library.
7 *
8 * Permission is hereby granted, without written agreement and without
9 * license or royalty fees, to use, copy, modify, and distribute this
10 * software and its documentation for any purpose, provided that the
11 * above copyright notice and the following two paragraphs appear in
12 * all copies of this software.
13 *
14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * DAMAGE.
19 *
20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 *
26 * Red Hat Author(s): Behdad Esfahbod
27 * Codethink Author(s): Ryan Lortie
28 * Google Author(s): Behdad Esfahbod
29 */
30
31#include "hb.hh"
32
33#include "hb-unicode.hh"
34
35
36/**
37 * SECTION: hb-unicode
38 * @title: hb-unicode
39 * @short_description: Unicode character property access
40 * @include: hb.h
41 *
42 * Unicode functions are used to access Unicode character properties.
43 * With these functions, client programs can query various properties from
44 * the Unicode Character Database for any code point, such as General
45 * Category (gc), Script (sc), Canonical Combining Class (ccc), etc.
46 *
47 * Client programs can optionally pass in their own Unicode functions
48 * that implement the same queries. The set of functions available is
49 * defined by the virtual methods in #hb_unicode_funcs_t.
50 *
51 * HarfBuzz provides built-in default functions for each method in
52 * #hb_unicode_funcs_t.
53 **/
54
55
56/*
57 * hb_unicode_funcs_t
58 */
59
60static hb_unicode_combining_class_t
61hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
62 hb_codepoint_t unicode HB_UNUSED,
63 void *user_data HB_UNUSED)
64{
65 return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
66}
67
68#ifndef HB_DISABLE_DEPRECATED
69static unsigned int
70hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
71 hb_codepoint_t unicode HB_UNUSED,
72 void *user_data HB_UNUSED)
73{
74 return 1;
75}
76#endif
77
78static hb_unicode_general_category_t
79hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
80 hb_codepoint_t unicode HB_UNUSED,
81 void *user_data HB_UNUSED)
82{
83 return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
84}
85
86static hb_codepoint_t
87hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
88 hb_codepoint_t unicode,
89 void *user_data HB_UNUSED)
90{
91 return unicode;
92}
93
94static hb_script_t
95hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
96 hb_codepoint_t unicode HB_UNUSED,
97 void *user_data HB_UNUSED)
98{
99 return HB_SCRIPT_UNKNOWN;
100}
101
102static hb_bool_t
103hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
104 hb_codepoint_t a HB_UNUSED,
105 hb_codepoint_t b HB_UNUSED,
106 hb_codepoint_t *ab HB_UNUSED,
107 void *user_data HB_UNUSED)
108{
109 return false;
110}
111
112static hb_bool_t
113hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
114 hb_codepoint_t ab HB_UNUSED,
115 hb_codepoint_t *a HB_UNUSED,
116 hb_codepoint_t *b HB_UNUSED,
117 void *user_data HB_UNUSED)
118{
119 return false;
120}
121
122
123#ifndef HB_DISABLE_DEPRECATED
124static unsigned int
125hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
126 hb_codepoint_t u HB_UNUSED,
127 hb_codepoint_t *decomposed HB_UNUSED,
128 void *user_data HB_UNUSED)
129{
130 return 0;
131}
132#endif
133
134#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
135#include "hb-glib.h"
136#endif
137#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
138#include "hb-icu.h"
139#endif
140
141/**
142 * hb_unicode_funcs_get_default:
143 *
144 * Fetches a pointer to the default Unicode-functions structure that is used
145 * when no functions are explicitly set on #hb_buffer_t.
146 *
147 * Return value: (transfer none): a pointer to the #hb_unicode_funcs_t Unicode-functions structure
148 *
149 * Since: 0.9.2
150 **/
151hb_unicode_funcs_t *
152hb_unicode_funcs_get_default ()
153{
154#if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD)
155 return hb_ucd_get_unicode_funcs ();
156#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
157 return hb_glib_get_unicode_funcs ();
158#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
159 return hb_icu_get_unicode_funcs ();
160#else
161#define HB_UNICODE_FUNCS_NIL 1
162 return hb_unicode_funcs_get_empty ();
163#endif
164}
165
166#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
167#error "Could not find any Unicode functions implementation, you have to provide your own"
168#error "Consider building hb-ucd.cc. If you absolutely want to build without any, define HB_NO_UNICODE_FUNCS."
169#endif
170
171/**
172 * hb_unicode_funcs_create:
173 * @parent: (nullable): Parent Unicode-functions structure
174 *
175 * Creates a new #hb_unicode_funcs_t structure of Unicode functions.
176 *
177 * Return value: (transfer full): The Unicode-functions structure
178 *
179 * Since: 0.9.2
180 **/
181hb_unicode_funcs_t *
182hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
183{
184 hb_unicode_funcs_t *ufuncs;
185
186 if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
187 return hb_unicode_funcs_get_empty ();
188
189 if (!parent)
190 parent = hb_unicode_funcs_get_empty ();
191
192 hb_unicode_funcs_make_immutable (parent);
193 ufuncs->parent = hb_unicode_funcs_reference (parent);
194
195 ufuncs->func = parent->func;
196
197 /* We can safely copy user_data from parent since we hold a reference
198 * onto it and it's immutable. We should not copy the destroy notifiers
199 * though. */
200 ufuncs->user_data = parent->user_data;
201
202 return ufuncs;
203}
204
205
206DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) =
207{
208 HB_OBJECT_HEADER_STATIC,
209
210 nullptr, /* parent */
211 {
212#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
213 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
214#undef HB_UNICODE_FUNC_IMPLEMENT
215 }
216};
217
218/**
219 * hb_unicode_funcs_get_empty:
220 *
221 * Fetches the singleton empty Unicode-functions structure.
222 *
223 * Return value: (transfer full): The empty Unicode-functions structure
224 *
225 * Since: 0.9.2
226 **/
227hb_unicode_funcs_t *
228hb_unicode_funcs_get_empty ()
229{
230 return const_cast<hb_unicode_funcs_t *> (&Null (hb_unicode_funcs_t));
231}
232
233/**
234 * hb_unicode_funcs_reference: (skip)
235 * @ufuncs: The Unicode-functions structure
236 *
237 * Increases the reference count on a Unicode-functions structure.
238 *
239 * Return value: (transfer full): The Unicode-functions structure
240 *
241 * Since: 0.9.2
242 **/
243hb_unicode_funcs_t *
244hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
245{
246 return hb_object_reference (ufuncs);
247}
248
249/**
250 * hb_unicode_funcs_destroy: (skip)
251 * @ufuncs: The Unicode-functions structure
252 *
253 * Decreases the reference count on a Unicode-functions structure. When
254 * the reference count reaches zero, the Unicode-functions structure is
255 * destroyed, freeing all memory.
256 *
257 * Since: 0.9.2
258 **/
259void
260hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
261{
262 if (!hb_object_destroy (ufuncs)) return;
263
264#define HB_UNICODE_FUNC_IMPLEMENT(name) \
265 if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
266 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
267#undef HB_UNICODE_FUNC_IMPLEMENT
268
269 hb_unicode_funcs_destroy (ufuncs->parent);
270
271 hb_free (ufuncs);
272}
273
274/**
275 * hb_unicode_funcs_set_user_data: (skip)
276 * @ufuncs: The Unicode-functions structure
277 * @key: The user-data key
278 * @data: A pointer to the user data
279 * @destroy: (nullable): A callback to call when @data is not needed anymore
280 * @replace: Whether to replace an existing data with the same key
281 *
282 * Attaches a user-data key/data pair to the specified Unicode-functions structure.
283 *
284 * Return value: `true` if success, `false` otherwise
285 *
286 * Since: 0.9.2
287 **/
288hb_bool_t
289hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
290 hb_user_data_key_t *key,
291 void * data,
292 hb_destroy_func_t destroy,
293 hb_bool_t replace)
294{
295 return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
296}
297
298/**
299 * hb_unicode_funcs_get_user_data: (skip)
300 * @ufuncs: The Unicode-functions structure
301 * @key: The user-data key to query
302 *
303 * Fetches the user-data associated with the specified key,
304 * attached to the specified Unicode-functions structure.
305 *
306 * Return value: (transfer none): A pointer to the user data
307 *
308 * Since: 0.9.2
309 **/
310void *
311hb_unicode_funcs_get_user_data (const hb_unicode_funcs_t *ufuncs,
312 hb_user_data_key_t *key)
313{
314 return hb_object_get_user_data (ufuncs, key);
315}
316
317
318/**
319 * hb_unicode_funcs_make_immutable:
320 * @ufuncs: The Unicode-functions structure
321 *
322 * Makes the specified Unicode-functions structure
323 * immutable.
324 *
325 * Since: 0.9.2
326 **/
327void
328hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
329{
330 if (hb_object_is_immutable (ufuncs))
331 return;
332
333 hb_object_make_immutable (ufuncs);
334}
335
336/**
337 * hb_unicode_funcs_is_immutable:
338 * @ufuncs: The Unicode-functions structure
339 *
340 * Tests whether the specified Unicode-functions structure
341 * is immutable.
342 *
343 * Return value: `true` if @ufuncs is immutable, `false` otherwise
344 *
345 * Since: 0.9.2
346 **/
347hb_bool_t
348hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
349{
350 return hb_object_is_immutable (ufuncs);
351}
352
353/**
354 * hb_unicode_funcs_get_parent:
355 * @ufuncs: The Unicode-functions structure
356 *
357 * Fetches the parent of the Unicode-functions structure
358 * @ufuncs.
359 *
360 * Return value: The parent Unicode-functions structure
361 *
362 * Since: 0.9.2
363 **/
364hb_unicode_funcs_t *
365hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
366{
367 return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
368}
369
370
371#define HB_UNICODE_FUNC_IMPLEMENT(name) \
372 \
373void \
374hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
375 hb_unicode_##name##_func_t func, \
376 void *user_data, \
377 hb_destroy_func_t destroy) \
378{ \
379 if (hb_object_is_immutable (ufuncs)) \
380 goto fail; \
381 \
382 if (!func) \
383 { \
384 if (destroy) \
385 destroy (user_data); \
386 destroy = nullptr; \
387 user_data = ufuncs->parent->user_data.name; \
388 } \
389 \
390 if (ufuncs->destroy.name) \
391 ufuncs->destroy.name (ufuncs->user_data.name); \
392 \
393 if (func) \
394 ufuncs->func.name = func; \
395 else \
396 ufuncs->func.name = ufuncs->parent->func.name; \
397 ufuncs->user_data.name = user_data; \
398 ufuncs->destroy.name = destroy; \
399 return; \
400 \
401fail: \
402 if (destroy) \
403 destroy (user_data); \
404}
405
406HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
407#undef HB_UNICODE_FUNC_IMPLEMENT
408
409
410#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
411 \
412return_type \
413hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
414 hb_codepoint_t unicode) \
415{ \
416 return ufuncs->name (unicode); \
417}
418HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
419#undef HB_UNICODE_FUNC_IMPLEMENT
420
421/**
422 * hb_unicode_compose:
423 * @ufuncs: The Unicode-functions structure
424 * @a: The first Unicode code point to compose
425 * @b: The second Unicode code point to compose
426 * @ab: (out): The composition of @a, @b
427 *
428 * Fetches the composition of a sequence of two Unicode
429 * code points.
430 *
431 * Calls the composition function of the specified
432 * Unicode-functions structure @ufuncs.
433 *
434 * Return value: `true` if @a and @b composed, `false` otherwise
435 *
436 * Since: 0.9.2
437 **/
438hb_bool_t
439hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
440 hb_codepoint_t a,
441 hb_codepoint_t b,
442 hb_codepoint_t *ab)
443{
444 return ufuncs->compose (a, b, ab);
445}
446
447/**
448 * hb_unicode_decompose:
449 * @ufuncs: The Unicode-functions structure
450 * @ab: Unicode code point to decompose
451 * @a: (out): The first code point of the decomposition of @ab
452 * @b: (out): The second code point of the decomposition of @ab
453 *
454 * Fetches the decomposition of a Unicode code point.
455 *
456 * Calls the decomposition function of the specified
457 * Unicode-functions structure @ufuncs.
458 *
459 * Return value: `true` if @ab was decomposed, `false` otherwise
460 *
461 * Since: 0.9.2
462 **/
463hb_bool_t
464hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
465 hb_codepoint_t ab,
466 hb_codepoint_t *a,
467 hb_codepoint_t *b)
468{
469 return ufuncs->decompose (ab, a, b);
470}
471
472#ifndef HB_DISABLE_DEPRECATED
473/**
474 * hb_unicode_decompose_compatibility:
475 * @ufuncs: The Unicode-functions structure
476 * @u: Code point to decompose
477 * @decomposed: (out): Compatibility decomposition of @u
478 *
479 * Fetches the compatibility decomposition of a Unicode
480 * code point. Deprecated.
481 *
482 * Return value: length of @decomposed.
483 *
484 * Since: 0.9.2
485 * Deprecated: 2.0.0
486 **/
487unsigned int
488hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
489 hb_codepoint_t u,
490 hb_codepoint_t *decomposed)
491{
492 return ufuncs->decompose_compatibility (u, decomposed);
493}
494#endif
495
496
497#ifndef HB_NO_OT_SHAPE
498/* See hb-unicode.hh for details. */
499const uint8_t
500_hb_modified_combining_class[256] =
501{
502 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
503 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
504 2, 3, 4, 5, 6,
505 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
506 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
507 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
508
509 /* Hebrew */
510 HB_MODIFIED_COMBINING_CLASS_CCC10,
511 HB_MODIFIED_COMBINING_CLASS_CCC11,
512 HB_MODIFIED_COMBINING_CLASS_CCC12,
513 HB_MODIFIED_COMBINING_CLASS_CCC13,
514 HB_MODIFIED_COMBINING_CLASS_CCC14,
515 HB_MODIFIED_COMBINING_CLASS_CCC15,
516 HB_MODIFIED_COMBINING_CLASS_CCC16,
517 HB_MODIFIED_COMBINING_CLASS_CCC17,
518 HB_MODIFIED_COMBINING_CLASS_CCC18,
519 HB_MODIFIED_COMBINING_CLASS_CCC19,
520 HB_MODIFIED_COMBINING_CLASS_CCC20,
521 HB_MODIFIED_COMBINING_CLASS_CCC21,
522 HB_MODIFIED_COMBINING_CLASS_CCC22,
523 HB_MODIFIED_COMBINING_CLASS_CCC23,
524 HB_MODIFIED_COMBINING_CLASS_CCC24,
525 HB_MODIFIED_COMBINING_CLASS_CCC25,
526 HB_MODIFIED_COMBINING_CLASS_CCC26,
527
528 /* Arabic */
529 HB_MODIFIED_COMBINING_CLASS_CCC27,
530 HB_MODIFIED_COMBINING_CLASS_CCC28,
531 HB_MODIFIED_COMBINING_CLASS_CCC29,
532 HB_MODIFIED_COMBINING_CLASS_CCC30,
533 HB_MODIFIED_COMBINING_CLASS_CCC31,
534 HB_MODIFIED_COMBINING_CLASS_CCC32,
535 HB_MODIFIED_COMBINING_CLASS_CCC33,
536 HB_MODIFIED_COMBINING_CLASS_CCC34,
537 HB_MODIFIED_COMBINING_CLASS_CCC35,
538
539 /* Syriac */
540 HB_MODIFIED_COMBINING_CLASS_CCC36,
541
542 37, 38, 39,
543 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
544 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
545 80, 81, 82, 83,
546
547 /* Telugu */
548 HB_MODIFIED_COMBINING_CLASS_CCC84,
549 85, 86, 87, 88, 89, 90,
550 HB_MODIFIED_COMBINING_CLASS_CCC91,
551 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
552
553 /* Thai */
554 HB_MODIFIED_COMBINING_CLASS_CCC103,
555 104, 105, 106,
556 HB_MODIFIED_COMBINING_CLASS_CCC107,
557 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
558
559 /* Lao */
560 HB_MODIFIED_COMBINING_CLASS_CCC118,
561 119, 120, 121,
562 HB_MODIFIED_COMBINING_CLASS_CCC122,
563 123, 124, 125, 126, 127, 128,
564
565 /* Tibetan */
566 HB_MODIFIED_COMBINING_CLASS_CCC129,
567 HB_MODIFIED_COMBINING_CLASS_CCC130,
568 131,
569 HB_MODIFIED_COMBINING_CLASS_CCC132,
570 133, 134, 135, 136, 137, 138, 139,
571
572
573 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
574 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
575 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
576 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
577 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
578 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
579
580 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
581 201,
582 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
583 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
584 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
585 215,
586 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
587 217,
588 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
589 219,
590 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
591 221,
592 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
593 223,
594 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
595 225,
596 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
597 227,
598 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
599 229,
600 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
601 231,
602 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
603 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
604 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
605 235, 236, 237, 238, 239,
606 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
607 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
608 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
609};
610#endif
611
612
613/*
614 * Emoji
615 */
616#ifndef HB_NO_EMOJI_SEQUENCES
617
618#include "hb-unicode-emoji-table.hh"
619
620bool
621_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
622{
623 return _hb_emoji_is_Extended_Pictographic (cp);
624}
625#endif
626