1/*
2 * Copyright © 2009 Red Hat, Inc.
3 * Copyright © 2011 Codethink Limited
4 * Copyright © 2010,2011,2012 Google, Inc.
5 *
6 * This is part of HarfBuzz, a text shaping library.
7 *
8 * Permission is hereby granted, without written agreement and without
9 * license or royalty fees, to use, copy, modify, and distribute this
10 * software and its documentation for any purpose, provided that the
11 * above copyright notice and the following two paragraphs appear in
12 * all copies of this software.
13 *
14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * DAMAGE.
19 *
20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 *
26 * Red Hat Author(s): Behdad Esfahbod
27 * Codethink Author(s): Ryan Lortie
28 * Google Author(s): Behdad Esfahbod
29 */
30
31#include "hb.hh"
32
33#include "hb-unicode.hh"
34
35
36/**
37 * SECTION: hb-unicode
38 * @title: hb-unicode
39 * @short_description: Unicode character property access
40 * @include: hb.h
41 *
42 * Unicode functions are used to access Unicode character properties.
43 * Client can pass its own Unicode functions to HarfBuzz, or access
44 * the built-in Unicode functions that come with HarfBuzz.
45 *
46 * With the Unicode functions, one can query variour Unicode character
47 * properties, such as General Category, Script, Combining Class, etc.
48 **/
49
50
51/*
52 * hb_unicode_funcs_t
53 */
54
55static hb_unicode_combining_class_t
56hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
57 hb_codepoint_t unicode HB_UNUSED,
58 void *user_data HB_UNUSED)
59{
60 return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
61}
62
63#ifndef HB_DISABLE_DEPRECATED
64static unsigned int
65hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
66 hb_codepoint_t unicode HB_UNUSED,
67 void *user_data HB_UNUSED)
68{
69 return 1;
70}
71#endif
72
73static hb_unicode_general_category_t
74hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
75 hb_codepoint_t unicode HB_UNUSED,
76 void *user_data HB_UNUSED)
77{
78 return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
79}
80
81static hb_codepoint_t
82hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
83 hb_codepoint_t unicode,
84 void *user_data HB_UNUSED)
85{
86 return unicode;
87}
88
89static hb_script_t
90hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
91 hb_codepoint_t unicode HB_UNUSED,
92 void *user_data HB_UNUSED)
93{
94 return HB_SCRIPT_UNKNOWN;
95}
96
97static hb_bool_t
98hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
99 hb_codepoint_t a HB_UNUSED,
100 hb_codepoint_t b HB_UNUSED,
101 hb_codepoint_t *ab HB_UNUSED,
102 void *user_data HB_UNUSED)
103{
104 return false;
105}
106
107static hb_bool_t
108hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
109 hb_codepoint_t ab HB_UNUSED,
110 hb_codepoint_t *a HB_UNUSED,
111 hb_codepoint_t *b HB_UNUSED,
112 void *user_data HB_UNUSED)
113{
114 return false;
115}
116
117
118#ifndef HB_DISABLE_DEPRECATED
119static unsigned int
120hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
121 hb_codepoint_t u HB_UNUSED,
122 hb_codepoint_t *decomposed HB_UNUSED,
123 void *user_data HB_UNUSED)
124{
125 return 0;
126}
127#endif
128
129#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
130#include "hb-glib.h"
131#endif
132#if !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
133#include "hb-icu.h"
134#endif
135
136hb_unicode_funcs_t *
137hb_unicode_funcs_get_default ()
138{
139#if !defined(HB_NO_UNICODE_FUNCS) && !defined(HB_NO_UCD)
140 return hb_ucd_get_unicode_funcs ();
141#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_GLIB)
142 return hb_glib_get_unicode_funcs ();
143#elif !defined(HB_NO_UNICODE_FUNCS) && defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
144 return hb_icu_get_unicode_funcs ();
145#else
146#define HB_UNICODE_FUNCS_NIL 1
147 return hb_unicode_funcs_get_empty ();
148#endif
149}
150
151#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
152#error "Could not find any Unicode functions implementation, you have to provide your own"
153#error "Consider building hb-ucd.cc. If you absolutely want to build without any, check the code."
154#endif
155
156/**
157 * hb_unicode_funcs_create: (Xconstructor)
158 * @parent: (nullable):
159 *
160 *
161 *
162 * Return value: (transfer full):
163 *
164 * Since: 0.9.2
165 **/
166hb_unicode_funcs_t *
167hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
168{
169 hb_unicode_funcs_t *ufuncs;
170
171 if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
172 return hb_unicode_funcs_get_empty ();
173
174 if (!parent)
175 parent = hb_unicode_funcs_get_empty ();
176
177 hb_unicode_funcs_make_immutable (parent);
178 ufuncs->parent = hb_unicode_funcs_reference (parent);
179
180 ufuncs->func = parent->func;
181
182 /* We can safely copy user_data from parent since we hold a reference
183 * onto it and it's immutable. We should not copy the destroy notifiers
184 * though. */
185 ufuncs->user_data = parent->user_data;
186
187 return ufuncs;
188}
189
190
191DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) =
192{
193 HB_OBJECT_HEADER_STATIC,
194
195 nullptr, /* parent */
196 {
197#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
198 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
199#undef HB_UNICODE_FUNC_IMPLEMENT
200 }
201};
202
203/**
204 * hb_unicode_funcs_get_empty:
205 *
206 *
207 *
208 * Return value: (transfer full):
209 *
210 * Since: 0.9.2
211 **/
212hb_unicode_funcs_t *
213hb_unicode_funcs_get_empty ()
214{
215 return const_cast<hb_unicode_funcs_t *> (&Null (hb_unicode_funcs_t));
216}
217
218/**
219 * hb_unicode_funcs_reference: (skip)
220 * @ufuncs: Unicode functions.
221 *
222 *
223 *
224 * Return value: (transfer full):
225 *
226 * Since: 0.9.2
227 **/
228hb_unicode_funcs_t *
229hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
230{
231 return hb_object_reference (ufuncs);
232}
233
234/**
235 * hb_unicode_funcs_destroy: (skip)
236 * @ufuncs: Unicode functions.
237 *
238 *
239 *
240 * Since: 0.9.2
241 **/
242void
243hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
244{
245 if (!hb_object_destroy (ufuncs)) return;
246
247#define HB_UNICODE_FUNC_IMPLEMENT(name) \
248 if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
249 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
250#undef HB_UNICODE_FUNC_IMPLEMENT
251
252 hb_unicode_funcs_destroy (ufuncs->parent);
253
254 free (ufuncs);
255}
256
257/**
258 * hb_unicode_funcs_set_user_data: (skip)
259 * @ufuncs: Unicode functions.
260 * @key:
261 * @data:
262 * @destroy:
263 * @replace:
264 *
265 *
266 *
267 * Return value:
268 *
269 * Since: 0.9.2
270 **/
271hb_bool_t
272hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
273 hb_user_data_key_t *key,
274 void * data,
275 hb_destroy_func_t destroy,
276 hb_bool_t replace)
277{
278 return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
279}
280
281/**
282 * hb_unicode_funcs_get_user_data: (skip)
283 * @ufuncs: Unicode functions.
284 * @key:
285 *
286 *
287 *
288 * Return value: (transfer none):
289 *
290 * Since: 0.9.2
291 **/
292void *
293hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
294 hb_user_data_key_t *key)
295{
296 return hb_object_get_user_data (ufuncs, key);
297}
298
299
300/**
301 * hb_unicode_funcs_make_immutable:
302 * @ufuncs: Unicode functions.
303 *
304 *
305 *
306 * Since: 0.9.2
307 **/
308void
309hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
310{
311 if (hb_object_is_immutable (ufuncs))
312 return;
313
314 hb_object_make_immutable (ufuncs);
315}
316
317/**
318 * hb_unicode_funcs_is_immutable:
319 * @ufuncs: Unicode functions.
320 *
321 *
322 *
323 * Return value:
324 *
325 * Since: 0.9.2
326 **/
327hb_bool_t
328hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
329{
330 return hb_object_is_immutable (ufuncs);
331}
332
333/**
334 * hb_unicode_funcs_get_parent:
335 * @ufuncs: Unicode functions.
336 *
337 *
338 *
339 * Return value:
340 *
341 * Since: 0.9.2
342 **/
343hb_unicode_funcs_t *
344hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
345{
346 return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
347}
348
349
350#define HB_UNICODE_FUNC_IMPLEMENT(name) \
351 \
352void \
353hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
354 hb_unicode_##name##_func_t func, \
355 void *user_data, \
356 hb_destroy_func_t destroy) \
357{ \
358 if (hb_object_is_immutable (ufuncs)) \
359 return; \
360 \
361 if (ufuncs->destroy.name) \
362 ufuncs->destroy.name (ufuncs->user_data.name); \
363 \
364 if (func) { \
365 ufuncs->func.name = func; \
366 ufuncs->user_data.name = user_data; \
367 ufuncs->destroy.name = destroy; \
368 } else { \
369 ufuncs->func.name = ufuncs->parent->func.name; \
370 ufuncs->user_data.name = ufuncs->parent->user_data.name; \
371 ufuncs->destroy.name = nullptr; \
372 } \
373}
374
375HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
376#undef HB_UNICODE_FUNC_IMPLEMENT
377
378
379#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
380 \
381return_type \
382hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
383 hb_codepoint_t unicode) \
384{ \
385 return ufuncs->name (unicode); \
386}
387HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
388#undef HB_UNICODE_FUNC_IMPLEMENT
389
390/**
391 * hb_unicode_compose:
392 * @ufuncs: Unicode functions.
393 * @a:
394 * @b:
395 * @ab: (out):
396 *
397 *
398 *
399 * Return value:
400 *
401 * Since: 0.9.2
402 **/
403hb_bool_t
404hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
405 hb_codepoint_t a,
406 hb_codepoint_t b,
407 hb_codepoint_t *ab)
408{
409 return ufuncs->compose (a, b, ab);
410}
411
412/**
413 * hb_unicode_decompose:
414 * @ufuncs: Unicode functions.
415 * @ab:
416 * @a: (out):
417 * @b: (out):
418 *
419 *
420 *
421 * Return value:
422 *
423 * Since: 0.9.2
424 **/
425hb_bool_t
426hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
427 hb_codepoint_t ab,
428 hb_codepoint_t *a,
429 hb_codepoint_t *b)
430{
431 return ufuncs->decompose (ab, a, b);
432}
433
434#ifndef HB_DISABLE_DEPRECATED
435/**
436 * hb_unicode_decompose_compatibility:
437 * @ufuncs: Unicode functions.
438 * @u:
439 * @decomposed: (out):
440 *
441 *
442 *
443 * Return value:
444 *
445 * Since: 0.9.2
446 * Deprecated: 2.0.0
447 **/
448unsigned int
449hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
450 hb_codepoint_t u,
451 hb_codepoint_t *decomposed)
452{
453 return ufuncs->decompose_compatibility (u, decomposed);
454}
455#endif
456
457
458#ifndef HB_NO_OT_SHAPE
459/* See hb-unicode.hh for details. */
460const uint8_t
461_hb_modified_combining_class[256] =
462{
463 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
464 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
465 2, 3, 4, 5, 6,
466 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
467 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
468 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
469
470 /* Hebrew */
471 HB_MODIFIED_COMBINING_CLASS_CCC10,
472 HB_MODIFIED_COMBINING_CLASS_CCC11,
473 HB_MODIFIED_COMBINING_CLASS_CCC12,
474 HB_MODIFIED_COMBINING_CLASS_CCC13,
475 HB_MODIFIED_COMBINING_CLASS_CCC14,
476 HB_MODIFIED_COMBINING_CLASS_CCC15,
477 HB_MODIFIED_COMBINING_CLASS_CCC16,
478 HB_MODIFIED_COMBINING_CLASS_CCC17,
479 HB_MODIFIED_COMBINING_CLASS_CCC18,
480 HB_MODIFIED_COMBINING_CLASS_CCC19,
481 HB_MODIFIED_COMBINING_CLASS_CCC20,
482 HB_MODIFIED_COMBINING_CLASS_CCC21,
483 HB_MODIFIED_COMBINING_CLASS_CCC22,
484 HB_MODIFIED_COMBINING_CLASS_CCC23,
485 HB_MODIFIED_COMBINING_CLASS_CCC24,
486 HB_MODIFIED_COMBINING_CLASS_CCC25,
487 HB_MODIFIED_COMBINING_CLASS_CCC26,
488
489 /* Arabic */
490 HB_MODIFIED_COMBINING_CLASS_CCC27,
491 HB_MODIFIED_COMBINING_CLASS_CCC28,
492 HB_MODIFIED_COMBINING_CLASS_CCC29,
493 HB_MODIFIED_COMBINING_CLASS_CCC30,
494 HB_MODIFIED_COMBINING_CLASS_CCC31,
495 HB_MODIFIED_COMBINING_CLASS_CCC32,
496 HB_MODIFIED_COMBINING_CLASS_CCC33,
497 HB_MODIFIED_COMBINING_CLASS_CCC34,
498 HB_MODIFIED_COMBINING_CLASS_CCC35,
499
500 /* Syriac */
501 HB_MODIFIED_COMBINING_CLASS_CCC36,
502
503 37, 38, 39,
504 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
505 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
506 80, 81, 82, 83,
507
508 /* Telugu */
509 HB_MODIFIED_COMBINING_CLASS_CCC84,
510 85, 86, 87, 88, 89, 90,
511 HB_MODIFIED_COMBINING_CLASS_CCC91,
512 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
513
514 /* Thai */
515 HB_MODIFIED_COMBINING_CLASS_CCC103,
516 104, 105, 106,
517 HB_MODIFIED_COMBINING_CLASS_CCC107,
518 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
519
520 /* Lao */
521 HB_MODIFIED_COMBINING_CLASS_CCC118,
522 119, 120, 121,
523 HB_MODIFIED_COMBINING_CLASS_CCC122,
524 123, 124, 125, 126, 127, 128,
525
526 /* Tibetan */
527 HB_MODIFIED_COMBINING_CLASS_CCC129,
528 HB_MODIFIED_COMBINING_CLASS_CCC130,
529 131,
530 HB_MODIFIED_COMBINING_CLASS_CCC132,
531 133, 134, 135, 136, 137, 138, 139,
532
533
534 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
535 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
536 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
537 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
538 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
539 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
540
541 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
542 201,
543 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
544 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
545 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
546 215,
547 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
548 217,
549 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
550 219,
551 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
552 221,
553 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
554 223,
555 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
556 225,
557 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
558 227,
559 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
560 229,
561 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
562 231,
563 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
564 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
565 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
566 235, 236, 237, 238, 239,
567 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
568 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
569 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
570};
571#endif
572
573
574/*
575 * Emoji
576 */
577#ifndef HB_NO_EMOJI_SEQUENCES
578
579#include "hb-unicode-emoji-table.hh"
580
581bool
582_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
583{
584 return _hb_emoji_is_Extended_Pictographic (cp);
585}
586#endif
587