1/*
2 * Copyright © 2009 Red Hat, Inc.
3 * Copyright © 2011 Codethink Limited
4 * Copyright © 2010,2011,2012 Google, Inc.
5 *
6 * This is part of HarfBuzz, a text shaping library.
7 *
8 * Permission is hereby granted, without written agreement and without
9 * license or royalty fees, to use, copy, modify, and distribute this
10 * software and its documentation for any purpose, provided that the
11 * above copyright notice and the following two paragraphs appear in
12 * all copies of this software.
13 *
14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * DAMAGE.
19 *
20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 *
26 * Red Hat Author(s): Behdad Esfahbod
27 * Codethink Author(s): Ryan Lortie
28 * Google Author(s): Behdad Esfahbod
29 */
30
31#include "hb.hh"
32
33#include "hb-unicode.hh"
34
35
36/**
37 * SECTION: hb-unicode
38 * @title: hb-unicode
39 * @short_description: Unicode character property access
40 * @include: hb.h
41 *
42 * Unicode functions are used to access Unicode character properties.
43 * Client can pass its own Unicode functions to HarfBuzz, or access
44 * the built-in Unicode functions that come with HarfBuzz.
45 *
46 * With the Unicode functions, one can query variour Unicode character
47 * properties, such as General Category, Script, Combining Class, etc.
48 **/
49
50
51/*
52 * hb_unicode_funcs_t
53 */
54
55static hb_unicode_combining_class_t
56hb_unicode_combining_class_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
57 hb_codepoint_t unicode HB_UNUSED,
58 void *user_data HB_UNUSED)
59{
60 return HB_UNICODE_COMBINING_CLASS_NOT_REORDERED;
61}
62
63static unsigned int
64hb_unicode_eastasian_width_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
65 hb_codepoint_t unicode HB_UNUSED,
66 void *user_data HB_UNUSED)
67{
68 return 1;
69}
70
71static hb_unicode_general_category_t
72hb_unicode_general_category_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
73 hb_codepoint_t unicode HB_UNUSED,
74 void *user_data HB_UNUSED)
75{
76 return HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER;
77}
78
79static hb_codepoint_t
80hb_unicode_mirroring_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
81 hb_codepoint_t unicode,
82 void *user_data HB_UNUSED)
83{
84 return unicode;
85}
86
87static hb_script_t
88hb_unicode_script_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
89 hb_codepoint_t unicode HB_UNUSED,
90 void *user_data HB_UNUSED)
91{
92 return HB_SCRIPT_UNKNOWN;
93}
94
95static hb_bool_t
96hb_unicode_compose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
97 hb_codepoint_t a HB_UNUSED,
98 hb_codepoint_t b HB_UNUSED,
99 hb_codepoint_t *ab HB_UNUSED,
100 void *user_data HB_UNUSED)
101{
102 return false;
103}
104
105static hb_bool_t
106hb_unicode_decompose_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
107 hb_codepoint_t ab HB_UNUSED,
108 hb_codepoint_t *a HB_UNUSED,
109 hb_codepoint_t *b HB_UNUSED,
110 void *user_data HB_UNUSED)
111{
112 return false;
113}
114
115
116static unsigned int
117hb_unicode_decompose_compatibility_nil (hb_unicode_funcs_t *ufuncs HB_UNUSED,
118 hb_codepoint_t u HB_UNUSED,
119 hb_codepoint_t *decomposed HB_UNUSED,
120 void *user_data HB_UNUSED)
121{
122 return 0;
123}
124
125
126extern "C" hb_unicode_funcs_t *hb_glib_get_unicode_funcs ();
127extern "C" hb_unicode_funcs_t *hb_icu_get_unicode_funcs ();
128extern "C" hb_unicode_funcs_t *hb_ucdn_get_unicode_funcs ();
129
130hb_unicode_funcs_t *
131hb_unicode_funcs_get_default ()
132{
133#if defined(HAVE_UCDN)
134 return hb_ucdn_get_unicode_funcs ();
135#elif defined(HAVE_GLIB)
136 return hb_glib_get_unicode_funcs ();
137#elif defined(HAVE_ICU) && defined(HAVE_ICU_BUILTIN)
138 return hb_icu_get_unicode_funcs ();
139#else
140#define HB_UNICODE_FUNCS_NIL 1
141 return hb_unicode_funcs_get_empty ();
142#endif
143}
144
145#if !defined(HB_NO_UNICODE_FUNCS) && defined(HB_UNICODE_FUNCS_NIL)
146#error "Could not find any Unicode functions implementation, you have to provide your own"
147#error "Consider building hb-ucdn.c. If you absolutely want to build without any, check the code."
148#endif
149
150/**
151 * hb_unicode_funcs_create: (Xconstructor)
152 * @parent: (nullable):
153 *
154 *
155 *
156 * Return value: (transfer full):
157 *
158 * Since: 0.9.2
159 **/
160hb_unicode_funcs_t *
161hb_unicode_funcs_create (hb_unicode_funcs_t *parent)
162{
163 hb_unicode_funcs_t *ufuncs;
164
165 if (!(ufuncs = hb_object_create<hb_unicode_funcs_t> ()))
166 return hb_unicode_funcs_get_empty ();
167
168 if (!parent)
169 parent = hb_unicode_funcs_get_empty ();
170
171 hb_unicode_funcs_make_immutable (parent);
172 ufuncs->parent = hb_unicode_funcs_reference (parent);
173
174 ufuncs->func = parent->func;
175
176 /* We can safely copy user_data from parent since we hold a reference
177 * onto it and it's immutable. We should not copy the destroy notifiers
178 * though. */
179 ufuncs->user_data = parent->user_data;
180
181 return ufuncs;
182}
183
184
185DEFINE_NULL_INSTANCE (hb_unicode_funcs_t) =
186{
187 HB_OBJECT_HEADER_STATIC,
188
189 nullptr, /* parent */
190 {
191#define HB_UNICODE_FUNC_IMPLEMENT(name) hb_unicode_##name##_nil,
192 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
193#undef HB_UNICODE_FUNC_IMPLEMENT
194 }
195};
196
197/**
198 * hb_unicode_funcs_get_empty:
199 *
200 *
201 *
202 * Return value: (transfer full):
203 *
204 * Since: 0.9.2
205 **/
206hb_unicode_funcs_t *
207hb_unicode_funcs_get_empty ()
208{
209 return const_cast<hb_unicode_funcs_t *> (&Null(hb_unicode_funcs_t));
210}
211
212/**
213 * hb_unicode_funcs_reference: (skip)
214 * @ufuncs: Unicode functions.
215 *
216 *
217 *
218 * Return value: (transfer full):
219 *
220 * Since: 0.9.2
221 **/
222hb_unicode_funcs_t *
223hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs)
224{
225 return hb_object_reference (ufuncs);
226}
227
228/**
229 * hb_unicode_funcs_destroy: (skip)
230 * @ufuncs: Unicode functions.
231 *
232 *
233 *
234 * Since: 0.9.2
235 **/
236void
237hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs)
238{
239 if (!hb_object_destroy (ufuncs)) return;
240
241#define HB_UNICODE_FUNC_IMPLEMENT(name) \
242 if (ufuncs->destroy.name) ufuncs->destroy.name (ufuncs->user_data.name);
243 HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
244#undef HB_UNICODE_FUNC_IMPLEMENT
245
246 hb_unicode_funcs_destroy (ufuncs->parent);
247
248 free (ufuncs);
249}
250
251/**
252 * hb_unicode_funcs_set_user_data: (skip)
253 * @ufuncs: Unicode functions.
254 * @key:
255 * @data:
256 * @destroy:
257 * @replace:
258 *
259 *
260 *
261 * Return value:
262 *
263 * Since: 0.9.2
264 **/
265hb_bool_t
266hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
267 hb_user_data_key_t *key,
268 void * data,
269 hb_destroy_func_t destroy,
270 hb_bool_t replace)
271{
272 return hb_object_set_user_data (ufuncs, key, data, destroy, replace);
273}
274
275/**
276 * hb_unicode_funcs_get_user_data: (skip)
277 * @ufuncs: Unicode functions.
278 * @key:
279 *
280 *
281 *
282 * Return value: (transfer none):
283 *
284 * Since: 0.9.2
285 **/
286void *
287hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
288 hb_user_data_key_t *key)
289{
290 return hb_object_get_user_data (ufuncs, key);
291}
292
293
294/**
295 * hb_unicode_funcs_make_immutable:
296 * @ufuncs: Unicode functions.
297 *
298 *
299 *
300 * Since: 0.9.2
301 **/
302void
303hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs)
304{
305 if (hb_object_is_immutable (ufuncs))
306 return;
307
308 hb_object_make_immutable (ufuncs);
309}
310
311/**
312 * hb_unicode_funcs_is_immutable:
313 * @ufuncs: Unicode functions.
314 *
315 *
316 *
317 * Return value:
318 *
319 * Since: 0.9.2
320 **/
321hb_bool_t
322hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs)
323{
324 return hb_object_is_immutable (ufuncs);
325}
326
327/**
328 * hb_unicode_funcs_get_parent:
329 * @ufuncs: Unicode functions.
330 *
331 *
332 *
333 * Return value:
334 *
335 * Since: 0.9.2
336 **/
337hb_unicode_funcs_t *
338hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs)
339{
340 return ufuncs->parent ? ufuncs->parent : hb_unicode_funcs_get_empty ();
341}
342
343
344#define HB_UNICODE_FUNC_IMPLEMENT(name) \
345 \
346void \
347hb_unicode_funcs_set_##name##_func (hb_unicode_funcs_t *ufuncs, \
348 hb_unicode_##name##_func_t func, \
349 void *user_data, \
350 hb_destroy_func_t destroy) \
351{ \
352 if (hb_object_is_immutable (ufuncs)) \
353 return; \
354 \
355 if (ufuncs->destroy.name) \
356 ufuncs->destroy.name (ufuncs->user_data.name); \
357 \
358 if (func) { \
359 ufuncs->func.name = func; \
360 ufuncs->user_data.name = user_data; \
361 ufuncs->destroy.name = destroy; \
362 } else { \
363 ufuncs->func.name = ufuncs->parent->func.name; \
364 ufuncs->user_data.name = ufuncs->parent->user_data.name; \
365 ufuncs->destroy.name = nullptr; \
366 } \
367}
368
369HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS
370#undef HB_UNICODE_FUNC_IMPLEMENT
371
372
373#define HB_UNICODE_FUNC_IMPLEMENT(return_type, name) \
374 \
375return_type \
376hb_unicode_##name (hb_unicode_funcs_t *ufuncs, \
377 hb_codepoint_t unicode) \
378{ \
379 return ufuncs->name (unicode); \
380}
381HB_UNICODE_FUNCS_IMPLEMENT_CALLBACKS_SIMPLE
382#undef HB_UNICODE_FUNC_IMPLEMENT
383
384/**
385 * hb_unicode_compose:
386 * @ufuncs: Unicode functions.
387 * @a:
388 * @b:
389 * @ab: (out):
390 *
391 *
392 *
393 * Return value:
394 *
395 * Since: 0.9.2
396 **/
397hb_bool_t
398hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
399 hb_codepoint_t a,
400 hb_codepoint_t b,
401 hb_codepoint_t *ab)
402{
403 return ufuncs->compose (a, b, ab);
404}
405
406/**
407 * hb_unicode_decompose:
408 * @ufuncs: Unicode functions.
409 * @ab:
410 * @a: (out):
411 * @b: (out):
412 *
413 *
414 *
415 * Return value:
416 *
417 * Since: 0.9.2
418 **/
419hb_bool_t
420hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
421 hb_codepoint_t ab,
422 hb_codepoint_t *a,
423 hb_codepoint_t *b)
424{
425 return ufuncs->decompose (ab, a, b);
426}
427
428/**
429 * hb_unicode_decompose_compatibility:
430 * @ufuncs: Unicode functions.
431 * @u:
432 * @decomposed: (out):
433 *
434 *
435 *
436 * Return value:
437 *
438 * Since: 0.9.2
439 * Deprecated: 2.0.0
440 **/
441unsigned int
442hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
443 hb_codepoint_t u,
444 hb_codepoint_t *decomposed)
445{
446 return ufuncs->decompose_compatibility (u, decomposed);
447}
448
449
450/* See hb-unicode.hh for details. */
451const uint8_t
452_hb_modified_combining_class[256] =
453{
454 0, /* HB_UNICODE_COMBINING_CLASS_NOT_REORDERED */
455 1, /* HB_UNICODE_COMBINING_CLASS_OVERLAY */
456 2, 3, 4, 5, 6,
457 7, /* HB_UNICODE_COMBINING_CLASS_NUKTA */
458 8, /* HB_UNICODE_COMBINING_CLASS_KANA_VOICING */
459 9, /* HB_UNICODE_COMBINING_CLASS_VIRAMA */
460
461 /* Hebrew */
462 HB_MODIFIED_COMBINING_CLASS_CCC10,
463 HB_MODIFIED_COMBINING_CLASS_CCC11,
464 HB_MODIFIED_COMBINING_CLASS_CCC12,
465 HB_MODIFIED_COMBINING_CLASS_CCC13,
466 HB_MODIFIED_COMBINING_CLASS_CCC14,
467 HB_MODIFIED_COMBINING_CLASS_CCC15,
468 HB_MODIFIED_COMBINING_CLASS_CCC16,
469 HB_MODIFIED_COMBINING_CLASS_CCC17,
470 HB_MODIFIED_COMBINING_CLASS_CCC18,
471 HB_MODIFIED_COMBINING_CLASS_CCC19,
472 HB_MODIFIED_COMBINING_CLASS_CCC20,
473 HB_MODIFIED_COMBINING_CLASS_CCC21,
474 HB_MODIFIED_COMBINING_CLASS_CCC22,
475 HB_MODIFIED_COMBINING_CLASS_CCC23,
476 HB_MODIFIED_COMBINING_CLASS_CCC24,
477 HB_MODIFIED_COMBINING_CLASS_CCC25,
478 HB_MODIFIED_COMBINING_CLASS_CCC26,
479
480 /* Arabic */
481 HB_MODIFIED_COMBINING_CLASS_CCC27,
482 HB_MODIFIED_COMBINING_CLASS_CCC28,
483 HB_MODIFIED_COMBINING_CLASS_CCC29,
484 HB_MODIFIED_COMBINING_CLASS_CCC30,
485 HB_MODIFIED_COMBINING_CLASS_CCC31,
486 HB_MODIFIED_COMBINING_CLASS_CCC32,
487 HB_MODIFIED_COMBINING_CLASS_CCC33,
488 HB_MODIFIED_COMBINING_CLASS_CCC34,
489 HB_MODIFIED_COMBINING_CLASS_CCC35,
490
491 /* Syriac */
492 HB_MODIFIED_COMBINING_CLASS_CCC36,
493
494 37, 38, 39,
495 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
496 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
497 80, 81, 82, 83,
498
499 /* Telugu */
500 HB_MODIFIED_COMBINING_CLASS_CCC84,
501 85, 86, 87, 88, 89, 90,
502 HB_MODIFIED_COMBINING_CLASS_CCC91,
503 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102,
504
505 /* Thai */
506 HB_MODIFIED_COMBINING_CLASS_CCC103,
507 104, 105, 106,
508 HB_MODIFIED_COMBINING_CLASS_CCC107,
509 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
510
511 /* Lao */
512 HB_MODIFIED_COMBINING_CLASS_CCC118,
513 119, 120, 121,
514 HB_MODIFIED_COMBINING_CLASS_CCC122,
515 123, 124, 125, 126, 127, 128,
516
517 /* Tibetan */
518 HB_MODIFIED_COMBINING_CLASS_CCC129,
519 HB_MODIFIED_COMBINING_CLASS_CCC130,
520 131,
521 HB_MODIFIED_COMBINING_CLASS_CCC132,
522 133, 134, 135, 136, 137, 138, 139,
523
524
525 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
526 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
527 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
528 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
529 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
530 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
531
532 200, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT */
533 201,
534 202, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW */
535 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213,
536 214, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE */
537 215,
538 216, /* HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT */
539 217,
540 218, /* HB_UNICODE_COMBINING_CLASS_BELOW_LEFT */
541 219,
542 220, /* HB_UNICODE_COMBINING_CLASS_BELOW */
543 221,
544 222, /* HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT */
545 223,
546 224, /* HB_UNICODE_COMBINING_CLASS_LEFT */
547 225,
548 226, /* HB_UNICODE_COMBINING_CLASS_RIGHT */
549 227,
550 228, /* HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT */
551 229,
552 230, /* HB_UNICODE_COMBINING_CLASS_ABOVE */
553 231,
554 232, /* HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT */
555 233, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW */
556 234, /* HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE */
557 235, 236, 237, 238, 239,
558 240, /* HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT */
559 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
560 255, /* HB_UNICODE_COMBINING_CLASS_INVALID */
561};
562
563
564/*
565 * Emoji
566 */
567
568#include "hb-unicode-emoji-table.hh"
569
570bool
571_hb_unicode_is_emoji_Extended_Pictographic (hb_codepoint_t cp)
572{
573 return hb_bsearch (&cp, _hb_unicode_emoji_Extended_Pictographic_table,
574 ARRAY_LENGTH (_hb_unicode_emoji_Extended_Pictographic_table),
575 sizeof (hb_unicode_range_t),
576 hb_unicode_range_t::cmp);
577}
578