1/*
2 * Copyright © 2009 Red Hat, Inc.
3 * Copyright © 2011 Codethink Limited
4 * Copyright © 2011,2012 Google, Inc.
5 *
6 * This is part of HarfBuzz, a text shaping library.
7 *
8 * Permission is hereby granted, without written agreement and without
9 * license or royalty fees, to use, copy, modify, and distribute this
10 * software and its documentation for any purpose, provided that the
11 * above copyright notice and the following two paragraphs appear in
12 * all copies of this software.
13 *
14 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
15 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
16 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
17 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
18 * DAMAGE.
19 *
20 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
21 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
22 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
23 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
24 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
25 *
26 * Red Hat Author(s): Behdad Esfahbod
27 * Codethink Author(s): Ryan Lortie
28 * Google Author(s): Behdad Esfahbod
29 */
30
31#ifndef HB_H_IN
32#error "Include <hb.h> instead."
33#endif
34
35#ifndef HB_UNICODE_H
36#define HB_UNICODE_H
37
38#include "hb-common.h"
39
40HB_BEGIN_DECLS
41
42
43/**
44 * HB_UNICODE_MAX
45 *
46 * Since: 1.9.0
47 */
48#define HB_UNICODE_MAX 0x10FFFFu
49
50
51/* hb_unicode_general_category_t */
52
53/* Unicode Character Database property: General_Category (gc) */
54typedef enum
55{
56 HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */
57 HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */
58 HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */
59 HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */
60 HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */
61 HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */
62 HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */
63 HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */
64 HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */
65 HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */
66 HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */
67 HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */
68 HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */
69 HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */
70 HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */
71 HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */
72 HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */
73 HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */
74 HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */
75 HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */
76 HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */
77 HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */
78 HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */
79 HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */
80 HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */
81 HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */
82 HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */
83 HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */
84 HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */
85 HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */
86} hb_unicode_general_category_t;
87
88/* hb_unicode_combining_class_t */
89
90/* Note: newer versions of Unicode may add new values. Clients should be ready to handle
91 * any value in the 0..254 range being returned from hb_unicode_combining_class().
92 */
93
94/* Unicode Character Database property: Canonical_Combining_Class (ccc) */
95typedef enum
96{
97 HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0,
98 HB_UNICODE_COMBINING_CLASS_OVERLAY = 1,
99 HB_UNICODE_COMBINING_CLASS_NUKTA = 7,
100 HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8,
101 HB_UNICODE_COMBINING_CLASS_VIRAMA = 9,
102
103 /* Hebrew */
104 HB_UNICODE_COMBINING_CLASS_CCC10 = 10,
105 HB_UNICODE_COMBINING_CLASS_CCC11 = 11,
106 HB_UNICODE_COMBINING_CLASS_CCC12 = 12,
107 HB_UNICODE_COMBINING_CLASS_CCC13 = 13,
108 HB_UNICODE_COMBINING_CLASS_CCC14 = 14,
109 HB_UNICODE_COMBINING_CLASS_CCC15 = 15,
110 HB_UNICODE_COMBINING_CLASS_CCC16 = 16,
111 HB_UNICODE_COMBINING_CLASS_CCC17 = 17,
112 HB_UNICODE_COMBINING_CLASS_CCC18 = 18,
113 HB_UNICODE_COMBINING_CLASS_CCC19 = 19,
114 HB_UNICODE_COMBINING_CLASS_CCC20 = 20,
115 HB_UNICODE_COMBINING_CLASS_CCC21 = 21,
116 HB_UNICODE_COMBINING_CLASS_CCC22 = 22,
117 HB_UNICODE_COMBINING_CLASS_CCC23 = 23,
118 HB_UNICODE_COMBINING_CLASS_CCC24 = 24,
119 HB_UNICODE_COMBINING_CLASS_CCC25 = 25,
120 HB_UNICODE_COMBINING_CLASS_CCC26 = 26,
121
122 /* Arabic */
123 HB_UNICODE_COMBINING_CLASS_CCC27 = 27,
124 HB_UNICODE_COMBINING_CLASS_CCC28 = 28,
125 HB_UNICODE_COMBINING_CLASS_CCC29 = 29,
126 HB_UNICODE_COMBINING_CLASS_CCC30 = 30,
127 HB_UNICODE_COMBINING_CLASS_CCC31 = 31,
128 HB_UNICODE_COMBINING_CLASS_CCC32 = 32,
129 HB_UNICODE_COMBINING_CLASS_CCC33 = 33,
130 HB_UNICODE_COMBINING_CLASS_CCC34 = 34,
131 HB_UNICODE_COMBINING_CLASS_CCC35 = 35,
132
133 /* Syriac */
134 HB_UNICODE_COMBINING_CLASS_CCC36 = 36,
135
136 /* Telugu */
137 HB_UNICODE_COMBINING_CLASS_CCC84 = 84,
138 HB_UNICODE_COMBINING_CLASS_CCC91 = 91,
139
140 /* Thai */
141 HB_UNICODE_COMBINING_CLASS_CCC103 = 103,
142 HB_UNICODE_COMBINING_CLASS_CCC107 = 107,
143
144 /* Lao */
145 HB_UNICODE_COMBINING_CLASS_CCC118 = 118,
146 HB_UNICODE_COMBINING_CLASS_CCC122 = 122,
147
148 /* Tibetan */
149 HB_UNICODE_COMBINING_CLASS_CCC129 = 129,
150 HB_UNICODE_COMBINING_CLASS_CCC130 = 130,
151 HB_UNICODE_COMBINING_CLASS_CCC133 = 132,
152
153
154 HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200,
155 HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202,
156 HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214,
157 HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216,
158 HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218,
159 HB_UNICODE_COMBINING_CLASS_BELOW = 220,
160 HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222,
161 HB_UNICODE_COMBINING_CLASS_LEFT = 224,
162 HB_UNICODE_COMBINING_CLASS_RIGHT = 226,
163 HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228,
164 HB_UNICODE_COMBINING_CLASS_ABOVE = 230,
165 HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232,
166 HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233,
167 HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234,
168
169 HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240,
170
171 HB_UNICODE_COMBINING_CLASS_INVALID = 255
172} hb_unicode_combining_class_t;
173
174
175/*
176 * hb_unicode_funcs_t
177 */
178
179typedef struct hb_unicode_funcs_t hb_unicode_funcs_t;
180
181
182/*
183 * just give me the best implementation you've got there.
184 */
185HB_EXTERN hb_unicode_funcs_t *
186hb_unicode_funcs_get_default (void);
187
188
189HB_EXTERN hb_unicode_funcs_t *
190hb_unicode_funcs_create (hb_unicode_funcs_t *parent);
191
192HB_EXTERN hb_unicode_funcs_t *
193hb_unicode_funcs_get_empty (void);
194
195HB_EXTERN hb_unicode_funcs_t *
196hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs);
197
198HB_EXTERN void
199hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs);
200
201HB_EXTERN hb_bool_t
202hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs,
203 hb_user_data_key_t *key,
204 void * data,
205 hb_destroy_func_t destroy,
206 hb_bool_t replace);
207
208
209HB_EXTERN void *
210hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs,
211 hb_user_data_key_t *key);
212
213
214HB_EXTERN void
215hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs);
216
217HB_EXTERN hb_bool_t
218hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs);
219
220HB_EXTERN hb_unicode_funcs_t *
221hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs);
222
223
224/*
225 * funcs
226 */
227
228/* typedefs */
229
230typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs,
231 hb_codepoint_t unicode,
232 void *user_data);
233typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs,
234 hb_codepoint_t unicode,
235 void *user_data);
236typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs,
237 hb_codepoint_t unicode,
238 void *user_data);
239typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs,
240 hb_codepoint_t unicode,
241 void *user_data);
242typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs,
243 hb_codepoint_t unicode,
244 void *user_data);
245
246typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs,
247 hb_codepoint_t a,
248 hb_codepoint_t b,
249 hb_codepoint_t *ab,
250 void *user_data);
251typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs,
252 hb_codepoint_t ab,
253 hb_codepoint_t *a,
254 hb_codepoint_t *b,
255 void *user_data);
256
257/**
258 * hb_unicode_decompose_compatibility_func_t:
259 * @ufuncs: a Unicode function structure
260 * @u: codepoint to decompose
261 * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into
262 * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func()
263 *
264 * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed.
265 * The complete length of the decomposition will be returned.
266 *
267 * If @u has no compatibility decomposition, zero should be returned.
268 *
269 * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any
270 * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations
271 * of this function type must ensure that they do not write past the provided array.
272 *
273 * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available.
274 */
275typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs,
276 hb_codepoint_t u,
277 hb_codepoint_t *decomposed,
278 void *user_data);
279
280/* See Unicode 6.1 for details on the maximum decomposition length. */
281#define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */
282
283/* setters */
284
285/**
286 * hb_unicode_funcs_set_combining_class_func:
287 * @ufuncs: a Unicode function structure
288 * @func: (closure user_data) (destroy destroy) (scope notified):
289 * @user_data:
290 * @destroy:
291 *
292 *
293 *
294 * Since: 0.9.2
295 **/
296HB_EXTERN void
297hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs,
298 hb_unicode_combining_class_func_t func,
299 void *user_data, hb_destroy_func_t destroy);
300
301/**
302 * hb_unicode_funcs_set_eastasian_width_func:
303 * @ufuncs: a Unicode function structure
304 * @func: (closure user_data) (destroy destroy) (scope notified):
305 * @user_data:
306 * @destroy:
307 *
308 *
309 *
310 * Since: 0.9.2
311 **/
312HB_EXTERN void
313hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs,
314 hb_unicode_eastasian_width_func_t func,
315 void *user_data, hb_destroy_func_t destroy);
316
317/**
318 * hb_unicode_funcs_set_general_category_func:
319 * @ufuncs: a Unicode function structure
320 * @func: (closure user_data) (destroy destroy) (scope notified):
321 * @user_data:
322 * @destroy:
323 *
324 *
325 *
326 * Since: 0.9.2
327 **/
328HB_EXTERN void
329hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs,
330 hb_unicode_general_category_func_t func,
331 void *user_data, hb_destroy_func_t destroy);
332
333/**
334 * hb_unicode_funcs_set_mirroring_func:
335 * @ufuncs: a Unicode function structure
336 * @func: (closure user_data) (destroy destroy) (scope notified):
337 * @user_data:
338 * @destroy:
339 *
340 *
341 *
342 * Since: 0.9.2
343 **/
344HB_EXTERN void
345hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs,
346 hb_unicode_mirroring_func_t func,
347 void *user_data, hb_destroy_func_t destroy);
348
349/**
350 * hb_unicode_funcs_set_script_func:
351 * @ufuncs: a Unicode function structure
352 * @func: (closure user_data) (destroy destroy) (scope notified):
353 * @user_data:
354 * @destroy:
355 *
356 *
357 *
358 * Since: 0.9.2
359 **/
360HB_EXTERN void
361hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs,
362 hb_unicode_script_func_t func,
363 void *user_data, hb_destroy_func_t destroy);
364
365/**
366 * hb_unicode_funcs_set_compose_func:
367 * @ufuncs: a Unicode function structure
368 * @func: (closure user_data) (destroy destroy) (scope notified):
369 * @user_data:
370 * @destroy:
371 *
372 *
373 *
374 * Since: 0.9.2
375 **/
376HB_EXTERN void
377hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs,
378 hb_unicode_compose_func_t func,
379 void *user_data, hb_destroy_func_t destroy);
380
381/**
382 * hb_unicode_funcs_set_decompose_func:
383 * @ufuncs: a Unicode function structure
384 * @func: (closure user_data) (destroy destroy) (scope notified):
385 * @user_data:
386 * @destroy:
387 *
388 *
389 *
390 * Since: 0.9.2
391 **/
392HB_EXTERN void
393hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs,
394 hb_unicode_decompose_func_t func,
395 void *user_data, hb_destroy_func_t destroy);
396
397/**
398 * hb_unicode_funcs_set_decompose_compatibility_func:
399 * @ufuncs: a Unicode function structure
400 * @func: (closure user_data) (destroy destroy) (scope notified):
401 * @user_data:
402 * @destroy:
403 *
404 *
405 *
406 * Since: 0.9.2
407 **/
408HB_EXTERN void
409hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs,
410 hb_unicode_decompose_compatibility_func_t func,
411 void *user_data, hb_destroy_func_t destroy);
412
413/* accessors */
414
415/**
416 * hb_unicode_combining_class:
417 *
418 * Since: 0.9.2
419 **/
420HB_EXTERN hb_unicode_combining_class_t
421hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs,
422 hb_codepoint_t unicode);
423
424/**
425 * hb_unicode_eastasian_width:
426 *
427 * Since: 0.9.2
428 **/
429HB_EXTERN unsigned int
430hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs,
431 hb_codepoint_t unicode);
432
433/**
434 * hb_unicode_general_category:
435 *
436 * Since: 0.9.2
437 **/
438HB_EXTERN hb_unicode_general_category_t
439hb_unicode_general_category (hb_unicode_funcs_t *ufuncs,
440 hb_codepoint_t unicode);
441
442/**
443 * hb_unicode_mirroring:
444 *
445 * Since: 0.9.2
446 **/
447HB_EXTERN hb_codepoint_t
448hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs,
449 hb_codepoint_t unicode);
450
451/**
452 * hb_unicode_script:
453 *
454 * Since: 0.9.2
455 **/
456HB_EXTERN hb_script_t
457hb_unicode_script (hb_unicode_funcs_t *ufuncs,
458 hb_codepoint_t unicode);
459
460HB_EXTERN hb_bool_t
461hb_unicode_compose (hb_unicode_funcs_t *ufuncs,
462 hb_codepoint_t a,
463 hb_codepoint_t b,
464 hb_codepoint_t *ab);
465
466HB_EXTERN hb_bool_t
467hb_unicode_decompose (hb_unicode_funcs_t *ufuncs,
468 hb_codepoint_t ab,
469 hb_codepoint_t *a,
470 hb_codepoint_t *b);
471
472HB_EXTERN unsigned int
473hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs,
474 hb_codepoint_t u,
475 hb_codepoint_t *decomposed);
476
477HB_END_DECLS
478
479#endif /* HB_UNICODE_H */
480