1 | /* |
2 | * Copyright © 2009 Red Hat, Inc. |
3 | * Copyright © 2011 Codethink Limited |
4 | * Copyright © 2011,2012 Google, Inc. |
5 | * |
6 | * This is part of HarfBuzz, a text shaping library. |
7 | * |
8 | * Permission is hereby granted, without written agreement and without |
9 | * license or royalty fees, to use, copy, modify, and distribute this |
10 | * software and its documentation for any purpose, provided that the |
11 | * above copyright notice and the following two paragraphs appear in |
12 | * all copies of this software. |
13 | * |
14 | * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR |
15 | * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES |
16 | * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN |
17 | * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH |
18 | * DAMAGE. |
19 | * |
20 | * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, |
21 | * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND |
22 | * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS |
23 | * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO |
24 | * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. |
25 | * |
26 | * Red Hat Author(s): Behdad Esfahbod |
27 | * Codethink Author(s): Ryan Lortie |
28 | * Google Author(s): Behdad Esfahbod |
29 | */ |
30 | |
31 | #ifndef HB_H_IN |
32 | #error "Include <hb.h> instead." |
33 | #endif |
34 | |
35 | #ifndef HB_UNICODE_H |
36 | #define HB_UNICODE_H |
37 | |
38 | #include "hb-common.h" |
39 | |
40 | HB_BEGIN_DECLS |
41 | |
42 | |
43 | /** |
44 | * HB_UNICODE_MAX |
45 | * |
46 | * Since: 1.9.0 |
47 | */ |
48 | #define HB_UNICODE_MAX 0x10FFFFu |
49 | |
50 | |
51 | /* hb_unicode_general_category_t */ |
52 | |
53 | /* Unicode Character Database property: General_Category (gc) */ |
54 | typedef enum |
55 | { |
56 | HB_UNICODE_GENERAL_CATEGORY_CONTROL, /* Cc */ |
57 | HB_UNICODE_GENERAL_CATEGORY_FORMAT, /* Cf */ |
58 | HB_UNICODE_GENERAL_CATEGORY_UNASSIGNED, /* Cn */ |
59 | HB_UNICODE_GENERAL_CATEGORY_PRIVATE_USE, /* Co */ |
60 | HB_UNICODE_GENERAL_CATEGORY_SURROGATE, /* Cs */ |
61 | HB_UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER, /* Ll */ |
62 | HB_UNICODE_GENERAL_CATEGORY_MODIFIER_LETTER, /* Lm */ |
63 | HB_UNICODE_GENERAL_CATEGORY_OTHER_LETTER, /* Lo */ |
64 | HB_UNICODE_GENERAL_CATEGORY_TITLECASE_LETTER, /* Lt */ |
65 | HB_UNICODE_GENERAL_CATEGORY_UPPERCASE_LETTER, /* Lu */ |
66 | HB_UNICODE_GENERAL_CATEGORY_SPACING_MARK, /* Mc */ |
67 | HB_UNICODE_GENERAL_CATEGORY_ENCLOSING_MARK, /* Me */ |
68 | HB_UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK, /* Mn */ |
69 | HB_UNICODE_GENERAL_CATEGORY_DECIMAL_NUMBER, /* Nd */ |
70 | HB_UNICODE_GENERAL_CATEGORY_LETTER_NUMBER, /* Nl */ |
71 | HB_UNICODE_GENERAL_CATEGORY_OTHER_NUMBER, /* No */ |
72 | HB_UNICODE_GENERAL_CATEGORY_CONNECT_PUNCTUATION, /* Pc */ |
73 | HB_UNICODE_GENERAL_CATEGORY_DASH_PUNCTUATION, /* Pd */ |
74 | HB_UNICODE_GENERAL_CATEGORY_CLOSE_PUNCTUATION, /* Pe */ |
75 | HB_UNICODE_GENERAL_CATEGORY_FINAL_PUNCTUATION, /* Pf */ |
76 | HB_UNICODE_GENERAL_CATEGORY_INITIAL_PUNCTUATION, /* Pi */ |
77 | HB_UNICODE_GENERAL_CATEGORY_OTHER_PUNCTUATION, /* Po */ |
78 | HB_UNICODE_GENERAL_CATEGORY_OPEN_PUNCTUATION, /* Ps */ |
79 | HB_UNICODE_GENERAL_CATEGORY_CURRENCY_SYMBOL, /* Sc */ |
80 | HB_UNICODE_GENERAL_CATEGORY_MODIFIER_SYMBOL, /* Sk */ |
81 | HB_UNICODE_GENERAL_CATEGORY_MATH_SYMBOL, /* Sm */ |
82 | HB_UNICODE_GENERAL_CATEGORY_OTHER_SYMBOL, /* So */ |
83 | HB_UNICODE_GENERAL_CATEGORY_LINE_SEPARATOR, /* Zl */ |
84 | HB_UNICODE_GENERAL_CATEGORY_PARAGRAPH_SEPARATOR, /* Zp */ |
85 | HB_UNICODE_GENERAL_CATEGORY_SPACE_SEPARATOR /* Zs */ |
86 | } hb_unicode_general_category_t; |
87 | |
88 | /* hb_unicode_combining_class_t */ |
89 | |
90 | /* Note: newer versions of Unicode may add new values. Clients should be ready to handle |
91 | * any value in the 0..254 range being returned from hb_unicode_combining_class(). |
92 | */ |
93 | |
94 | /* Unicode Character Database property: Canonical_Combining_Class (ccc) */ |
95 | typedef enum |
96 | { |
97 | HB_UNICODE_COMBINING_CLASS_NOT_REORDERED = 0, |
98 | HB_UNICODE_COMBINING_CLASS_OVERLAY = 1, |
99 | HB_UNICODE_COMBINING_CLASS_NUKTA = 7, |
100 | HB_UNICODE_COMBINING_CLASS_KANA_VOICING = 8, |
101 | HB_UNICODE_COMBINING_CLASS_VIRAMA = 9, |
102 | |
103 | /* Hebrew */ |
104 | HB_UNICODE_COMBINING_CLASS_CCC10 = 10, |
105 | HB_UNICODE_COMBINING_CLASS_CCC11 = 11, |
106 | HB_UNICODE_COMBINING_CLASS_CCC12 = 12, |
107 | HB_UNICODE_COMBINING_CLASS_CCC13 = 13, |
108 | HB_UNICODE_COMBINING_CLASS_CCC14 = 14, |
109 | HB_UNICODE_COMBINING_CLASS_CCC15 = 15, |
110 | HB_UNICODE_COMBINING_CLASS_CCC16 = 16, |
111 | HB_UNICODE_COMBINING_CLASS_CCC17 = 17, |
112 | HB_UNICODE_COMBINING_CLASS_CCC18 = 18, |
113 | HB_UNICODE_COMBINING_CLASS_CCC19 = 19, |
114 | HB_UNICODE_COMBINING_CLASS_CCC20 = 20, |
115 | HB_UNICODE_COMBINING_CLASS_CCC21 = 21, |
116 | HB_UNICODE_COMBINING_CLASS_CCC22 = 22, |
117 | HB_UNICODE_COMBINING_CLASS_CCC23 = 23, |
118 | HB_UNICODE_COMBINING_CLASS_CCC24 = 24, |
119 | HB_UNICODE_COMBINING_CLASS_CCC25 = 25, |
120 | HB_UNICODE_COMBINING_CLASS_CCC26 = 26, |
121 | |
122 | /* Arabic */ |
123 | HB_UNICODE_COMBINING_CLASS_CCC27 = 27, |
124 | HB_UNICODE_COMBINING_CLASS_CCC28 = 28, |
125 | HB_UNICODE_COMBINING_CLASS_CCC29 = 29, |
126 | HB_UNICODE_COMBINING_CLASS_CCC30 = 30, |
127 | HB_UNICODE_COMBINING_CLASS_CCC31 = 31, |
128 | HB_UNICODE_COMBINING_CLASS_CCC32 = 32, |
129 | HB_UNICODE_COMBINING_CLASS_CCC33 = 33, |
130 | HB_UNICODE_COMBINING_CLASS_CCC34 = 34, |
131 | HB_UNICODE_COMBINING_CLASS_CCC35 = 35, |
132 | |
133 | /* Syriac */ |
134 | HB_UNICODE_COMBINING_CLASS_CCC36 = 36, |
135 | |
136 | /* Telugu */ |
137 | HB_UNICODE_COMBINING_CLASS_CCC84 = 84, |
138 | HB_UNICODE_COMBINING_CLASS_CCC91 = 91, |
139 | |
140 | /* Thai */ |
141 | HB_UNICODE_COMBINING_CLASS_CCC103 = 103, |
142 | HB_UNICODE_COMBINING_CLASS_CCC107 = 107, |
143 | |
144 | /* Lao */ |
145 | HB_UNICODE_COMBINING_CLASS_CCC118 = 118, |
146 | HB_UNICODE_COMBINING_CLASS_CCC122 = 122, |
147 | |
148 | /* Tibetan */ |
149 | HB_UNICODE_COMBINING_CLASS_CCC129 = 129, |
150 | HB_UNICODE_COMBINING_CLASS_CCC130 = 130, |
151 | HB_UNICODE_COMBINING_CLASS_CCC133 = 132, |
152 | |
153 | |
154 | HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW_LEFT = 200, |
155 | HB_UNICODE_COMBINING_CLASS_ATTACHED_BELOW = 202, |
156 | HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE = 214, |
157 | HB_UNICODE_COMBINING_CLASS_ATTACHED_ABOVE_RIGHT = 216, |
158 | HB_UNICODE_COMBINING_CLASS_BELOW_LEFT = 218, |
159 | HB_UNICODE_COMBINING_CLASS_BELOW = 220, |
160 | HB_UNICODE_COMBINING_CLASS_BELOW_RIGHT = 222, |
161 | HB_UNICODE_COMBINING_CLASS_LEFT = 224, |
162 | HB_UNICODE_COMBINING_CLASS_RIGHT = 226, |
163 | HB_UNICODE_COMBINING_CLASS_ABOVE_LEFT = 228, |
164 | HB_UNICODE_COMBINING_CLASS_ABOVE = 230, |
165 | HB_UNICODE_COMBINING_CLASS_ABOVE_RIGHT = 232, |
166 | HB_UNICODE_COMBINING_CLASS_DOUBLE_BELOW = 233, |
167 | HB_UNICODE_COMBINING_CLASS_DOUBLE_ABOVE = 234, |
168 | |
169 | HB_UNICODE_COMBINING_CLASS_IOTA_SUBSCRIPT = 240, |
170 | |
171 | HB_UNICODE_COMBINING_CLASS_INVALID = 255 |
172 | } hb_unicode_combining_class_t; |
173 | |
174 | |
175 | /* |
176 | * hb_unicode_funcs_t |
177 | */ |
178 | |
179 | typedef struct hb_unicode_funcs_t hb_unicode_funcs_t; |
180 | |
181 | |
182 | /* |
183 | * just give me the best implementation you've got there. |
184 | */ |
185 | HB_EXTERN hb_unicode_funcs_t * |
186 | hb_unicode_funcs_get_default (void); |
187 | |
188 | |
189 | HB_EXTERN hb_unicode_funcs_t * |
190 | hb_unicode_funcs_create (hb_unicode_funcs_t *parent); |
191 | |
192 | HB_EXTERN hb_unicode_funcs_t * |
193 | hb_unicode_funcs_get_empty (void); |
194 | |
195 | HB_EXTERN hb_unicode_funcs_t * |
196 | hb_unicode_funcs_reference (hb_unicode_funcs_t *ufuncs); |
197 | |
198 | HB_EXTERN void |
199 | hb_unicode_funcs_destroy (hb_unicode_funcs_t *ufuncs); |
200 | |
201 | HB_EXTERN hb_bool_t |
202 | hb_unicode_funcs_set_user_data (hb_unicode_funcs_t *ufuncs, |
203 | hb_user_data_key_t *key, |
204 | void * data, |
205 | hb_destroy_func_t destroy, |
206 | hb_bool_t replace); |
207 | |
208 | |
209 | HB_EXTERN void * |
210 | hb_unicode_funcs_get_user_data (hb_unicode_funcs_t *ufuncs, |
211 | hb_user_data_key_t *key); |
212 | |
213 | |
214 | HB_EXTERN void |
215 | hb_unicode_funcs_make_immutable (hb_unicode_funcs_t *ufuncs); |
216 | |
217 | HB_EXTERN hb_bool_t |
218 | hb_unicode_funcs_is_immutable (hb_unicode_funcs_t *ufuncs); |
219 | |
220 | HB_EXTERN hb_unicode_funcs_t * |
221 | hb_unicode_funcs_get_parent (hb_unicode_funcs_t *ufuncs); |
222 | |
223 | |
224 | /* |
225 | * funcs |
226 | */ |
227 | |
228 | /* typedefs */ |
229 | |
230 | typedef hb_unicode_combining_class_t (*hb_unicode_combining_class_func_t) (hb_unicode_funcs_t *ufuncs, |
231 | hb_codepoint_t unicode, |
232 | void *user_data); |
233 | typedef unsigned int (*hb_unicode_eastasian_width_func_t) (hb_unicode_funcs_t *ufuncs, |
234 | hb_codepoint_t unicode, |
235 | void *user_data); |
236 | typedef hb_unicode_general_category_t (*hb_unicode_general_category_func_t) (hb_unicode_funcs_t *ufuncs, |
237 | hb_codepoint_t unicode, |
238 | void *user_data); |
239 | typedef hb_codepoint_t (*hb_unicode_mirroring_func_t) (hb_unicode_funcs_t *ufuncs, |
240 | hb_codepoint_t unicode, |
241 | void *user_data); |
242 | typedef hb_script_t (*hb_unicode_script_func_t) (hb_unicode_funcs_t *ufuncs, |
243 | hb_codepoint_t unicode, |
244 | void *user_data); |
245 | |
246 | typedef hb_bool_t (*hb_unicode_compose_func_t) (hb_unicode_funcs_t *ufuncs, |
247 | hb_codepoint_t a, |
248 | hb_codepoint_t b, |
249 | hb_codepoint_t *ab, |
250 | void *user_data); |
251 | typedef hb_bool_t (*hb_unicode_decompose_func_t) (hb_unicode_funcs_t *ufuncs, |
252 | hb_codepoint_t ab, |
253 | hb_codepoint_t *a, |
254 | hb_codepoint_t *b, |
255 | void *user_data); |
256 | |
257 | /** |
258 | * hb_unicode_decompose_compatibility_func_t: |
259 | * @ufuncs: a Unicode function structure |
260 | * @u: codepoint to decompose |
261 | * @decomposed: address of codepoint array (of length %HB_UNICODE_MAX_DECOMPOSITION_LEN) to write decomposition into |
262 | * @user_data: user data pointer as passed to hb_unicode_funcs_set_decompose_compatibility_func() |
263 | * |
264 | * Fully decompose @u to its Unicode compatibility decomposition. The codepoints of the decomposition will be written to @decomposed. |
265 | * The complete length of the decomposition will be returned. |
266 | * |
267 | * If @u has no compatibility decomposition, zero should be returned. |
268 | * |
269 | * The Unicode standard guarantees that a buffer of length %HB_UNICODE_MAX_DECOMPOSITION_LEN codepoints will always be sufficient for any |
270 | * compatibility decomposition plus an terminating value of 0. Consequently, @decompose must be allocated by the caller to be at least this length. Implementations |
271 | * of this function type must ensure that they do not write past the provided array. |
272 | * |
273 | * Return value: number of codepoints in the full compatibility decomposition of @u, or 0 if no decomposition available. |
274 | */ |
275 | typedef unsigned int (*hb_unicode_decompose_compatibility_func_t) (hb_unicode_funcs_t *ufuncs, |
276 | hb_codepoint_t u, |
277 | hb_codepoint_t *decomposed, |
278 | void *user_data); |
279 | |
280 | /* See Unicode 6.1 for details on the maximum decomposition length. */ |
281 | #define HB_UNICODE_MAX_DECOMPOSITION_LEN (18+1) /* codepoints */ |
282 | |
283 | /* setters */ |
284 | |
285 | /** |
286 | * hb_unicode_funcs_set_combining_class_func: |
287 | * @ufuncs: a Unicode function structure |
288 | * @func: (closure user_data) (destroy destroy) (scope notified): |
289 | * @user_data: |
290 | * @destroy: |
291 | * |
292 | * |
293 | * |
294 | * Since: 0.9.2 |
295 | **/ |
296 | HB_EXTERN void |
297 | hb_unicode_funcs_set_combining_class_func (hb_unicode_funcs_t *ufuncs, |
298 | hb_unicode_combining_class_func_t func, |
299 | void *user_data, hb_destroy_func_t destroy); |
300 | |
301 | /** |
302 | * hb_unicode_funcs_set_eastasian_width_func: |
303 | * @ufuncs: a Unicode function structure |
304 | * @func: (closure user_data) (destroy destroy) (scope notified): |
305 | * @user_data: |
306 | * @destroy: |
307 | * |
308 | * |
309 | * |
310 | * Since: 0.9.2 |
311 | **/ |
312 | HB_EXTERN void |
313 | hb_unicode_funcs_set_eastasian_width_func (hb_unicode_funcs_t *ufuncs, |
314 | hb_unicode_eastasian_width_func_t func, |
315 | void *user_data, hb_destroy_func_t destroy); |
316 | |
317 | /** |
318 | * hb_unicode_funcs_set_general_category_func: |
319 | * @ufuncs: a Unicode function structure |
320 | * @func: (closure user_data) (destroy destroy) (scope notified): |
321 | * @user_data: |
322 | * @destroy: |
323 | * |
324 | * |
325 | * |
326 | * Since: 0.9.2 |
327 | **/ |
328 | HB_EXTERN void |
329 | hb_unicode_funcs_set_general_category_func (hb_unicode_funcs_t *ufuncs, |
330 | hb_unicode_general_category_func_t func, |
331 | void *user_data, hb_destroy_func_t destroy); |
332 | |
333 | /** |
334 | * hb_unicode_funcs_set_mirroring_func: |
335 | * @ufuncs: a Unicode function structure |
336 | * @func: (closure user_data) (destroy destroy) (scope notified): |
337 | * @user_data: |
338 | * @destroy: |
339 | * |
340 | * |
341 | * |
342 | * Since: 0.9.2 |
343 | **/ |
344 | HB_EXTERN void |
345 | hb_unicode_funcs_set_mirroring_func (hb_unicode_funcs_t *ufuncs, |
346 | hb_unicode_mirroring_func_t func, |
347 | void *user_data, hb_destroy_func_t destroy); |
348 | |
349 | /** |
350 | * hb_unicode_funcs_set_script_func: |
351 | * @ufuncs: a Unicode function structure |
352 | * @func: (closure user_data) (destroy destroy) (scope notified): |
353 | * @user_data: |
354 | * @destroy: |
355 | * |
356 | * |
357 | * |
358 | * Since: 0.9.2 |
359 | **/ |
360 | HB_EXTERN void |
361 | hb_unicode_funcs_set_script_func (hb_unicode_funcs_t *ufuncs, |
362 | hb_unicode_script_func_t func, |
363 | void *user_data, hb_destroy_func_t destroy); |
364 | |
365 | /** |
366 | * hb_unicode_funcs_set_compose_func: |
367 | * @ufuncs: a Unicode function structure |
368 | * @func: (closure user_data) (destroy destroy) (scope notified): |
369 | * @user_data: |
370 | * @destroy: |
371 | * |
372 | * |
373 | * |
374 | * Since: 0.9.2 |
375 | **/ |
376 | HB_EXTERN void |
377 | hb_unicode_funcs_set_compose_func (hb_unicode_funcs_t *ufuncs, |
378 | hb_unicode_compose_func_t func, |
379 | void *user_data, hb_destroy_func_t destroy); |
380 | |
381 | /** |
382 | * hb_unicode_funcs_set_decompose_func: |
383 | * @ufuncs: a Unicode function structure |
384 | * @func: (closure user_data) (destroy destroy) (scope notified): |
385 | * @user_data: |
386 | * @destroy: |
387 | * |
388 | * |
389 | * |
390 | * Since: 0.9.2 |
391 | **/ |
392 | HB_EXTERN void |
393 | hb_unicode_funcs_set_decompose_func (hb_unicode_funcs_t *ufuncs, |
394 | hb_unicode_decompose_func_t func, |
395 | void *user_data, hb_destroy_func_t destroy); |
396 | |
397 | /** |
398 | * hb_unicode_funcs_set_decompose_compatibility_func: |
399 | * @ufuncs: a Unicode function structure |
400 | * @func: (closure user_data) (destroy destroy) (scope notified): |
401 | * @user_data: |
402 | * @destroy: |
403 | * |
404 | * |
405 | * |
406 | * Since: 0.9.2 |
407 | **/ |
408 | HB_EXTERN void |
409 | hb_unicode_funcs_set_decompose_compatibility_func (hb_unicode_funcs_t *ufuncs, |
410 | hb_unicode_decompose_compatibility_func_t func, |
411 | void *user_data, hb_destroy_func_t destroy); |
412 | |
413 | /* accessors */ |
414 | |
415 | /** |
416 | * hb_unicode_combining_class: |
417 | * |
418 | * Since: 0.9.2 |
419 | **/ |
420 | HB_EXTERN hb_unicode_combining_class_t |
421 | hb_unicode_combining_class (hb_unicode_funcs_t *ufuncs, |
422 | hb_codepoint_t unicode); |
423 | |
424 | /** |
425 | * hb_unicode_eastasian_width: |
426 | * |
427 | * Since: 0.9.2 |
428 | **/ |
429 | HB_EXTERN unsigned int |
430 | hb_unicode_eastasian_width (hb_unicode_funcs_t *ufuncs, |
431 | hb_codepoint_t unicode); |
432 | |
433 | /** |
434 | * hb_unicode_general_category: |
435 | * |
436 | * Since: 0.9.2 |
437 | **/ |
438 | HB_EXTERN hb_unicode_general_category_t |
439 | hb_unicode_general_category (hb_unicode_funcs_t *ufuncs, |
440 | hb_codepoint_t unicode); |
441 | |
442 | /** |
443 | * hb_unicode_mirroring: |
444 | * |
445 | * Since: 0.9.2 |
446 | **/ |
447 | HB_EXTERN hb_codepoint_t |
448 | hb_unicode_mirroring (hb_unicode_funcs_t *ufuncs, |
449 | hb_codepoint_t unicode); |
450 | |
451 | /** |
452 | * hb_unicode_script: |
453 | * |
454 | * Since: 0.9.2 |
455 | **/ |
456 | HB_EXTERN hb_script_t |
457 | hb_unicode_script (hb_unicode_funcs_t *ufuncs, |
458 | hb_codepoint_t unicode); |
459 | |
460 | HB_EXTERN hb_bool_t |
461 | hb_unicode_compose (hb_unicode_funcs_t *ufuncs, |
462 | hb_codepoint_t a, |
463 | hb_codepoint_t b, |
464 | hb_codepoint_t *ab); |
465 | |
466 | HB_EXTERN hb_bool_t |
467 | hb_unicode_decompose (hb_unicode_funcs_t *ufuncs, |
468 | hb_codepoint_t ab, |
469 | hb_codepoint_t *a, |
470 | hb_codepoint_t *b); |
471 | |
472 | HB_EXTERN unsigned int |
473 | hb_unicode_decompose_compatibility (hb_unicode_funcs_t *ufuncs, |
474 | hb_codepoint_t u, |
475 | hb_codepoint_t *decomposed); |
476 | |
477 | HB_END_DECLS |
478 | |
479 | #endif /* HB_UNICODE_H */ |
480 | |