| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ****************************************************************************** | 
|---|
| 5 | * | 
|---|
| 6 | *   Copyright (C) 2000-2012, International Business Machines | 
|---|
| 7 | *   Corporation and others.  All Rights Reserved. | 
|---|
| 8 | * | 
|---|
| 9 | ****************************************************************************** | 
|---|
| 10 | *   file name:  ushape.h | 
|---|
| 11 | *   encoding:   UTF-8 | 
|---|
| 12 | *   tab size:   8 (not used) | 
|---|
| 13 | *   indentation:4 | 
|---|
| 14 | * | 
|---|
| 15 | *   created on: 2000jun29 | 
|---|
| 16 | *   created by: Markus W. Scherer | 
|---|
| 17 | */ | 
|---|
| 18 |  | 
|---|
| 19 | #ifndef __USHAPE_H__ | 
|---|
| 20 | #define __USHAPE_H__ | 
|---|
| 21 |  | 
|---|
| 22 | #include "unicode/utypes.h" | 
|---|
| 23 |  | 
|---|
| 24 | /** | 
|---|
| 25 | * \file | 
|---|
| 26 | * \brief C API:  Arabic shaping | 
|---|
| 27 | * | 
|---|
| 28 | */ | 
|---|
| 29 |  | 
|---|
| 30 | /** | 
|---|
| 31 | * Shape Arabic text on a character basis. | 
|---|
| 32 | * | 
|---|
| 33 | * <p>This function performs basic operations for "shaping" Arabic text. It is most | 
|---|
| 34 | * useful for use with legacy data formats and legacy display technology | 
|---|
| 35 | * (simple terminals). All operations are performed on Unicode characters.</p> | 
|---|
| 36 | * | 
|---|
| 37 | * <p>Text-based shaping means that some character code points in the text are | 
|---|
| 38 | * replaced by others depending on the context. It transforms one kind of text | 
|---|
| 39 | * into another. In comparison, modern displays for Arabic text select | 
|---|
| 40 | * appropriate, context-dependent font glyphs for each text element, which means | 
|---|
| 41 | * that they transform text into a glyph vector.</p> | 
|---|
| 42 | * | 
|---|
| 43 | * <p>Text transformations are necessary when modern display technology is not | 
|---|
| 44 | * available or when text needs to be transformed to or from legacy formats that | 
|---|
| 45 | * use "shaped" characters. Since the Arabic script is cursive, connecting | 
|---|
| 46 | * adjacent letters to each other, computers select images for each letter based | 
|---|
| 47 | * on the surrounding letters. This usually results in four images per Arabic | 
|---|
| 48 | * letter: initial, middle, final, and isolated forms. In Unicode, on the other | 
|---|
| 49 | * hand, letters are normally stored abstract, and a display system is expected | 
|---|
| 50 | * to select the necessary glyphs. (This makes searching and other text | 
|---|
| 51 | * processing easier because the same letter has only one code.) It is possible | 
|---|
| 52 | * to mimic this with text transformations because there are characters in | 
|---|
| 53 | * Unicode that are rendered as letters with a specific shape | 
|---|
| 54 | * (or cursive connectivity). They were included for interoperability with | 
|---|
| 55 | * legacy systems and codepages, and for unsophisticated display systems.</p> | 
|---|
| 56 | * | 
|---|
| 57 | * <p>A second kind of text transformations is supported for Arabic digits: | 
|---|
| 58 | * For compatibility with legacy codepages that only include European digits, | 
|---|
| 59 | * it is possible to replace one set of digits by another, changing the | 
|---|
| 60 | * character code points. These operations can be performed for either | 
|---|
| 61 | * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic | 
|---|
| 62 | * digits (U+06f0...U+06f9).</p> | 
|---|
| 63 | * | 
|---|
| 64 | * <p>Some replacements may result in more or fewer characters (code points). | 
|---|
| 65 | * By default, this means that the destination buffer may receive text with a | 
|---|
| 66 | * length different from the source length. Some legacy systems rely on the | 
|---|
| 67 | * length of the text to be constant. They expect extra spaces to be added | 
|---|
| 68 | * or consumed either next to the affected character or at the end of the | 
|---|
| 69 | * text.</p> | 
|---|
| 70 | * | 
|---|
| 71 | * <p>For details about the available operations, see the description of the | 
|---|
| 72 | * <code>U_SHAPE_...</code> options.</p> | 
|---|
| 73 | * | 
|---|
| 74 | * @param source The input text. | 
|---|
| 75 | * | 
|---|
| 76 | * @param sourceLength The number of UChars in <code>source</code>. | 
|---|
| 77 | * | 
|---|
| 78 | * @param dest The destination buffer that will receive the results of the | 
|---|
| 79 | *             requested operations. It may be <code>NULL</code> only if | 
|---|
| 80 | *             <code>destSize</code> is 0. The source and destination must not | 
|---|
| 81 | *             overlap. | 
|---|
| 82 | * | 
|---|
| 83 | * @param destSize The size (capacity) of the destination buffer in UChars. | 
|---|
| 84 | *                 If <code>destSize</code> is 0, then no output is produced, | 
|---|
| 85 | *                 but the necessary buffer size is returned ("preflighting"). | 
|---|
| 86 | * | 
|---|
| 87 | * @param options This is a 32-bit set of flags that specify the operations | 
|---|
| 88 | *                that are performed on the input text. If no error occurs, | 
|---|
| 89 | *                then the result will always be written to the destination | 
|---|
| 90 | *                buffer. | 
|---|
| 91 | * | 
|---|
| 92 | * @param pErrorCode must be a valid pointer to an error code value, | 
|---|
| 93 | *        which must not indicate a failure before the function call. | 
|---|
| 94 | * | 
|---|
| 95 | * @return The number of UChars written to the destination buffer. | 
|---|
| 96 | *         If an error occurred, then no output was written, or it may be | 
|---|
| 97 | *         incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then | 
|---|
| 98 | *         the return value indicates the necessary destination buffer size. | 
|---|
| 99 | * @stable ICU 2.0 | 
|---|
| 100 | */ | 
|---|
| 101 | U_CAPI int32_t U_EXPORT2 | 
|---|
| 102 | u_shapeArabic(const UChar *source, int32_t sourceLength, | 
|---|
| 103 | UChar *dest, int32_t destSize, | 
|---|
| 104 | uint32_t options, | 
|---|
| 105 | UErrorCode *pErrorCode); | 
|---|
| 106 |  | 
|---|
| 107 | /** | 
|---|
| 108 | * Memory option: allow the result to have a different length than the source. | 
|---|
| 109 | * Affects: LamAlef options | 
|---|
| 110 | * @stable ICU 2.0 | 
|---|
| 111 | */ | 
|---|
| 112 | #define U_SHAPE_LENGTH_GROW_SHRINK              0 | 
|---|
| 113 |  | 
|---|
| 114 | /** | 
|---|
| 115 | * Memory option: allow the result to have a different length than the source. | 
|---|
| 116 | * Affects: LamAlef options | 
|---|
| 117 | * This option is an alias to U_SHAPE_LENGTH_GROW_SHRINK | 
|---|
| 118 | * @stable ICU 4.2 | 
|---|
| 119 | */ | 
|---|
| 120 | #define U_SHAPE_LAMALEF_RESIZE                  0 | 
|---|
| 121 |  | 
|---|
| 122 | /** | 
|---|
| 123 | * Memory option: the result must have the same length as the source. | 
|---|
| 124 | * If more room is necessary, then try to consume spaces next to modified characters. | 
|---|
| 125 | * @stable ICU 2.0 | 
|---|
| 126 | */ | 
|---|
| 127 | #define U_SHAPE_LENGTH_FIXED_SPACES_NEAR        1 | 
|---|
| 128 |  | 
|---|
| 129 | /** | 
|---|
| 130 | * Memory option: the result must have the same length as the source. | 
|---|
| 131 | * If more room is necessary, then try to consume spaces next to modified characters. | 
|---|
| 132 | * Affects: LamAlef options | 
|---|
| 133 | * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_NEAR | 
|---|
| 134 | * @stable ICU 4.2 | 
|---|
| 135 | */ | 
|---|
| 136 | #define U_SHAPE_LAMALEF_NEAR                    1 | 
|---|
| 137 |  | 
|---|
| 138 | /** | 
|---|
| 139 | * Memory option: the result must have the same length as the source. | 
|---|
| 140 | * If more room is necessary, then try to consume spaces at the end of the text. | 
|---|
| 141 | * @stable ICU 2.0 | 
|---|
| 142 | */ | 
|---|
| 143 | #define U_SHAPE_LENGTH_FIXED_SPACES_AT_END      2 | 
|---|
| 144 |  | 
|---|
| 145 | /** | 
|---|
| 146 | * Memory option: the result must have the same length as the source. | 
|---|
| 147 | * If more room is necessary, then try to consume spaces at the end of the text. | 
|---|
| 148 | * Affects: LamAlef options | 
|---|
| 149 | * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_END | 
|---|
| 150 | * @stable ICU 4.2 | 
|---|
| 151 | */ | 
|---|
| 152 | #define U_SHAPE_LAMALEF_END                     2 | 
|---|
| 153 |  | 
|---|
| 154 | /** | 
|---|
| 155 | * Memory option: the result must have the same length as the source. | 
|---|
| 156 | * If more room is necessary, then try to consume spaces at the beginning of the text. | 
|---|
| 157 | * @stable ICU 2.0 | 
|---|
| 158 | */ | 
|---|
| 159 | #define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3 | 
|---|
| 160 |  | 
|---|
| 161 | /** | 
|---|
| 162 | * Memory option: the result must have the same length as the source. | 
|---|
| 163 | * If more room is necessary, then try to consume spaces at the beginning of the text. | 
|---|
| 164 | * Affects: LamAlef options | 
|---|
| 165 | * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING | 
|---|
| 166 | * @stable ICU 4.2 | 
|---|
| 167 | */ | 
|---|
| 168 | #define U_SHAPE_LAMALEF_BEGIN                    3 | 
|---|
| 169 |  | 
|---|
| 170 |  | 
|---|
| 171 | /** | 
|---|
| 172 | * Memory option: the result must have the same length as the source. | 
|---|
| 173 | * Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end. | 
|---|
| 174 | *               If there is no space at end, use spaces at beginning of the buffer. If there | 
|---|
| 175 | *               is no space at beginning of the buffer, use spaces at the near (i.e. the space | 
|---|
| 176 | *               after the LAMALEF character). | 
|---|
| 177 | *               If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) | 
|---|
| 178 | *               will be set in pErrorCode | 
|---|
| 179 | * | 
|---|
| 180 | * Deshaping Mode: Perform the same function as the flag equals U_SHAPE_LAMALEF_END. | 
|---|
| 181 | * Affects: LamAlef options | 
|---|
| 182 | * @stable ICU 4.2 | 
|---|
| 183 | */ | 
|---|
| 184 | #define U_SHAPE_LAMALEF_AUTO                     0x10000 | 
|---|
| 185 |  | 
|---|
| 186 | /** Bit mask for memory options. @stable ICU 2.0 */ | 
|---|
| 187 | #define U_SHAPE_LENGTH_MASK                      0x10003 /* Changed old value 3 */ | 
|---|
| 188 |  | 
|---|
| 189 |  | 
|---|
| 190 | /** | 
|---|
| 191 | * Bit mask for LamAlef memory options. | 
|---|
| 192 | * @stable ICU 4.2 | 
|---|
| 193 | */ | 
|---|
| 194 | #define U_SHAPE_LAMALEF_MASK                     0x10003 /* updated */ | 
|---|
| 195 |  | 
|---|
| 196 | /** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */ | 
|---|
| 197 | #define U_SHAPE_TEXT_DIRECTION_LOGICAL          0 | 
|---|
| 198 |  | 
|---|
| 199 | /** | 
|---|
| 200 | * Direction indicator: | 
|---|
| 201 | * the source is in visual RTL order, | 
|---|
| 202 | * the rightmost displayed character stored first. | 
|---|
| 203 | * This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL | 
|---|
| 204 | * @stable ICU 4.2 | 
|---|
| 205 | */ | 
|---|
| 206 | #define U_SHAPE_TEXT_DIRECTION_VISUAL_RTL       0 | 
|---|
| 207 |  | 
|---|
| 208 | /** | 
|---|
| 209 | * Direction indicator: | 
|---|
| 210 | * the source is in visual LTR order, | 
|---|
| 211 | * the leftmost displayed character stored first. | 
|---|
| 212 | * @stable ICU 2.0 | 
|---|
| 213 | */ | 
|---|
| 214 | #define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR       4 | 
|---|
| 215 |  | 
|---|
| 216 | /** Bit mask for direction indicators. @stable ICU 2.0 */ | 
|---|
| 217 | #define U_SHAPE_TEXT_DIRECTION_MASK             4 | 
|---|
| 218 |  | 
|---|
| 219 |  | 
|---|
| 220 | /** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */ | 
|---|
| 221 | #define U_SHAPE_LETTERS_NOOP                    0 | 
|---|
| 222 |  | 
|---|
| 223 | /** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */ | 
|---|
| 224 | #define U_SHAPE_LETTERS_SHAPE                   8 | 
|---|
| 225 |  | 
|---|
| 226 | /** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */ | 
|---|
| 227 | #define U_SHAPE_LETTERS_UNSHAPE                 0x10 | 
|---|
| 228 |  | 
|---|
| 229 | /** | 
|---|
| 230 | * Letter shaping option: replace abstract letter characters by "shaped" ones. | 
|---|
| 231 | * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters | 
|---|
| 232 | * are always "shaped" into the isolated form instead of the medial form | 
|---|
| 233 | * (selecting code points from the Arabic Presentation Forms-B block). | 
|---|
| 234 | * @stable ICU 2.0 | 
|---|
| 235 | */ | 
|---|
| 236 | #define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18 | 
|---|
| 237 |  | 
|---|
| 238 |  | 
|---|
| 239 | /** Bit mask for letter shaping options. @stable ICU 2.0 */ | 
|---|
| 240 | #define U_SHAPE_LETTERS_MASK                        0x18 | 
|---|
| 241 |  | 
|---|
| 242 |  | 
|---|
| 243 | /** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */ | 
|---|
| 244 | #define U_SHAPE_DIGITS_NOOP                     0 | 
|---|
| 245 |  | 
|---|
| 246 | /** | 
|---|
| 247 | * Digit shaping option: | 
|---|
| 248 | * Replace European digits (U+0030...) by Arabic-Indic digits. | 
|---|
| 249 | * @stable ICU 2.0 | 
|---|
| 250 | */ | 
|---|
| 251 | #define U_SHAPE_DIGITS_EN2AN                    0x20 | 
|---|
| 252 |  | 
|---|
| 253 | /** | 
|---|
| 254 | * Digit shaping option: | 
|---|
| 255 | * Replace Arabic-Indic digits by European digits (U+0030...). | 
|---|
| 256 | * @stable ICU 2.0 | 
|---|
| 257 | */ | 
|---|
| 258 | #define U_SHAPE_DIGITS_AN2EN                    0x40 | 
|---|
| 259 |  | 
|---|
| 260 | /** | 
|---|
| 261 | * Digit shaping option: | 
|---|
| 262 | * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent | 
|---|
| 263 | * strongly directional character is an Arabic letter | 
|---|
| 264 | * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br> | 
|---|
| 265 | * The direction of "preceding" depends on the direction indicator option. | 
|---|
| 266 | * For the first characters, the preceding strongly directional character | 
|---|
| 267 | * (initial state) is assumed to be not an Arabic letter | 
|---|
| 268 | * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]). | 
|---|
| 269 | * @stable ICU 2.0 | 
|---|
| 270 | */ | 
|---|
| 271 | #define U_SHAPE_DIGITS_ALEN2AN_INIT_LR          0x60 | 
|---|
| 272 |  | 
|---|
| 273 | /** | 
|---|
| 274 | * Digit shaping option: | 
|---|
| 275 | * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent | 
|---|
| 276 | * strongly directional character is an Arabic letter | 
|---|
| 277 | * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br> | 
|---|
| 278 | * The direction of "preceding" depends on the direction indicator option. | 
|---|
| 279 | * For the first characters, the preceding strongly directional character | 
|---|
| 280 | * (initial state) is assumed to be an Arabic letter. | 
|---|
| 281 | * @stable ICU 2.0 | 
|---|
| 282 | */ | 
|---|
| 283 | #define U_SHAPE_DIGITS_ALEN2AN_INIT_AL          0x80 | 
|---|
| 284 |  | 
|---|
| 285 | /** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */ | 
|---|
| 286 | #define U_SHAPE_DIGITS_RESERVED                 0xa0 | 
|---|
| 287 |  | 
|---|
| 288 | /** Bit mask for digit shaping options. @stable ICU 2.0 */ | 
|---|
| 289 | #define U_SHAPE_DIGITS_MASK                     0xe0 | 
|---|
| 290 |  | 
|---|
| 291 |  | 
|---|
| 292 | /** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */ | 
|---|
| 293 | #define U_SHAPE_DIGIT_TYPE_AN                   0 | 
|---|
| 294 |  | 
|---|
| 295 | /** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */ | 
|---|
| 296 | #define U_SHAPE_DIGIT_TYPE_AN_EXTENDED          0x100 | 
|---|
| 297 |  | 
|---|
| 298 | /** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */ | 
|---|
| 299 | #define U_SHAPE_DIGIT_TYPE_RESERVED             0x200 | 
|---|
| 300 |  | 
|---|
| 301 | /** Bit mask for digit type options. @stable ICU 2.0 */ | 
|---|
| 302 | #define U_SHAPE_DIGIT_TYPE_MASK                 0x300 /* I need to change this from 0x3f00 to 0x300 */ | 
|---|
| 303 |  | 
|---|
| 304 | /** | 
|---|
| 305 | * Tashkeel aggregation option: | 
|---|
| 306 | * Replaces any combination of U+0651 with one of | 
|---|
| 307 | * U+064C, U+064D, U+064E, U+064F, U+0650 with | 
|---|
| 308 | * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively. | 
|---|
| 309 | * @stable ICU 3.6 | 
|---|
| 310 | */ | 
|---|
| 311 | #define U_SHAPE_AGGREGATE_TASHKEEL              0x4000 | 
|---|
| 312 | /** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */ | 
|---|
| 313 | #define U_SHAPE_AGGREGATE_TASHKEEL_NOOP         0 | 
|---|
| 314 | /** Bit mask for tashkeel aggregation. @stable ICU 3.6 */ | 
|---|
| 315 | #define U_SHAPE_AGGREGATE_TASHKEEL_MASK         0x4000 | 
|---|
| 316 |  | 
|---|
| 317 | /** | 
|---|
| 318 | * Presentation form option: | 
|---|
| 319 | * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B | 
|---|
| 320 | * characters with 0+06xx characters, before shaping. | 
|---|
| 321 | * @stable ICU 3.6 | 
|---|
| 322 | */ | 
|---|
| 323 | #define U_SHAPE_PRESERVE_PRESENTATION           0x8000 | 
|---|
| 324 | /** Presentation form option: | 
|---|
| 325 | * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with | 
|---|
| 326 | * their unshaped correspondents in range 0+06xx, before shaping. | 
|---|
| 327 | * @stable ICU 3.6 | 
|---|
| 328 | */ | 
|---|
| 329 | #define U_SHAPE_PRESERVE_PRESENTATION_NOOP      0 | 
|---|
| 330 | /** Bit mask for preserve presentation form. @stable ICU 3.6 */ | 
|---|
| 331 | #define U_SHAPE_PRESERVE_PRESENTATION_MASK      0x8000 | 
|---|
| 332 |  | 
|---|
| 333 | /* Seen Tail option */ | 
|---|
| 334 | /** | 
|---|
| 335 | * Memory option: the result must have the same length as the source. | 
|---|
| 336 | * Shaping mode: The SEEN family character will expand into two characters using space near | 
|---|
| 337 | *               the SEEN family character(i.e. the space after the character). | 
|---|
| 338 | *               If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) | 
|---|
| 339 | *               will be set in pErrorCode | 
|---|
| 340 | * | 
|---|
| 341 | * De-shaping mode: Any Seen character followed by Tail character will be | 
|---|
| 342 | *                  replaced by one cell Seen and a space will replace the Tail. | 
|---|
| 343 | * Affects: Seen options | 
|---|
| 344 | * @stable ICU 4.2 | 
|---|
| 345 | */ | 
|---|
| 346 | #define U_SHAPE_SEEN_TWOCELL_NEAR     0x200000 | 
|---|
| 347 |  | 
|---|
| 348 | /** | 
|---|
| 349 | * Bit mask for Seen memory options. | 
|---|
| 350 | * @stable ICU 4.2 | 
|---|
| 351 | */ | 
|---|
| 352 | #define U_SHAPE_SEEN_MASK             0x700000 | 
|---|
| 353 |  | 
|---|
| 354 | /* YehHamza option */ | 
|---|
| 355 | /** | 
|---|
| 356 | * Memory option: the result must have the same length as the source. | 
|---|
| 357 | * Shaping mode: The YEHHAMZA character will expand into two characters using space near it | 
|---|
| 358 | *              (i.e. the space after the character | 
|---|
| 359 | *               If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h) | 
|---|
| 360 | *               will be set in pErrorCode | 
|---|
| 361 | * | 
|---|
| 362 | * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be | 
|---|
| 363 | *                  replaced by one cell YehHamza and space will replace the Hamza. | 
|---|
| 364 | * Affects: YehHamza options | 
|---|
| 365 | * @stable ICU 4.2 | 
|---|
| 366 | */ | 
|---|
| 367 | #define U_SHAPE_YEHHAMZA_TWOCELL_NEAR      0x1000000 | 
|---|
| 368 |  | 
|---|
| 369 |  | 
|---|
| 370 | /** | 
|---|
| 371 | * Bit mask for YehHamza memory options. | 
|---|
| 372 | * @stable ICU 4.2 | 
|---|
| 373 | */ | 
|---|
| 374 | #define U_SHAPE_YEHHAMZA_MASK              0x3800000 | 
|---|
| 375 |  | 
|---|
| 376 | /* New Tashkeel options */ | 
|---|
| 377 | /** | 
|---|
| 378 | * Memory option: the result must have the same length as the source. | 
|---|
| 379 | * Shaping mode: Tashkeel characters will be replaced by spaces. | 
|---|
| 380 | *               Spaces will be placed at beginning of the buffer | 
|---|
| 381 | * | 
|---|
| 382 | * De-shaping mode: N/A | 
|---|
| 383 | * Affects: Tashkeel options | 
|---|
| 384 | * @stable ICU 4.2 | 
|---|
| 385 | */ | 
|---|
| 386 | #define U_SHAPE_TASHKEEL_BEGIN                      0x40000 | 
|---|
| 387 |  | 
|---|
| 388 | /** | 
|---|
| 389 | * Memory option: the result must have the same length as the source. | 
|---|
| 390 | * Shaping mode: Tashkeel characters will be replaced by spaces. | 
|---|
| 391 | *               Spaces will be placed at end of the buffer | 
|---|
| 392 | * | 
|---|
| 393 | * De-shaping mode: N/A | 
|---|
| 394 | * Affects: Tashkeel options | 
|---|
| 395 | * @stable ICU 4.2 | 
|---|
| 396 | */ | 
|---|
| 397 | #define U_SHAPE_TASHKEEL_END                        0x60000 | 
|---|
| 398 |  | 
|---|
| 399 | /** | 
|---|
| 400 | * Memory option: allow the result to have a different length than the source. | 
|---|
| 401 | * Shaping mode: Tashkeel characters will be removed, buffer length will shrink. | 
|---|
| 402 | * De-shaping mode: N/A | 
|---|
| 403 | * | 
|---|
| 404 | * Affect: Tashkeel options | 
|---|
| 405 | * @stable ICU 4.2 | 
|---|
| 406 | */ | 
|---|
| 407 | #define U_SHAPE_TASHKEEL_RESIZE                     0x80000 | 
|---|
| 408 |  | 
|---|
| 409 | /** | 
|---|
| 410 | * Memory option: the result must have the same length as the source. | 
|---|
| 411 | * Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent | 
|---|
| 412 | *               characters (i.e. shaped on Tatweel) or replaced by space if it is not connected. | 
|---|
| 413 | * | 
|---|
| 414 | * De-shaping mode: N/A | 
|---|
| 415 | * Affects: YehHamza options | 
|---|
| 416 | * @stable ICU 4.2 | 
|---|
| 417 | */ | 
|---|
| 418 | #define U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL         0xC0000 | 
|---|
| 419 |  | 
|---|
| 420 | /** | 
|---|
| 421 | * Bit mask for Tashkeel replacement with Space or Tatweel memory options. | 
|---|
| 422 | * @stable ICU 4.2 | 
|---|
| 423 | */ | 
|---|
| 424 | #define U_SHAPE_TASHKEEL_MASK                       0xE0000 | 
|---|
| 425 |  | 
|---|
| 426 |  | 
|---|
| 427 | /* Space location Control options */ | 
|---|
| 428 | /** | 
|---|
| 429 | * This option affect the meaning of BEGIN and END options. if this option is not used the default | 
|---|
| 430 | * for BEGIN and END will be as following: | 
|---|
| 431 | * The Default (for both Visual LTR, Visual RTL and Logical Text) | 
|---|
| 432 | *           1. BEGIN always refers to the start address of physical memory. | 
|---|
| 433 | *           2. END always refers to the end address of physical memory. | 
|---|
| 434 | * | 
|---|
| 435 | * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text. | 
|---|
| 436 | * | 
|---|
| 437 | * The effect on BEGIN and END Memory Options will be as following: | 
|---|
| 438 | *    A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text( | 
|---|
| 439 | *       corresponding to the physical memory address end for Visual LTR text, Same as END in | 
|---|
| 440 | *       default behavior) | 
|---|
| 441 | *    B. BEGIN For Logical text: Same as BEGIN in default behavior. | 
|---|
| 442 | *    C. END For Visual LTR text: This will be the end (left side) of the visual text (corresponding | 
|---|
| 443 | *       to the physical memory address beginning for Visual LTR text, Same as BEGIN in default behavior. | 
|---|
| 444 | *    D. END For Logical text: Same as END in default behavior). | 
|---|
| 445 | * Affects: All LamAlef BEGIN, END and AUTO options. | 
|---|
| 446 | * @stable ICU 4.2 | 
|---|
| 447 | */ | 
|---|
| 448 | #define U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 0x4000000 | 
|---|
| 449 |  | 
|---|
| 450 | /** | 
|---|
| 451 | * Bit mask for swapping BEGIN and END for Visual LTR text | 
|---|
| 452 | * @stable ICU 4.2 | 
|---|
| 453 | */ | 
|---|
| 454 | #define U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK      0x4000000 | 
|---|
| 455 |  | 
|---|
| 456 | /** | 
|---|
| 457 | * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73). | 
|---|
| 458 | * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B) | 
|---|
| 459 | * De-shaping will not use this option as it will always search for both the new Unicode code point for the | 
|---|
| 460 | * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the | 
|---|
| 461 | * Seen-Family letter accordingly. | 
|---|
| 462 | * | 
|---|
| 463 | * Shaping Mode: Only shaping. | 
|---|
| 464 | * De-shaping Mode: N/A. | 
|---|
| 465 | * Affects: All Seen options | 
|---|
| 466 | * @stable ICU 4.8 | 
|---|
| 467 | */ | 
|---|
| 468 | #define U_SHAPE_TAIL_NEW_UNICODE        0x8000000 | 
|---|
| 469 |  | 
|---|
| 470 | /** | 
|---|
| 471 | * Bit mask for new Unicode Tail option | 
|---|
| 472 | * @stable ICU 4.8 | 
|---|
| 473 | */ | 
|---|
| 474 | #define U_SHAPE_TAIL_TYPE_MASK          0x8000000 | 
|---|
| 475 |  | 
|---|
| 476 | #endif | 
|---|
| 477 |  | 
|---|