| 1 | /* | 
|---|
| 2 | ****************************************************************************** | 
|---|
| 3 | * | 
|---|
| 4 | * © 2016 and later: Unicode, Inc. and others. | 
|---|
| 5 | * License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 6 | * | 
|---|
| 7 | ****************************************************************************** | 
|---|
| 8 | *   file name:  ubiditransform.h | 
|---|
| 9 | *   encoding:   UTF-8 | 
|---|
| 10 | *   tab size:   8 (not used) | 
|---|
| 11 | *   indentation:4 | 
|---|
| 12 | * | 
|---|
| 13 | *   created on: 2016jul24 | 
|---|
| 14 | *   created by: Lina Kemmel | 
|---|
| 15 | * | 
|---|
| 16 | */ | 
|---|
| 17 |  | 
|---|
| 18 | #ifndef UBIDITRANSFORM_H | 
|---|
| 19 | #define UBIDITRANSFORM_H | 
|---|
| 20 |  | 
|---|
| 21 | #include "unicode/utypes.h" | 
|---|
| 22 | #include "unicode/ubidi.h" | 
|---|
| 23 | #include "unicode/uchar.h" | 
|---|
| 24 |  | 
|---|
| 25 | #if U_SHOW_CPLUSPLUS_API | 
|---|
| 26 | #include "unicode/localpointer.h" | 
|---|
| 27 | #endif   // U_SHOW_CPLUSPLUS_API | 
|---|
| 28 |  | 
|---|
| 29 | /** | 
|---|
| 30 | * \file | 
|---|
| 31 | * \brief C API: Bidi Transformations | 
|---|
| 32 | */ | 
|---|
| 33 |  | 
|---|
| 34 | /** | 
|---|
| 35 | * `UBiDiOrder` indicates the order of text. | 
|---|
| 36 | * | 
|---|
| 37 | * This bidi transformation engine supports all possible combinations (4 in | 
|---|
| 38 | * total) of input and output text order: | 
|---|
| 39 | * | 
|---|
| 40 | *   - <logical input, visual output>: unless the output direction is RTL, this | 
|---|
| 41 | *     corresponds to a normal operation of the Bidi algorithm as described in the | 
|---|
| 42 | *     Unicode Technical Report and implemented by `UBiDi` when the | 
|---|
| 43 | *     reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL | 
|---|
| 44 | *     mode is not supported by `UBiDi` and is accomplished through | 
|---|
| 45 | *     reversing a visual LTR string, | 
|---|
| 46 | * | 
|---|
| 47 | *   - <visual input, logical output>: unless the input direction is RTL, this | 
|---|
| 48 | *     corresponds to an "inverse bidi algorithm" in `UBiDi` with the | 
|---|
| 49 | *     reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`. | 
|---|
| 50 | *     Visual RTL mode is not not supported by `UBiDi` and is | 
|---|
| 51 | *     accomplished through reversing a visual LTR string, | 
|---|
| 52 | * | 
|---|
| 53 | *   - <logical input, logical output>: if the input and output base directions | 
|---|
| 54 | *     mismatch, this corresponds to the `UBiDi` implementation with the | 
|---|
| 55 | *     reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the | 
|---|
| 56 | *     input and output base directions are identical, the transformation engine | 
|---|
| 57 | *     will only handle character mirroring and Arabic shaping operations without | 
|---|
| 58 | *     reordering, | 
|---|
| 59 | * | 
|---|
| 60 | *   - <visual input, visual output>: this reordering mode is not supported by | 
|---|
| 61 | *     the `UBiDi` engine; it implies character mirroring, Arabic | 
|---|
| 62 | *     shaping, and - if the input/output base directions mismatch -  string | 
|---|
| 63 | *     reverse operations. | 
|---|
| 64 | * @see ubidi_setInverse | 
|---|
| 65 | * @see ubidi_setReorderingMode | 
|---|
| 66 | * @see UBIDI_REORDER_DEFAULT | 
|---|
| 67 | * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT | 
|---|
| 68 | * @see UBIDI_REORDER_RUNS_ONLY | 
|---|
| 69 | * @stable ICU 58 | 
|---|
| 70 | */ | 
|---|
| 71 | typedef enum { | 
|---|
| 72 | /** 0: Constant indicating a logical order. | 
|---|
| 73 | * This is the default for input text. | 
|---|
| 74 | * @stable ICU 58 | 
|---|
| 75 | */ | 
|---|
| 76 | UBIDI_LOGICAL = 0, | 
|---|
| 77 | /** 1: Constant indicating a visual order. | 
|---|
| 78 | * This is a default for output text. | 
|---|
| 79 | * @stable ICU 58 | 
|---|
| 80 | */ | 
|---|
| 81 | UBIDI_VISUAL | 
|---|
| 82 | } UBiDiOrder; | 
|---|
| 83 |  | 
|---|
| 84 | /** | 
|---|
| 85 | * <code>UBiDiMirroring</code> indicates whether or not characters with the | 
|---|
| 86 | * "mirrored" property in RTL runs should be replaced with their mirror-image | 
|---|
| 87 | * counterparts. | 
|---|
| 88 | * @see UBIDI_DO_MIRRORING | 
|---|
| 89 | * @see ubidi_setReorderingOptions | 
|---|
| 90 | * @see ubidi_writeReordered | 
|---|
| 91 | * @see ubidi_writeReverse | 
|---|
| 92 | * @stable ICU 58 | 
|---|
| 93 | */ | 
|---|
| 94 | typedef enum { | 
|---|
| 95 | /** 0: Constant indicating that character mirroring should not be | 
|---|
| 96 | * performed. | 
|---|
| 97 | * This is the default. | 
|---|
| 98 | * @stable ICU 58 | 
|---|
| 99 | */ | 
|---|
| 100 | UBIDI_MIRRORING_OFF = 0, | 
|---|
| 101 | /** 1: Constant indicating that character mirroring should be performed. | 
|---|
| 102 | * This corresponds to calling <code>ubidi_writeReordered</code> or | 
|---|
| 103 | * <code>ubidi_writeReverse</code> with the | 
|---|
| 104 | * <code>UBIDI_DO_MIRRORING</code> option bit set. | 
|---|
| 105 | * @stable ICU 58 | 
|---|
| 106 | */ | 
|---|
| 107 | UBIDI_MIRRORING_ON | 
|---|
| 108 | } UBiDiMirroring; | 
|---|
| 109 |  | 
|---|
| 110 | /** | 
|---|
| 111 | * Forward declaration of the <code>UBiDiTransform</code> structure that stores | 
|---|
| 112 | * information used by the layout transformation engine. | 
|---|
| 113 | * @stable ICU 58 | 
|---|
| 114 | */ | 
|---|
| 115 | typedef struct UBiDiTransform UBiDiTransform; | 
|---|
| 116 |  | 
|---|
| 117 | /** | 
|---|
| 118 | * Performs transformation of text from the bidi layout defined by the input | 
|---|
| 119 | * ordering scheme to the bidi layout defined by the output ordering scheme, | 
|---|
| 120 | * and applies character mirroring and Arabic shaping operations.<p> | 
|---|
| 121 | * In terms of <code>UBiDi</code>, such a transformation implies: | 
|---|
| 122 | * <ul> | 
|---|
| 123 | * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the | 
|---|
| 124 | * reordering mode is other than normal),</li> | 
|---|
| 125 | * <li>calling <code>ubidi_setInverse</code> as needed (when text should be | 
|---|
| 126 | * transformed from a visual to a logical form),</li> | 
|---|
| 127 | * <li>resolving embedding levels of each character in the input text by | 
|---|
| 128 | * calling <code>ubidi_setPara</code>,</li> | 
|---|
| 129 | * <li>reordering the characters based on the computed embedding levels, also | 
|---|
| 130 | * performing character mirroring as needed, and streaming the result to the | 
|---|
| 131 | * output, by calling <code>ubidi_writeReordered</code>,</li> | 
|---|
| 132 | * <li>performing Arabic digit and letter shaping on the output text by calling | 
|---|
| 133 | * <code>u_shapeArabic</code>.</li> | 
|---|
| 134 | * </ul> | 
|---|
| 135 | * An "ordering scheme" encompasses the base direction and the order of text, | 
|---|
| 136 | * and these characteristics must be defined by the caller for both input and | 
|---|
| 137 | * output explicitly .<p> | 
|---|
| 138 | * There are 36 possible combinations of <input, output> ordering schemes, | 
|---|
| 139 | * which are partially supported by <code>UBiDi</code> already. Examples of the | 
|---|
| 140 | * currently supported combinations: | 
|---|
| 141 | * <ul> | 
|---|
| 142 | * <li><Logical LTR, Visual LTR>: this is equivalent to calling | 
|---|
| 143 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> | 
|---|
| 144 | * <li><Logical RTL, Visual LTR>: this is equivalent to calling | 
|---|
| 145 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li> | 
|---|
| 146 | * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to | 
|---|
| 147 | * calling <code>ubidi_setPara</code> with | 
|---|
| 148 | * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li> | 
|---|
| 149 | * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to | 
|---|
| 150 | * calling <code>ubidi_setPara</code> with | 
|---|
| 151 | * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li> | 
|---|
| 152 | * <li><Visual LTR, Logical LTR>: this is equivalent to | 
|---|
| 153 | * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then | 
|---|
| 154 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> | 
|---|
| 155 | * <li><Visual LTR, Logical RTL>: this is equivalent to | 
|---|
| 156 | * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then | 
|---|
| 157 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li> | 
|---|
| 158 | * </ul> | 
|---|
| 159 | * All combinations that involve the Visual RTL scheme are unsupported by | 
|---|
| 160 | * <code>UBiDi</code>, for instance: | 
|---|
| 161 | * <ul> | 
|---|
| 162 | * <li><Logical LTR, Visual RTL>,</li> | 
|---|
| 163 | * <li><Visual RTL, Logical RTL>.</li> | 
|---|
| 164 | * </ul> | 
|---|
| 165 | * <p>Example of usage of the transformation engine:<br> | 
|---|
| 166 | * <pre> | 
|---|
| 167 | * \code | 
|---|
| 168 | * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0}; | 
|---|
| 169 | * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0}; | 
|---|
| 170 | * UErrorCode errorCode = U_ZERO_ERROR; | 
|---|
| 171 | * // Run a transformation. | 
|---|
| 172 | * ubiditransform_transform(pBidiTransform, | 
|---|
| 173 | *          text1, -1, text2, -1, | 
|---|
| 174 | *          UBIDI_LTR, UBIDI_VISUAL, | 
|---|
| 175 | *          UBIDI_RTL, UBIDI_LOGICAL, | 
|---|
| 176 | *          UBIDI_MIRRORING_OFF, | 
|---|
| 177 | *          U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, | 
|---|
| 178 | *          &errorCode); | 
|---|
| 179 | * // Do something with text2. | 
|---|
| 180 | *  text2[4] = '2'; | 
|---|
| 181 | * // Run a reverse transformation. | 
|---|
| 182 | * ubiditransform_transform(pBidiTransform, | 
|---|
| 183 | *          text2, -1, text1, -1, | 
|---|
| 184 | *          UBIDI_RTL, UBIDI_LOGICAL, | 
|---|
| 185 | *          UBIDI_LTR, UBIDI_VISUAL, | 
|---|
| 186 | *          UBIDI_MIRRORING_OFF, | 
|---|
| 187 | *          U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, | 
|---|
| 188 | *          &errorCode); | 
|---|
| 189 | *\endcode | 
|---|
| 190 | * </pre> | 
|---|
| 191 | * </p> | 
|---|
| 192 | * | 
|---|
| 193 | * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object | 
|---|
| 194 | *        allocated with <code>ubiditransform_open()</code> or | 
|---|
| 195 | *        <code>NULL</code>.<p> | 
|---|
| 196 | *        This object serves for one-time setup to amortize initialization | 
|---|
| 197 | *        overheads. Use of this object is not thread-safe. All other threads | 
|---|
| 198 | *        should allocate a new <code>UBiDiTransform</code> object by calling | 
|---|
| 199 | *        <code>ubiditransform_open()</code> before using it. Alternatively, | 
|---|
| 200 | *        a caller can set this parameter to <code>NULL</code>, in which case | 
|---|
| 201 | *        the object will be allocated by the engine on the fly.</p> | 
|---|
| 202 | * @param src A pointer to the text that the Bidi layout transformations will | 
|---|
| 203 | *        be performed on. | 
|---|
| 204 | *        <p><strong>Note:</strong> the text must be (at least) | 
|---|
| 205 | *        <code>srcLength</code> long.</p> | 
|---|
| 206 | * @param srcLength The length of the text, in number of UChars. If | 
|---|
| 207 | *        <code>length == -1</code> then the text must be zero-terminated. | 
|---|
| 208 | * @param dest A pointer to where the processed text is to be copied. | 
|---|
| 209 | * @param destSize The size of the <code>dest</code> buffer, in number of | 
|---|
| 210 | *        UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set, | 
|---|
| 211 | *        then the destination length could be as large as | 
|---|
| 212 | *        <code>srcLength * 2</code>. Otherwise, the destination length will | 
|---|
| 213 | *        not exceed <code>srcLength</code>. If the caller reserves the last | 
|---|
| 214 | *        position for zero-termination, it should be excluded from | 
|---|
| 215 | *        <code>destSize</code>. | 
|---|
| 216 | *        <p><code>destSize == -1</code> is allowed and makes sense when | 
|---|
| 217 | *        <code>dest</code> was holds some meaningful value, e.g. that of | 
|---|
| 218 | *        <code>src</code>. In this case <code>dest</code> must be | 
|---|
| 219 | *        zero-terminated.</p> | 
|---|
| 220 | * @param inParaLevel A base embedding level of the input as defined in | 
|---|
| 221 | *        <code>ubidi_setPara</code> documentation for the | 
|---|
| 222 | *        <code>paraLevel</code> parameter. | 
|---|
| 223 | * @param inOrder An order of the input, which can be one of the | 
|---|
| 224 | *        <code>UBiDiOrder</code> values. | 
|---|
| 225 | * @param outParaLevel A base embedding level of the output as defined in | 
|---|
| 226 | *        <code>ubidi_setPara</code> documentation for the | 
|---|
| 227 | *        <code>paraLevel</code> parameter. | 
|---|
| 228 | * @param outOrder An order of the output, which can be one of the | 
|---|
| 229 | *        <code>UBiDiOrder</code> values. | 
|---|
| 230 | * @param doMirroring Indicates whether or not to perform character mirroring, | 
|---|
| 231 | *        and can accept one of the <code>UBiDiMirroring</code> values. | 
|---|
| 232 | * @param shapingOptions Arabic digit and letter shaping options defined in the | 
|---|
| 233 | *        ushape.h documentation. | 
|---|
| 234 | *        <p><strong>Note:</strong> Direction indicator options are computed by | 
|---|
| 235 | *        the transformation engine based on the effective ordering schemes, so | 
|---|
| 236 | *        user-defined direction indicators will be ignored.</p> | 
|---|
| 237 | * @param pErrorCode A pointer to an error code value. | 
|---|
| 238 | * | 
|---|
| 239 | * @return The destination length, i.e. the number of UChars written to | 
|---|
| 240 | *         <code>dest</code>. If the transformation fails, the return value | 
|---|
| 241 | *         will be 0 (and the error code will be written to | 
|---|
| 242 | *         <code>pErrorCode</code>). | 
|---|
| 243 | * | 
|---|
| 244 | * @see UBiDiLevel | 
|---|
| 245 | * @see UBiDiOrder | 
|---|
| 246 | * @see UBiDiMirroring | 
|---|
| 247 | * @see ubidi_setPara | 
|---|
| 248 | * @see u_shapeArabic | 
|---|
| 249 | * @stable ICU 58 | 
|---|
| 250 | */ | 
|---|
| 251 | U_CAPI uint32_t U_EXPORT2 | 
|---|
| 252 | ubiditransform_transform(UBiDiTransform *pBiDiTransform, | 
|---|
| 253 | const UChar *src, int32_t srcLength, | 
|---|
| 254 | UChar *dest, int32_t destSize, | 
|---|
| 255 | UBiDiLevel inParaLevel, UBiDiOrder inOrder, | 
|---|
| 256 | UBiDiLevel outParaLevel, UBiDiOrder outOrder, | 
|---|
| 257 | UBiDiMirroring doMirroring, uint32_t shapingOptions, | 
|---|
| 258 | UErrorCode *pErrorCode); | 
|---|
| 259 |  | 
|---|
| 260 | /** | 
|---|
| 261 | * Allocates a <code>UBiDiTransform</code> object. This object can be reused, | 
|---|
| 262 | * e.g. with different ordering schemes, mirroring or shaping options.<p> | 
|---|
| 263 | * <strong>Note:</strong>The object can only be reused in the same thread. | 
|---|
| 264 | * All other threads should allocate a new <code>UBiDiTransform</code> object | 
|---|
| 265 | * before using it.<p> | 
|---|
| 266 | * Example of usage:<p> | 
|---|
| 267 | * <pre> | 
|---|
| 268 | * \code | 
|---|
| 269 | * UErrorCode errorCode = U_ZERO_ERROR; | 
|---|
| 270 | * // Open a new UBiDiTransform. | 
|---|
| 271 | * UBiDiTransform* transform = ubiditransform_open(&errorCode); | 
|---|
| 272 | * // Run a transformation. | 
|---|
| 273 | * ubiditransform_transform(transform, | 
|---|
| 274 | *          text1, -1, text2, -1, | 
|---|
| 275 | *          UBIDI_RTL, UBIDI_LOGICAL, | 
|---|
| 276 | *          UBIDI_LTR, UBIDI_VISUAL, | 
|---|
| 277 | *          UBIDI_MIRRORING_ON, | 
|---|
| 278 | *          U_SHAPE_DIGITS_EN2AN, | 
|---|
| 279 | *          &errorCode); | 
|---|
| 280 | * // Do something with the output text and invoke another transformation using | 
|---|
| 281 | * //   that text as input. | 
|---|
| 282 | * ubiditransform_transform(transform, | 
|---|
| 283 | *          text2, -1, text3, -1, | 
|---|
| 284 | *          UBIDI_LTR, UBIDI_VISUAL, | 
|---|
| 285 | *          UBIDI_RTL, UBIDI_VISUAL, | 
|---|
| 286 | *          UBIDI_MIRRORING_ON, | 
|---|
| 287 | *          0, &errorCode); | 
|---|
| 288 | *\endcode | 
|---|
| 289 | * </pre> | 
|---|
| 290 | * <p> | 
|---|
| 291 | * The <code>UBiDiTransform</code> object must be deallocated by calling | 
|---|
| 292 | * <code>ubiditransform_close()</code>. | 
|---|
| 293 | * | 
|---|
| 294 | * @return An empty <code>UBiDiTransform</code> object. | 
|---|
| 295 | * @stable ICU 58 | 
|---|
| 296 | */ | 
|---|
| 297 | U_CAPI UBiDiTransform* U_EXPORT2 | 
|---|
| 298 | ubiditransform_open(UErrorCode *pErrorCode); | 
|---|
| 299 |  | 
|---|
| 300 | /** | 
|---|
| 301 | * Deallocates the given <code>UBiDiTransform</code> object. | 
|---|
| 302 | * @stable ICU 58 | 
|---|
| 303 | */ | 
|---|
| 304 | U_CAPI void U_EXPORT2 | 
|---|
| 305 | ubiditransform_close(UBiDiTransform *pBidiTransform); | 
|---|
| 306 |  | 
|---|
| 307 | #if U_SHOW_CPLUSPLUS_API | 
|---|
| 308 |  | 
|---|
| 309 | U_NAMESPACE_BEGIN | 
|---|
| 310 |  | 
|---|
| 311 | /** | 
|---|
| 312 | * \class LocalUBiDiTransformPointer | 
|---|
| 313 | * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close(). | 
|---|
| 314 | * For most methods see the LocalPointerBase base class. | 
|---|
| 315 | * | 
|---|
| 316 | * @see LocalPointerBase | 
|---|
| 317 | * @see LocalPointer | 
|---|
| 318 | * @stable ICU 58 | 
|---|
| 319 | */ | 
|---|
| 320 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); | 
|---|
| 321 |  | 
|---|
| 322 | U_NAMESPACE_END | 
|---|
| 323 |  | 
|---|
| 324 | #endif | 
|---|
| 325 |  | 
|---|
| 326 | #endif | 
|---|
| 327 |  | 
|---|