| 1 | /* |
| 2 | ****************************************************************************** |
| 3 | * |
| 4 | * © 2016 and later: Unicode, Inc. and others. |
| 5 | * License & terms of use: http://www.unicode.org/copyright.html |
| 6 | * |
| 7 | ****************************************************************************** |
| 8 | * file name: ubiditransform.h |
| 9 | * encoding: UTF-8 |
| 10 | * tab size: 8 (not used) |
| 11 | * indentation:4 |
| 12 | * |
| 13 | * created on: 2016jul24 |
| 14 | * created by: Lina Kemmel |
| 15 | * |
| 16 | */ |
| 17 | |
| 18 | #ifndef UBIDITRANSFORM_H |
| 19 | #define UBIDITRANSFORM_H |
| 20 | |
| 21 | #include "unicode/utypes.h" |
| 22 | #include "unicode/ubidi.h" |
| 23 | #include "unicode/uchar.h" |
| 24 | |
| 25 | #if U_SHOW_CPLUSPLUS_API |
| 26 | #include "unicode/localpointer.h" |
| 27 | #endif // U_SHOW_CPLUSPLUS_API |
| 28 | |
| 29 | /** |
| 30 | * \file |
| 31 | * \brief C API: Bidi Transformations |
| 32 | */ |
| 33 | |
| 34 | /** |
| 35 | * `UBiDiOrder` indicates the order of text. |
| 36 | * |
| 37 | * This bidi transformation engine supports all possible combinations (4 in |
| 38 | * total) of input and output text order: |
| 39 | * |
| 40 | * - <logical input, visual output>: unless the output direction is RTL, this |
| 41 | * corresponds to a normal operation of the Bidi algorithm as described in the |
| 42 | * Unicode Technical Report and implemented by `UBiDi` when the |
| 43 | * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL |
| 44 | * mode is not supported by `UBiDi` and is accomplished through |
| 45 | * reversing a visual LTR string, |
| 46 | * |
| 47 | * - <visual input, logical output>: unless the input direction is RTL, this |
| 48 | * corresponds to an "inverse bidi algorithm" in `UBiDi` with the |
| 49 | * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`. |
| 50 | * Visual RTL mode is not not supported by `UBiDi` and is |
| 51 | * accomplished through reversing a visual LTR string, |
| 52 | * |
| 53 | * - <logical input, logical output>: if the input and output base directions |
| 54 | * mismatch, this corresponds to the `UBiDi` implementation with the |
| 55 | * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the |
| 56 | * input and output base directions are identical, the transformation engine |
| 57 | * will only handle character mirroring and Arabic shaping operations without |
| 58 | * reordering, |
| 59 | * |
| 60 | * - <visual input, visual output>: this reordering mode is not supported by |
| 61 | * the `UBiDi` engine; it implies character mirroring, Arabic |
| 62 | * shaping, and - if the input/output base directions mismatch - string |
| 63 | * reverse operations. |
| 64 | * @see ubidi_setInverse |
| 65 | * @see ubidi_setReorderingMode |
| 66 | * @see UBIDI_REORDER_DEFAULT |
| 67 | * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT |
| 68 | * @see UBIDI_REORDER_RUNS_ONLY |
| 69 | * @stable ICU 58 |
| 70 | */ |
| 71 | typedef enum { |
| 72 | /** 0: Constant indicating a logical order. |
| 73 | * This is the default for input text. |
| 74 | * @stable ICU 58 |
| 75 | */ |
| 76 | UBIDI_LOGICAL = 0, |
| 77 | /** 1: Constant indicating a visual order. |
| 78 | * This is a default for output text. |
| 79 | * @stable ICU 58 |
| 80 | */ |
| 81 | UBIDI_VISUAL |
| 82 | } UBiDiOrder; |
| 83 | |
| 84 | /** |
| 85 | * <code>UBiDiMirroring</code> indicates whether or not characters with the |
| 86 | * "mirrored" property in RTL runs should be replaced with their mirror-image |
| 87 | * counterparts. |
| 88 | * @see UBIDI_DO_MIRRORING |
| 89 | * @see ubidi_setReorderingOptions |
| 90 | * @see ubidi_writeReordered |
| 91 | * @see ubidi_writeReverse |
| 92 | * @stable ICU 58 |
| 93 | */ |
| 94 | typedef enum { |
| 95 | /** 0: Constant indicating that character mirroring should not be |
| 96 | * performed. |
| 97 | * This is the default. |
| 98 | * @stable ICU 58 |
| 99 | */ |
| 100 | UBIDI_MIRRORING_OFF = 0, |
| 101 | /** 1: Constant indicating that character mirroring should be performed. |
| 102 | * This corresponds to calling <code>ubidi_writeReordered</code> or |
| 103 | * <code>ubidi_writeReverse</code> with the |
| 104 | * <code>UBIDI_DO_MIRRORING</code> option bit set. |
| 105 | * @stable ICU 58 |
| 106 | */ |
| 107 | UBIDI_MIRRORING_ON |
| 108 | } UBiDiMirroring; |
| 109 | |
| 110 | /** |
| 111 | * Forward declaration of the <code>UBiDiTransform</code> structure that stores |
| 112 | * information used by the layout transformation engine. |
| 113 | * @stable ICU 58 |
| 114 | */ |
| 115 | typedef struct UBiDiTransform UBiDiTransform; |
| 116 | |
| 117 | /** |
| 118 | * Performs transformation of text from the bidi layout defined by the input |
| 119 | * ordering scheme to the bidi layout defined by the output ordering scheme, |
| 120 | * and applies character mirroring and Arabic shaping operations.<p> |
| 121 | * In terms of <code>UBiDi</code>, such a transformation implies: |
| 122 | * <ul> |
| 123 | * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the |
| 124 | * reordering mode is other than normal),</li> |
| 125 | * <li>calling <code>ubidi_setInverse</code> as needed (when text should be |
| 126 | * transformed from a visual to a logical form),</li> |
| 127 | * <li>resolving embedding levels of each character in the input text by |
| 128 | * calling <code>ubidi_setPara</code>,</li> |
| 129 | * <li>reordering the characters based on the computed embedding levels, also |
| 130 | * performing character mirroring as needed, and streaming the result to the |
| 131 | * output, by calling <code>ubidi_writeReordered</code>,</li> |
| 132 | * <li>performing Arabic digit and letter shaping on the output text by calling |
| 133 | * <code>u_shapeArabic</code>.</li> |
| 134 | * </ul> |
| 135 | * An "ordering scheme" encompasses the base direction and the order of text, |
| 136 | * and these characteristics must be defined by the caller for both input and |
| 137 | * output explicitly .<p> |
| 138 | * There are 36 possible combinations of <input, output> ordering schemes, |
| 139 | * which are partially supported by <code>UBiDi</code> already. Examples of the |
| 140 | * currently supported combinations: |
| 141 | * <ul> |
| 142 | * <li><Logical LTR, Visual LTR>: this is equivalent to calling |
| 143 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> |
| 144 | * <li><Logical RTL, Visual LTR>: this is equivalent to calling |
| 145 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li> |
| 146 | * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to |
| 147 | * calling <code>ubidi_setPara</code> with |
| 148 | * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li> |
| 149 | * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to |
| 150 | * calling <code>ubidi_setPara</code> with |
| 151 | * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li> |
| 152 | * <li><Visual LTR, Logical LTR>: this is equivalent to |
| 153 | * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then |
| 154 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> |
| 155 | * <li><Visual LTR, Logical RTL>: this is equivalent to |
| 156 | * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then |
| 157 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li> |
| 158 | * </ul> |
| 159 | * All combinations that involve the Visual RTL scheme are unsupported by |
| 160 | * <code>UBiDi</code>, for instance: |
| 161 | * <ul> |
| 162 | * <li><Logical LTR, Visual RTL>,</li> |
| 163 | * <li><Visual RTL, Logical RTL>.</li> |
| 164 | * </ul> |
| 165 | * <p>Example of usage of the transformation engine:<br> |
| 166 | * <pre> |
| 167 | * \code |
| 168 | * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0}; |
| 169 | * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0}; |
| 170 | * UErrorCode errorCode = U_ZERO_ERROR; |
| 171 | * // Run a transformation. |
| 172 | * ubiditransform_transform(pBidiTransform, |
| 173 | * text1, -1, text2, -1, |
| 174 | * UBIDI_LTR, UBIDI_VISUAL, |
| 175 | * UBIDI_RTL, UBIDI_LOGICAL, |
| 176 | * UBIDI_MIRRORING_OFF, |
| 177 | * U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, |
| 178 | * &errorCode); |
| 179 | * // Do something with text2. |
| 180 | * text2[4] = '2'; |
| 181 | * // Run a reverse transformation. |
| 182 | * ubiditransform_transform(pBidiTransform, |
| 183 | * text2, -1, text1, -1, |
| 184 | * UBIDI_RTL, UBIDI_LOGICAL, |
| 185 | * UBIDI_LTR, UBIDI_VISUAL, |
| 186 | * UBIDI_MIRRORING_OFF, |
| 187 | * U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, |
| 188 | * &errorCode); |
| 189 | *\endcode |
| 190 | * </pre> |
| 191 | * </p> |
| 192 | * |
| 193 | * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object |
| 194 | * allocated with <code>ubiditransform_open()</code> or |
| 195 | * <code>NULL</code>.<p> |
| 196 | * This object serves for one-time setup to amortize initialization |
| 197 | * overheads. Use of this object is not thread-safe. All other threads |
| 198 | * should allocate a new <code>UBiDiTransform</code> object by calling |
| 199 | * <code>ubiditransform_open()</code> before using it. Alternatively, |
| 200 | * a caller can set this parameter to <code>NULL</code>, in which case |
| 201 | * the object will be allocated by the engine on the fly.</p> |
| 202 | * @param src A pointer to the text that the Bidi layout transformations will |
| 203 | * be performed on. |
| 204 | * <p><strong>Note:</strong> the text must be (at least) |
| 205 | * <code>srcLength</code> long.</p> |
| 206 | * @param srcLength The length of the text, in number of UChars. If |
| 207 | * <code>length == -1</code> then the text must be zero-terminated. |
| 208 | * @param dest A pointer to where the processed text is to be copied. |
| 209 | * @param destSize The size of the <code>dest</code> buffer, in number of |
| 210 | * UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set, |
| 211 | * then the destination length could be as large as |
| 212 | * <code>srcLength * 2</code>. Otherwise, the destination length will |
| 213 | * not exceed <code>srcLength</code>. If the caller reserves the last |
| 214 | * position for zero-termination, it should be excluded from |
| 215 | * <code>destSize</code>. |
| 216 | * <p><code>destSize == -1</code> is allowed and makes sense when |
| 217 | * <code>dest</code> was holds some meaningful value, e.g. that of |
| 218 | * <code>src</code>. In this case <code>dest</code> must be |
| 219 | * zero-terminated.</p> |
| 220 | * @param inParaLevel A base embedding level of the input as defined in |
| 221 | * <code>ubidi_setPara</code> documentation for the |
| 222 | * <code>paraLevel</code> parameter. |
| 223 | * @param inOrder An order of the input, which can be one of the |
| 224 | * <code>UBiDiOrder</code> values. |
| 225 | * @param outParaLevel A base embedding level of the output as defined in |
| 226 | * <code>ubidi_setPara</code> documentation for the |
| 227 | * <code>paraLevel</code> parameter. |
| 228 | * @param outOrder An order of the output, which can be one of the |
| 229 | * <code>UBiDiOrder</code> values. |
| 230 | * @param doMirroring Indicates whether or not to perform character mirroring, |
| 231 | * and can accept one of the <code>UBiDiMirroring</code> values. |
| 232 | * @param shapingOptions Arabic digit and letter shaping options defined in the |
| 233 | * ushape.h documentation. |
| 234 | * <p><strong>Note:</strong> Direction indicator options are computed by |
| 235 | * the transformation engine based on the effective ordering schemes, so |
| 236 | * user-defined direction indicators will be ignored.</p> |
| 237 | * @param pErrorCode A pointer to an error code value. |
| 238 | * |
| 239 | * @return The destination length, i.e. the number of UChars written to |
| 240 | * <code>dest</code>. If the transformation fails, the return value |
| 241 | * will be 0 (and the error code will be written to |
| 242 | * <code>pErrorCode</code>). |
| 243 | * |
| 244 | * @see UBiDiLevel |
| 245 | * @see UBiDiOrder |
| 246 | * @see UBiDiMirroring |
| 247 | * @see ubidi_setPara |
| 248 | * @see u_shapeArabic |
| 249 | * @stable ICU 58 |
| 250 | */ |
| 251 | U_CAPI uint32_t U_EXPORT2 |
| 252 | ubiditransform_transform(UBiDiTransform *pBiDiTransform, |
| 253 | const UChar *src, int32_t srcLength, |
| 254 | UChar *dest, int32_t destSize, |
| 255 | UBiDiLevel inParaLevel, UBiDiOrder inOrder, |
| 256 | UBiDiLevel outParaLevel, UBiDiOrder outOrder, |
| 257 | UBiDiMirroring doMirroring, uint32_t shapingOptions, |
| 258 | UErrorCode *pErrorCode); |
| 259 | |
| 260 | /** |
| 261 | * Allocates a <code>UBiDiTransform</code> object. This object can be reused, |
| 262 | * e.g. with different ordering schemes, mirroring or shaping options.<p> |
| 263 | * <strong>Note:</strong>The object can only be reused in the same thread. |
| 264 | * All other threads should allocate a new <code>UBiDiTransform</code> object |
| 265 | * before using it.<p> |
| 266 | * Example of usage:<p> |
| 267 | * <pre> |
| 268 | * \code |
| 269 | * UErrorCode errorCode = U_ZERO_ERROR; |
| 270 | * // Open a new UBiDiTransform. |
| 271 | * UBiDiTransform* transform = ubiditransform_open(&errorCode); |
| 272 | * // Run a transformation. |
| 273 | * ubiditransform_transform(transform, |
| 274 | * text1, -1, text2, -1, |
| 275 | * UBIDI_RTL, UBIDI_LOGICAL, |
| 276 | * UBIDI_LTR, UBIDI_VISUAL, |
| 277 | * UBIDI_MIRRORING_ON, |
| 278 | * U_SHAPE_DIGITS_EN2AN, |
| 279 | * &errorCode); |
| 280 | * // Do something with the output text and invoke another transformation using |
| 281 | * // that text as input. |
| 282 | * ubiditransform_transform(transform, |
| 283 | * text2, -1, text3, -1, |
| 284 | * UBIDI_LTR, UBIDI_VISUAL, |
| 285 | * UBIDI_RTL, UBIDI_VISUAL, |
| 286 | * UBIDI_MIRRORING_ON, |
| 287 | * 0, &errorCode); |
| 288 | *\endcode |
| 289 | * </pre> |
| 290 | * <p> |
| 291 | * The <code>UBiDiTransform</code> object must be deallocated by calling |
| 292 | * <code>ubiditransform_close()</code>. |
| 293 | * |
| 294 | * @return An empty <code>UBiDiTransform</code> object. |
| 295 | * @stable ICU 58 |
| 296 | */ |
| 297 | U_CAPI UBiDiTransform* U_EXPORT2 |
| 298 | ubiditransform_open(UErrorCode *pErrorCode); |
| 299 | |
| 300 | /** |
| 301 | * Deallocates the given <code>UBiDiTransform</code> object. |
| 302 | * @stable ICU 58 |
| 303 | */ |
| 304 | U_CAPI void U_EXPORT2 |
| 305 | ubiditransform_close(UBiDiTransform *pBidiTransform); |
| 306 | |
| 307 | #if U_SHOW_CPLUSPLUS_API |
| 308 | |
| 309 | U_NAMESPACE_BEGIN |
| 310 | |
| 311 | /** |
| 312 | * \class LocalUBiDiTransformPointer |
| 313 | * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close(). |
| 314 | * For most methods see the LocalPointerBase base class. |
| 315 | * |
| 316 | * @see LocalPointerBase |
| 317 | * @see LocalPointer |
| 318 | * @stable ICU 58 |
| 319 | */ |
| 320 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); |
| 321 | |
| 322 | U_NAMESPACE_END |
| 323 | |
| 324 | #endif |
| 325 | |
| 326 | #endif |
| 327 | |