1/*
2******************************************************************************
3*
4* © 2016 and later: Unicode, Inc. and others.
5* License & terms of use: http://www.unicode.org/copyright.html
6*
7******************************************************************************
8* file name: ubiditransform.h
9* encoding: UTF-8
10* tab size: 8 (not used)
11* indentation:4
12*
13* created on: 2016jul24
14* created by: Lina Kemmel
15*
16*/
17
18#ifndef UBIDITRANSFORM_H
19#define UBIDITRANSFORM_H
20
21#include "unicode/utypes.h"
22#include "unicode/ubidi.h"
23#include "unicode/uchar.h"
24
25#if U_SHOW_CPLUSPLUS_API
26#include "unicode/localpointer.h"
27#endif // U_SHOW_CPLUSPLUS_API
28
29/**
30 * \file
31 * \brief C API: Bidi Transformations
32 */
33
34/**
35 * `UBiDiOrder` indicates the order of text.
36 *
37 * This bidi transformation engine supports all possible combinations (4 in
38 * total) of input and output text order:
39 *
40 * - <logical input, visual output>: unless the output direction is RTL, this
41 * corresponds to a normal operation of the Bidi algorithm as described in the
42 * Unicode Technical Report and implemented by `UBiDi` when the
43 * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL
44 * mode is not supported by `UBiDi` and is accomplished through
45 * reversing a visual LTR string,
46 *
47 * - <visual input, logical output>: unless the input direction is RTL, this
48 * corresponds to an "inverse bidi algorithm" in `UBiDi` with the
49 * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`.
50 * Visual RTL mode is not not supported by `UBiDi` and is
51 * accomplished through reversing a visual LTR string,
52 *
53 * - <logical input, logical output>: if the input and output base directions
54 * mismatch, this corresponds to the `UBiDi` implementation with the
55 * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the
56 * input and output base directions are identical, the transformation engine
57 * will only handle character mirroring and Arabic shaping operations without
58 * reordering,
59 *
60 * - <visual input, visual output>: this reordering mode is not supported by
61 * the `UBiDi` engine; it implies character mirroring, Arabic
62 * shaping, and - if the input/output base directions mismatch - string
63 * reverse operations.
64 * @see ubidi_setInverse
65 * @see ubidi_setReorderingMode
66 * @see UBIDI_REORDER_DEFAULT
67 * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
68 * @see UBIDI_REORDER_RUNS_ONLY
69 * @stable ICU 58
70 */
71typedef enum {
72 /** 0: Constant indicating a logical order.
73 * This is the default for input text.
74 * @stable ICU 58
75 */
76 UBIDI_LOGICAL = 0,
77 /** 1: Constant indicating a visual order.
78 * This is a default for output text.
79 * @stable ICU 58
80 */
81 UBIDI_VISUAL
82} UBiDiOrder;
83
84/**
85 * <code>UBiDiMirroring</code> indicates whether or not characters with the
86 * "mirrored" property in RTL runs should be replaced with their mirror-image
87 * counterparts.
88 * @see UBIDI_DO_MIRRORING
89 * @see ubidi_setReorderingOptions
90 * @see ubidi_writeReordered
91 * @see ubidi_writeReverse
92 * @stable ICU 58
93 */
94typedef enum {
95 /** 0: Constant indicating that character mirroring should not be
96 * performed.
97 * This is the default.
98 * @stable ICU 58
99 */
100 UBIDI_MIRRORING_OFF = 0,
101 /** 1: Constant indicating that character mirroring should be performed.
102 * This corresponds to calling <code>ubidi_writeReordered</code> or
103 * <code>ubidi_writeReverse</code> with the
104 * <code>UBIDI_DO_MIRRORING</code> option bit set.
105 * @stable ICU 58
106 */
107 UBIDI_MIRRORING_ON
108} UBiDiMirroring;
109
110/**
111 * Forward declaration of the <code>UBiDiTransform</code> structure that stores
112 * information used by the layout transformation engine.
113 * @stable ICU 58
114 */
115typedef struct UBiDiTransform UBiDiTransform;
116
117/**
118 * Performs transformation of text from the bidi layout defined by the input
119 * ordering scheme to the bidi layout defined by the output ordering scheme,
120 * and applies character mirroring and Arabic shaping operations.<p>
121 * In terms of <code>UBiDi</code>, such a transformation implies:
122 * <ul>
123 * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
124 * reordering mode is other than normal),</li>
125 * <li>calling <code>ubidi_setInverse</code> as needed (when text should be
126 * transformed from a visual to a logical form),</li>
127 * <li>resolving embedding levels of each character in the input text by
128 * calling <code>ubidi_setPara</code>,</li>
129 * <li>reordering the characters based on the computed embedding levels, also
130 * performing character mirroring as needed, and streaming the result to the
131 * output, by calling <code>ubidi_writeReordered</code>,</li>
132 * <li>performing Arabic digit and letter shaping on the output text by calling
133 * <code>u_shapeArabic</code>.</li>
134 * </ul>
135 * An "ordering scheme" encompasses the base direction and the order of text,
136 * and these characteristics must be defined by the caller for both input and
137 * output explicitly .<p>
138 * There are 36 possible combinations of <input, output> ordering schemes,
139 * which are partially supported by <code>UBiDi</code> already. Examples of the
140 * currently supported combinations:
141 * <ul>
142 * <li><Logical LTR, Visual LTR>: this is equivalent to calling
143 * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
144 * <li><Logical RTL, Visual LTR>: this is equivalent to calling
145 * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
146 * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
147 * calling <code>ubidi_setPara</code> with
148 * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
149 * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
150 * calling <code>ubidi_setPara</code> with
151 * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
152 * <li><Visual LTR, Logical LTR>: this is equivalent to
153 * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
154 * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
155 * <li><Visual LTR, Logical RTL>: this is equivalent to
156 * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
157 * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
158 * </ul>
159 * All combinations that involve the Visual RTL scheme are unsupported by
160 * <code>UBiDi</code>, for instance:
161 * <ul>
162 * <li><Logical LTR, Visual RTL>,</li>
163 * <li><Visual RTL, Logical RTL>.</li>
164 * </ul>
165 * <p>Example of usage of the transformation engine:<br>
166 * <pre>
167 * \code
168 * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
169 * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
170 * UErrorCode errorCode = U_ZERO_ERROR;
171 * // Run a transformation.
172 * ubiditransform_transform(pBidiTransform,
173 * text1, -1, text2, -1,
174 * UBIDI_LTR, UBIDI_VISUAL,
175 * UBIDI_RTL, UBIDI_LOGICAL,
176 * UBIDI_MIRRORING_OFF,
177 * U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
178 * &errorCode);
179 * // Do something with text2.
180 * text2[4] = '2';
181 * // Run a reverse transformation.
182 * ubiditransform_transform(pBidiTransform,
183 * text2, -1, text1, -1,
184 * UBIDI_RTL, UBIDI_LOGICAL,
185 * UBIDI_LTR, UBIDI_VISUAL,
186 * UBIDI_MIRRORING_OFF,
187 * U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
188 * &errorCode);
189 *\endcode
190 * </pre>
191 * </p>
192 *
193 * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
194 * allocated with <code>ubiditransform_open()</code> or
195 * <code>NULL</code>.<p>
196 * This object serves for one-time setup to amortize initialization
197 * overheads. Use of this object is not thread-safe. All other threads
198 * should allocate a new <code>UBiDiTransform</code> object by calling
199 * <code>ubiditransform_open()</code> before using it. Alternatively,
200 * a caller can set this parameter to <code>NULL</code>, in which case
201 * the object will be allocated by the engine on the fly.</p>
202 * @param src A pointer to the text that the Bidi layout transformations will
203 * be performed on.
204 * <p><strong>Note:</strong> the text must be (at least)
205 * <code>srcLength</code> long.</p>
206 * @param srcLength The length of the text, in number of UChars. If
207 * <code>length == -1</code> then the text must be zero-terminated.
208 * @param dest A pointer to where the processed text is to be copied.
209 * @param destSize The size of the <code>dest</code> buffer, in number of
210 * UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
211 * then the destination length could be as large as
212 * <code>srcLength * 2</code>. Otherwise, the destination length will
213 * not exceed <code>srcLength</code>. If the caller reserves the last
214 * position for zero-termination, it should be excluded from
215 * <code>destSize</code>.
216 * <p><code>destSize == -1</code> is allowed and makes sense when
217 * <code>dest</code> was holds some meaningful value, e.g. that of
218 * <code>src</code>. In this case <code>dest</code> must be
219 * zero-terminated.</p>
220 * @param inParaLevel A base embedding level of the input as defined in
221 * <code>ubidi_setPara</code> documentation for the
222 * <code>paraLevel</code> parameter.
223 * @param inOrder An order of the input, which can be one of the
224 * <code>UBiDiOrder</code> values.
225 * @param outParaLevel A base embedding level of the output as defined in
226 * <code>ubidi_setPara</code> documentation for the
227 * <code>paraLevel</code> parameter.
228 * @param outOrder An order of the output, which can be one of the
229 * <code>UBiDiOrder</code> values.
230 * @param doMirroring Indicates whether or not to perform character mirroring,
231 * and can accept one of the <code>UBiDiMirroring</code> values.
232 * @param shapingOptions Arabic digit and letter shaping options defined in the
233 * ushape.h documentation.
234 * <p><strong>Note:</strong> Direction indicator options are computed by
235 * the transformation engine based on the effective ordering schemes, so
236 * user-defined direction indicators will be ignored.</p>
237 * @param pErrorCode A pointer to an error code value.
238 *
239 * @return The destination length, i.e. the number of UChars written to
240 * <code>dest</code>. If the transformation fails, the return value
241 * will be 0 (and the error code will be written to
242 * <code>pErrorCode</code>).
243 *
244 * @see UBiDiLevel
245 * @see UBiDiOrder
246 * @see UBiDiMirroring
247 * @see ubidi_setPara
248 * @see u_shapeArabic
249 * @stable ICU 58
250 */
251U_CAPI uint32_t U_EXPORT2
252ubiditransform_transform(UBiDiTransform *pBiDiTransform,
253 const UChar *src, int32_t srcLength,
254 UChar *dest, int32_t destSize,
255 UBiDiLevel inParaLevel, UBiDiOrder inOrder,
256 UBiDiLevel outParaLevel, UBiDiOrder outOrder,
257 UBiDiMirroring doMirroring, uint32_t shapingOptions,
258 UErrorCode *pErrorCode);
259
260/**
261 * Allocates a <code>UBiDiTransform</code> object. This object can be reused,
262 * e.g. with different ordering schemes, mirroring or shaping options.<p>
263 * <strong>Note:</strong>The object can only be reused in the same thread.
264 * All other threads should allocate a new <code>UBiDiTransform</code> object
265 * before using it.<p>
266 * Example of usage:<p>
267 * <pre>
268 * \code
269 * UErrorCode errorCode = U_ZERO_ERROR;
270 * // Open a new UBiDiTransform.
271 * UBiDiTransform* transform = ubiditransform_open(&errorCode);
272 * // Run a transformation.
273 * ubiditransform_transform(transform,
274 * text1, -1, text2, -1,
275 * UBIDI_RTL, UBIDI_LOGICAL,
276 * UBIDI_LTR, UBIDI_VISUAL,
277 * UBIDI_MIRRORING_ON,
278 * U_SHAPE_DIGITS_EN2AN,
279 * &errorCode);
280 * // Do something with the output text and invoke another transformation using
281 * // that text as input.
282 * ubiditransform_transform(transform,
283 * text2, -1, text3, -1,
284 * UBIDI_LTR, UBIDI_VISUAL,
285 * UBIDI_RTL, UBIDI_VISUAL,
286 * UBIDI_MIRRORING_ON,
287 * 0, &errorCode);
288 *\endcode
289 * </pre>
290 * <p>
291 * The <code>UBiDiTransform</code> object must be deallocated by calling
292 * <code>ubiditransform_close()</code>.
293 *
294 * @return An empty <code>UBiDiTransform</code> object.
295 * @stable ICU 58
296 */
297U_CAPI UBiDiTransform* U_EXPORT2
298ubiditransform_open(UErrorCode *pErrorCode);
299
300/**
301 * Deallocates the given <code>UBiDiTransform</code> object.
302 * @stable ICU 58
303 */
304U_CAPI void U_EXPORT2
305ubiditransform_close(UBiDiTransform *pBidiTransform);
306
307#if U_SHOW_CPLUSPLUS_API
308
309U_NAMESPACE_BEGIN
310
311/**
312 * \class LocalUBiDiTransformPointer
313 * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
314 * For most methods see the LocalPointerBase base class.
315 *
316 * @see LocalPointerBase
317 * @see LocalPointer
318 * @stable ICU 58
319 */
320U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
321
322U_NAMESPACE_END
323
324#endif
325
326#endif
327