1 | /* |
2 | ****************************************************************************** |
3 | * |
4 | * © 2016 and later: Unicode, Inc. and others. |
5 | * License & terms of use: http://www.unicode.org/copyright.html |
6 | * |
7 | ****************************************************************************** |
8 | * file name: ubiditransform.h |
9 | * encoding: UTF-8 |
10 | * tab size: 8 (not used) |
11 | * indentation:4 |
12 | * |
13 | * created on: 2016jul24 |
14 | * created by: Lina Kemmel |
15 | * |
16 | */ |
17 | |
18 | #ifndef UBIDITRANSFORM_H |
19 | #define UBIDITRANSFORM_H |
20 | |
21 | #include "unicode/utypes.h" |
22 | #include "unicode/ubidi.h" |
23 | #include "unicode/uchar.h" |
24 | #include "unicode/localpointer.h" |
25 | |
26 | /** |
27 | * \file |
28 | * \brief Bidi Transformations |
29 | */ |
30 | |
31 | /** |
32 | * `UBiDiOrder` indicates the order of text. |
33 | * |
34 | * This bidi transformation engine supports all possible combinations (4 in |
35 | * total) of input and output text order: |
36 | * |
37 | * - <logical input, visual output>: unless the output direction is RTL, this |
38 | * corresponds to a normal operation of the Bidi algorithm as described in the |
39 | * Unicode Technical Report and implemented by `UBiDi` when the |
40 | * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL |
41 | * mode is not supported by `UBiDi` and is accomplished through |
42 | * reversing a visual LTR string, |
43 | * |
44 | * - <visual input, logical output>: unless the input direction is RTL, this |
45 | * corresponds to an "inverse bidi algorithm" in `UBiDi` with the |
46 | * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`. |
47 | * Visual RTL mode is not not supported by `UBiDi` and is |
48 | * accomplished through reversing a visual LTR string, |
49 | * |
50 | * - <logical input, logical output>: if the input and output base directions |
51 | * mismatch, this corresponds to the `UBiDi` implementation with the |
52 | * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the |
53 | * input and output base directions are identical, the transformation engine |
54 | * will only handle character mirroring and Arabic shaping operations without |
55 | * reordering, |
56 | * |
57 | * - <visual input, visual output>: this reordering mode is not supported by |
58 | * the `UBiDi` engine; it implies character mirroring, Arabic |
59 | * shaping, and - if the input/output base directions mismatch - string |
60 | * reverse operations. |
61 | * @see ubidi_setInverse |
62 | * @see ubidi_setReorderingMode |
63 | * @see UBIDI_REORDER_DEFAULT |
64 | * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT |
65 | * @see UBIDI_REORDER_RUNS_ONLY |
66 | * @stable ICU 58 |
67 | */ |
68 | typedef enum { |
69 | /** 0: Constant indicating a logical order. |
70 | * This is the default for input text. |
71 | * @stable ICU 58 |
72 | */ |
73 | UBIDI_LOGICAL = 0, |
74 | /** 1: Constant indicating a visual order. |
75 | * This is a default for output text. |
76 | * @stable ICU 58 |
77 | */ |
78 | UBIDI_VISUAL |
79 | } UBiDiOrder; |
80 | |
81 | /** |
82 | * <code>UBiDiMirroring</code> indicates whether or not characters with the |
83 | * "mirrored" property in RTL runs should be replaced with their mirror-image |
84 | * counterparts. |
85 | * @see UBIDI_DO_MIRRORING |
86 | * @see ubidi_setReorderingOptions |
87 | * @see ubidi_writeReordered |
88 | * @see ubidi_writeReverse |
89 | * @stable ICU 58 |
90 | */ |
91 | typedef enum { |
92 | /** 0: Constant indicating that character mirroring should not be |
93 | * performed. |
94 | * This is the default. |
95 | * @stable ICU 58 |
96 | */ |
97 | UBIDI_MIRRORING_OFF = 0, |
98 | /** 1: Constant indicating that character mirroring should be performed. |
99 | * This corresponds to calling <code>ubidi_writeReordered</code> or |
100 | * <code>ubidi_writeReverse</code> with the |
101 | * <code>UBIDI_DO_MIRRORING</code> option bit set. |
102 | * @stable ICU 58 |
103 | */ |
104 | UBIDI_MIRRORING_ON |
105 | } UBiDiMirroring; |
106 | |
107 | /** |
108 | * Forward declaration of the <code>UBiDiTransform</code> structure that stores |
109 | * information used by the layout transformation engine. |
110 | * @stable ICU 58 |
111 | */ |
112 | typedef struct UBiDiTransform UBiDiTransform; |
113 | |
114 | /** |
115 | * Performs transformation of text from the bidi layout defined by the input |
116 | * ordering scheme to the bidi layout defined by the output ordering scheme, |
117 | * and applies character mirroring and Arabic shaping operations.<p> |
118 | * In terms of <code>UBiDi</code>, such a transformation implies: |
119 | * <ul> |
120 | * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the |
121 | * reordering mode is other than normal),</li> |
122 | * <li>calling <code>ubidi_setInverse</code> as needed (when text should be |
123 | * transformed from a visual to a logical form),</li> |
124 | * <li>resolving embedding levels of each character in the input text by |
125 | * calling <code>ubidi_setPara</code>,</li> |
126 | * <li>reordering the characters based on the computed embedding levels, also |
127 | * performing character mirroring as needed, and streaming the result to the |
128 | * output, by calling <code>ubidi_writeReordered</code>,</li> |
129 | * <li>performing Arabic digit and letter shaping on the output text by calling |
130 | * <code>u_shapeArabic</code>.</li> |
131 | * </ul> |
132 | * An "ordering scheme" encompasses the base direction and the order of text, |
133 | * and these characteristics must be defined by the caller for both input and |
134 | * output explicitly .<p> |
135 | * There are 36 possible combinations of <input, output> ordering schemes, |
136 | * which are partially supported by <code>UBiDi</code> already. Examples of the |
137 | * currently supported combinations: |
138 | * <ul> |
139 | * <li><Logical LTR, Visual LTR>: this is equivalent to calling |
140 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> |
141 | * <li><Logical RTL, Visual LTR>: this is equivalent to calling |
142 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li> |
143 | * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to |
144 | * calling <code>ubidi_setPara</code> with |
145 | * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li> |
146 | * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to |
147 | * calling <code>ubidi_setPara</code> with |
148 | * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li> |
149 | * <li><Visual LTR, Logical LTR>: this is equivalent to |
150 | * calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then |
151 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> |
152 | * <li><Visual LTR, Logical RTL>: this is equivalent to |
153 | * calling <code>ubidi_setInverse(UBiDi*, TRUE)</code> and then |
154 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li> |
155 | * </ul> |
156 | * All combinations that involve the Visual RTL scheme are unsupported by |
157 | * <code>UBiDi</code>, for instance: |
158 | * <ul> |
159 | * <li><Logical LTR, Visual RTL>,</li> |
160 | * <li><Visual RTL, Logical RTL>.</li> |
161 | * </ul> |
162 | * <p>Example of usage of the transformation engine:<br> |
163 | * <pre> |
164 | * \code |
165 | * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0}; |
166 | * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0}; |
167 | * UErrorCode errorCode = U_ZERO_ERROR; |
168 | * // Run a transformation. |
169 | * ubiditransform_transform(pBidiTransform, |
170 | * text1, -1, text2, -1, |
171 | * UBIDI_LTR, UBIDI_VISUAL, |
172 | * UBIDI_RTL, UBIDI_LOGICAL, |
173 | * UBIDI_MIRRORING_OFF, |
174 | * U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, |
175 | * &errorCode); |
176 | * // Do something with text2. |
177 | * text2[4] = '2'; |
178 | * // Run a reverse transformation. |
179 | * ubiditransform_transform(pBidiTransform, |
180 | * text2, -1, text1, -1, |
181 | * UBIDI_RTL, UBIDI_LOGICAL, |
182 | * UBIDI_LTR, UBIDI_VISUAL, |
183 | * UBIDI_MIRRORING_OFF, |
184 | * U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, |
185 | * &errorCode); |
186 | *\endcode |
187 | * </pre> |
188 | * </p> |
189 | * |
190 | * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object |
191 | * allocated with <code>ubiditransform_open()</code> or |
192 | * <code>NULL</code>.<p> |
193 | * This object serves for one-time setup to amortize initialization |
194 | * overheads. Use of this object is not thread-safe. All other threads |
195 | * should allocate a new <code>UBiDiTransform</code> object by calling |
196 | * <code>ubiditransform_open()</code> before using it. Alternatively, |
197 | * a caller can set this parameter to <code>NULL</code>, in which case |
198 | * the object will be allocated by the engine on the fly.</p> |
199 | * @param src A pointer to the text that the Bidi layout transformations will |
200 | * be performed on. |
201 | * <p><strong>Note:</strong> the text must be (at least) |
202 | * <code>srcLength</code> long.</p> |
203 | * @param srcLength The length of the text, in number of UChars. If |
204 | * <code>length == -1</code> then the text must be zero-terminated. |
205 | * @param dest A pointer to where the processed text is to be copied. |
206 | * @param destSize The size of the <code>dest</code> buffer, in number of |
207 | * UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set, |
208 | * then the destination length could be as large as |
209 | * <code>srcLength * 2</code>. Otherwise, the destination length will |
210 | * not exceed <code>srcLength</code>. If the caller reserves the last |
211 | * position for zero-termination, it should be excluded from |
212 | * <code>destSize</code>. |
213 | * <p><code>destSize == -1</code> is allowed and makes sense when |
214 | * <code>dest</code> was holds some meaningful value, e.g. that of |
215 | * <code>src</code>. In this case <code>dest</code> must be |
216 | * zero-terminated.</p> |
217 | * @param inParaLevel A base embedding level of the input as defined in |
218 | * <code>ubidi_setPara</code> documentation for the |
219 | * <code>paraLevel</code> parameter. |
220 | * @param inOrder An order of the input, which can be one of the |
221 | * <code>UBiDiOrder</code> values. |
222 | * @param outParaLevel A base embedding level of the output as defined in |
223 | * <code>ubidi_setPara</code> documentation for the |
224 | * <code>paraLevel</code> parameter. |
225 | * @param outOrder An order of the output, which can be one of the |
226 | * <code>UBiDiOrder</code> values. |
227 | * @param doMirroring Indicates whether or not to perform character mirroring, |
228 | * and can accept one of the <code>UBiDiMirroring</code> values. |
229 | * @param shapingOptions Arabic digit and letter shaping options defined in the |
230 | * ushape.h documentation. |
231 | * <p><strong>Note:</strong> Direction indicator options are computed by |
232 | * the transformation engine based on the effective ordering schemes, so |
233 | * user-defined direction indicators will be ignored.</p> |
234 | * @param pErrorCode A pointer to an error code value. |
235 | * |
236 | * @return The destination length, i.e. the number of UChars written to |
237 | * <code>dest</code>. If the transformation fails, the return value |
238 | * will be 0 (and the error code will be written to |
239 | * <code>pErrorCode</code>). |
240 | * |
241 | * @see UBiDiLevel |
242 | * @see UBiDiOrder |
243 | * @see UBiDiMirroring |
244 | * @see ubidi_setPara |
245 | * @see u_shapeArabic |
246 | * @stable ICU 58 |
247 | */ |
248 | U_STABLE uint32_t U_EXPORT2 |
249 | ubiditransform_transform(UBiDiTransform *pBiDiTransform, |
250 | const UChar *src, int32_t srcLength, |
251 | UChar *dest, int32_t destSize, |
252 | UBiDiLevel inParaLevel, UBiDiOrder inOrder, |
253 | UBiDiLevel outParaLevel, UBiDiOrder outOrder, |
254 | UBiDiMirroring doMirroring, uint32_t shapingOptions, |
255 | UErrorCode *pErrorCode); |
256 | |
257 | /** |
258 | * Allocates a <code>UBiDiTransform</code> object. This object can be reused, |
259 | * e.g. with different ordering schemes, mirroring or shaping options.<p> |
260 | * <strong>Note:</strong>The object can only be reused in the same thread. |
261 | * All other threads should allocate a new <code>UBiDiTransform</code> object |
262 | * before using it.<p> |
263 | * Example of usage:<p> |
264 | * <pre> |
265 | * \code |
266 | * UErrorCode errorCode = U_ZERO_ERROR; |
267 | * // Open a new UBiDiTransform. |
268 | * UBiDiTransform* transform = ubiditransform_open(&errorCode); |
269 | * // Run a transformation. |
270 | * ubiditransform_transform(transform, |
271 | * text1, -1, text2, -1, |
272 | * UBIDI_RTL, UBIDI_LOGICAL, |
273 | * UBIDI_LTR, UBIDI_VISUAL, |
274 | * UBIDI_MIRRORING_ON, |
275 | * U_SHAPE_DIGITS_EN2AN, |
276 | * &errorCode); |
277 | * // Do something with the output text and invoke another transformation using |
278 | * // that text as input. |
279 | * ubiditransform_transform(transform, |
280 | * text2, -1, text3, -1, |
281 | * UBIDI_LTR, UBIDI_VISUAL, |
282 | * UBIDI_RTL, UBIDI_VISUAL, |
283 | * UBIDI_MIRRORING_ON, |
284 | * 0, &errorCode); |
285 | *\endcode |
286 | * </pre> |
287 | * <p> |
288 | * The <code>UBiDiTransform</code> object must be deallocated by calling |
289 | * <code>ubiditransform_close()</code>. |
290 | * |
291 | * @return An empty <code>UBiDiTransform</code> object. |
292 | * @stable ICU 58 |
293 | */ |
294 | U_STABLE UBiDiTransform* U_EXPORT2 |
295 | ubiditransform_open(UErrorCode *pErrorCode); |
296 | |
297 | /** |
298 | * Deallocates the given <code>UBiDiTransform</code> object. |
299 | * @stable ICU 58 |
300 | */ |
301 | U_STABLE void U_EXPORT2 |
302 | ubiditransform_close(UBiDiTransform *pBidiTransform); |
303 | |
304 | #if U_SHOW_CPLUSPLUS_API |
305 | |
306 | U_NAMESPACE_BEGIN |
307 | |
308 | /** |
309 | * \class LocalUBiDiTransformPointer |
310 | * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close(). |
311 | * For most methods see the LocalPointerBase base class. |
312 | * |
313 | * @see LocalPointerBase |
314 | * @see LocalPointer |
315 | * @stable ICU 58 |
316 | */ |
317 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); |
318 | |
319 | U_NAMESPACE_END |
320 | |
321 | #endif |
322 | |
323 | #endif |
324 | |