1 | /* |
2 | ****************************************************************************** |
3 | * |
4 | * © 2016 and later: Unicode, Inc. and others. |
5 | * License & terms of use: http://www.unicode.org/copyright.html |
6 | * |
7 | ****************************************************************************** |
8 | * file name: ubiditransform.h |
9 | * encoding: UTF-8 |
10 | * tab size: 8 (not used) |
11 | * indentation:4 |
12 | * |
13 | * created on: 2016jul24 |
14 | * created by: Lina Kemmel |
15 | * |
16 | */ |
17 | |
18 | #ifndef UBIDITRANSFORM_H |
19 | #define UBIDITRANSFORM_H |
20 | |
21 | #include "unicode/utypes.h" |
22 | #include "unicode/ubidi.h" |
23 | #include "unicode/uchar.h" |
24 | |
25 | #if U_SHOW_CPLUSPLUS_API |
26 | #include "unicode/localpointer.h" |
27 | #endif // U_SHOW_CPLUSPLUS_API |
28 | |
29 | /** |
30 | * \file |
31 | * \brief C API: Bidi Transformations |
32 | */ |
33 | |
34 | /** |
35 | * `UBiDiOrder` indicates the order of text. |
36 | * |
37 | * This bidi transformation engine supports all possible combinations (4 in |
38 | * total) of input and output text order: |
39 | * |
40 | * - <logical input, visual output>: unless the output direction is RTL, this |
41 | * corresponds to a normal operation of the Bidi algorithm as described in the |
42 | * Unicode Technical Report and implemented by `UBiDi` when the |
43 | * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL |
44 | * mode is not supported by `UBiDi` and is accomplished through |
45 | * reversing a visual LTR string, |
46 | * |
47 | * - <visual input, logical output>: unless the input direction is RTL, this |
48 | * corresponds to an "inverse bidi algorithm" in `UBiDi` with the |
49 | * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`. |
50 | * Visual RTL mode is not not supported by `UBiDi` and is |
51 | * accomplished through reversing a visual LTR string, |
52 | * |
53 | * - <logical input, logical output>: if the input and output base directions |
54 | * mismatch, this corresponds to the `UBiDi` implementation with the |
55 | * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the |
56 | * input and output base directions are identical, the transformation engine |
57 | * will only handle character mirroring and Arabic shaping operations without |
58 | * reordering, |
59 | * |
60 | * - <visual input, visual output>: this reordering mode is not supported by |
61 | * the `UBiDi` engine; it implies character mirroring, Arabic |
62 | * shaping, and - if the input/output base directions mismatch - string |
63 | * reverse operations. |
64 | * @see ubidi_setInverse |
65 | * @see ubidi_setReorderingMode |
66 | * @see UBIDI_REORDER_DEFAULT |
67 | * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT |
68 | * @see UBIDI_REORDER_RUNS_ONLY |
69 | * @stable ICU 58 |
70 | */ |
71 | typedef enum { |
72 | /** 0: Constant indicating a logical order. |
73 | * This is the default for input text. |
74 | * @stable ICU 58 |
75 | */ |
76 | UBIDI_LOGICAL = 0, |
77 | /** 1: Constant indicating a visual order. |
78 | * This is a default for output text. |
79 | * @stable ICU 58 |
80 | */ |
81 | UBIDI_VISUAL |
82 | } UBiDiOrder; |
83 | |
84 | /** |
85 | * <code>UBiDiMirroring</code> indicates whether or not characters with the |
86 | * "mirrored" property in RTL runs should be replaced with their mirror-image |
87 | * counterparts. |
88 | * @see UBIDI_DO_MIRRORING |
89 | * @see ubidi_setReorderingOptions |
90 | * @see ubidi_writeReordered |
91 | * @see ubidi_writeReverse |
92 | * @stable ICU 58 |
93 | */ |
94 | typedef enum { |
95 | /** 0: Constant indicating that character mirroring should not be |
96 | * performed. |
97 | * This is the default. |
98 | * @stable ICU 58 |
99 | */ |
100 | UBIDI_MIRRORING_OFF = 0, |
101 | /** 1: Constant indicating that character mirroring should be performed. |
102 | * This corresponds to calling <code>ubidi_writeReordered</code> or |
103 | * <code>ubidi_writeReverse</code> with the |
104 | * <code>UBIDI_DO_MIRRORING</code> option bit set. |
105 | * @stable ICU 58 |
106 | */ |
107 | UBIDI_MIRRORING_ON |
108 | } UBiDiMirroring; |
109 | |
110 | /** |
111 | * Forward declaration of the <code>UBiDiTransform</code> structure that stores |
112 | * information used by the layout transformation engine. |
113 | * @stable ICU 58 |
114 | */ |
115 | typedef struct UBiDiTransform UBiDiTransform; |
116 | |
117 | /** |
118 | * Performs transformation of text from the bidi layout defined by the input |
119 | * ordering scheme to the bidi layout defined by the output ordering scheme, |
120 | * and applies character mirroring and Arabic shaping operations.<p> |
121 | * In terms of <code>UBiDi</code>, such a transformation implies: |
122 | * <ul> |
123 | * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the |
124 | * reordering mode is other than normal),</li> |
125 | * <li>calling <code>ubidi_setInverse</code> as needed (when text should be |
126 | * transformed from a visual to a logical form),</li> |
127 | * <li>resolving embedding levels of each character in the input text by |
128 | * calling <code>ubidi_setPara</code>,</li> |
129 | * <li>reordering the characters based on the computed embedding levels, also |
130 | * performing character mirroring as needed, and streaming the result to the |
131 | * output, by calling <code>ubidi_writeReordered</code>,</li> |
132 | * <li>performing Arabic digit and letter shaping on the output text by calling |
133 | * <code>u_shapeArabic</code>.</li> |
134 | * </ul> |
135 | * An "ordering scheme" encompasses the base direction and the order of text, |
136 | * and these characteristics must be defined by the caller for both input and |
137 | * output explicitly .<p> |
138 | * There are 36 possible combinations of <input, output> ordering schemes, |
139 | * which are partially supported by <code>UBiDi</code> already. Examples of the |
140 | * currently supported combinations: |
141 | * <ul> |
142 | * <li><Logical LTR, Visual LTR>: this is equivalent to calling |
143 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> |
144 | * <li><Logical RTL, Visual LTR>: this is equivalent to calling |
145 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li> |
146 | * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to |
147 | * calling <code>ubidi_setPara</code> with |
148 | * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li> |
149 | * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to |
150 | * calling <code>ubidi_setPara</code> with |
151 | * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li> |
152 | * <li><Visual LTR, Logical LTR>: this is equivalent to |
153 | * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then |
154 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li> |
155 | * <li><Visual LTR, Logical RTL>: this is equivalent to |
156 | * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then |
157 | * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li> |
158 | * </ul> |
159 | * All combinations that involve the Visual RTL scheme are unsupported by |
160 | * <code>UBiDi</code>, for instance: |
161 | * <ul> |
162 | * <li><Logical LTR, Visual RTL>,</li> |
163 | * <li><Visual RTL, Logical RTL>.</li> |
164 | * </ul> |
165 | * <p>Example of usage of the transformation engine:<br> |
166 | * <pre> |
167 | * \code |
168 | * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0}; |
169 | * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0}; |
170 | * UErrorCode errorCode = U_ZERO_ERROR; |
171 | * // Run a transformation. |
172 | * ubiditransform_transform(pBidiTransform, |
173 | * text1, -1, text2, -1, |
174 | * UBIDI_LTR, UBIDI_VISUAL, |
175 | * UBIDI_RTL, UBIDI_LOGICAL, |
176 | * UBIDI_MIRRORING_OFF, |
177 | * U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, |
178 | * &errorCode); |
179 | * // Do something with text2. |
180 | * text2[4] = '2'; |
181 | * // Run a reverse transformation. |
182 | * ubiditransform_transform(pBidiTransform, |
183 | * text2, -1, text1, -1, |
184 | * UBIDI_RTL, UBIDI_LOGICAL, |
185 | * UBIDI_LTR, UBIDI_VISUAL, |
186 | * UBIDI_MIRRORING_OFF, |
187 | * U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED, |
188 | * &errorCode); |
189 | *\endcode |
190 | * </pre> |
191 | * </p> |
192 | * |
193 | * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object |
194 | * allocated with <code>ubiditransform_open()</code> or |
195 | * <code>NULL</code>.<p> |
196 | * This object serves for one-time setup to amortize initialization |
197 | * overheads. Use of this object is not thread-safe. All other threads |
198 | * should allocate a new <code>UBiDiTransform</code> object by calling |
199 | * <code>ubiditransform_open()</code> before using it. Alternatively, |
200 | * a caller can set this parameter to <code>NULL</code>, in which case |
201 | * the object will be allocated by the engine on the fly.</p> |
202 | * @param src A pointer to the text that the Bidi layout transformations will |
203 | * be performed on. |
204 | * <p><strong>Note:</strong> the text must be (at least) |
205 | * <code>srcLength</code> long.</p> |
206 | * @param srcLength The length of the text, in number of UChars. If |
207 | * <code>length == -1</code> then the text must be zero-terminated. |
208 | * @param dest A pointer to where the processed text is to be copied. |
209 | * @param destSize The size of the <code>dest</code> buffer, in number of |
210 | * UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set, |
211 | * then the destination length could be as large as |
212 | * <code>srcLength * 2</code>. Otherwise, the destination length will |
213 | * not exceed <code>srcLength</code>. If the caller reserves the last |
214 | * position for zero-termination, it should be excluded from |
215 | * <code>destSize</code>. |
216 | * <p><code>destSize == -1</code> is allowed and makes sense when |
217 | * <code>dest</code> was holds some meaningful value, e.g. that of |
218 | * <code>src</code>. In this case <code>dest</code> must be |
219 | * zero-terminated.</p> |
220 | * @param inParaLevel A base embedding level of the input as defined in |
221 | * <code>ubidi_setPara</code> documentation for the |
222 | * <code>paraLevel</code> parameter. |
223 | * @param inOrder An order of the input, which can be one of the |
224 | * <code>UBiDiOrder</code> values. |
225 | * @param outParaLevel A base embedding level of the output as defined in |
226 | * <code>ubidi_setPara</code> documentation for the |
227 | * <code>paraLevel</code> parameter. |
228 | * @param outOrder An order of the output, which can be one of the |
229 | * <code>UBiDiOrder</code> values. |
230 | * @param doMirroring Indicates whether or not to perform character mirroring, |
231 | * and can accept one of the <code>UBiDiMirroring</code> values. |
232 | * @param shapingOptions Arabic digit and letter shaping options defined in the |
233 | * ushape.h documentation. |
234 | * <p><strong>Note:</strong> Direction indicator options are computed by |
235 | * the transformation engine based on the effective ordering schemes, so |
236 | * user-defined direction indicators will be ignored.</p> |
237 | * @param pErrorCode A pointer to an error code value. |
238 | * |
239 | * @return The destination length, i.e. the number of UChars written to |
240 | * <code>dest</code>. If the transformation fails, the return value |
241 | * will be 0 (and the error code will be written to |
242 | * <code>pErrorCode</code>). |
243 | * |
244 | * @see UBiDiLevel |
245 | * @see UBiDiOrder |
246 | * @see UBiDiMirroring |
247 | * @see ubidi_setPara |
248 | * @see u_shapeArabic |
249 | * @stable ICU 58 |
250 | */ |
251 | U_CAPI uint32_t U_EXPORT2 |
252 | ubiditransform_transform(UBiDiTransform *pBiDiTransform, |
253 | const UChar *src, int32_t srcLength, |
254 | UChar *dest, int32_t destSize, |
255 | UBiDiLevel inParaLevel, UBiDiOrder inOrder, |
256 | UBiDiLevel outParaLevel, UBiDiOrder outOrder, |
257 | UBiDiMirroring doMirroring, uint32_t shapingOptions, |
258 | UErrorCode *pErrorCode); |
259 | |
260 | /** |
261 | * Allocates a <code>UBiDiTransform</code> object. This object can be reused, |
262 | * e.g. with different ordering schemes, mirroring or shaping options.<p> |
263 | * <strong>Note:</strong>The object can only be reused in the same thread. |
264 | * All other threads should allocate a new <code>UBiDiTransform</code> object |
265 | * before using it.<p> |
266 | * Example of usage:<p> |
267 | * <pre> |
268 | * \code |
269 | * UErrorCode errorCode = U_ZERO_ERROR; |
270 | * // Open a new UBiDiTransform. |
271 | * UBiDiTransform* transform = ubiditransform_open(&errorCode); |
272 | * // Run a transformation. |
273 | * ubiditransform_transform(transform, |
274 | * text1, -1, text2, -1, |
275 | * UBIDI_RTL, UBIDI_LOGICAL, |
276 | * UBIDI_LTR, UBIDI_VISUAL, |
277 | * UBIDI_MIRRORING_ON, |
278 | * U_SHAPE_DIGITS_EN2AN, |
279 | * &errorCode); |
280 | * // Do something with the output text and invoke another transformation using |
281 | * // that text as input. |
282 | * ubiditransform_transform(transform, |
283 | * text2, -1, text3, -1, |
284 | * UBIDI_LTR, UBIDI_VISUAL, |
285 | * UBIDI_RTL, UBIDI_VISUAL, |
286 | * UBIDI_MIRRORING_ON, |
287 | * 0, &errorCode); |
288 | *\endcode |
289 | * </pre> |
290 | * <p> |
291 | * The <code>UBiDiTransform</code> object must be deallocated by calling |
292 | * <code>ubiditransform_close()</code>. |
293 | * |
294 | * @return An empty <code>UBiDiTransform</code> object. |
295 | * @stable ICU 58 |
296 | */ |
297 | U_CAPI UBiDiTransform* U_EXPORT2 |
298 | ubiditransform_open(UErrorCode *pErrorCode); |
299 | |
300 | /** |
301 | * Deallocates the given <code>UBiDiTransform</code> object. |
302 | * @stable ICU 58 |
303 | */ |
304 | U_CAPI void U_EXPORT2 |
305 | ubiditransform_close(UBiDiTransform *pBidiTransform); |
306 | |
307 | #if U_SHOW_CPLUSPLUS_API |
308 | |
309 | U_NAMESPACE_BEGIN |
310 | |
311 | /** |
312 | * \class LocalUBiDiTransformPointer |
313 | * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close(). |
314 | * For most methods see the LocalPointerBase base class. |
315 | * |
316 | * @see LocalPointerBase |
317 | * @see LocalPointer |
318 | * @stable ICU 58 |
319 | */ |
320 | U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); |
321 | |
322 | U_NAMESPACE_END |
323 | |
324 | #endif |
325 | |
326 | #endif |
327 | |