normlzr.h source code [ClickHouse/contrib/icu/icu4c/source/common/unicode/normlzr.h]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	********************************************************************
5	* COPYRIGHT:
6	* Copyright (c) 1996-2015, International Business Machines Corporation and
7	* others. All Rights Reserved.
8	********************************************************************
9	*/
10
11	#ifndef NORMLZR_H
12	#define NORMLZR_H
13
14	#include "unicode/utypes.h"
15
16	#if U_SHOW_CPLUSPLUS_API
17
18	/**
19	* \file
20	* \brief C++ API: Unicode Normalization
21	*/
22
23	#if !UCONFIG_NO_NORMALIZATION
24
25	#include "unicode/chariter.h"
26	#include "unicode/normalizer2.h"
27	#include "unicode/unistr.h"
28	#include "unicode/unorm.h"
29	#include "unicode/uobject.h"
30
31	U_NAMESPACE_BEGIN
32	/**
33	* Old Unicode normalization API.
34	*
35	* This API has been replaced by the Normalizer2 class and is only available
36	* for backward compatibility. This class simply delegates to the Normalizer2 class.
37	* There is one exception: The new API does not provide a replacement for Normalizer::compare().
38	*
39	* The Normalizer class supports the standard normalization forms described in
40	* <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
41	* Unicode Standard Annex #15: Unicode Normalization Forms</a>.
42	*
43	* The Normalizer class consists of two parts:
44	* - static functions that normalize strings or test if strings are normalized
45	* - a Normalizer object is an iterator that takes any kind of text and
46	* provides iteration over its normalized form
47	*
48	* The Normalizer class is not suitable for subclassing.
49	*
50	* For basic information about normalization forms and details about the C API
51	* please see the documentation in unorm.h.
52	*
53	* The iterator API with the Normalizer constructors and the non-static functions
54	* use a CharacterIterator as input. It is possible to pass a string which
55	* is then internally wrapped in a CharacterIterator.
56	* The input text is not normalized all at once, but incrementally where needed
57	* (providing efficient random access).
58	* This allows to pass in a large text but spend only a small amount of time
59	* normalizing a small part of that text.
60	* However, if the entire text is normalized, then the iterator will be
61	* slower than normalizing the entire text at once and iterating over the result.
62	* A possible use of the Normalizer iterator is also to report an index into the
63	* original text that is close to where the normalized characters come from.
64	*
65	* <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
66	* The earlier implementation reported the getIndex() inconsistently,
67	* and previous() could not be used after setIndex(), next(), first(), and current().
68	*
69	* Normalizer allows to start normalizing from anywhere in the input text by
70	* calling setIndexOnly(), first(), or last().
71	* Without calling any of these, the iterator will start at the beginning of the text.
72	*
73	* At any time, next() returns the next normalized code point (UChar32),
74	* with post-increment semantics (like CharacterIterator::next32PostInc()).
75	* previous() returns the previous normalized code point (UChar32),
76	* with pre-decrement semantics (like CharacterIterator::previous32()).
77	*
78	* current() returns the current code point
79	* (respectively the one at the newly set index) without moving
80	* the getIndex(). Note that if the text at the current position
81	* needs to be normalized, then these functions will do that.
82	* (This is why current() is not const.)
83	* It is more efficient to call setIndexOnly() instead, which does not
84	* normalize.
85	*
86	* getIndex() always refers to the position in the input text where the normalized
87	* code points are returned from. It does not always change with each returned
88	* code point.
89	* The code point that is returned from any of the functions
90	* corresponds to text at or after getIndex(), according to the
91	* function's iteration semantics (post-increment or pre-decrement).
92	*
93	* next() returns a code point from at or after the getIndex()
94	* from before the next() call. After the next() call, the getIndex()
95	* might have moved to where the next code point will be returned from
96	* (from a next() or current() call).
97	* This is semantically equivalent to array access with array[index++]
98	* (post-increment semantics).
99	*
100	* previous() returns a code point from at or after the getIndex()
101	* from after the previous() call.
102	* This is semantically equivalent to array access with array[--index]
103	* (pre-decrement semantics).
104	*
105	* Internally, the Normalizer iterator normalizes a small piece of text
106	* starting at the getIndex() and ending at a following "safe" index.
107	* The normalized results is stored in an internal string buffer, and
108	* the code points are iterated from there.
109	* With multiple iteration calls, this is repeated until the next piece
110	* of text needs to be normalized, and the getIndex() needs to be moved.
111	*
112	* The following "safe" index, the internal buffer, and the secondary
113	* iteration index into that buffer are not exposed on the API.
114	* This also means that it is currently not practical to return to
115	* a particular, arbitrary position in the text because one would need to
116	* know, and be able to set, in addition to the getIndex(), at least also the
117	* current index into the internal buffer.
118	* It is currently only possible to observe when getIndex() changes
119	* (with careful consideration of the iteration semantics),
120	* at which time the internal index will be 0.
121	* For example, if getIndex() is different after next() than before it,
122	* then the internal index is 0 and one can return to this getIndex()
123	* later with setIndexOnly().
124	*
125	* Note: While the setIndex() and getIndex() refer to indices in the
126	* underlying Unicode input text, the next() and previous() methods
127	* iterate through characters in the normalized output.
128	* This means that there is not necessarily a one-to-one correspondence
129	* between characters returned by next() and previous() and the indices
130	* passed to and returned from setIndex() and getIndex().
131	* It is for this reason that Normalizer does not implement the CharacterIterator interface.
132	*
133	* @author Laura Werner, Mark Davis, Markus Scherer
134	* @stable ICU 2.0
135	*/
136	class U_COMMON_API Normalizer : public UObject {
137	public:
138	#ifndef U_HIDE_DEPRECATED_API
139	/**
140	* If DONE is returned from an iteration function that returns a code point,
141	* then there are no more normalization results available.
142	* @deprecated ICU 56 Use Normalizer2 instead.
143	*/
144	enum {
145	DONE=`0xffff`
146	};
147
148	// Constructors
149
150	/**
151	* Creates a new <code>Normalizer</code> object for iterating over the
152	* normalized form of a given string.
153	* <p>
154	* @param str The string to be normalized. The normalization
155	* will start at the beginning of the string.
156	*
157	* @param mode The normalization mode.
158	* @deprecated ICU 56 Use Normalizer2 instead.
159	*/
160	Normalizer(const UnicodeString& str, UNormalizationMode mode);
161
162	/**
163	* Creates a new <code>Normalizer</code> object for iterating over the
164	* normalized form of a given string.
165	* <p>
166	* @param str The string to be normalized. The normalization
167	* will start at the beginning of the string.
168	*
169	* @param length Length of the string, or -1 if NUL-terminated.
170	* @param mode The normalization mode.
171	* @deprecated ICU 56 Use Normalizer2 instead.
172	*/
173	Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
174
175	/**
176	* Creates a new <code>Normalizer</code> object for iterating over the
177	* normalized form of the given text.
178	* <p>
179	* @param iter The input text to be normalized. The normalization
180	* will start at the beginning of the string.
181	*
182	* @param mode The normalization mode.
183	* @deprecated ICU 56 Use Normalizer2 instead.
184	*/
185	Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
186	#endif /* U_HIDE_DEPRECATED_API */
187
188	#ifndef U_FORCE_HIDE_DEPRECATED_API
189	/**
190	* Copy constructor.
191	* @param copy The object to be copied.
192	* @deprecated ICU 56 Use Normalizer2 instead.
193	*/
194	Normalizer(const Normalizer& copy);
195
196	/**
197	* Destructor
198	* @deprecated ICU 56 Use Normalizer2 instead.
199	*/
200	virtual ~Normalizer();
201	#endif // U_FORCE_HIDE_DEPRECATED_API
202
203	//-------------------------------------------------------------------------
204	// Static utility methods
205	//-------------------------------------------------------------------------
206
207	#ifndef U_HIDE_DEPRECATED_API
208	/**
209	* Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
210	* This is a wrapper for unorm_normalize(), using UnicodeString's.
211	*
212	* The <code>options</code> parameter specifies which optional
213	* <code>Normalizer</code> features are to be enabled for this operation.
214	*
215	* @param source the input string to be normalized.
216	* @param mode the normalization mode
217	* @param options the optional features to be enabled (0 for no options)
218	* @param result The normalized string (on output).
219	* @param status The error code.
220	* @deprecated ICU 56 Use Normalizer2 instead.
221	*/
222	static void U_EXPORT2 normalize(const UnicodeString& source,
223	UNormalizationMode mode, int32_t options,
224	UnicodeString& result,
225	UErrorCode &status);
226
227	/**
228	* Compose a <code>UnicodeString</code>.
229	* This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
230	* This is a wrapper for unorm_normalize(), using UnicodeString's.
231	*
232	* The <code>options</code> parameter specifies which optional
233	* <code>Normalizer</code> features are to be enabled for this operation.
234	*
235	* @param source the string to be composed.
236	* @param compat Perform compatibility decomposition before composition.
237	* If this argument is <code>FALSE</code>, only canonical
238	* decomposition will be performed.
239	* @param options the optional features to be enabled (0 for no options)
240	* @param result The composed string (on output).
241	* @param status The error code.
242	* @deprecated ICU 56 Use Normalizer2 instead.
243	*/
244	static void U_EXPORT2 compose(const UnicodeString& source,
245	UBool compat, int32_t options,
246	UnicodeString& result,
247	UErrorCode &status);
248
249	/**
250	* Static method to decompose a <code>UnicodeString</code>.
251	* This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
252	* This is a wrapper for unorm_normalize(), using UnicodeString's.
253	*
254	* The <code>options</code> parameter specifies which optional
255	* <code>Normalizer</code> features are to be enabled for this operation.
256	*
257	* @param source the string to be decomposed.
258	* @param compat Perform compatibility decomposition.
259	* If this argument is <code>FALSE</code>, only canonical
260	* decomposition will be performed.
261	* @param options the optional features to be enabled (0 for no options)
262	* @param result The decomposed string (on output).
263	* @param status The error code.
264	* @deprecated ICU 56 Use Normalizer2 instead.
265	*/
266	static void U_EXPORT2 decompose(const UnicodeString& source,
267	UBool compat, int32_t options,
268	UnicodeString& result,
269	UErrorCode &status);
270
271	/**
272	* Performing quick check on a string, to quickly determine if the string is
273	* in a particular normalization format.
274	* This is a wrapper for unorm_quickCheck(), using a UnicodeString.
275	*
276	* Three types of result can be returned UNORM_YES, UNORM_NO or
277	* UNORM_MAYBE. Result UNORM_YES indicates that the argument
278	* string is in the desired normalized format, UNORM_NO determines that
279	* argument string is not in the desired normalized format. A
280	* UNORM_MAYBE result indicates that a more thorough check is required,
281	* the user may have to put the string in its normalized form and compare the
282	* results.
283	* @param source string for determining if it is in a normalized format
284	* @param mode normalization format
285	* @param status A reference to a UErrorCode to receive any errors
286	* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
287	*
288	* @see isNormalized
289	* @deprecated ICU 56 Use Normalizer2 instead.
290	*/
291	static inline UNormalizationCheckResult
292	quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
293
294	/**
295	* Performing quick check on a string; same as the other version of quickCheck
296	* but takes an extra options parameter like most normalization functions.
297	*
298	* @param source string for determining if it is in a normalized format
299	* @param mode normalization format
300	* @param options the optional features to be enabled (0 for no options)
301	* @param status A reference to a UErrorCode to receive any errors
302	* @return UNORM_YES, UNORM_NO or UNORM_MAYBE
303	*
304	* @see isNormalized
305	* @deprecated ICU 56 Use Normalizer2 instead.
306	*/
307	static UNormalizationCheckResult
308	quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
309
310	/**
311	* Test if a string is in a given normalization form.
312	* This is semantically equivalent to source.equals(normalize(source, mode)) .
313	*
314	* Unlike unorm_quickCheck(), this function returns a definitive result,
315	* never a "maybe".
316	* For NFD, NFKD, and FCD, both functions work exactly the same.
317	* For NFC and NFKC where quickCheck may return "maybe", this function will
318	* perform further tests to arrive at a TRUE/FALSE result.
319	*
320	* @param src String that is to be tested if it is in a normalization format.
321	* @param mode Which normalization form to test for.
322	* @param errorCode ICU error code in/out parameter.
323	* Must fulfill U_SUCCESS before the function call.
324	* @return Boolean value indicating whether the source string is in the
325	* "mode" normalization form.
326	*
327	* @see quickCheck
328	* @deprecated ICU 56 Use Normalizer2 instead.
329	*/
330	static inline UBool
331	isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
332
333	/**
334	* Test if a string is in a given normalization form; same as the other version of isNormalized
335	* but takes an extra options parameter like most normalization functions.
336	*
337	* @param src String that is to be tested if it is in a normalization format.
338	* @param mode Which normalization form to test for.
339	* @param options the optional features to be enabled (0 for no options)
340	* @param errorCode ICU error code in/out parameter.
341	* Must fulfill U_SUCCESS before the function call.
342	* @return Boolean value indicating whether the source string is in the
343	* "mode" normalization form.
344	*
345	* @see quickCheck
346	* @deprecated ICU 56 Use Normalizer2 instead.
347	*/
348	static UBool
349	isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
350
351	/**
352	* Concatenate normalized strings, making sure that the result is normalized as well.
353	*
354	* If both the left and the right strings are in
355	* the normalization form according to "mode/options",
356	* then the result will be
357	*
358	* \code
359	* dest=normalize(left+right, mode, options)
360	* \endcode
361	*
362	* For details see unorm_concatenate in unorm.h.
363	*
364	* @param left Left source string.
365	* @param right Right source string.
366	* @param result The output string.
367	* @param mode The normalization mode.
368	* @param options A bit set of normalization options.
369	* @param errorCode ICU error code in/out parameter.
370	* Must fulfill U_SUCCESS before the function call.
371	* @return result
372	*
373	* @see unorm_concatenate
374	* @see normalize
375	* @see unorm_next
376	* @see unorm_previous
377	*
378	* @deprecated ICU 56 Use Normalizer2 instead.
379	*/
380	static UnicodeString &
381	U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
382	UnicodeString &result,
383	UNormalizationMode mode, int32_t options,
384	UErrorCode &errorCode);
385	#endif /* U_HIDE_DEPRECATED_API */
386
387	/**
388	* Compare two strings for canonical equivalence.
389	* Further options include case-insensitive comparison and
390	* code point order (as opposed to code unit order).
391	*
392	* Canonical equivalence between two strings is defined as their normalized
393	* forms (NFD or NFC) being identical.
394	* This function compares strings incrementally instead of normalizing
395	* (and optionally case-folding) both strings entirely,
396	* improving performance significantly.
397	*
398	* Bulk normalization is only necessary if the strings do not fulfill the FCD
399	* conditions. Only in this case, and only if the strings are relatively long,
400	* is memory allocated temporarily.
401	* For FCD strings and short non-FCD strings there is no memory allocation.
402	*
403	* Semantically, this is equivalent to
404	* strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
405	* where code point order and foldCase are all optional.
406	*
407	* UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
408	* the case folding must be performed first, then the normalization.
409	*
410	* @param s1 First source string.
411	* @param s2 Second source string.
412	*
413	* @param options A bit set of options:
414	* - U_FOLD_CASE_DEFAULT or 0 is used for default options:
415	* Case-sensitive comparison in code unit order, and the input strings
416	* are quick-checked for FCD.
417	*
418	* - UNORM_INPUT_IS_FCD
419	* Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
420	* If not set, the function will quickCheck for FCD
421	* and normalize if necessary.
422	*
423	* - U_COMPARE_CODE_POINT_ORDER
424	* Set to choose code point order instead of code unit order
425	* (see u_strCompare for details).
426	*
427	* - U_COMPARE_IGNORE_CASE
428	* Set to compare strings case-insensitively using case folding,
429	* instead of case-sensitively.
430	* If set, then the following case folding options are used.
431	*
432	* - Options as used with case-insensitive comparisons, currently:
433	*
434	* - U_FOLD_CASE_EXCLUDE_SPECIAL_I
435	* (see u_strCaseCompare for details)
436	*
437	* - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
438	*
439	* @param errorCode ICU error code in/out parameter.
440	* Must fulfill U_SUCCESS before the function call.
441	* @return <0 or 0 or >0 as usual for string comparisons
442	*
443	* @see unorm_compare
444	* @see normalize
445	* @see UNORM_FCD
446	* @see u_strCompare
447	* @see u_strCaseCompare
448	*
449	* @stable ICU 2.2
450	*/
451	static inline int32_t
452	compare(const UnicodeString &s1, const UnicodeString &s2,
453	uint32_t options,
454	UErrorCode &errorCode);
455
456	#ifndef U_HIDE_DEPRECATED_API
457	//-------------------------------------------------------------------------
458	// Iteration API
459	//-------------------------------------------------------------------------
460
461	/**
462	* Return the current character in the normalized text.
463	* current() may need to normalize some text at getIndex().
464	* The getIndex() is not changed.
465	*
466	* @return the current normalized code point
467	* @deprecated ICU 56 Use Normalizer2 instead.
468	*/
469	UChar32 current(void);
470
471	/**
472	* Return the first character in the normalized text.
473	* This is equivalent to setIndexOnly(startIndex()) followed by next().
474	* (Post-increment semantics.)
475	*
476	* @return the first normalized code point
477	* @deprecated ICU 56 Use Normalizer2 instead.
478	*/
479	UChar32 first(void);
480
481	/**
482	* Return the last character in the normalized text.
483	* This is equivalent to setIndexOnly(endIndex()) followed by previous().
484	* (Pre-decrement semantics.)
485	*
486	* @return the last normalized code point
487	* @deprecated ICU 56 Use Normalizer2 instead.
488	*/
489	UChar32 last(void);
490
491	/**
492	* Return the next character in the normalized text.
493	* (Post-increment semantics.)
494	* If the end of the text has already been reached, DONE is returned.
495	* The DONE value could be confused with a U+FFFF non-character code point
496	* in the text. If this is possible, you can test getIndex()<endIndex()
497	* before calling next(), or (getIndex()<endIndex() \|\| last()!=DONE)
498	* after calling next(). (Calling last() will change the iterator state!)
499	*
500	* The C API unorm_next() is more efficient and does not have this ambiguity.
501	*
502	* @return the next normalized code point
503	* @deprecated ICU 56 Use Normalizer2 instead.
504	*/
505	UChar32 next(void);
506
507	/**
508	* Return the previous character in the normalized text and decrement.
509	* (Pre-decrement semantics.)
510	* If the beginning of the text has already been reached, DONE is returned.
511	* The DONE value could be confused with a U+FFFF non-character code point
512	* in the text. If this is possible, you can test
513	* (getIndex()>startIndex() \|\| first()!=DONE). (Calling first() will change
514	* the iterator state!)
515	*
516	* The C API unorm_previous() is more efficient and does not have this ambiguity.
517	*
518	* @return the previous normalized code point
519	* @deprecated ICU 56 Use Normalizer2 instead.
520	*/
521	UChar32 previous(void);
522
523	/**
524	* Set the iteration position in the input text that is being normalized,
525	* without any immediate normalization.
526	* After setIndexOnly(), getIndex() will return the same index that is
527	* specified here.
528	*
529	* @param index the desired index in the input text.
530	* @deprecated ICU 56 Use Normalizer2 instead.
531	*/
532	void setIndexOnly(int32_t index);
533
534	/**
535	* Reset the index to the beginning of the text.
536	* This is equivalent to setIndexOnly(startIndex)).
537	* @deprecated ICU 56 Use Normalizer2 instead.
538	*/
539	void reset(void);
540
541	/**
542	* Retrieve the current iteration position in the input text that is
543	* being normalized.
544	*
545	* A following call to next() will return a normalized code point from
546	* the input text at or after this index.
547	*
548	* After a call to previous(), getIndex() will point at or before the
549	* position in the input text where the normalized code point
550	* was returned from with previous().
551	*
552	* @return the current index in the input text
553	* @deprecated ICU 56 Use Normalizer2 instead.
554	*/
555	int32_t getIndex(void) const;
556
557	/**
558	* Retrieve the index of the start of the input text. This is the begin index
559	* of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
560	* over which this <code>Normalizer</code> is iterating.
561	*
562	* @return the smallest index in the input text where the Normalizer operates
563	* @deprecated ICU 56 Use Normalizer2 instead.
564	*/
565	int32_t startIndex(void) const;
566
567	/**
568	* Retrieve the index of the end of the input text. This is the end index
569	* of the <code>CharacterIterator</code> or the length of the string
570	* over which this <code>Normalizer</code> is iterating.
571	* This end index is exclusive, i.e., the Normalizer operates only on characters
572	* before this index.
573	*
574	* @return the first index in the input text where the Normalizer does not operate
575	* @deprecated ICU 56 Use Normalizer2 instead.
576	*/
577	int32_t endIndex(void) const;
578
579	/**
580	* Returns TRUE when both iterators refer to the same character in the same
581	* input text.
582	*
583	* @param that a Normalizer object to compare this one to
584	* @return comparison result
585	* @deprecated ICU 56 Use Normalizer2 instead.
586	*/
587	UBool operator==(const Normalizer& that) const;
588
589	/**
590	* Returns FALSE when both iterators refer to the same character in the same
591	* input text.
592	*
593	* @param that a Normalizer object to compare this one to
594	* @return comparison result
595	* @deprecated ICU 56 Use Normalizer2 instead.
596	*/
597	inline UBool operator!=(const Normalizer& that) const;
598
599	/**
600	* Returns a pointer to a new Normalizer that is a clone of this one.
601	* The caller is responsible for deleting the new clone.
602	* @return a pointer to a new Normalizer
603	* @deprecated ICU 56 Use Normalizer2 instead.
604	*/
605	Normalizer* clone() const;
606
607	/**
608	* Generates a hash code for this iterator.
609	*
610	* @return the hash code
611	* @deprecated ICU 56 Use Normalizer2 instead.
612	*/
613	int32_t hashCode(void) const;
614
615	//-------------------------------------------------------------------------
616	// Property access methods
617	//-------------------------------------------------------------------------
618
619	/**
620	* Set the normalization mode for this object.
621	* <p>
622	* <b>Note:</b>If the normalization mode is changed while iterating
623	* over a string, calls to {@link #next() } and {@link #previous() } may
624	* return previously buffers characters in the old normalization mode
625	* until the iteration is able to re-sync at the next base character.
626	* It is safest to call {@link #setIndexOnly }, {@link #reset() },
627	* {@link #setText }, {@link #first() },
628	* {@link #last() }, etc. after calling <code>setMode</code>.
629	* <p>
630	* @param newMode the new mode for this <code>Normalizer</code>.
631	* @see #getUMode
632	* @deprecated ICU 56 Use Normalizer2 instead.
633	*/
634	void setMode(UNormalizationMode newMode);
635
636	/**
637	* Return the normalization mode for this object.
638	*
639	* This is an unusual name because there used to be a getMode() that
640	* returned a different type.
641	*
642	* @return the mode for this <code>Normalizer</code>
643	* @see #setMode
644	* @deprecated ICU 56 Use Normalizer2 instead.
645	*/
646	UNormalizationMode getUMode(void) const;
647
648	/**
649	* Set options that affect this <code>Normalizer</code>'s operation.
650	* Options do not change the basic composition or decomposition operation
651	* that is being performed, but they control whether
652	* certain optional portions of the operation are done.
653	* Currently the only available option is obsolete.
654	*
655	* It is possible to specify multiple options that are all turned on or off.
656	*
657	* @param option the option(s) whose value is/are to be set.
658	* @param value the new setting for the option. Use <code>TRUE</code> to
659	* turn the option(s) on and <code>FALSE</code> to turn it/them off.
660	*
661	* @see #getOption
662	* @deprecated ICU 56 Use Normalizer2 instead.
663	*/
664	void setOption(int32_t option,
665	UBool value);
666
667	/**
668	* Determine whether an option is turned on or off.
669	* If multiple options are specified, then the result is TRUE if any
670	* of them are set.
671	* <p>
672	* @param option the option(s) that are to be checked
673	* @return TRUE if any of the option(s) are set
674	* @see #setOption
675	* @deprecated ICU 56 Use Normalizer2 instead.
676	*/
677	UBool getOption(int32_t option) const;
678
679	/**
680	* Set the input text over which this <code>Normalizer</code> will iterate.
681	* The iteration position is set to the beginning.
682	*
683	* @param newText a string that replaces the current input text
684	* @param status a UErrorCode
685	* @deprecated ICU 56 Use Normalizer2 instead.
686	*/
687	void setText(const UnicodeString& newText,
688	UErrorCode &status);
689
690	/**
691	* Set the input text over which this <code>Normalizer</code> will iterate.
692	* The iteration position is set to the beginning.
693	*
694	* @param newText a CharacterIterator object that replaces the current input text
695	* @param status a UErrorCode
696	* @deprecated ICU 56 Use Normalizer2 instead.
697	*/
698	void setText(const CharacterIterator& newText,
699	UErrorCode &status);
700
701	/**
702	* Set the input text over which this <code>Normalizer</code> will iterate.
703	* The iteration position is set to the beginning.
704	*
705	* @param newText a string that replaces the current input text
706	* @param length the length of the string, or -1 if NUL-terminated
707	* @param status a UErrorCode
708	* @deprecated ICU 56 Use Normalizer2 instead.
709	*/
710	void setText(ConstChar16Ptr newText,
711	int32_t length,
712	UErrorCode &status);
713	/**
714	* Copies the input text into the UnicodeString argument.
715	*
716	* @param result Receives a copy of the text under iteration.
717	* @deprecated ICU 56 Use Normalizer2 instead.
718	*/
719	void getText(UnicodeString& result);
720
721	/**
722	* ICU "poor man's RTTI", returns a UClassID for this class.
723	* @returns a UClassID for this class.
724	* @deprecated ICU 56 Use Normalizer2 instead.
725	*/
726	static UClassID U_EXPORT2 getStaticClassID();
727	#endif /* U_HIDE_DEPRECATED_API */
728
729	#ifndef U_FORCE_HIDE_DEPRECATED_API
730	/**
731	* ICU "poor man's RTTI", returns a UClassID for the actual class.
732	* @return a UClassID for the actual class.
733	* @deprecated ICU 56 Use Normalizer2 instead.
734	*/
735	virtual UClassID getDynamicClassID() const;
736	#endif // U_FORCE_HIDE_DEPRECATED_API
737
738	private:
739	//-------------------------------------------------------------------------
740	// Private functions
741	//-------------------------------------------------------------------------
742
743	Normalizer(); // default constructor not implemented
744	Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
745
746	// Private utility methods for iteration
747	// For documentation, see the source code
748	UBool nextNormalize();
749	UBool previousNormalize();
750
751	void init();
752	void clearBuffer(void);
753
754	//-------------------------------------------------------------------------
755	// Private data
756	//-------------------------------------------------------------------------
757
758	FilteredNormalizer2fFilteredNorm2; // owned if not NULL*
759	const Normalizer2 fNorm2; // not owned; may be equal to fFilteredNorm2*
760	UNormalizationMode fUMode; // deprecated
761	int32_t fOptions;
762
763	// The input text and our position in it
764	CharacterIterator *text;
765
766	// The normalization buffer is the result of normalization
767	// of the source in [currentIndex..nextIndex[ .
768	int32_t currentIndex, nextIndex;
769
770	// A buffer for holding intermediate results
771	UnicodeString buffer;
772	int32_t bufferPos;
773	};
774
775	//-------------------------------------------------------------------------
776	// Inline implementations
777	//-------------------------------------------------------------------------
778
779	#ifndef U_HIDE_DEPRECATED_API
780	inline UBool
781	Normalizer::operator!= (const Normalizer& other) const
782	{ return ! operator==(other); }
783
784	inline UNormalizationCheckResult
785	Normalizer::quickCheck(const UnicodeString& source,
786	UNormalizationMode mode,
787	UErrorCode &status) {
788	return quickCheck(source, mode, `0`, status);
789	}
790
791	inline UBool
792	Normalizer::isNormalized(const UnicodeString& source,
793	UNormalizationMode mode,
794	UErrorCode &status) {
795	return isNormalized(source, mode, `0`, status);
796	}
797	#endif /* U_HIDE_DEPRECATED_API */
798
799	inline int32_t
800	Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
801	uint32_t options,
802	UErrorCode &errorCode) {
803	// all argument checking is done in unorm_compare
804	return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
805	toUCharPtr(s2.getBuffer()), s2.length(),
806	options,
807	&errorCode);
808	}
809
810	U_NAMESPACE_END
811
812	#endif /* #if !UCONFIG_NO_NORMALIZATION */
813
814	#endif // NORMLZR_H
815
816	#endif /* U_SHOW_CPLUSPLUS_API */
817

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/unicode/normlzr.h