1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2014, International Business Machines
6* Corporation and others. All Rights Reserved.
7*******************************************************************************
8* norm2allmodes.h
9*
10* created on: 2014sep07
11* created by: Markus W. Scherer
12*/
13
14#ifndef __NORM2ALLMODES_H__
15#define __NORM2ALLMODES_H__
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_NORMALIZATION
20
21#include "unicode/edits.h"
22#include "unicode/normalizer2.h"
23#include "unicode/stringoptions.h"
24#include "unicode/unistr.h"
25#include "cpputils.h"
26#include "normalizer2impl.h"
27
28U_NAMESPACE_BEGIN
29
30// Intermediate class:
31// Has Normalizer2Impl and does boilerplate argument checking and setup.
32class Normalizer2WithImpl : public Normalizer2 {
33public:
34 Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
35 virtual ~Normalizer2WithImpl();
36
37 // normalize
38 virtual UnicodeString &
39 normalize(const UnicodeString &src,
40 UnicodeString &dest,
41 UErrorCode &errorCode) const override {
42 if(U_FAILURE(errorCode)) {
43 dest.setToBogus();
44 return dest;
45 }
46 const char16_t *sArray=src.getBuffer();
47 if(&dest==&src || sArray==nullptr) {
48 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
49 dest.setToBogus();
50 return dest;
51 }
52 dest.remove();
53 ReorderingBuffer buffer(impl, dest);
54 if(buffer.init(src.length(), errorCode)) {
55 normalize(sArray, sArray+src.length(), buffer, errorCode);
56 }
57 return dest;
58 }
59 virtual void
60 normalize(const char16_t *src, const char16_t *limit,
61 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
62
63 // normalize and append
64 virtual UnicodeString &
65 normalizeSecondAndAppend(UnicodeString &first,
66 const UnicodeString &second,
67 UErrorCode &errorCode) const override {
68 return normalizeSecondAndAppend(first, second, true, errorCode);
69 }
70 virtual UnicodeString &
71 append(UnicodeString &first,
72 const UnicodeString &second,
73 UErrorCode &errorCode) const override {
74 return normalizeSecondAndAppend(first, second, false, errorCode);
75 }
76 UnicodeString &
77 normalizeSecondAndAppend(UnicodeString &first,
78 const UnicodeString &second,
79 UBool doNormalize,
80 UErrorCode &errorCode) const {
81 uprv_checkCanGetBuffer(first, errorCode);
82 if(U_FAILURE(errorCode)) {
83 return first;
84 }
85 const char16_t *secondArray=second.getBuffer();
86 if(&first==&second || secondArray==nullptr) {
87 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
88 return first;
89 }
90 int32_t firstLength=first.length();
91 UnicodeString safeMiddle;
92 {
93 ReorderingBuffer buffer(impl, first);
94 if(buffer.init(firstLength+second.length(), errorCode)) {
95 normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
96 safeMiddle, buffer, errorCode);
97 }
98 } // The ReorderingBuffer destructor finalizes the first string.
99 if(U_FAILURE(errorCode)) {
100 // Restore the modified suffix of the first string.
101 first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
102 }
103 return first;
104 }
105 virtual void
106 normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
107 UnicodeString &safeMiddle,
108 ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
109 virtual UBool
110 getDecomposition(UChar32 c, UnicodeString &decomposition) const override {
111 char16_t buffer[4];
112 int32_t length;
113 const char16_t *d=impl.getDecomposition(c, buffer, length);
114 if(d==nullptr) {
115 return false;
116 }
117 if(d==buffer) {
118 decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
119 } else {
120 decomposition.setTo(false, d, length); // read-only alias
121 }
122 return true;
123 }
124 virtual UBool
125 getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override {
126 char16_t buffer[30];
127 int32_t length;
128 const char16_t *d=impl.getRawDecomposition(c, buffer, length);
129 if(d==nullptr) {
130 return false;
131 }
132 if(d==buffer) {
133 decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
134 } else {
135 decomposition.setTo(false, d, length); // read-only alias
136 }
137 return true;
138 }
139 virtual UChar32
140 composePair(UChar32 a, UChar32 b) const override {
141 return impl.composePair(a, b);
142 }
143
144 virtual uint8_t
145 getCombiningClass(UChar32 c) const override {
146 return impl.getCC(impl.getNorm16(c));
147 }
148
149 // quick checks
150 virtual UBool
151 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
152 if(U_FAILURE(errorCode)) {
153 return false;
154 }
155 const char16_t *sArray=s.getBuffer();
156 if(sArray==nullptr) {
157 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
158 return false;
159 }
160 const char16_t *sLimit=sArray+s.length();
161 return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
162 }
163 virtual UNormalizationCheckResult
164 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
165 return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
166 }
167 virtual int32_t
168 spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override {
169 if(U_FAILURE(errorCode)) {
170 return 0;
171 }
172 const char16_t *sArray=s.getBuffer();
173 if(sArray==nullptr) {
174 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
175 return 0;
176 }
177 return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
178 }
179 virtual const char16_t *
180 spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const = 0;
181
182 virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
183 return UNORM_YES;
184 }
185
186 const Normalizer2Impl &impl;
187};
188
189class DecomposeNormalizer2 : public Normalizer2WithImpl {
190public:
191 DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
192 virtual ~DecomposeNormalizer2();
193
194private:
195 virtual void
196 normalize(const char16_t *src, const char16_t *limit,
197 ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
198 impl.decompose(src, limit, &buffer, errorCode);
199 }
200 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
201 virtual void
202 normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
203 UnicodeString &safeMiddle,
204 ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
205 impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
206 }
207
208 void
209 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
210 Edits *edits, UErrorCode &errorCode) const override {
211 if (U_FAILURE(errorCode)) {
212 return;
213 }
214 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
215 edits->reset();
216 }
217 const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
218 impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
219 sink.Flush();
220 }
221 virtual UBool
222 isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
223 if(U_FAILURE(errorCode)) {
224 return false;
225 }
226 const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
227 const uint8_t *sLimit = s + sp.length();
228 return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
229 }
230
231 virtual const char16_t *
232 spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
233 return impl.decompose(src, limit, nullptr, errorCode);
234 }
235 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
236 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
237 return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
238 }
239 virtual UBool hasBoundaryBefore(UChar32 c) const override {
240 return impl.hasDecompBoundaryBefore(c);
241 }
242 virtual UBool hasBoundaryAfter(UChar32 c) const override {
243 return impl.hasDecompBoundaryAfter(c);
244 }
245 virtual UBool isInert(UChar32 c) const override {
246 return impl.isDecompInert(c);
247 }
248};
249
250class ComposeNormalizer2 : public Normalizer2WithImpl {
251public:
252 ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
253 Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
254 virtual ~ComposeNormalizer2();
255
256private:
257 virtual void
258 normalize(const char16_t *src, const char16_t *limit,
259 ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
260 impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
261 }
262 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
263
264 void
265 normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
266 Edits *edits, UErrorCode &errorCode) const override {
267 if (U_FAILURE(errorCode)) {
268 return;
269 }
270 if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
271 edits->reset();
272 }
273 const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
274 impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
275 &sink, edits, errorCode);
276 sink.Flush();
277 }
278
279 virtual void
280 normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
281 UnicodeString &safeMiddle,
282 ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
283 impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
284 }
285
286 virtual UBool
287 isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
288 if(U_FAILURE(errorCode)) {
289 return false;
290 }
291 const char16_t *sArray=s.getBuffer();
292 if(sArray==nullptr) {
293 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
294 return false;
295 }
296 UnicodeString temp;
297 ReorderingBuffer buffer(impl, temp);
298 if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
299 return false;
300 }
301 return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
302 }
303 virtual UBool
304 isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
305 if(U_FAILURE(errorCode)) {
306 return false;
307 }
308 const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
309 return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
310 }
311 virtual UNormalizationCheckResult
312 quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
313 if(U_FAILURE(errorCode)) {
314 return UNORM_MAYBE;
315 }
316 const char16_t *sArray=s.getBuffer();
317 if(sArray==nullptr) {
318 errorCode=U_ILLEGAL_ARGUMENT_ERROR;
319 return UNORM_MAYBE;
320 }
321 UNormalizationCheckResult qcResult=UNORM_YES;
322 impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
323 return qcResult;
324 }
325 virtual const char16_t *
326 spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &) const override {
327 return impl.composeQuickCheck(src, limit, onlyContiguous, nullptr);
328 }
329 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
330 virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
331 return impl.getCompQuickCheck(impl.getNorm16(c));
332 }
333 virtual UBool hasBoundaryBefore(UChar32 c) const override {
334 return impl.hasCompBoundaryBefore(c);
335 }
336 virtual UBool hasBoundaryAfter(UChar32 c) const override {
337 return impl.hasCompBoundaryAfter(c, onlyContiguous);
338 }
339 virtual UBool isInert(UChar32 c) const override {
340 return impl.isCompInert(c, onlyContiguous);
341 }
342
343 const UBool onlyContiguous;
344};
345
346class FCDNormalizer2 : public Normalizer2WithImpl {
347public:
348 FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
349 virtual ~FCDNormalizer2();
350
351private:
352 virtual void
353 normalize(const char16_t *src, const char16_t *limit,
354 ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
355 impl.makeFCD(src, limit, &buffer, errorCode);
356 }
357 using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
358 virtual void
359 normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
360 UnicodeString &safeMiddle,
361 ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
362 impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
363 }
364 virtual const char16_t *
365 spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
366 return impl.makeFCD(src, limit, nullptr, errorCode);
367 }
368 using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
369 virtual UBool hasBoundaryBefore(UChar32 c) const override {
370 return impl.hasFCDBoundaryBefore(c);
371 }
372 virtual UBool hasBoundaryAfter(UChar32 c) const override {
373 return impl.hasFCDBoundaryAfter(c);
374 }
375 virtual UBool isInert(UChar32 c) const override {
376 return impl.isFCDInert(c);
377 }
378};
379
380struct Norm2AllModes : public UMemory {
381 Norm2AllModes(Normalizer2Impl *i)
382 : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
383 ~Norm2AllModes();
384
385 static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
386 static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
387 static Norm2AllModes *createInstance(const char *packageName,
388 const char *name,
389 UErrorCode &errorCode);
390
391 static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
392 static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
393 static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
394
395 Normalizer2Impl *impl;
396 ComposeNormalizer2 comp;
397 DecomposeNormalizer2 decomp;
398 FCDNormalizer2 fcd;
399 ComposeNormalizer2 fcc;
400};
401
402U_NAMESPACE_END
403
404#endif // !UCONFIG_NO_NORMALIZATION
405#endif // __NORM2ALLMODES_H__
406