| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ******************************************************************************* |
| 5 | * Copyright (C) 2014, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ******************************************************************************* |
| 8 | * norm2allmodes.h |
| 9 | * |
| 10 | * created on: 2014sep07 |
| 11 | * created by: Markus W. Scherer |
| 12 | */ |
| 13 | |
| 14 | #ifndef __NORM2ALLMODES_H__ |
| 15 | #define __NORM2ALLMODES_H__ |
| 16 | |
| 17 | #include "unicode/utypes.h" |
| 18 | |
| 19 | #if !UCONFIG_NO_NORMALIZATION |
| 20 | |
| 21 | #include "unicode/edits.h" |
| 22 | #include "unicode/normalizer2.h" |
| 23 | #include "unicode/stringoptions.h" |
| 24 | #include "unicode/unistr.h" |
| 25 | #include "cpputils.h" |
| 26 | #include "normalizer2impl.h" |
| 27 | |
| 28 | U_NAMESPACE_BEGIN |
| 29 | |
| 30 | // Intermediate class: |
| 31 | // Has Normalizer2Impl and does boilerplate argument checking and setup. |
| 32 | class Normalizer2WithImpl : public Normalizer2 { |
| 33 | public: |
| 34 | Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} |
| 35 | virtual ~Normalizer2WithImpl(); |
| 36 | |
| 37 | // normalize |
| 38 | virtual UnicodeString & |
| 39 | normalize(const UnicodeString &src, |
| 40 | UnicodeString &dest, |
| 41 | UErrorCode &errorCode) const { |
| 42 | if(U_FAILURE(errorCode)) { |
| 43 | dest.setToBogus(); |
| 44 | return dest; |
| 45 | } |
| 46 | const UChar *sArray=src.getBuffer(); |
| 47 | if(&dest==&src || sArray==NULL) { |
| 48 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 49 | dest.setToBogus(); |
| 50 | return dest; |
| 51 | } |
| 52 | dest.remove(); |
| 53 | ReorderingBuffer buffer(impl, dest); |
| 54 | if(buffer.init(src.length(), errorCode)) { |
| 55 | normalize(sArray, sArray+src.length(), buffer, errorCode); |
| 56 | } |
| 57 | return dest; |
| 58 | } |
| 59 | virtual void |
| 60 | normalize(const UChar *src, const UChar *limit, |
| 61 | ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; |
| 62 | |
| 63 | // normalize and append |
| 64 | virtual UnicodeString & |
| 65 | normalizeSecondAndAppend(UnicodeString &first, |
| 66 | const UnicodeString &second, |
| 67 | UErrorCode &errorCode) const { |
| 68 | return normalizeSecondAndAppend(first, second, TRUE, errorCode); |
| 69 | } |
| 70 | virtual UnicodeString & |
| 71 | append(UnicodeString &first, |
| 72 | const UnicodeString &second, |
| 73 | UErrorCode &errorCode) const { |
| 74 | return normalizeSecondAndAppend(first, second, FALSE, errorCode); |
| 75 | } |
| 76 | UnicodeString & |
| 77 | normalizeSecondAndAppend(UnicodeString &first, |
| 78 | const UnicodeString &second, |
| 79 | UBool doNormalize, |
| 80 | UErrorCode &errorCode) const { |
| 81 | uprv_checkCanGetBuffer(first, errorCode); |
| 82 | if(U_FAILURE(errorCode)) { |
| 83 | return first; |
| 84 | } |
| 85 | const UChar *secondArray=second.getBuffer(); |
| 86 | if(&first==&second || secondArray==NULL) { |
| 87 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 88 | return first; |
| 89 | } |
| 90 | int32_t firstLength=first.length(); |
| 91 | UnicodeString safeMiddle; |
| 92 | { |
| 93 | ReorderingBuffer buffer(impl, first); |
| 94 | if(buffer.init(firstLength+second.length(), errorCode)) { |
| 95 | normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, |
| 96 | safeMiddle, buffer, errorCode); |
| 97 | } |
| 98 | } // The ReorderingBuffer destructor finalizes the first string. |
| 99 | if(U_FAILURE(errorCode)) { |
| 100 | // Restore the modified suffix of the first string. |
| 101 | first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); |
| 102 | } |
| 103 | return first; |
| 104 | } |
| 105 | virtual void |
| 106 | normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| 107 | UnicodeString &safeMiddle, |
| 108 | ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; |
| 109 | virtual UBool |
| 110 | getDecomposition(UChar32 c, UnicodeString &decomposition) const { |
| 111 | UChar buffer[4]; |
| 112 | int32_t length; |
| 113 | const UChar *d=impl.getDecomposition(c, buffer, length); |
| 114 | if(d==NULL) { |
| 115 | return FALSE; |
| 116 | } |
| 117 | if(d==buffer) { |
| 118 | decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) |
| 119 | } else { |
| 120 | decomposition.setTo(FALSE, d, length); // read-only alias |
| 121 | } |
| 122 | return TRUE; |
| 123 | } |
| 124 | virtual UBool |
| 125 | getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { |
| 126 | UChar buffer[30]; |
| 127 | int32_t length; |
| 128 | const UChar *d=impl.getRawDecomposition(c, buffer, length); |
| 129 | if(d==NULL) { |
| 130 | return FALSE; |
| 131 | } |
| 132 | if(d==buffer) { |
| 133 | decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) |
| 134 | } else { |
| 135 | decomposition.setTo(FALSE, d, length); // read-only alias |
| 136 | } |
| 137 | return TRUE; |
| 138 | } |
| 139 | virtual UChar32 |
| 140 | composePair(UChar32 a, UChar32 b) const { |
| 141 | return impl.composePair(a, b); |
| 142 | } |
| 143 | |
| 144 | virtual uint8_t |
| 145 | getCombiningClass(UChar32 c) const { |
| 146 | return impl.getCC(impl.getNorm16(c)); |
| 147 | } |
| 148 | |
| 149 | // quick checks |
| 150 | virtual UBool |
| 151 | isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { |
| 152 | if(U_FAILURE(errorCode)) { |
| 153 | return FALSE; |
| 154 | } |
| 155 | const UChar *sArray=s.getBuffer(); |
| 156 | if(sArray==NULL) { |
| 157 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 158 | return FALSE; |
| 159 | } |
| 160 | const UChar *sLimit=sArray+s.length(); |
| 161 | return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); |
| 162 | } |
| 163 | virtual UNormalizationCheckResult |
| 164 | quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { |
| 165 | return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; |
| 166 | } |
| 167 | virtual int32_t |
| 168 | spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { |
| 169 | if(U_FAILURE(errorCode)) { |
| 170 | return 0; |
| 171 | } |
| 172 | const UChar *sArray=s.getBuffer(); |
| 173 | if(sArray==NULL) { |
| 174 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 175 | return 0; |
| 176 | } |
| 177 | return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); |
| 178 | } |
| 179 | virtual const UChar * |
| 180 | spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; |
| 181 | |
| 182 | virtual UNormalizationCheckResult getQuickCheck(UChar32) const { |
| 183 | return UNORM_YES; |
| 184 | } |
| 185 | |
| 186 | const Normalizer2Impl &impl; |
| 187 | }; |
| 188 | |
| 189 | class DecomposeNormalizer2 : public Normalizer2WithImpl { |
| 190 | public: |
| 191 | DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
| 192 | virtual ~DecomposeNormalizer2(); |
| 193 | |
| 194 | private: |
| 195 | virtual void |
| 196 | normalize(const UChar *src, const UChar *limit, |
| 197 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 198 | impl.decompose(src, limit, &buffer, errorCode); |
| 199 | } |
| 200 | using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
| 201 | virtual void |
| 202 | normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| 203 | UnicodeString &safeMiddle, |
| 204 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 205 | impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); |
| 206 | } |
| 207 | virtual const UChar * |
| 208 | spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { |
| 209 | return impl.decompose(src, limit, NULL, errorCode); |
| 210 | } |
| 211 | using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
| 212 | virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { |
| 213 | return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; |
| 214 | } |
| 215 | virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); } |
| 216 | virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); } |
| 217 | virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } |
| 218 | }; |
| 219 | |
| 220 | class ComposeNormalizer2 : public Normalizer2WithImpl { |
| 221 | public: |
| 222 | ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : |
| 223 | Normalizer2WithImpl(ni), onlyContiguous(fcc) {} |
| 224 | virtual ~ComposeNormalizer2(); |
| 225 | |
| 226 | private: |
| 227 | virtual void |
| 228 | normalize(const UChar *src, const UChar *limit, |
| 229 | ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { |
| 230 | impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); |
| 231 | } |
| 232 | using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
| 233 | |
| 234 | void |
| 235 | normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, |
| 236 | Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { |
| 237 | if (U_FAILURE(errorCode)) { |
| 238 | return; |
| 239 | } |
| 240 | if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { |
| 241 | edits->reset(); |
| 242 | } |
| 243 | const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data()); |
| 244 | impl.composeUTF8(options, onlyContiguous, s, s + src.length(), |
| 245 | &sink, edits, errorCode); |
| 246 | sink.Flush(); |
| 247 | } |
| 248 | |
| 249 | virtual void |
| 250 | normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| 251 | UnicodeString &safeMiddle, |
| 252 | ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { |
| 253 | impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); |
| 254 | } |
| 255 | |
| 256 | virtual UBool |
| 257 | isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { |
| 258 | if(U_FAILURE(errorCode)) { |
| 259 | return FALSE; |
| 260 | } |
| 261 | const UChar *sArray=s.getBuffer(); |
| 262 | if(sArray==NULL) { |
| 263 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 264 | return FALSE; |
| 265 | } |
| 266 | UnicodeString temp; |
| 267 | ReorderingBuffer buffer(impl, temp); |
| 268 | if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization |
| 269 | return FALSE; |
| 270 | } |
| 271 | return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); |
| 272 | } |
| 273 | virtual UBool |
| 274 | isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE { |
| 275 | if(U_FAILURE(errorCode)) { |
| 276 | return FALSE; |
| 277 | } |
| 278 | const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data()); |
| 279 | return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode); |
| 280 | } |
| 281 | virtual UNormalizationCheckResult |
| 282 | quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { |
| 283 | if(U_FAILURE(errorCode)) { |
| 284 | return UNORM_MAYBE; |
| 285 | } |
| 286 | const UChar *sArray=s.getBuffer(); |
| 287 | if(sArray==NULL) { |
| 288 | errorCode=U_ILLEGAL_ARGUMENT_ERROR; |
| 289 | return UNORM_MAYBE; |
| 290 | } |
| 291 | UNormalizationCheckResult qcResult=UNORM_YES; |
| 292 | impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); |
| 293 | return qcResult; |
| 294 | } |
| 295 | virtual const UChar * |
| 296 | spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE { |
| 297 | return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); |
| 298 | } |
| 299 | using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
| 300 | virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE { |
| 301 | return impl.getCompQuickCheck(impl.getNorm16(c)); |
| 302 | } |
| 303 | virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { |
| 304 | return impl.hasCompBoundaryBefore(c); |
| 305 | } |
| 306 | virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { |
| 307 | return impl.hasCompBoundaryAfter(c, onlyContiguous); |
| 308 | } |
| 309 | virtual UBool isInert(UChar32 c) const U_OVERRIDE { |
| 310 | return impl.isCompInert(c, onlyContiguous); |
| 311 | } |
| 312 | |
| 313 | const UBool onlyContiguous; |
| 314 | }; |
| 315 | |
| 316 | class FCDNormalizer2 : public Normalizer2WithImpl { |
| 317 | public: |
| 318 | FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} |
| 319 | virtual ~FCDNormalizer2(); |
| 320 | |
| 321 | private: |
| 322 | virtual void |
| 323 | normalize(const UChar *src, const UChar *limit, |
| 324 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 325 | impl.makeFCD(src, limit, &buffer, errorCode); |
| 326 | } |
| 327 | using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. |
| 328 | virtual void |
| 329 | normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, |
| 330 | UnicodeString &safeMiddle, |
| 331 | ReorderingBuffer &buffer, UErrorCode &errorCode) const { |
| 332 | impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); |
| 333 | } |
| 334 | virtual const UChar * |
| 335 | spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { |
| 336 | return impl.makeFCD(src, limit, NULL, errorCode); |
| 337 | } |
| 338 | using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. |
| 339 | virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } |
| 340 | virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } |
| 341 | virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } |
| 342 | }; |
| 343 | |
| 344 | struct Norm2AllModes : public UMemory { |
| 345 | Norm2AllModes(Normalizer2Impl *i) |
| 346 | : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {} |
| 347 | ~Norm2AllModes(); |
| 348 | |
| 349 | static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); |
| 350 | static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); |
| 351 | static Norm2AllModes *createInstance(const char *packageName, |
| 352 | const char *name, |
| 353 | UErrorCode &errorCode); |
| 354 | |
| 355 | static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); |
| 356 | static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); |
| 357 | static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); |
| 358 | |
| 359 | Normalizer2Impl *impl; |
| 360 | ComposeNormalizer2 comp; |
| 361 | DecomposeNormalizer2 decomp; |
| 362 | FCDNormalizer2 fcd; |
| 363 | ComposeNormalizer2 fcc; |
| 364 | }; |
| 365 | |
| 366 | U_NAMESPACE_END |
| 367 | |
| 368 | #endif // !UCONFIG_NO_NORMALIZATION |
| 369 | #endif // __NORM2ALLMODES_H__ |
| 370 | |