| 1 | // Licensed to the .NET Foundation under one or more agreements. |
| 2 | // The .NET Foundation licenses this file to you under the MIT license. |
| 3 | // See the LICENSE file in the project root for more information. |
| 4 | // |
| 5 | |
| 6 | #include <stdint.h> |
| 7 | #include "icushim.h" |
| 8 | |
| 9 | /* |
| 10 | * These values should be kept in sync with System.Text.NormalizationForm |
| 11 | */ |
| 12 | enum class NormalizationForm : int32_t |
| 13 | { |
| 14 | C = 0x1, |
| 15 | D = 0x2, |
| 16 | KC = 0x5, |
| 17 | KD = 0x6 |
| 18 | }; |
| 19 | |
| 20 | const UNormalizer2* GetNormalizerForForm(NormalizationForm normalizationForm, UErrorCode* pErrorCode) |
| 21 | { |
| 22 | switch (normalizationForm) |
| 23 | { |
| 24 | case NormalizationForm::C: |
| 25 | return unorm2_getNFCInstance(pErrorCode); |
| 26 | case NormalizationForm::D: |
| 27 | return unorm2_getNFDInstance(pErrorCode); |
| 28 | case NormalizationForm::KC: |
| 29 | return unorm2_getNFKCInstance(pErrorCode); |
| 30 | case NormalizationForm::KD: |
| 31 | return unorm2_getNFKDInstance(pErrorCode); |
| 32 | } |
| 33 | |
| 34 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
| 35 | return nullptr; |
| 36 | } |
| 37 | |
| 38 | /* |
| 39 | Function: |
| 40 | IsNormalized |
| 41 | |
| 42 | Used by System.StringNormalizationExtensions.IsNormalized to detect if a string |
| 43 | is in a certain |
| 44 | Unicode Normalization Form. |
| 45 | |
| 46 | Return values: |
| 47 | 0: lpStr is not normalized. |
| 48 | 1: lpStr is normalized. |
| 49 | -1: internal error during normalization. |
| 50 | */ |
| 51 | extern "C" int32_t GlobalizationNative_IsNormalized( |
| 52 | NormalizationForm normalizationForm, const UChar* lpStr, int32_t cwStrLength) |
| 53 | { |
| 54 | UErrorCode err = U_ZERO_ERROR; |
| 55 | const UNormalizer2* pNormalizer = GetNormalizerForForm(normalizationForm, &err); |
| 56 | UBool isNormalized = unorm2_isNormalized(pNormalizer, lpStr, cwStrLength, &err); |
| 57 | |
| 58 | if (U_SUCCESS(err)) |
| 59 | { |
| 60 | return isNormalized == TRUE ? 1 : 0; |
| 61 | } |
| 62 | else |
| 63 | { |
| 64 | return -1; |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | /* |
| 69 | Function: |
| 70 | NormalizeString |
| 71 | |
| 72 | Used by System.StringNormalizationExtensions.Normalize to normalize a string |
| 73 | into a certain |
| 74 | Unicode Normalization Form. |
| 75 | |
| 76 | Return values: |
| 77 | 0: internal error during normalization. |
| 78 | >0: the length of the normalized string (not counting the null terminator). |
| 79 | */ |
| 80 | extern "C" int32_t GlobalizationNative_NormalizeString( |
| 81 | NormalizationForm normalizationForm, const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength) |
| 82 | { |
| 83 | UErrorCode err = U_ZERO_ERROR; |
| 84 | const UNormalizer2* pNormalizer = GetNormalizerForForm(normalizationForm, &err); |
| 85 | int32_t normalizedLen = unorm2_normalize(pNormalizer, lpSrc, cwSrcLength, lpDst, cwDstLength, &err); |
| 86 | |
| 87 | return (U_SUCCESS(err) || (err == U_BUFFER_OVERFLOW_ERROR)) ? normalizedLen : 0; |
| 88 | } |
| 89 | |