1 | // Licensed to the .NET Foundation under one or more agreements. |
2 | // The .NET Foundation licenses this file to you under the MIT license. |
3 | // See the LICENSE file in the project root for more information. |
4 | // |
5 | |
6 | #include <stdint.h> |
7 | #include "icushim.h" |
8 | |
9 | /* |
10 | * These values should be kept in sync with System.Text.NormalizationForm |
11 | */ |
12 | enum class NormalizationForm : int32_t |
13 | { |
14 | C = 0x1, |
15 | D = 0x2, |
16 | KC = 0x5, |
17 | KD = 0x6 |
18 | }; |
19 | |
20 | const UNormalizer2* GetNormalizerForForm(NormalizationForm normalizationForm, UErrorCode* pErrorCode) |
21 | { |
22 | switch (normalizationForm) |
23 | { |
24 | case NormalizationForm::C: |
25 | return unorm2_getNFCInstance(pErrorCode); |
26 | case NormalizationForm::D: |
27 | return unorm2_getNFDInstance(pErrorCode); |
28 | case NormalizationForm::KC: |
29 | return unorm2_getNFKCInstance(pErrorCode); |
30 | case NormalizationForm::KD: |
31 | return unorm2_getNFKDInstance(pErrorCode); |
32 | } |
33 | |
34 | *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR; |
35 | return nullptr; |
36 | } |
37 | |
38 | /* |
39 | Function: |
40 | IsNormalized |
41 | |
42 | Used by System.StringNormalizationExtensions.IsNormalized to detect if a string |
43 | is in a certain |
44 | Unicode Normalization Form. |
45 | |
46 | Return values: |
47 | 0: lpStr is not normalized. |
48 | 1: lpStr is normalized. |
49 | -1: internal error during normalization. |
50 | */ |
51 | extern "C" int32_t GlobalizationNative_IsNormalized( |
52 | NormalizationForm normalizationForm, const UChar* lpStr, int32_t cwStrLength) |
53 | { |
54 | UErrorCode err = U_ZERO_ERROR; |
55 | const UNormalizer2* pNormalizer = GetNormalizerForForm(normalizationForm, &err); |
56 | UBool isNormalized = unorm2_isNormalized(pNormalizer, lpStr, cwStrLength, &err); |
57 | |
58 | if (U_SUCCESS(err)) |
59 | { |
60 | return isNormalized == TRUE ? 1 : 0; |
61 | } |
62 | else |
63 | { |
64 | return -1; |
65 | } |
66 | } |
67 | |
68 | /* |
69 | Function: |
70 | NormalizeString |
71 | |
72 | Used by System.StringNormalizationExtensions.Normalize to normalize a string |
73 | into a certain |
74 | Unicode Normalization Form. |
75 | |
76 | Return values: |
77 | 0: internal error during normalization. |
78 | >0: the length of the normalized string (not counting the null terminator). |
79 | */ |
80 | extern "C" int32_t GlobalizationNative_NormalizeString( |
81 | NormalizationForm normalizationForm, const UChar* lpSrc, int32_t cwSrcLength, UChar* lpDst, int32_t cwDstLength) |
82 | { |
83 | UErrorCode err = U_ZERO_ERROR; |
84 | const UNormalizer2* pNormalizer = GetNormalizerForForm(normalizationForm, &err); |
85 | int32_t normalizedLen = unorm2_normalize(pNormalizer, lpSrc, cwSrcLength, lpDst, cwDstLength, &err); |
86 | |
87 | return (U_SUCCESS(err) || (err == U_BUFFER_OVERFLOW_ERROR)) ? normalizedLen : 0; |
88 | } |
89 | |