1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | *************************************************************************** |
5 | * Copyright (C) 2008-2015, International Business Machines Corporation |
6 | * and others. All Rights Reserved. |
7 | *************************************************************************** |
8 | * file name: uspoof_build.cpp |
9 | * encoding: UTF-8 |
10 | * tab size: 8 (not used) |
11 | * indentation:4 |
12 | * |
13 | * created on: 2008 Dec 8 |
14 | * created by: Andy Heninger |
15 | * |
16 | * Unicode Spoof Detection Data Builder |
17 | * Builder-related functions are kept in separate files so that applications not needing |
18 | * the builder can more easily exclude them, typically by means of static linking. |
19 | * |
20 | * There are three relatively independent sets of Spoof data, |
21 | * Confusables, |
22 | * Whole Script Confusables |
23 | * ID character extensions. |
24 | * |
25 | * The data tables for each are built separately, each from its own definitions |
26 | */ |
27 | |
28 | #include "unicode/utypes.h" |
29 | #include "unicode/uspoof.h" |
30 | #include "unicode/unorm.h" |
31 | #include "unicode/uregex.h" |
32 | #include "unicode/ustring.h" |
33 | #include "cmemory.h" |
34 | #include "uspoof_impl.h" |
35 | #include "uhash.h" |
36 | #include "uvector.h" |
37 | #include "uassert.h" |
38 | #include "uarrsort.h" |
39 | #include "uspoof_conf.h" |
40 | |
41 | #if !UCONFIG_NO_NORMALIZATION |
42 | |
43 | U_NAMESPACE_USE |
44 | |
45 | // Defined in uspoof.cpp, initializes file-static variables. |
46 | U_CFUNC void uspoof_internalInitStatics(UErrorCode *status); |
47 | |
48 | // The main data building function |
49 | |
50 | U_CAPI USpoofChecker * U_EXPORT2 |
51 | uspoof_openFromSource(const char *confusables, int32_t confusablesLen, |
52 | const char* /*confusablesWholeScript*/, int32_t /*confusablesWholeScriptLen*/, |
53 | int32_t *errorType, UParseError *pe, UErrorCode *status) { |
54 | uspoof_internalInitStatics(status); |
55 | if (U_FAILURE(*status)) { |
56 | return NULL; |
57 | } |
58 | #if UCONFIG_NO_REGULAR_EXPRESSIONS |
59 | *status = U_UNSUPPORTED_ERROR; |
60 | return NULL; |
61 | #else |
62 | if (errorType!=NULL) { |
63 | *errorType = 0; |
64 | } |
65 | if (pe != NULL) { |
66 | pe->line = 0; |
67 | pe->offset = 0; |
68 | pe->preContext[0] = 0; |
69 | pe->postContext[0] = 0; |
70 | } |
71 | |
72 | // Set up a shell of a spoof detector, with empty data. |
73 | SpoofData *newSpoofData = new SpoofData(*status); |
74 | |
75 | if (newSpoofData == NULL) { |
76 | *status = U_MEMORY_ALLOCATION_ERROR; |
77 | return NULL; |
78 | } |
79 | |
80 | if (U_FAILURE(*status)) { |
81 | delete newSpoofData; |
82 | return NULL; |
83 | } |
84 | SpoofImpl *This = new SpoofImpl(newSpoofData, *status); |
85 | |
86 | if (This == NULL) { |
87 | *status = U_MEMORY_ALLOCATION_ERROR; |
88 | delete newSpoofData; // explicit delete as the destructor for SpoofImpl won't be called. |
89 | return NULL; |
90 | } |
91 | |
92 | if (U_FAILURE(*status)) { |
93 | delete This; // no delete for newSpoofData, as the SpoofImpl destructor will delete it. |
94 | return NULL; |
95 | } |
96 | |
97 | // Compile the binary data from the source (text) format. |
98 | ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); |
99 | |
100 | if (U_FAILURE(*status)) { |
101 | delete This; |
102 | This = NULL; |
103 | } |
104 | return (USpoofChecker *)This; |
105 | #endif // UCONFIG_NO_REGULAR_EXPRESSIONS |
106 | } |
107 | |
108 | #endif |
109 | |