| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | *************************************************************************** |
| 5 | * Copyright (C) 2008-2015, International Business Machines Corporation |
| 6 | * and others. All Rights Reserved. |
| 7 | *************************************************************************** |
| 8 | * file name: uspoof_build.cpp |
| 9 | * encoding: UTF-8 |
| 10 | * tab size: 8 (not used) |
| 11 | * indentation:4 |
| 12 | * |
| 13 | * created on: 2008 Dec 8 |
| 14 | * created by: Andy Heninger |
| 15 | * |
| 16 | * Unicode Spoof Detection Data Builder |
| 17 | * Builder-related functions are kept in separate files so that applications not needing |
| 18 | * the builder can more easily exclude them, typically by means of static linking. |
| 19 | * |
| 20 | * There are three relatively independent sets of Spoof data, |
| 21 | * Confusables, |
| 22 | * Whole Script Confusables |
| 23 | * ID character extensions. |
| 24 | * |
| 25 | * The data tables for each are built separately, each from its own definitions |
| 26 | */ |
| 27 | |
| 28 | #include "unicode/utypes.h" |
| 29 | #include "unicode/uspoof.h" |
| 30 | #include "unicode/unorm.h" |
| 31 | #include "unicode/uregex.h" |
| 32 | #include "unicode/ustring.h" |
| 33 | #include "cmemory.h" |
| 34 | #include "uspoof_impl.h" |
| 35 | #include "uhash.h" |
| 36 | #include "uvector.h" |
| 37 | #include "uassert.h" |
| 38 | #include "uarrsort.h" |
| 39 | #include "uspoof_conf.h" |
| 40 | |
| 41 | #if !UCONFIG_NO_NORMALIZATION |
| 42 | |
| 43 | U_NAMESPACE_USE |
| 44 | |
| 45 | // Defined in uspoof.cpp, initializes file-static variables. |
| 46 | U_CFUNC void uspoof_internalInitStatics(UErrorCode *status); |
| 47 | |
| 48 | // The main data building function |
| 49 | |
| 50 | U_CAPI USpoofChecker * U_EXPORT2 |
| 51 | uspoof_openFromSource(const char *confusables, int32_t confusablesLen, |
| 52 | const char* /*confusablesWholeScript*/, int32_t /*confusablesWholeScriptLen*/, |
| 53 | int32_t *errorType, UParseError *pe, UErrorCode *status) { |
| 54 | uspoof_internalInitStatics(status); |
| 55 | if (U_FAILURE(*status)) { |
| 56 | return NULL; |
| 57 | } |
| 58 | #if UCONFIG_NO_REGULAR_EXPRESSIONS |
| 59 | *status = U_UNSUPPORTED_ERROR; |
| 60 | return NULL; |
| 61 | #else |
| 62 | if (errorType!=NULL) { |
| 63 | *errorType = 0; |
| 64 | } |
| 65 | if (pe != NULL) { |
| 66 | pe->line = 0; |
| 67 | pe->offset = 0; |
| 68 | pe->preContext[0] = 0; |
| 69 | pe->postContext[0] = 0; |
| 70 | } |
| 71 | |
| 72 | // Set up a shell of a spoof detector, with empty data. |
| 73 | SpoofData *newSpoofData = new SpoofData(*status); |
| 74 | |
| 75 | if (newSpoofData == NULL) { |
| 76 | *status = U_MEMORY_ALLOCATION_ERROR; |
| 77 | return NULL; |
| 78 | } |
| 79 | |
| 80 | if (U_FAILURE(*status)) { |
| 81 | delete newSpoofData; |
| 82 | return NULL; |
| 83 | } |
| 84 | SpoofImpl *This = new SpoofImpl(newSpoofData, *status); |
| 85 | |
| 86 | if (This == NULL) { |
| 87 | *status = U_MEMORY_ALLOCATION_ERROR; |
| 88 | delete newSpoofData; // explicit delete as the destructor for SpoofImpl won't be called. |
| 89 | return NULL; |
| 90 | } |
| 91 | |
| 92 | if (U_FAILURE(*status)) { |
| 93 | delete This; // no delete for newSpoofData, as the SpoofImpl destructor will delete it. |
| 94 | return NULL; |
| 95 | } |
| 96 | |
| 97 | // Compile the binary data from the source (text) format. |
| 98 | ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); |
| 99 | |
| 100 | if (U_FAILURE(*status)) { |
| 101 | delete This; |
| 102 | This = NULL; |
| 103 | } |
| 104 | return (USpoofChecker *)This; |
| 105 | #endif // UCONFIG_NO_REGULAR_EXPRESSIONS |
| 106 | } |
| 107 | |
| 108 | #endif |
| 109 | |