| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | *************************************************************************** | 
|---|
| 5 | * Copyright (C) 2008-2015, International Business Machines Corporation | 
|---|
| 6 | * and others. All Rights Reserved. | 
|---|
| 7 | *************************************************************************** | 
|---|
| 8 | *   file name:  uspoof_build.cpp | 
|---|
| 9 | *   encoding:   UTF-8 | 
|---|
| 10 | *   tab size:   8 (not used) | 
|---|
| 11 | *   indentation:4 | 
|---|
| 12 | * | 
|---|
| 13 | *   created on: 2008 Dec 8 | 
|---|
| 14 | *   created by: Andy Heninger | 
|---|
| 15 | * | 
|---|
| 16 | *   Unicode Spoof Detection Data Builder | 
|---|
| 17 | *   Builder-related functions are kept in separate files so that applications not needing | 
|---|
| 18 | *   the builder can more easily exclude them, typically by means of static linking. | 
|---|
| 19 | * | 
|---|
| 20 | *   There are three relatively independent sets of Spoof data, | 
|---|
| 21 | *      Confusables, | 
|---|
| 22 | *      Whole Script Confusables | 
|---|
| 23 | *      ID character extensions. | 
|---|
| 24 | * | 
|---|
| 25 | *   The data tables for each are built separately, each from its own definitions | 
|---|
| 26 | */ | 
|---|
| 27 |  | 
|---|
| 28 | #include "unicode/utypes.h" | 
|---|
| 29 | #include "unicode/uspoof.h" | 
|---|
| 30 | #include "unicode/unorm.h" | 
|---|
| 31 | #include "unicode/uregex.h" | 
|---|
| 32 | #include "unicode/ustring.h" | 
|---|
| 33 | #include "cmemory.h" | 
|---|
| 34 | #include "uspoof_impl.h" | 
|---|
| 35 | #include "uhash.h" | 
|---|
| 36 | #include "uvector.h" | 
|---|
| 37 | #include "uassert.h" | 
|---|
| 38 | #include "uarrsort.h" | 
|---|
| 39 | #include "uspoof_conf.h" | 
|---|
| 40 |  | 
|---|
| 41 | #if !UCONFIG_NO_NORMALIZATION | 
|---|
| 42 |  | 
|---|
| 43 | U_NAMESPACE_USE | 
|---|
| 44 |  | 
|---|
| 45 | // Defined in uspoof.cpp, initializes file-static variables. | 
|---|
| 46 | U_CFUNC void uspoof_internalInitStatics(UErrorCode *status); | 
|---|
| 47 |  | 
|---|
| 48 | // The main data building function | 
|---|
| 49 |  | 
|---|
| 50 | U_CAPI USpoofChecker * U_EXPORT2 | 
|---|
| 51 | uspoof_openFromSource(const char *confusables,  int32_t confusablesLen, | 
|---|
| 52 | const char* /*confusablesWholeScript*/, int32_t /*confusablesWholeScriptLen*/, | 
|---|
| 53 | int32_t *errorType, UParseError *pe, UErrorCode *status) { | 
|---|
| 54 | uspoof_internalInitStatics(status); | 
|---|
| 55 | if (U_FAILURE(*status)) { | 
|---|
| 56 | return NULL; | 
|---|
| 57 | } | 
|---|
| 58 | #if UCONFIG_NO_REGULAR_EXPRESSIONS | 
|---|
| 59 | *status = U_UNSUPPORTED_ERROR; | 
|---|
| 60 | return NULL; | 
|---|
| 61 | #else | 
|---|
| 62 | if (errorType!=NULL) { | 
|---|
| 63 | *errorType = 0; | 
|---|
| 64 | } | 
|---|
| 65 | if (pe != NULL) { | 
|---|
| 66 | pe->line = 0; | 
|---|
| 67 | pe->offset = 0; | 
|---|
| 68 | pe->preContext[0] = 0; | 
|---|
| 69 | pe->postContext[0] = 0; | 
|---|
| 70 | } | 
|---|
| 71 |  | 
|---|
| 72 | // Set up a shell of a spoof detector, with empty data. | 
|---|
| 73 | SpoofData *newSpoofData = new SpoofData(*status); | 
|---|
| 74 |  | 
|---|
| 75 | if (newSpoofData == NULL) { | 
|---|
| 76 | *status = U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 77 | return NULL; | 
|---|
| 78 | } | 
|---|
| 79 |  | 
|---|
| 80 | if (U_FAILURE(*status)) { | 
|---|
| 81 | delete newSpoofData; | 
|---|
| 82 | return NULL; | 
|---|
| 83 | } | 
|---|
| 84 | SpoofImpl *This = new SpoofImpl(newSpoofData, *status); | 
|---|
| 85 |  | 
|---|
| 86 | if (This == NULL) { | 
|---|
| 87 | *status = U_MEMORY_ALLOCATION_ERROR; | 
|---|
| 88 | delete newSpoofData; // explicit delete as the destructor for SpoofImpl won't be called. | 
|---|
| 89 | return NULL; | 
|---|
| 90 | } | 
|---|
| 91 |  | 
|---|
| 92 | if (U_FAILURE(*status)) { | 
|---|
| 93 | delete This; // no delete for newSpoofData, as the SpoofImpl destructor will delete it. | 
|---|
| 94 | return NULL; | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | // Compile the binary data from the source (text) format. | 
|---|
| 98 | ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); | 
|---|
| 99 |  | 
|---|
| 100 | if (U_FAILURE(*status)) { | 
|---|
| 101 | delete This; | 
|---|
| 102 | This = NULL; | 
|---|
| 103 | } | 
|---|
| 104 | return (USpoofChecker *)This; | 
|---|
| 105 | #endif // UCONFIG_NO_REGULAR_EXPRESSIONS | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | #endif | 
|---|
| 109 |  | 
|---|