| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ****************************************************************************** | 
|---|
| 5 | * | 
|---|
| 6 | *   Copyright (C) 2008-2016, International Business Machines | 
|---|
| 7 | *   Corporation and others.  All Rights Reserved. | 
|---|
| 8 | * | 
|---|
| 9 | ****************************************************************************** | 
|---|
| 10 | *   file name:  uspoof_conf.h | 
|---|
| 11 | *   encoding:   UTF-8 | 
|---|
| 12 | *   tab size:   8 (not used) | 
|---|
| 13 | *   indentation:4 | 
|---|
| 14 | * | 
|---|
| 15 | *   created on: 2009Jan05 | 
|---|
| 16 | *   created by: Andy Heninger | 
|---|
| 17 | * | 
|---|
| 18 | *   Internal classes for compiling confusable data into its binary (runtime) form. | 
|---|
| 19 | */ | 
|---|
| 20 |  | 
|---|
| 21 | #ifndef __USPOOF_BUILDCONF_H__ | 
|---|
| 22 | #define __USPOOF_BUILDCONF_H__ | 
|---|
| 23 |  | 
|---|
| 24 | #include "unicode/utypes.h" | 
|---|
| 25 |  | 
|---|
| 26 | #if !UCONFIG_NO_NORMALIZATION | 
|---|
| 27 |  | 
|---|
| 28 | #if !UCONFIG_NO_REGULAR_EXPRESSIONS | 
|---|
| 29 |  | 
|---|
| 30 | #include "unicode/uregex.h" | 
|---|
| 31 | #include "uhash.h" | 
|---|
| 32 | #include "uspoof_impl.h" | 
|---|
| 33 |  | 
|---|
| 34 | U_NAMESPACE_BEGIN | 
|---|
| 35 |  | 
|---|
| 36 | // SPUString | 
|---|
| 37 | //              Holds a string that is the result of one of the mappings defined | 
|---|
| 38 | //              by the confusable mapping data (confusables.txt from Unicode.org) | 
|---|
| 39 | //              Instances of SPUString exist during the compilation process only. | 
|---|
| 40 |  | 
|---|
| 41 | struct SPUString : public UMemory { | 
|---|
| 42 | UnicodeString  *fStr;             // The actual string. | 
|---|
| 43 | int32_t         fCharOrStrTableIndex;   // Index into the final runtime data for this | 
|---|
| 44 | // string (or, for length 1, the single string char | 
|---|
| 45 | // itself, there being no string table entry for it.) | 
|---|
| 46 | SPUString(UnicodeString *s); | 
|---|
| 47 | ~SPUString(); | 
|---|
| 48 | }; | 
|---|
| 49 |  | 
|---|
| 50 |  | 
|---|
| 51 | //  String Pool   A utility class for holding the strings that are the result of | 
|---|
| 52 | //                the spoof mappings.  These strings will utimately end up in the | 
|---|
| 53 | //                run-time String Table. | 
|---|
| 54 | //                This is sort of like a sorted set of strings, except that ICU's anemic | 
|---|
| 55 | //                built-in collections don't support those, so it is implemented with a | 
|---|
| 56 | //                combination of a uhash and a UVector. | 
|---|
| 57 |  | 
|---|
| 58 |  | 
|---|
| 59 | class SPUStringPool : public UMemory { | 
|---|
| 60 | public: | 
|---|
| 61 | SPUStringPool(UErrorCode &status); | 
|---|
| 62 | ~SPUStringPool(); | 
|---|
| 63 |  | 
|---|
| 64 | // Add a string. Return the string from the table. | 
|---|
| 65 | // If the input parameter string is already in the table, delete the | 
|---|
| 66 | //  input parameter and return the existing string. | 
|---|
| 67 | SPUString *addString(UnicodeString *src, UErrorCode &status); | 
|---|
| 68 |  | 
|---|
| 69 |  | 
|---|
| 70 | // Get the n-th string in the collection. | 
|---|
| 71 | SPUString *getByIndex(int32_t i); | 
|---|
| 72 |  | 
|---|
| 73 | // Sort the contents; affects the ordering of getByIndex(). | 
|---|
| 74 | void sort(UErrorCode &status); | 
|---|
| 75 |  | 
|---|
| 76 | int32_t size(); | 
|---|
| 77 |  | 
|---|
| 78 | private: | 
|---|
| 79 | UVector     *fVec;    // Elements are SPUString * | 
|---|
| 80 | UHashtable  *fHash;   // Key: UnicodeString  Value: SPUString | 
|---|
| 81 | }; | 
|---|
| 82 |  | 
|---|
| 83 |  | 
|---|
| 84 | // class ConfusabledataBuilder | 
|---|
| 85 | //     An instance of this class exists while the confusable data is being built from source. | 
|---|
| 86 | //     It encapsulates the intermediate data structures that are used for building. | 
|---|
| 87 | //     It exports one static function, to do a confusable data build. | 
|---|
| 88 |  | 
|---|
| 89 | class ConfusabledataBuilder : public UMemory { | 
|---|
| 90 | private: | 
|---|
| 91 | SpoofImpl  *fSpoofImpl; | 
|---|
| 92 | UChar      *fInput; | 
|---|
| 93 | UHashtable *fTable; | 
|---|
| 94 | UnicodeSet *fKeySet;     // A set of all keys (UChar32s) that go into the four mapping tables. | 
|---|
| 95 |  | 
|---|
| 96 | // The binary data is first assembled into the following four collections, then | 
|---|
| 97 | //   copied to its final raw-memory destination. | 
|---|
| 98 | UVector            *fKeyVec; | 
|---|
| 99 | UVector            *fValueVec; | 
|---|
| 100 | UnicodeString      *fStringTable; | 
|---|
| 101 |  | 
|---|
| 102 | SPUStringPool      *stringPool; | 
|---|
| 103 | URegularExpression *fParseLine; | 
|---|
| 104 | URegularExpression *fParseHexNum; | 
|---|
| 105 | int32_t             fLineNum; | 
|---|
| 106 |  | 
|---|
| 107 | ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status); | 
|---|
| 108 | ~ConfusabledataBuilder(); | 
|---|
| 109 | void build(const char * confusables, int32_t confusablesLen, UErrorCode &status); | 
|---|
| 110 |  | 
|---|
| 111 | // Add an entry to the key and value tables being built | 
|---|
| 112 | //   input:  data from SLTable, MATable, etc. | 
|---|
| 113 | //   outut:  entry added to fKeyVec and fValueVec | 
|---|
| 114 | void addKeyEntry(UChar32     keyChar,     // The key character | 
|---|
| 115 | UHashtable *table,       // The table, one of SATable, MATable, etc. | 
|---|
| 116 | int32_t     tableFlag,   // One of USPOOF_SA_TABLE_FLAG, etc. | 
|---|
| 117 | UErrorCode &status); | 
|---|
| 118 |  | 
|---|
| 119 | // From an index into fKeyVec & fValueVec | 
|---|
| 120 | //   get a UnicodeString with the corresponding mapping. | 
|---|
| 121 | UnicodeString getMapping(int32_t index); | 
|---|
| 122 |  | 
|---|
| 123 | // Populate the final binary output data array with the compiled data. | 
|---|
| 124 | void outputData(UErrorCode &status); | 
|---|
| 125 |  | 
|---|
| 126 | public: | 
|---|
| 127 | static void buildConfusableData(SpoofImpl *spImpl, const char * confusables, | 
|---|
| 128 | int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status); | 
|---|
| 129 | }; | 
|---|
| 130 | U_NAMESPACE_END | 
|---|
| 131 |  | 
|---|
| 132 | #endif | 
|---|
| 133 | #endif  // !UCONFIG_NO_REGULAR_EXPRESSIONS | 
|---|
| 134 | #endif  // __USPOOF_BUILDCONF_H__ | 
|---|
| 135 |  | 
|---|