1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5*
6* Copyright (C) 2008-2016, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9******************************************************************************
10* file name: uspoof_conf.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2009Jan05
16* created by: Andy Heninger
17*
18* Internal classes for compiling confusable data into its binary (runtime) form.
19*/
20
21#ifndef __USPOOF_BUILDCONF_H__
22#define __USPOOF_BUILDCONF_H__
23
24#include "unicode/utypes.h"
25
26#if !UCONFIG_NO_NORMALIZATION
27
28#if !UCONFIG_NO_REGULAR_EXPRESSIONS
29
30#include "unicode/uregex.h"
31#include "uhash.h"
32#include "uspoof_impl.h"
33
34U_NAMESPACE_BEGIN
35
36// SPUString
37// Holds a string that is the result of one of the mappings defined
38// by the confusable mapping data (confusables.txt from Unicode.org)
39// Instances of SPUString exist during the compilation process only.
40
41struct SPUString : public UMemory {
42 UnicodeString *fStr; // The actual string.
43 int32_t fCharOrStrTableIndex; // Index into the final runtime data for this
44 // string (or, for length 1, the single string char
45 // itself, there being no string table entry for it.)
46 SPUString(UnicodeString *s);
47 ~SPUString();
48};
49
50
51// String Pool A utility class for holding the strings that are the result of
52// the spoof mappings. These strings will utimately end up in the
53// run-time String Table.
54// This is sort of like a sorted set of strings, except that ICU's anemic
55// built-in collections don't support those, so it is implemented with a
56// combination of a uhash and a UVector.
57
58
59class SPUStringPool : public UMemory {
60 public:
61 SPUStringPool(UErrorCode &status);
62 ~SPUStringPool();
63
64 // Add a string. Return the string from the table.
65 // If the input parameter string is already in the table, delete the
66 // input parameter and return the existing string.
67 SPUString *addString(UnicodeString *src, UErrorCode &status);
68
69
70 // Get the n-th string in the collection.
71 SPUString *getByIndex(int32_t i);
72
73 // Sort the contents; affects the ordering of getByIndex().
74 void sort(UErrorCode &status);
75
76 int32_t size();
77
78 private:
79 UVector *fVec; // Elements are SPUString *
80 UHashtable *fHash; // Key: UnicodeString Value: SPUString
81};
82
83
84// class ConfusabledataBuilder
85// An instance of this class exists while the confusable data is being built from source.
86// It encapsulates the intermediate data structures that are used for building.
87// It exports one static function, to do a confusable data build.
88
89class ConfusabledataBuilder : public UMemory {
90 private:
91 SpoofImpl *fSpoofImpl;
92 UChar *fInput;
93 UHashtable *fTable;
94 UnicodeSet *fKeySet; // A set of all keys (UChar32s) that go into the four mapping tables.
95
96 // The binary data is first assembled into the following four collections, then
97 // copied to its final raw-memory destination.
98 UVector *fKeyVec;
99 UVector *fValueVec;
100 UnicodeString *fStringTable;
101
102 SPUStringPool *stringPool;
103 URegularExpression *fParseLine;
104 URegularExpression *fParseHexNum;
105 int32_t fLineNum;
106
107 ConfusabledataBuilder(SpoofImpl *spImpl, UErrorCode &status);
108 ~ConfusabledataBuilder();
109 void build(const char * confusables, int32_t confusablesLen, UErrorCode &status);
110
111 // Add an entry to the key and value tables being built
112 // input: data from SLTable, MATable, etc.
113 // outut: entry added to fKeyVec and fValueVec
114 void addKeyEntry(UChar32 keyChar, // The key character
115 UHashtable *table, // The table, one of SATable, MATable, etc.
116 int32_t tableFlag, // One of USPOOF_SA_TABLE_FLAG, etc.
117 UErrorCode &status);
118
119 // From an index into fKeyVec & fValueVec
120 // get a UnicodeString with the corresponding mapping.
121 UnicodeString getMapping(int32_t index);
122
123 // Populate the final binary output data array with the compiled data.
124 void outputData(UErrorCode &status);
125
126 public:
127 static void buildConfusableData(SpoofImpl *spImpl, const char * confusables,
128 int32_t confusablesLen, int32_t *errorType, UParseError *pe, UErrorCode &status);
129};
130U_NAMESPACE_END
131
132#endif
133#endif // !UCONFIG_NO_REGULAR_EXPRESSIONS
134#endif // __USPOOF_BUILDCONF_H__
135