1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 ***************************************************************************
5 * Copyright (C) 2008-2015, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 ***************************************************************************
8 * file name: uspoof_build.cpp
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2008 Dec 8
14 * created by: Andy Heninger
15 *
16 * Unicode Spoof Detection Data Builder
17 * Builder-related functions are kept in separate files so that applications not needing
18 * the builder can more easily exclude them, typically by means of static linking.
19 *
20 * There are three relatively independent sets of Spoof data,
21 * Confusables,
22 * Whole Script Confusables
23 * ID character extensions.
24 *
25 * The data tables for each are built separately, each from its own definitions
26 */
27
28#include "unicode/utypes.h"
29#include "unicode/uspoof.h"
30#include "unicode/unorm.h"
31#include "unicode/uregex.h"
32#include "unicode/ustring.h"
33#include "cmemory.h"
34#include "uspoof_impl.h"
35#include "uhash.h"
36#include "uvector.h"
37#include "uassert.h"
38#include "uarrsort.h"
39#include "uspoof_conf.h"
40
41#if !UCONFIG_NO_NORMALIZATION
42
43U_NAMESPACE_USE
44
45// Defined in uspoof.cpp, initializes file-static variables.
46U_CFUNC void uspoof_internalInitStatics(UErrorCode *status);
47
48// The main data building function
49
50U_CAPI USpoofChecker * U_EXPORT2
51uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
52 const char* /*confusablesWholeScript*/, int32_t /*confusablesWholeScriptLen*/,
53 int32_t *errorType, UParseError *pe, UErrorCode *status) {
54 uspoof_internalInitStatics(status);
55 if (U_FAILURE(*status)) {
56 return NULL;
57 }
58#if UCONFIG_NO_REGULAR_EXPRESSIONS
59 *status = U_UNSUPPORTED_ERROR;
60 return NULL;
61#else
62 if (errorType!=NULL) {
63 *errorType = 0;
64 }
65 if (pe != NULL) {
66 pe->line = 0;
67 pe->offset = 0;
68 pe->preContext[0] = 0;
69 pe->postContext[0] = 0;
70 }
71
72 // Set up a shell of a spoof detector, with empty data.
73 SpoofData *newSpoofData = new SpoofData(*status);
74
75 if (newSpoofData == NULL) {
76 *status = U_MEMORY_ALLOCATION_ERROR;
77 return NULL;
78 }
79
80 if (U_FAILURE(*status)) {
81 delete newSpoofData;
82 return NULL;
83 }
84 SpoofImpl *This = new SpoofImpl(newSpoofData, *status);
85
86 if (This == NULL) {
87 *status = U_MEMORY_ALLOCATION_ERROR;
88 delete newSpoofData; // explicit delete as the destructor for SpoofImpl won't be called.
89 return NULL;
90 }
91
92 if (U_FAILURE(*status)) {
93 delete This; // no delete for newSpoofData, as the SpoofImpl destructor will delete it.
94 return NULL;
95 }
96
97 // Compile the binary data from the source (text) format.
98 ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status);
99
100 if (U_FAILURE(*status)) {
101 delete This;
102 This = NULL;
103 }
104 return (USpoofChecker *)This;
105#endif // UCONFIG_NO_REGULAR_EXPRESSIONS
106}
107
108#endif
109