1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 1999-2010, International Business Machines Corporation and others.
6* All Rights Reserved.
7**********************************************************************
8* Date Name Description
9* 11/17/99 aliu Creation.
10**********************************************************************
11*/
12#ifndef UNIFILT_H
13#define UNIFILT_H
14
15#include "unicode/utypes.h"
16
17#if U_SHOW_CPLUSPLUS_API
18
19#include "unicode/unifunct.h"
20#include "unicode/unimatch.h"
21
22/**
23 * \file
24 * \brief C++ API: Unicode Filter
25 */
26
27U_NAMESPACE_BEGIN
28
29/**
30 * U_ETHER is used to represent character values for positions outside
31 * a range. For example, transliterator uses this to represent
32 * characters outside the range contextStart..contextLimit-1. This
33 * allows explicit matching by rules and UnicodeSets of text outside a
34 * defined range.
35 * @stable ICU 3.0
36 */
37#define U_ETHER ((char16_t)0xFFFF)
38
39/**
40 *
41 * <code>UnicodeFilter</code> defines a protocol for selecting a
42 * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
43 * Currently, filters are used in conjunction with classes like {@link
44 * Transliterator} to only process selected characters through a
45 * transformation.
46 *
47 * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
48 * of its base class, UnicodeMatcher. These methods are toPattern()
49 * and matchesIndexValue(). This is done so that filter classes that
50 * are not actually used as matchers -- specifically, those in the
51 * UnicodeFilterLogic component, and those in tests -- can continue to
52 * work without defining these methods. As long as a filter is not
53 * used in an RBT during real transliteration, these methods will not
54 * be called. However, this breaks the UnicodeMatcher base class
55 * protocol, and it is not a correct solution.
56 *
57 * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
58 * hierarchy and either redesign it, or simply remove the stubs in
59 * UnicodeFilter and force subclasses to implement the full
60 * UnicodeMatcher protocol.
61 *
62 * @see UnicodeFilterLogic
63 * @stable ICU 2.0
64 */
65class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
66
67public:
68 /**
69 * Destructor
70 * @stable ICU 2.0
71 */
72 virtual ~UnicodeFilter();
73
74 /**
75 * Clones this object polymorphically.
76 * The caller owns the result and should delete it when done.
77 * @return clone, or nullptr if an error occurred
78 * @stable ICU 2.4
79 */
80 virtual UnicodeFilter* clone() const = 0;
81
82 /**
83 * Returns <tt>true</tt> for characters that are in the selected
84 * subset. In other words, if a character is <b>to be
85 * filtered</b>, then <tt>contains()</tt> returns
86 * <b><tt>false</tt></b>.
87 * @stable ICU 2.0
88 */
89 virtual UBool contains(UChar32 c) const = 0;
90
91 /**
92 * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
93 * and return the pointer.
94 * @stable ICU 2.4
95 */
96 virtual UnicodeMatcher* toMatcher() const;
97
98 /**
99 * Implement UnicodeMatcher API.
100 * @stable ICU 2.4
101 */
102 virtual UMatchDegree matches(const Replaceable& text,
103 int32_t& offset,
104 int32_t limit,
105 UBool incremental);
106
107 /**
108 * UnicodeFunctor API. Nothing to do.
109 * @stable ICU 2.4
110 */
111 virtual void setData(const TransliterationRuleData*);
112
113 /**
114 * ICU "poor man's RTTI", returns a UClassID for this class.
115 *
116 * @stable ICU 2.2
117 */
118 static UClassID U_EXPORT2 getStaticClassID();
119
120protected:
121
122 /*
123 * Since this class has pure virtual functions,
124 * a constructor can't be used.
125 * @stable ICU 2.0
126 */
127/* UnicodeFilter();*/
128};
129
130/*inline UnicodeFilter::UnicodeFilter() {}*/
131
132U_NAMESPACE_END
133
134#endif /* U_SHOW_CPLUSPLUS_API */
135
136#endif
137