| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
|---|
| 3 | /* | 
|---|
| 4 | ********************************************************************** | 
|---|
| 5 | * Copyright (C) 1999-2010, International Business Machines Corporation and others. | 
|---|
| 6 | * All Rights Reserved. | 
|---|
| 7 | ********************************************************************** | 
|---|
| 8 | *   Date        Name        Description | 
|---|
| 9 | *   11/17/99    aliu        Creation. | 
|---|
| 10 | ********************************************************************** | 
|---|
| 11 | */ | 
|---|
| 12 | #ifndef UNIFILT_H | 
|---|
| 13 | #define UNIFILT_H | 
|---|
| 14 |  | 
|---|
| 15 | #include "unicode/utypes.h" | 
|---|
| 16 |  | 
|---|
| 17 | #if U_SHOW_CPLUSPLUS_API | 
|---|
| 18 |  | 
|---|
| 19 | #include "unicode/unifunct.h" | 
|---|
| 20 | #include "unicode/unimatch.h" | 
|---|
| 21 |  | 
|---|
| 22 | /** | 
|---|
| 23 | * \file | 
|---|
| 24 | * \brief C++ API: Unicode Filter | 
|---|
| 25 | */ | 
|---|
| 26 |  | 
|---|
| 27 | U_NAMESPACE_BEGIN | 
|---|
| 28 |  | 
|---|
| 29 | /** | 
|---|
| 30 | * U_ETHER is used to represent character values for positions outside | 
|---|
| 31 | * a range.  For example, transliterator uses this to represent | 
|---|
| 32 | * characters outside the range contextStart..contextLimit-1.  This | 
|---|
| 33 | * allows explicit matching by rules and UnicodeSets of text outside a | 
|---|
| 34 | * defined range. | 
|---|
| 35 | * @stable ICU 3.0 | 
|---|
| 36 | */ | 
|---|
| 37 | #define U_ETHER ((char16_t)0xFFFF) | 
|---|
| 38 |  | 
|---|
| 39 | /** | 
|---|
| 40 | * | 
|---|
| 41 | * <code>UnicodeFilter</code> defines a protocol for selecting a | 
|---|
| 42 | * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. | 
|---|
| 43 | * Currently, filters are used in conjunction with classes like {@link | 
|---|
| 44 | * Transliterator} to only process selected characters through a | 
|---|
| 45 | * transformation. | 
|---|
| 46 | * | 
|---|
| 47 | * <p>Note: UnicodeFilter currently stubs out two pure virtual methods | 
|---|
| 48 | * of its base class, UnicodeMatcher.  These methods are toPattern() | 
|---|
| 49 | * and matchesIndexValue().  This is done so that filter classes that | 
|---|
| 50 | * are not actually used as matchers -- specifically, those in the | 
|---|
| 51 | * UnicodeFilterLogic component, and those in tests -- can continue to | 
|---|
| 52 | * work without defining these methods.  As long as a filter is not | 
|---|
| 53 | * used in an RBT during real transliteration, these methods will not | 
|---|
| 54 | * be called.  However, this breaks the UnicodeMatcher base class | 
|---|
| 55 | * protocol, and it is not a correct solution. | 
|---|
| 56 | * | 
|---|
| 57 | * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter | 
|---|
| 58 | * hierarchy and either redesign it, or simply remove the stubs in | 
|---|
| 59 | * UnicodeFilter and force subclasses to implement the full | 
|---|
| 60 | * UnicodeMatcher protocol. | 
|---|
| 61 | * | 
|---|
| 62 | * @see UnicodeFilterLogic | 
|---|
| 63 | * @stable ICU 2.0 | 
|---|
| 64 | */ | 
|---|
| 65 | class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { | 
|---|
| 66 |  | 
|---|
| 67 | public: | 
|---|
| 68 | /** | 
|---|
| 69 | * Destructor | 
|---|
| 70 | * @stable ICU 2.0 | 
|---|
| 71 | */ | 
|---|
| 72 | virtual ~UnicodeFilter(); | 
|---|
| 73 |  | 
|---|
| 74 | /** | 
|---|
| 75 | * Clones this object polymorphically. | 
|---|
| 76 | * The caller owns the result and should delete it when done. | 
|---|
| 77 | * @return clone, or nullptr if an error occurred | 
|---|
| 78 | * @stable ICU 2.4 | 
|---|
| 79 | */ | 
|---|
| 80 | virtual UnicodeFilter* clone() const = 0; | 
|---|
| 81 |  | 
|---|
| 82 | /** | 
|---|
| 83 | * Returns <tt>true</tt> for characters that are in the selected | 
|---|
| 84 | * subset.  In other words, if a character is <b>to be | 
|---|
| 85 | * filtered</b>, then <tt>contains()</tt> returns | 
|---|
| 86 | * <b><tt>false</tt></b>. | 
|---|
| 87 | * @stable ICU 2.0 | 
|---|
| 88 | */ | 
|---|
| 89 | virtual UBool contains(UChar32 c) const = 0; | 
|---|
| 90 |  | 
|---|
| 91 | /** | 
|---|
| 92 | * UnicodeFunctor API.  Cast 'this' to a UnicodeMatcher* pointer | 
|---|
| 93 | * and return the pointer. | 
|---|
| 94 | * @stable ICU 2.4 | 
|---|
| 95 | */ | 
|---|
| 96 | virtual UnicodeMatcher* toMatcher() const; | 
|---|
| 97 |  | 
|---|
| 98 | /** | 
|---|
| 99 | * Implement UnicodeMatcher API. | 
|---|
| 100 | * @stable ICU 2.4 | 
|---|
| 101 | */ | 
|---|
| 102 | virtual UMatchDegree matches(const Replaceable& text, | 
|---|
| 103 | int32_t& offset, | 
|---|
| 104 | int32_t limit, | 
|---|
| 105 | UBool incremental); | 
|---|
| 106 |  | 
|---|
| 107 | /** | 
|---|
| 108 | * UnicodeFunctor API.  Nothing to do. | 
|---|
| 109 | * @stable ICU 2.4 | 
|---|
| 110 | */ | 
|---|
| 111 | virtual void setData(const TransliterationRuleData*); | 
|---|
| 112 |  | 
|---|
| 113 | /** | 
|---|
| 114 | * ICU "poor man's RTTI", returns a UClassID for this class. | 
|---|
| 115 | * | 
|---|
| 116 | * @stable ICU 2.2 | 
|---|
| 117 | */ | 
|---|
| 118 | static UClassID U_EXPORT2 getStaticClassID(); | 
|---|
| 119 |  | 
|---|
| 120 | protected: | 
|---|
| 121 |  | 
|---|
| 122 | /* | 
|---|
| 123 | * Since this class has pure virtual functions, | 
|---|
| 124 | * a constructor can't be used. | 
|---|
| 125 | * @stable ICU 2.0 | 
|---|
| 126 | */ | 
|---|
| 127 | /*    UnicodeFilter();*/ | 
|---|
| 128 | }; | 
|---|
| 129 |  | 
|---|
| 130 | /*inline UnicodeFilter::UnicodeFilter() {}*/ | 
|---|
| 131 |  | 
|---|
| 132 | U_NAMESPACE_END | 
|---|
| 133 |  | 
|---|
| 134 | #endif /* U_SHOW_CPLUSPLUS_API */ | 
|---|
| 135 |  | 
|---|
| 136 | #endif | 
|---|
| 137 |  | 
|---|