| 1 | // © 2016 and later: Unicode, Inc. and others. | 
|---|---|
| 2 | // License & terms of use: http://www.unicode.org/copyright.html | 
| 3 | /* | 
| 4 | ********************************************************************** | 
| 5 | * Copyright (c) 2001-2012, International Business Machines | 
| 6 | * Corporation and others. All Rights Reserved. | 
| 7 | ********************************************************************** | 
| 8 | * Date Name Description | 
| 9 | * 07/18/01 aliu Creation. | 
| 10 | ********************************************************************** | 
| 11 | */ | 
| 12 | |
| 13 | #include "unicode/unifilt.h" | 
| 14 | #include "unicode/rep.h" | 
| 15 | #include "unicode/utf16.h" | 
| 16 | |
| 17 | U_NAMESPACE_BEGIN | 
| 18 | UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(UnicodeFilter) | 
| 19 | |
| 20 | |
| 21 | /* Define this here due to the lack of another file. | 
| 22 | It can't be defined in the header */ | 
| 23 | UnicodeMatcher::~UnicodeMatcher() {} | 
| 24 | |
| 25 | UnicodeFilter::~UnicodeFilter() {} | 
| 26 | |
| 27 | /** | 
| 28 | * UnicodeFunctor API. | 
| 29 | * Note that UnicodeMatcher is a base class of UnicodeFilter. | 
| 30 | */ | 
| 31 | UnicodeMatcher* UnicodeFilter::toMatcher() const { | 
| 32 | return const_cast<UnicodeFilter *>(this); | 
| 33 | } | 
| 34 | |
| 35 | void UnicodeFilter::setData(const TransliterationRuleData*) {} | 
| 36 | |
| 37 | /** | 
| 38 | * Default implementation of UnicodeMatcher::matches() for Unicode | 
| 39 | * filters. Matches a single code point at offset (either one or | 
| 40 | * two 16-bit code units). | 
| 41 | */ | 
| 42 | UMatchDegree UnicodeFilter::matches(const Replaceable& text, | 
| 43 | int32_t& offset, | 
| 44 | int32_t limit, | 
| 45 | UBool incremental) { | 
| 46 | UChar32 c; | 
| 47 | if (offset < limit && | 
| 48 | contains(c = text.char32At(offset))) { | 
| 49 | offset += U16_LENGTH(c); | 
| 50 | return U_MATCH; | 
| 51 | } | 
| 52 | if (offset > limit && | 
| 53 | contains(c = text.char32At(offset))) { | 
| 54 | // Backup offset by 1, unless the preceding character is a | 
| 55 | // surrogate pair -- then backup by 2 (keep offset pointing at | 
| 56 | // the lead surrogate). | 
| 57 | --offset; | 
| 58 | if (offset >= 0) { | 
| 59 | offset -= U16_LENGTH(text.char32At(offset)) - 1; | 
| 60 | } | 
| 61 | return U_MATCH; | 
| 62 | } | 
| 63 | if (incremental && offset == limit) { | 
| 64 | return U_PARTIAL_MATCH; | 
| 65 | } | 
| 66 | return U_MISMATCH; | 
| 67 | } | 
| 68 | |
| 69 | U_NAMESPACE_END | 
| 70 | |
| 71 | //eof | 
| 72 | 
