| 1 | // © 2016 and later: Unicode, Inc. and others. |
| 2 | // License & terms of use: http://www.unicode.org/copyright.html |
| 3 | /* |
| 4 | ******************************************************************************** |
| 5 | * Copyright (C) 1997-2015, International Business Machines |
| 6 | * Corporation and others. All Rights Reserved. |
| 7 | ******************************************************************************** |
| 8 | */ |
| 9 | |
| 10 | #ifndef FILTEREDBRK_H |
| 11 | #define FILTEREDBRK_H |
| 12 | |
| 13 | #include "unicode/utypes.h" |
| 14 | |
| 15 | #if U_SHOW_CPLUSPLUS_API |
| 16 | |
| 17 | #include "unicode/brkiter.h" |
| 18 | |
| 19 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
| 20 | |
| 21 | U_NAMESPACE_BEGIN |
| 22 | |
| 23 | /** |
| 24 | * \file |
| 25 | * \brief C++ API: FilteredBreakIteratorBuilder |
| 26 | */ |
| 27 | |
| 28 | /** |
| 29 | * The BreakIteratorFilter is used to modify the behavior of a BreakIterator |
| 30 | * by constructing a new BreakIterator which suppresses certain segment boundaries. |
| 31 | * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions . |
| 32 | * For example, a typical English Sentence Break Iterator would break on the space |
| 33 | * in the string "Mr. Smith" (resulting in two segments), |
| 34 | * but with "Mr." as an exception, a filtered break iterator |
| 35 | * would consider the string "Mr. Smith" to be a single segment. |
| 36 | * |
| 37 | * @stable ICU 56 |
| 38 | */ |
| 39 | class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { |
| 40 | public: |
| 41 | /** |
| 42 | * destructor. |
| 43 | * @stable ICU 56 |
| 44 | */ |
| 45 | virtual ~FilteredBreakIteratorBuilder(); |
| 46 | |
| 47 | /** |
| 48 | * Construct a FilteredBreakIteratorBuilder based on rules in a locale. |
| 49 | * The rules are taken from CLDR exception data for the locale, |
| 50 | * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions |
| 51 | * This is the equivalent of calling createInstance(UErrorCode&) |
| 52 | * and then repeatedly calling addNoBreakAfter(...) with the contents |
| 53 | * of the CLDR exception data. |
| 54 | * @param where the locale. |
| 55 | * @param status The error code. |
| 56 | * @return the new builder |
| 57 | * @stable ICU 56 |
| 58 | */ |
| 59 | static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); |
| 60 | |
| 61 | #ifndef U_HIDE_DEPRECATED_API |
| 62 | /** |
| 63 | * This function has been deprecated in favor of createEmptyInstance, which has |
| 64 | * identical behavior. |
| 65 | * @param status The error code. |
| 66 | * @return the new builder |
| 67 | * @deprecated ICU 60 use createEmptyInstance instead |
| 68 | * @see createEmptyInstance() |
| 69 | */ |
| 70 | static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); |
| 71 | #endif /* U_HIDE_DEPRECATED_API */ |
| 72 | |
| 73 | /** |
| 74 | * Construct an empty FilteredBreakIteratorBuilder. |
| 75 | * In this state, it will not suppress any segment boundaries. |
| 76 | * @param status The error code. |
| 77 | * @return the new builder |
| 78 | * @stable ICU 60 |
| 79 | */ |
| 80 | static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status); |
| 81 | |
| 82 | /** |
| 83 | * Suppress a certain string from being the end of a segment. |
| 84 | * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned |
| 85 | * by the iterator. |
| 86 | * @param string the string to suppress, such as "Mr." |
| 87 | * @param status error code |
| 88 | * @return returns true if the string was not present and now added, |
| 89 | * false if the call was a no-op because the string was already being suppressed. |
| 90 | * @stable ICU 56 |
| 91 | */ |
| 92 | virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; |
| 93 | |
| 94 | /** |
| 95 | * Stop suppressing a certain string from being the end of the segment. |
| 96 | * This function does not create any new segment boundaries, but only serves to un-do |
| 97 | * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of |
| 98 | * locale data which may be suppressing certain strings. |
| 99 | * @param string the exception to remove |
| 100 | * @param status error code |
| 101 | * @return returns true if the string was present and now removed, |
| 102 | * false if the call was a no-op because the string was not being suppressed. |
| 103 | * @stable ICU 56 |
| 104 | */ |
| 105 | virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; |
| 106 | |
| 107 | #ifndef U_FORCE_HIDE_DEPRECATED_API |
| 108 | /** |
| 109 | * This function has been deprecated in favor of wrapIteratorWithFilter() |
| 110 | * The behavior is identical. |
| 111 | * @param adoptBreakIterator the break iterator to adopt |
| 112 | * @param status error code |
| 113 | * @return the new BreakIterator, owned by the caller. |
| 114 | * @deprecated ICU 60 use wrapIteratorWithFilter() instead |
| 115 | * @see wrapBreakIteratorWithFilter() |
| 116 | */ |
| 117 | virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; |
| 118 | #endif // U_FORCE_HIDE_DEPRECATED_API |
| 119 | |
| 120 | /** |
| 121 | * Wrap (adopt) an existing break iterator in a new filtered instance. |
| 122 | * The resulting BreakIterator is owned by the caller. |
| 123 | * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed. |
| 124 | * Note that the adoptBreakIterator is adopted by the new BreakIterator |
| 125 | * and should no longer be used by the caller. |
| 126 | * The FilteredBreakIteratorBuilder may be reused. |
| 127 | * This function is an alias for build() |
| 128 | * @param adoptBreakIterator the break iterator to adopt |
| 129 | * @param status error code |
| 130 | * @return the new BreakIterator, owned by the caller. |
| 131 | * @stable ICU 60 |
| 132 | */ |
| 133 | inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) { |
| 134 | return build(adoptBreakIterator, status); |
| 135 | } |
| 136 | |
| 137 | protected: |
| 138 | /** |
| 139 | * For subclass use |
| 140 | * @stable ICU 56 |
| 141 | */ |
| 142 | FilteredBreakIteratorBuilder(); |
| 143 | }; |
| 144 | |
| 145 | |
| 146 | U_NAMESPACE_END |
| 147 | |
| 148 | #endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
| 149 | |
| 150 | #endif /* U_SHOW_CPLUSPLUS_API */ |
| 151 | |
| 152 | #endif // #ifndef FILTEREDBRK_H |
| 153 | |