1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html |
3 | /* |
4 | ******************************************************************************** |
5 | * Copyright (C) 1997-2015, International Business Machines |
6 | * Corporation and others. All Rights Reserved. |
7 | ******************************************************************************** |
8 | */ |
9 | |
10 | #ifndef FILTEREDBRK_H |
11 | #define FILTEREDBRK_H |
12 | |
13 | #include "unicode/utypes.h" |
14 | |
15 | #if U_SHOW_CPLUSPLUS_API |
16 | |
17 | #include "unicode/brkiter.h" |
18 | |
19 | #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
20 | |
21 | U_NAMESPACE_BEGIN |
22 | |
23 | /** |
24 | * \file |
25 | * \brief C++ API: FilteredBreakIteratorBuilder |
26 | */ |
27 | |
28 | /** |
29 | * The BreakIteratorFilter is used to modify the behavior of a BreakIterator |
30 | * by constructing a new BreakIterator which suppresses certain segment boundaries. |
31 | * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions . |
32 | * For example, a typical English Sentence Break Iterator would break on the space |
33 | * in the string "Mr. Smith" (resulting in two segments), |
34 | * but with "Mr." as an exception, a filtered break iterator |
35 | * would consider the string "Mr. Smith" to be a single segment. |
36 | * |
37 | * @stable ICU 56 |
38 | */ |
39 | class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { |
40 | public: |
41 | /** |
42 | * destructor. |
43 | * @stable ICU 56 |
44 | */ |
45 | virtual ~FilteredBreakIteratorBuilder(); |
46 | |
47 | /** |
48 | * Construct a FilteredBreakIteratorBuilder based on rules in a locale. |
49 | * The rules are taken from CLDR exception data for the locale, |
50 | * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions |
51 | * This is the equivalent of calling createInstance(UErrorCode&) |
52 | * and then repeatedly calling addNoBreakAfter(...) with the contents |
53 | * of the CLDR exception data. |
54 | * @param where the locale. |
55 | * @param status The error code. |
56 | * @return the new builder |
57 | * @stable ICU 56 |
58 | */ |
59 | static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); |
60 | |
61 | #ifndef U_HIDE_DEPRECATED_API |
62 | /** |
63 | * This function has been deprecated in favor of createEmptyInstance, which has |
64 | * identical behavior. |
65 | * @param status The error code. |
66 | * @return the new builder |
67 | * @deprecated ICU 60 use createEmptyInstance instead |
68 | * @see createEmptyInstance() |
69 | */ |
70 | static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); |
71 | #endif /* U_HIDE_DEPRECATED_API */ |
72 | |
73 | /** |
74 | * Construct an empty FilteredBreakIteratorBuilder. |
75 | * In this state, it will not suppress any segment boundaries. |
76 | * @param status The error code. |
77 | * @return the new builder |
78 | * @stable ICU 60 |
79 | */ |
80 | static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status); |
81 | |
82 | /** |
83 | * Suppress a certain string from being the end of a segment. |
84 | * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned |
85 | * by the iterator. |
86 | * @param string the string to suppress, such as "Mr." |
87 | * @param status error code |
88 | * @return returns true if the string was not present and now added, |
89 | * false if the call was a no-op because the string was already being suppressed. |
90 | * @stable ICU 56 |
91 | */ |
92 | virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; |
93 | |
94 | /** |
95 | * Stop suppressing a certain string from being the end of the segment. |
96 | * This function does not create any new segment boundaries, but only serves to un-do |
97 | * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of |
98 | * locale data which may be suppressing certain strings. |
99 | * @param string the exception to remove |
100 | * @param status error code |
101 | * @return returns true if the string was present and now removed, |
102 | * false if the call was a no-op because the string was not being suppressed. |
103 | * @stable ICU 56 |
104 | */ |
105 | virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; |
106 | |
107 | #ifndef U_FORCE_HIDE_DEPRECATED_API |
108 | /** |
109 | * This function has been deprecated in favor of wrapIteratorWithFilter() |
110 | * The behavior is identical. |
111 | * @param adoptBreakIterator the break iterator to adopt |
112 | * @param status error code |
113 | * @return the new BreakIterator, owned by the caller. |
114 | * @deprecated ICU 60 use wrapIteratorWithFilter() instead |
115 | * @see wrapBreakIteratorWithFilter() |
116 | */ |
117 | virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; |
118 | #endif // U_FORCE_HIDE_DEPRECATED_API |
119 | |
120 | /** |
121 | * Wrap (adopt) an existing break iterator in a new filtered instance. |
122 | * The resulting BreakIterator is owned by the caller. |
123 | * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed. |
124 | * Note that the adoptBreakIterator is adopted by the new BreakIterator |
125 | * and should no longer be used by the caller. |
126 | * The FilteredBreakIteratorBuilder may be reused. |
127 | * This function is an alias for build() |
128 | * @param adoptBreakIterator the break iterator to adopt |
129 | * @param status error code |
130 | * @return the new BreakIterator, owned by the caller. |
131 | * @stable ICU 60 |
132 | */ |
133 | inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) { |
134 | return build(adoptBreakIterator, status); |
135 | } |
136 | |
137 | protected: |
138 | /** |
139 | * For subclass use |
140 | * @stable ICU 56 |
141 | */ |
142 | FilteredBreakIteratorBuilder(); |
143 | }; |
144 | |
145 | |
146 | U_NAMESPACE_END |
147 | |
148 | #endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
149 | |
150 | #endif /* U_SHOW_CPLUSPLUS_API */ |
151 | |
152 | #endif // #ifndef FILTEREDBRK_H |
153 | |