1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4***************************************************************************
5* Copyright (C) 1999-2016 International Business Machines Corporation *
6* and others. All rights reserved. *
7***************************************************************************
8
9**********************************************************************
10* Date Name Description
11* 10/22/99 alan Creation.
12* 11/11/99 rgillam Complete port from Java.
13**********************************************************************
14*/
15
16#ifndef RBBI_H
17#define RBBI_H
18
19#include "unicode/utypes.h"
20
21#if U_SHOW_CPLUSPLUS_API
22
23/**
24 * \file
25 * \brief C++ API: Rule Based Break Iterator
26 */
27
28#if !UCONFIG_NO_BREAK_ITERATION
29
30#include "unicode/brkiter.h"
31#include "unicode/udata.h"
32#include "unicode/parseerr.h"
33#include "unicode/schriter.h"
34
35struct UCPTrie;
36
37U_NAMESPACE_BEGIN
38
39/** @internal */
40class LanguageBreakEngine;
41struct RBBIDataHeader;
42class RBBIDataWrapper;
43class UnhandledEngine;
44class UStack;
45
46/**
47 *
48 * A subclass of BreakIterator whose behavior is specified using a list of rules.
49 * <p>Instances of this class are most commonly created by the factory methods of
50 * BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
51 * and then used via the abstract API in class BreakIterator</p>
52 *
53 * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
54 *
55 * <p>This class is not intended to be subclassed.</p>
56 */
57class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator {
58
59private:
60 /**
61 * The UText through which this BreakIterator accesses the text
62 * @internal (private)
63 */
64 UText fText = UTEXT_INITIALIZER;
65
66#ifndef U_HIDE_INTERNAL_API
67public:
68#endif /* U_HIDE_INTERNAL_API */
69 /**
70 * The rule data for this BreakIterator instance.
71 * Not for general use; Public only for testing purposes.
72 * @internal
73 */
74 RBBIDataWrapper *fData = nullptr;
75
76private:
77 /**
78 * The saved error code associated with this break iterator.
79 * This is the value to be returned by copyErrorTo().
80 */
81 UErrorCode fErrorCode = U_ZERO_ERROR;
82
83 /**
84 * The current position of the iterator. Pinned, 0 < fPosition <= text.length.
85 * Never has the value UBRK_DONE (-1).
86 */
87 int32_t fPosition = 0;
88
89 /**
90 * TODO:
91 */
92 int32_t fRuleStatusIndex = 0;
93
94 /**
95 * Cache of previously determined boundary positions.
96 */
97 class BreakCache;
98 BreakCache *fBreakCache = nullptr;
99
100 /**
101 * Cache of boundary positions within a region of text that has been
102 * sub-divided by dictionary based breaking.
103 */
104 class DictionaryCache;
105 DictionaryCache *fDictionaryCache = nullptr;
106
107 /**
108 *
109 * If present, UStack of LanguageBreakEngine objects that might handle
110 * dictionary characters. Searched from top to bottom to find an object to
111 * handle a given character.
112 * @internal (private)
113 */
114 UStack *fLanguageBreakEngines = nullptr;
115
116 /**
117 *
118 * If present, the special LanguageBreakEngine used for handling
119 * characters that are in the dictionary set, but not handled by any
120 * LanguageBreakEngine.
121 * @internal (private)
122 */
123 UnhandledEngine *fUnhandledBreakEngine = nullptr;
124
125 /**
126 * Counter for the number of characters encountered with the "dictionary"
127 * flag set.
128 * @internal (private)
129 */
130 uint32_t fDictionaryCharCount = 0;
131
132 /**
133 * A character iterator that refers to the same text as the UText, above.
134 * Only included for compatibility with old API, which was based on CharacterIterators.
135 * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
136 */
137 CharacterIterator *fCharIter = &fSCharIter;
138
139 /**
140 * When the input text is provided by a UnicodeString, this will point to
141 * a characterIterator that wraps that data. Needed only for the
142 * implementation of getText(), a backwards compatibility issue.
143 */
144 UCharCharacterIterator fSCharIter {u"", 0};
145
146 /**
147 * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
148 */
149 bool fDone = false;
150
151 /**
152 * Array of look-ahead tentative results.
153 */
154 int32_t *fLookAheadMatches = nullptr;
155
156 /**
157 * A flag to indicate if phrase based breaking is enabled.
158 */
159 UBool fIsPhraseBreaking = false;
160
161 //=======================================================================
162 // constructors
163 //=======================================================================
164
165 /**
166 * Constructor from a flattened set of RBBI data in malloced memory.
167 * RulesBasedBreakIterators built from a custom set of rules
168 * are created via this constructor; the rules are compiled
169 * into memory, then the break iterator is constructed here.
170 *
171 * The break iterator adopts the memory, and will
172 * free it when done.
173 * @internal (private)
174 */
175 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
176
177 /**
178 * This constructor uses the udata interface to create a BreakIterator
179 * whose internal tables live in a memory-mapped file. "image" is an
180 * ICU UDataMemory handle for the pre-compiled break iterator tables.
181 * @param image handle to the memory image for the break iterator data.
182 * Ownership of the UDataMemory handle passes to the Break Iterator,
183 * which will be responsible for closing it when it is no longer needed.
184 * @param status Information on any errors encountered.
185 * @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
186 * @see udata_open
187 * @see #getBinaryRules
188 * @internal (private)
189 */
190 RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
191
192 /** @internal */
193 friend class RBBIRuleBuilder;
194 /** @internal */
195 friend class BreakIterator;
196
197 /**
198 * Default constructor with an error code parameter.
199 * Aside from error handling, otherwise identical to the default constructor.
200 * Internally, handles common initialization for other constructors.
201 * @internal (private)
202 */
203 RuleBasedBreakIterator(UErrorCode *status);
204
205public:
206
207 /** Default constructor. Creates an empty shell of an iterator, with no
208 * rules or text to iterate over. Object can subsequently be assigned to,
209 * but is otherwise unusable.
210 * @stable ICU 2.2
211 */
212 RuleBasedBreakIterator();
213
214 /**
215 * Copy constructor. Will produce a break iterator with the same behavior,
216 * and which iterates over the same text, as the one passed in.
217 * @param that The RuleBasedBreakIterator passed to be copied
218 * @stable ICU 2.0
219 */
220 RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
221
222 /**
223 * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
224 * @param rules The break rules to be used.
225 * @param parseError In the event of a syntax error in the rules, provides the location
226 * within the rules of the problem.
227 * @param status Information on any errors encountered.
228 * @stable ICU 2.2
229 */
230 RuleBasedBreakIterator( const UnicodeString &rules,
231 UParseError &parseError,
232 UErrorCode &status);
233
234 /**
235 * Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
236 * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
237 * Construction of a break iterator in this way is substantially faster than
238 * construction from source rules.
239 *
240 * Ownership of the storage containing the compiled rules remains with the
241 * caller of this function. The compiled rules must not be modified or
242 * deleted during the life of the break iterator.
243 *
244 * The compiled rules are not compatible across different major versions of ICU.
245 * The compiled rules are compatible only between machines with the same
246 * byte ordering (little or big endian) and the same base character set family
247 * (ASCII or EBCDIC).
248 *
249 * @see #getBinaryRules
250 * @param compiledRules A pointer to the compiled break rules to be used.
251 * @param ruleLength The length of the compiled break rules, in bytes. This
252 * corresponds to the length value produced by getBinaryRules().
253 * @param status Information on any errors encountered, including invalid
254 * binary rules.
255 * @stable ICU 4.8
256 */
257 RuleBasedBreakIterator(const uint8_t *compiledRules,
258 uint32_t ruleLength,
259 UErrorCode &status);
260
261 /**
262 * This constructor uses the udata interface to create a BreakIterator
263 * whose internal tables live in a memory-mapped file. "image" is an
264 * ICU UDataMemory handle for the pre-compiled break iterator tables.
265 * @param image handle to the memory image for the break iterator data.
266 * Ownership of the UDataMemory handle passes to the Break Iterator,
267 * which will be responsible for closing it when it is no longer needed.
268 * @param status Information on any errors encountered.
269 * @see udata_open
270 * @see #getBinaryRules
271 * @stable ICU 2.8
272 */
273 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
274
275 /**
276 * Destructor
277 * @stable ICU 2.0
278 */
279 virtual ~RuleBasedBreakIterator();
280
281 /**
282 * Assignment operator. Sets this iterator to have the same behavior,
283 * and iterate over the same text, as the one passed in.
284 * @param that The RuleBasedBreakItertor passed in
285 * @return the newly created RuleBasedBreakIterator
286 * @stable ICU 2.0
287 */
288 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
289
290 /**
291 * Equality operator. Returns true if both BreakIterators are of the
292 * same class, have the same behavior, and iterate over the same text.
293 * @param that The BreakIterator to be compared for equality
294 * @return true if both BreakIterators are of the
295 * same class, have the same behavior, and iterate over the same text.
296 * @stable ICU 2.0
297 */
298 virtual bool operator==(const BreakIterator& that) const override;
299
300 /**
301 * Not-equal operator. If operator== returns true, this returns false,
302 * and vice versa.
303 * @param that The BreakIterator to be compared for inequality
304 * @return true if both BreakIterators are not same.
305 * @stable ICU 2.0
306 */
307 inline bool operator!=(const BreakIterator& that) const {
308 return !operator==(that);
309 }
310
311 /**
312 * Returns a newly-constructed RuleBasedBreakIterator with the same
313 * behavior, and iterating over the same text, as this one.
314 * Differs from the copy constructor in that it is polymorphic, and
315 * will correctly clone (copy) a derived class.
316 * clone() is thread safe. Multiple threads may simultaneously
317 * clone the same source break iterator.
318 * @return a newly-constructed RuleBasedBreakIterator
319 * @stable ICU 2.0
320 */
321 virtual RuleBasedBreakIterator* clone() const override;
322
323 /**
324 * Compute a hash code for this BreakIterator
325 * @return A hash code
326 * @stable ICU 2.0
327 */
328 virtual int32_t hashCode(void) const;
329
330 /**
331 * Returns the description used to create this iterator
332 * @return the description used to create this iterator
333 * @stable ICU 2.0
334 */
335 virtual const UnicodeString& getRules(void) const;
336
337 //=======================================================================
338 // BreakIterator overrides
339 //=======================================================================
340
341 /**
342 * <p>
343 * Return a CharacterIterator over the text being analyzed.
344 * The returned character iterator is owned by the break iterator, and must
345 * not be deleted by the caller. Repeated calls to this function may
346 * return the same CharacterIterator.
347 * </p>
348 * <p>
349 * The returned character iterator must not be used concurrently with
350 * the break iterator. If concurrent operation is needed, clone the
351 * returned character iterator first and operate on the clone.
352 * </p>
353 * <p>
354 * When the break iterator is operating on text supplied via a UText,
355 * this function will fail, returning a CharacterIterator containing no text.
356 * The function getUText() provides similar functionality,
357 * is reliable, and is more efficient.
358 * </p>
359 *
360 * TODO: deprecate this function?
361 *
362 * @return An iterator over the text being analyzed.
363 * @stable ICU 2.0
364 */
365 virtual CharacterIterator& getText(void) const override;
366
367
368 /**
369 * Get a UText for the text being analyzed.
370 * The returned UText is a shallow clone of the UText used internally
371 * by the break iterator implementation. It can safely be used to
372 * access the text without impacting any break iterator operations,
373 * but the underlying text itself must not be altered.
374 *
375 * @param fillIn A UText to be filled in. If nullptr, a new UText will be
376 * allocated to hold the result.
377 * @param status receives any error codes.
378 * @return The current UText for this break iterator. If an input
379 * UText was provided, it will always be returned.
380 * @stable ICU 3.4
381 */
382 virtual UText *getUText(UText *fillIn, UErrorCode &status) const override;
383
384 /**
385 * Set the iterator to analyze a new piece of text. This function resets
386 * the current iteration position to the beginning of the text.
387 * @param newText An iterator over the text to analyze. The BreakIterator
388 * takes ownership of the character iterator. The caller MUST NOT delete it!
389 * @stable ICU 2.0
390 */
391 virtual void adoptText(CharacterIterator* newText) override;
392
393 /**
394 * Set the iterator to analyze a new piece of text. This function resets
395 * the current iteration position to the beginning of the text.
396 *
397 * The BreakIterator will retain a reference to the supplied string.
398 * The caller must not modify or delete the text while the BreakIterator
399 * retains the reference.
400 *
401 * @param newText The text to analyze.
402 * @stable ICU 2.0
403 */
404 virtual void setText(const UnicodeString& newText) override;
405
406 /**
407 * Reset the break iterator to operate over the text represented by
408 * the UText. The iterator position is reset to the start.
409 *
410 * This function makes a shallow clone of the supplied UText. This means
411 * that the caller is free to immediately close or otherwise reuse the
412 * Utext that was passed as a parameter, but that the underlying text itself
413 * must not be altered while being referenced by the break iterator.
414 *
415 * @param text The UText used to change the text.
416 * @param status Receives any error codes.
417 * @stable ICU 3.4
418 */
419 virtual void setText(UText *text, UErrorCode &status) override;
420
421 /**
422 * Sets the current iteration position to the beginning of the text, position zero.
423 * @return The offset of the beginning of the text, zero.
424 * @stable ICU 2.0
425 */
426 virtual int32_t first(void) override;
427
428 /**
429 * Sets the current iteration position to the end of the text.
430 * @return The text's past-the-end offset.
431 * @stable ICU 2.0
432 */
433 virtual int32_t last(void) override;
434
435 /**
436 * Advances the iterator either forward or backward the specified number of steps.
437 * Negative values move backward, and positive values move forward. This is
438 * equivalent to repeatedly calling next() or previous().
439 * @param n The number of steps to move. The sign indicates the direction
440 * (negative is backwards, and positive is forwards).
441 * @return The character offset of the boundary position n boundaries away from
442 * the current one.
443 * @stable ICU 2.0
444 */
445 virtual int32_t next(int32_t n) override;
446
447 /**
448 * Advances the iterator to the next boundary position.
449 * @return The position of the first boundary after this one.
450 * @stable ICU 2.0
451 */
452 virtual int32_t next(void) override;
453
454 /**
455 * Moves the iterator backwards, to the last boundary preceding this one.
456 * @return The position of the last boundary position preceding this one.
457 * @stable ICU 2.0
458 */
459 virtual int32_t previous(void) override;
460
461 /**
462 * Sets the iterator to refer to the first boundary position following
463 * the specified position.
464 * @param offset The position from which to begin searching for a break position.
465 * @return The position of the first break after the current position.
466 * @stable ICU 2.0
467 */
468 virtual int32_t following(int32_t offset) override;
469
470 /**
471 * Sets the iterator to refer to the last boundary position before the
472 * specified position.
473 * @param offset The position to begin searching for a break from.
474 * @return The position of the last boundary before the starting position.
475 * @stable ICU 2.0
476 */
477 virtual int32_t preceding(int32_t offset) override;
478
479 /**
480 * Returns true if the specified position is a boundary position. As a side
481 * effect, leaves the iterator pointing to the first boundary position at
482 * or after "offset".
483 * @param offset the offset to check.
484 * @return True if "offset" is a boundary position.
485 * @stable ICU 2.0
486 */
487 virtual UBool isBoundary(int32_t offset) override;
488
489 /**
490 * Returns the current iteration position. Note that UBRK_DONE is never
491 * returned from this function; if iteration has run to the end of a
492 * string, current() will return the length of the string while
493 * next() will return UBRK_DONE).
494 * @return The current iteration position.
495 * @stable ICU 2.0
496 */
497 virtual int32_t current(void) const override;
498
499
500 /**
501 * Return the status tag from the break rule that determined the boundary at
502 * the current iteration position. For break rules that do not specify a
503 * status, a default value of 0 is returned. If more than one break rule
504 * would cause a boundary to be located at some position in the text,
505 * the numerically largest of the applicable status values is returned.
506 * <p>
507 * Of the standard types of ICU break iterators, only word break and
508 * line break provide status values. The values are defined in
509 * the header file ubrk.h. For Word breaks, the status allows distinguishing between words
510 * that contain alphabetic letters, "words" that appear to be numbers,
511 * punctuation and spaces, words containing ideographic characters, and
512 * more. For Line Break, the status distinguishes between hard (mandatory) breaks
513 * and soft (potential) break positions.
514 * <p>
515 * <code>getRuleStatus()</code> can be called after obtaining a boundary
516 * position from <code>next()</code>, <code>previous()</code>, or
517 * any other break iterator functions that returns a boundary position.
518 * <p>
519 * Note that <code>getRuleStatus()</code> returns the value corresponding to
520 * <code>current()</code> index even after <code>next()</code> has returned DONE.
521 * <p>
522 * When creating custom break rules, one is free to define whatever
523 * status values may be convenient for the application.
524 * <p>
525 * @return the status from the break rule that determined the boundary
526 * at the current iteration position.
527 *
528 * @see UWordBreak
529 * @stable ICU 2.2
530 */
531 virtual int32_t getRuleStatus() const override;
532
533 /**
534 * Get the status (tag) values from the break rule(s) that determined the boundary
535 * at the current iteration position.
536 * <p>
537 * The returned status value(s) are stored into an array provided by the caller.
538 * The values are stored in sorted (ascending) order.
539 * If the capacity of the output array is insufficient to hold the data,
540 * the output will be truncated to the available length, and a
541 * U_BUFFER_OVERFLOW_ERROR will be signaled.
542 *
543 * @param fillInVec an array to be filled in with the status values.
544 * @param capacity the length of the supplied vector. A length of zero causes
545 * the function to return the number of status values, in the
546 * normal way, without attempting to store any values.
547 * @param status receives error codes.
548 * @return The number of rule status values from the rules that determined
549 * the boundary at the current iteration position.
550 * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
551 * is the total number of status values that were available,
552 * not the reduced number that were actually returned.
553 * @see getRuleStatus
554 * @stable ICU 3.0
555 */
556 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override;
557
558 /**
559 * Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
560 * This method is to implement a simple version of RTTI, since not all
561 * C++ compilers support genuine RTTI. Polymorphic operator==() and
562 * clone() methods call this method.
563 *
564 * @return The class ID for this object. All objects of a
565 * given class have the same class ID. Objects of
566 * other classes have different class IDs.
567 * @stable ICU 2.0
568 */
569 virtual UClassID getDynamicClassID(void) const override;
570
571 /**
572 * Returns the class ID for this class. This is useful only for
573 * comparing to a return value from getDynamicClassID(). For example:
574 *
575 * Base* polymorphic_pointer = createPolymorphicObject();
576 * if (polymorphic_pointer->getDynamicClassID() ==
577 * Derived::getStaticClassID()) ...
578 *
579 * @return The class ID for all objects of this class.
580 * @stable ICU 2.0
581 */
582 static UClassID U_EXPORT2 getStaticClassID(void);
583
584#ifndef U_FORCE_HIDE_DEPRECATED_API
585 /**
586 * Deprecated functionality. Use clone() instead.
587 *
588 * Create a clone (copy) of this break iterator in memory provided
589 * by the caller. The idea is to increase performance by avoiding
590 * a storage allocation. Use of this function is NOT RECOMMENDED.
591 * Performance gains are minimal, and correct buffer management is
592 * tricky. Use clone() instead.
593 *
594 * @param stackBuffer The pointer to the memory into which the cloned object
595 * should be placed. If nullptr, allocate heap memory
596 * for the cloned object.
597 * @param BufferSize The size of the buffer. If zero, return the required
598 * buffer size, but do not clone the object. If the
599 * size was too small (but not zero), allocate heap
600 * storage for the cloned object.
601 *
602 * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
603 * returned if the provided buffer was too small, and
604 * the clone was therefore put on the heap.
605 *
606 * @return Pointer to the clone object. This may differ from the stackBuffer
607 * address if the byte alignment of the stack buffer was not suitable
608 * or if the stackBuffer was too small to hold the clone.
609 * @deprecated ICU 52. Use clone() instead.
610 */
611 virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer,
612 int32_t &BufferSize,
613 UErrorCode &status) override;
614#endif // U_FORCE_HIDE_DEPRECATED_API
615
616 /**
617 * Return the binary form of compiled break rules,
618 * which can then be used to create a new break iterator at some
619 * time in the future. Creating a break iterator from pre-compiled rules
620 * is much faster than building one from the source form of the
621 * break rules.
622 *
623 * The binary data can only be used with the same version of ICU
624 * and on the same platform type (processor endian-ness)
625 *
626 * @param length Returns the length of the binary data. (Out parameter.)
627 *
628 * @return A pointer to the binary (compiled) rule data. The storage
629 * belongs to the RulesBasedBreakIterator object, not the
630 * caller, and must not be modified or deleted.
631 * @stable ICU 4.8
632 */
633 virtual const uint8_t *getBinaryRules(uint32_t &length);
634
635 /**
636 * Set the subject text string upon which the break iterator is operating
637 * without changing any other aspect of the matching state.
638 * The new and previous text strings must have the same content.
639 *
640 * This function is intended for use in environments where ICU is operating on
641 * strings that may move around in memory. It provides a mechanism for notifying
642 * ICU that the string has been relocated, and providing a new UText to access the
643 * string in its new position.
644 *
645 * Note that the break iterator implementation never copies the underlying text
646 * of a string being processed, but always operates directly on the original text
647 * provided by the user. Refreshing simply drops the references to the old text
648 * and replaces them with references to the new.
649 *
650 * Caution: this function is normally used only by very specialized,
651 * system-level code. One example use case is with garbage collection that moves
652 * the text in memory.
653 *
654 * @param input The new (moved) text string.
655 * @param status Receives errors detected by this function.
656 * @return *this
657 *
658 * @stable ICU 49
659 */
660 virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status) override;
661
662
663private:
664 //=======================================================================
665 // implementation
666 //=======================================================================
667 /**
668 * Iterate backwards from an arbitrary position in the input text using the
669 * synthesized Safe Reverse rules.
670 * This locates a "Safe Position" from which the forward break rules
671 * will operate correctly. A Safe Position is not necessarily a boundary itself.
672 *
673 * @param fromPosition the position in the input text to begin the iteration.
674 * @internal (private)
675 */
676 int32_t handleSafePrevious(int32_t fromPosition);
677
678 /**
679 * Find a rule-based boundary by running the state machine.
680 * Input
681 * fPosition, the position in the text to begin from.
682 * Output
683 * fPosition: the boundary following the starting position.
684 * fDictionaryCharCount the number of dictionary characters encountered.
685 * If > 0, the segment will be further subdivided
686 * fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
687 *
688 * @internal (private)
689 */
690 int32_t handleNext();
691
692 /*
693 * Templatized version of handleNext() and handleSafePrevious().
694 *
695 * There will be exactly four instantiations, two each for 8 and 16 bit tables,
696 * two each for 8 and 16 bit trie.
697 * Having separate instantiations for the table types keeps conditional tests of
698 * the table type out of the inner loops, at the expense of replicated code.
699 *
700 * The template parameter for the Trie access function is a value, not a type.
701 * Doing it this way, the compiler will inline the Trie function in the
702 * expanded functions. (Both the 8 and 16 bit access functions have the same type
703 * signature)
704 */
705
706 typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32);
707
708 template<typename RowType, PTrieFunc trieFunc>
709 int32_t handleSafePrevious(int32_t fromPosition);
710
711 template<typename RowType, PTrieFunc trieFunc>
712 int32_t handleNext();
713
714
715 /**
716 * This function returns the appropriate LanguageBreakEngine for a
717 * given character c.
718 * @param c A character in the dictionary set
719 * @internal (private)
720 */
721 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
722
723 public:
724#ifndef U_HIDE_INTERNAL_API
725 /**
726 * Debugging function only.
727 * @internal
728 */
729 void dumpCache();
730
731 /**
732 * Debugging function only.
733 * @internal
734 */
735 void dumpTables();
736#endif /* U_HIDE_INTERNAL_API */
737};
738
739U_NAMESPACE_END
740
741#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
742
743#endif /* U_SHOW_CPLUSPLUS_API */
744
745#endif
746