rbbi_cache.cpp source code [ClickHouse/contrib/icu/icu4c/source/common/rbbi_cache.cpp]

1	// Copyright (C) 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3
4	// file: rbbi_cache.cpp
5
6	#include "unicode/utypes.h"
7
8	#if !UCONFIG_NO_BREAK_ITERATION
9
10	#include "unicode/ubrk.h"
11	#include "unicode/rbbi.h"
12
13	#include "rbbi_cache.h"
14
15	#include "brkeng.h"
16	#include "cmemory.h"
17	#include "rbbidata.h"
18	#include "rbbirb.h"
19	#include "uassert.h"
20	#include "uvectr32.h"
21
22	U_NAMESPACE_BEGIN
23
24	/*
25	* DictionaryCache implementation
26	*/
27
28	RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
29	fBI(bi), fBreaks (status), fPositionInCache(-`1`),
30	fStart(`0`), fLimit(`0`), fFirstRuleStatusIndex(`0`), fOtherRuleStatusIndex(`0`) {
31	}
32
33	RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
34	}
35
36	void RuleBasedBreakIterator::DictionaryCache::reset() {
37	fPositionInCache = -`1`;
38	fStart = `0`;
39	fLimit = `0`;
40	fFirstRuleStatusIndex = `0`;
41	fOtherRuleStatusIndex = `0`;
42	fBreaks.removeAllElements();
43	}
44
45	UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t result, int32_t statusIndex) {
46	if (fromPos >= fLimit \|\| fromPos < fStart) {
47	fPositionInCache = -`1`;
48	return FALSE;
49	}
50
51	// Sequential iteration, move from previous boundary to the following
52
53	int32_t r = `0`;
54	if (fPositionInCache >= `0` && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
55	++fPositionInCache;
56	if (fPositionInCache >= fBreaks.size()) {
57	fPositionInCache = -`1`;
58	return FALSE;
59	}
60	r = fBreaks.elementAti(fPositionInCache);
61	U_ASSERT(r > fromPos);
62	*result = r;
63	*statusIndex = fOtherRuleStatusIndex;
64	return TRUE;
65	}
66
67	// Random indexing. Linear search for the boundary following the given position.
68
69	for (fPositionInCache = `0`; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
70	r= fBreaks.elementAti(fPositionInCache);
71	if (r > fromPos) {
72	*result = r;
73	*statusIndex = fOtherRuleStatusIndex;
74	return TRUE;
75	}
76	}
77	UPRV_UNREACHABLE;
78	}
79
80
81	UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t result, int32_t statusIndex) {
82	if (fromPos <= fStart \|\| fromPos > fLimit) {
83	fPositionInCache = -`1`;
84	return FALSE;
85	}
86
87	if (fromPos == fLimit) {
88	fPositionInCache = fBreaks.size() - `1`;
89	if (fPositionInCache >= `0`) {
90	U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
91	}
92	}
93
94	int32_t r;
95	if (fPositionInCache > `0` && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
96	--fPositionInCache;
97	r = fBreaks.elementAti(fPositionInCache);
98	U_ASSERT(r < fromPos);
99	*result = r;
100	*statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
101	return TRUE;
102	}
103
104	if (fPositionInCache == `0`) {
105	fPositionInCache = -`1`;
106	return FALSE;
107	}
108
109	for (fPositionInCache = fBreaks.size()-`1`; fPositionInCache >= `0`; --fPositionInCache) {
110	r = fBreaks.elementAti(fPositionInCache);
111	if (r < fromPos) {
112	*result = r;
113	*statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
114	return TRUE;
115	}
116	}
117	UPRV_UNREACHABLE;
118	}
119
120	void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
121	int32_t firstRuleStatus, int32_t otherRuleStatus) {
122	if ((endPos - startPos) <= `1`) {
123	return;
124	}
125
126	reset();
127	fFirstRuleStatusIndex = firstRuleStatus;
128	fOtherRuleStatusIndex = otherRuleStatus;
129
130	int32_t rangeStart = startPos;
131	int32_t rangeEnd = endPos;
132
133	uint16_t category;
134	int32_t current;
135	UErrorCode status = U_ZERO_ERROR;
136	int32_t foundBreakCount = `0`;
137	UText *text = &fBI->fText;
138
139	// Loop through the text, looking for ranges of dictionary characters.
140	// For each span, find the appropriate break engine, and ask it to find
141	// any breaks within the span.
142
143	utext_setNativeIndex(text, rangeStart);
144	UChar32 c = utext_current32(text);
145	category = UTRIE2_GET16(fBI->fData->fTrie, c);
146
147	while(U_SUCCESS(status)) {
148	while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd && (category & `0x4000`) == `0`) {
149	utext_next32(text); // TODO: cleaner loop structure.
150	c = utext_current32(text);
151	category = UTRIE2_GET16(fBI->fData->fTrie, c);
152	}
153	if (current >= rangeEnd) {
154	break;
155	}
156
157	// We now have a dictionary character. Get the appropriate language object
158	// to deal with it.
159	const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
160
161	// Ask the language object if there are any breaks. It will add them to the cache and
162	// leave the text pointer on the other side of its range, ready to search for the next one.
163	if (lbe != NULL) {
164	foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks);
165	}
166
167	// Reload the loop variables for the next go-round
168	c = utext_current32(text);
169	category = UTRIE2_GET16(fBI->fData->fTrie, c);
170	}
171
172	// If we found breaks, ensure that the first and last entries are
173	// the original starting and ending position. And initialize the
174	// cache iteration position to the first entry.
175
176	// printf("foundBreakCount = %d\n", foundBreakCount);
177	if (foundBreakCount > `0`) {
178	U_ASSERT(foundBreakCount == fBreaks.size());
179	if (startPos < fBreaks.elementAti(`0`)) {
180	// The dictionary did not place a boundary at the start of the segment of text.
181	// Add one now. This should not commonly happen, but it would be easy for interactions
182	// of the rules for dictionary segments and the break engine implementations to
183	// inadvertently cause it. Cover it here, just in case.
184	fBreaks.insertElementAt(startPos, `0`, status);
185	}
186	if (endPos > fBreaks.peeki()) {
187	fBreaks.push(endPos, status);
188	}
189	fPositionInCache = `0`;
190	// Note: Dictionary matching may extend beyond the original limit.
191	fStart = fBreaks.elementAti(`0`);
192	fLimit = fBreaks.peeki();
193	} else {
194	// there were no language-based breaks, even though the segment contained
195	// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
196	// for this range will fail, and the calling code will fall back to the rule based boundaries.
197	}
198	}
199
200
201	/*
202	* BreakCache implemetation
203	*/
204
205	RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
206	fBI(bi), fSideBuffer (status) {
207	reset();
208	}
209
210
211	RuleBasedBreakIterator::BreakCache::~BreakCache() {
212	}
213
214
215	void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
216	fStartBufIdx = `0`;
217	fEndBufIdx = `0`;
218	fTextIdx = pos;
219	fBufIdx = `0`;
220	fBoundaries[`0`] = pos;
221	fStatuses[`0`] = (uint16_t)ruleStatus;
222	}
223
224
225	int32_t RuleBasedBreakIterator::BreakCache::current() {
226	fBI->fPosition = fTextIdx;
227	fBI->fRuleStatusIndex = fStatuses[fBufIdx];
228	fBI->fDone = FALSE;
229	return fTextIdx;
230	}
231
232
233	void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
234	if (U_FAILURE(status)) {
235	return;
236	}
237	if (startPos == fTextIdx \|\| seek(startPos) \|\| populateNear(startPos, status)) {
238	// startPos is in the cache. Do a next() from that position.
239	// TODO: an awkward set of interactions with bi->fDone
240	// seek() does not clear it; it can't because of interactions with populateNear().
241	// next() does not clear it in the fast-path case, where everything matters. Maybe it should.
242	// So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
243	fBI->fDone = false;
244	next();
245	}
246	return;
247	}
248
249
250	void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
251	if (U_FAILURE(status)) {
252	return;
253	}
254	if (startPos == fTextIdx \|\| seek(startPos) \|\| populateNear(startPos, status)) {
255	if (startPos == fTextIdx) {
256	previous(status);
257	} else {
258	// seek() leaves the BreakCache positioned at the preceding boundary
259	// if the requested position is between two bounaries.
260	// current() pushes the BreakCache position out to the BreakIterator itself.
261	U_ASSERT(startPos > fTextIdx);
262	current();
263	}
264	}
265	return;
266	}
267
268
269	/*
270	* Out-of-line code for BreakCache::next().
271	* Cache does not already contain the boundary
272	*/
273	void RuleBasedBreakIterator::BreakCache::nextOL() {
274	fBI->fDone = !populateFollowing();
275	fBI->fPosition = fTextIdx;
276	fBI->fRuleStatusIndex = fStatuses[fBufIdx];
277	return;
278	}
279
280
281	void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
282	if (U_FAILURE(status)) {
283	return;
284	}
285	int32_t initialBufIdx = fBufIdx;
286	if (fBufIdx == fStartBufIdx) {
287	// At start of cache. Prepend to it.
288	populatePreceding(status);
289	} else {
290	// Cache already holds the next boundary
291	fBufIdx = modChunkSize(fBufIdx - `1`);
292	fTextIdx = fBoundaries[fBufIdx];
293	}
294	fBI->fDone = (fBufIdx == initialBufIdx);
295	fBI->fPosition = fTextIdx;
296	fBI->fRuleStatusIndex = fStatuses[fBufIdx];
297	return;
298	}
299
300
301	UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
302	if (pos < fBoundaries[fStartBufIdx] \|\| pos > fBoundaries[fEndBufIdx]) {
303	return FALSE;
304	}
305	if (pos == fBoundaries[fStartBufIdx]) {
306	// Common case: seek(0), from BreakIterator::first()
307	fBufIdx = fStartBufIdx;
308	fTextIdx = fBoundaries[fBufIdx];
309	return TRUE;
310	}
311	if (pos == fBoundaries[fEndBufIdx]) {
312	fBufIdx = fEndBufIdx;
313	fTextIdx = fBoundaries[fBufIdx];
314	return TRUE;
315	}
316
317	int32_t min = fStartBufIdx;
318	int32_t max = fEndBufIdx;
319	while (min != max) {
320	int32_t probe = (min + max + (min>max ? CACHE_SIZE : `0`)) / `2`;
321	probe = modChunkSize(probe);
322	if (fBoundaries[probe] > pos) {
323	max = probe;
324	} else {
325	min = modChunkSize(probe + `1`);
326	}
327	}
328	U_ASSERT(fBoundaries[max] > pos);
329	fBufIdx = modChunkSize(max - `1`);
330	fTextIdx = fBoundaries[fBufIdx];
331	U_ASSERT(fTextIdx <= pos);
332	return TRUE;
333	}
334
335
336	UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
337	if (U_FAILURE(status)) {
338	return FALSE;
339	}
340	U_ASSERT(position < fBoundaries[fStartBufIdx] \|\| position > fBoundaries[fEndBufIdx]);
341
342	// Find a boundary somewhere in the vicinity of the requested position.
343	// Depending on the safe rules and the text data, it could be either before, at, or after
344	// the requested position.
345
346
347	// If the requested position is not near already cached positions, clear the existing cache,
348	// find a near-by boundary and begin new cache contents there.
349
350	if ((position < fBoundaries[fStartBufIdx] - `15`) \|\| position > (fBoundaries[fEndBufIdx] + `15`)) {
351	int32_t aBoundary = `0`;
352	int32_t ruleStatusIndex = `0`;
353	if (position > `20`) {
354	int32_t backupPos = fBI->handleSafePrevious(position);
355
356	if (backupPos > `0`) {
357	// Advance to the boundary following the backup position.
358	// There is a complication: the safe reverse rules identify pairs of code points
359	// that are safe. If advancing from the safe point moves forwards by less than
360	// two code points, we need to advance one more time to ensure that the boundary
361	// is good, including a correct rules status value.
362	//
363	fBI->fPosition = backupPos;
364	aBoundary = fBI->handleNext();
365	if (aBoundary <= backupPos + `4`) {
366	// +4 is a quick test for possibly having advanced only one codepoint.
367	// Four being the length of the longest potential code point, a supplementary in UTF-8
368	utext_setNativeIndex(&fBI->fText, aBoundary);
369	if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
370	// The initial handleNext() only advanced by a single code point. Go again.
371	aBoundary = fBI->handleNext(); // Safe rules identify safe pairs.
372	}
373	}
374	ruleStatusIndex = fBI->fRuleStatusIndex;
375	}
376	}
377	reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
378	}
379
380	// Fill in boundaries between existing cache content and the new requested position.
381
382	if (fBoundaries[fEndBufIdx] < position) {
383	// The last position in the cache precedes the requested position.
384	// Add following position(s) to the cache.
385	while (fBoundaries[fEndBufIdx] < position) {
386	if (!populateFollowing()) {
387	UPRV_UNREACHABLE;
388	}
389	}
390	fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
391	fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
392	while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
393	previous(status);
394	}
395	return true;
396	}
397
398	if (fBoundaries[fStartBufIdx] > position) {
399	// The first position in the cache is beyond the requested position.
400	// back up more until we get a boundary <= the requested position.
401	while (fBoundaries[fStartBufIdx] > position) {
402	populatePreceding(status);
403	}
404	fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
405	fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
406	while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
407	next();
408	}
409	if (fTextIdx > position) {
410	// If position is not itself a boundary, the next() loop above will overshoot.
411	// Back up one, leaving cache position at the boundary preceding the requested position.
412	previous(status);
413	}
414	return true;
415	}
416
417	U_ASSERT(fTextIdx == position);
418	return true;
419	}
420
421
422
423	UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
424	int32_t fromPosition = fBoundaries[fEndBufIdx];
425	int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
426	int32_t pos = `0`;
427	int32_t ruleStatusIdx = `0`;
428
429	if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
430	addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
431	return TRUE;
432	}
433
434	fBI->fPosition = fromPosition;
435	pos = fBI->handleNext();
436	if (pos == UBRK_DONE) {
437	return FALSE;
438	}
439
440	ruleStatusIdx = fBI->fRuleStatusIndex;
441	if (fBI->fDictionaryCharCount > `0`) {
442	// The text segment obtained from the rules includes dictionary characters.
443	// Subdivide it, with subdivided results going into the dictionary cache.
444	fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
445	if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
446	addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
447	return TRUE;
448	// TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
449	// But be careful with interactions with populateNear().
450	}
451	}
452
453	// Rule based segment did not include dictionary characters.
454	// Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
455	// meaning that we didn't take the return, above.
456	// Add its end point to the cache.
457	addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
458
459	// Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
460	// (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
461	//
462	for (int count=`0`; count<`6`; ++count) {
463	pos = fBI->handleNext();
464	if (pos == UBRK_DONE \|\| fBI->fDictionaryCharCount > `0`) {
465	break;
466	}
467	addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
468	}
469
470	return TRUE;
471	}
472
473
474	UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
475	if (U_FAILURE(status)) {
476	return FALSE;
477	}
478
479	int32_t fromPosition = fBoundaries[fStartBufIdx];
480	if (fromPosition == `0`) {
481	return FALSE;
482	}
483
484	int32_t position = `0`;
485	int32_t positionStatusIdx = `0`;
486
487	if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
488	addPreceding(position, positionStatusIdx, UpdateCachePosition);
489	return TRUE;
490	}
491
492	int32_t backupPosition = fromPosition;
493
494	// Find a boundary somewhere preceding the first already-cached boundary
495	do {
496	backupPosition = backupPosition - `30`;
497	if (backupPosition <= `0`) {
498	backupPosition = `0`;
499	} else {
500	backupPosition = fBI->handleSafePrevious(backupPosition);
501	}
502	if (backupPosition == UBRK_DONE \|\| backupPosition == `0`) {
503	position = `0`;
504	positionStatusIdx = `0`;
505	} else {
506	// Advance to the boundary following the backup position.
507	// There is a complication: the safe reverse rules identify pairs of code points
508	// that are safe. If advancing from the safe point moves forwards by less than
509	// two code points, we need to advance one more time to ensure that the boundary
510	// is good, including a correct rules status value.
511	//
512	fBI->fPosition = backupPosition;
513	position = fBI->handleNext();
514	if (position <= backupPosition + `4`) {
515	// +4 is a quick test for possibly having advanced only one codepoint.
516	// Four being the length of the longest potential code point, a supplementary in UTF-8
517	utext_setNativeIndex(&fBI->fText, position);
518	if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
519	// The initial handleNext() only advanced by a single code point. Go again.
520	position = fBI->handleNext(); // Safe rules identify safe pairs.
521	}
522	}
523	positionStatusIdx = fBI->fRuleStatusIndex;
524	}
525	} while (position >= fromPosition);
526
527	// Find boundaries between the one we just located and the first already-cached boundary
528	// Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
529
530	fSideBuffer.removeAllElements();
531	fSideBuffer.addElement(position, status);
532	fSideBuffer.addElement(positionStatusIdx, status);
533
534	do {
535	int32_t prevPosition = fBI->fPosition = position;
536	int32_t prevStatusIdx = positionStatusIdx;
537	position = fBI->handleNext();
538	positionStatusIdx = fBI->fRuleStatusIndex;
539	if (position == UBRK_DONE) {
540	break;
541	}
542
543	UBool segmentHandledByDictionary = FALSE;
544	if (fBI->fDictionaryCharCount != `0`) {
545	// Segment from the rules includes dictionary characters.
546	// Subdivide it, with subdivided results going into the dictionary cache.
547	int32_t dictSegEndPosition = position;
548	fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
549	while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
550	segmentHandledByDictionary = true;
551	U_ASSERT(position > prevPosition);
552	if (position >= fromPosition) {
553	break;
554	}
555	U_ASSERT(position <= dictSegEndPosition);
556	fSideBuffer.addElement(position, status);
557	fSideBuffer.addElement(positionStatusIdx, status);
558	prevPosition = position;
559	}
560	U_ASSERT(position==dictSegEndPosition \|\| position>=fromPosition);
561	}
562
563	if (!segmentHandledByDictionary && position < fromPosition) {
564	fSideBuffer.addElement(position, status);
565	fSideBuffer.addElement(positionStatusIdx, status);
566	}
567	} while (position < fromPosition);
568
569	// Move boundaries from the side buffer to the main circular buffer.
570	UBool success = FALSE;
571	if (!fSideBuffer.isEmpty()) {
572	positionStatusIdx = fSideBuffer.popi();
573	position = fSideBuffer.popi();
574	addPreceding(position, positionStatusIdx, UpdateCachePosition);
575	success = TRUE;
576	}
577
578	while (!fSideBuffer.isEmpty()) {
579	positionStatusIdx = fSideBuffer.popi();
580	position = fSideBuffer.popi();
581	if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
582	// No space in circular buffer to hold a new preceding result while
583	// also retaining the current cache (iteration) position.
584	// Bailing out is safe; the cache will refill again if needed.
585	break;
586	}
587	}
588
589	return success;
590	}
591
592
593	void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
594	U_ASSERT(position > fBoundaries[fEndBufIdx]);
595	U_ASSERT(ruleStatusIdx <= UINT16_MAX);
596	int32_t nextIdx = modChunkSize(fEndBufIdx + `1`);
597	if (nextIdx == fStartBufIdx) {
598	fStartBufIdx = modChunkSize(fStartBufIdx + `6`); // TODO: experiment. Probably revert to 1.
599	}
600	fBoundaries[nextIdx] = position;
601	fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
602	fEndBufIdx = nextIdx;
603	if (update == UpdateCachePosition) {
604	// Set current position to the newly added boundary.
605	fBufIdx = nextIdx;
606	fTextIdx = position;
607	} else {
608	// Retaining the original cache position.
609	// Check if the added boundary wraps around the buffer, and would over-write the original position.
610	// It's the responsibility of callers of this function to not add too many.
611	U_ASSERT(nextIdx != fBufIdx);
612	}
613	}
614
615	bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
616	U_ASSERT(position < fBoundaries[fStartBufIdx]);
617	U_ASSERT(ruleStatusIdx <= UINT16_MAX);
618	int32_t nextIdx = modChunkSize(fStartBufIdx - `1`);
619	if (nextIdx == fEndBufIdx) {
620	if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
621	// Failure. The insertion of the new boundary would claim the buffer position that is the
622	// current iteration position. And we also want to retain the current iteration position.
623	// (The buffer is already completely full of entries that precede the iteration position.)
624	return false;
625	}
626	fEndBufIdx = modChunkSize(fEndBufIdx - `1`);
627	}
628	fBoundaries[nextIdx] = position;
629	fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
630	fStartBufIdx = nextIdx;
631	if (update == UpdateCachePosition) {
632	fBufIdx = nextIdx;
633	fTextIdx = position;
634	}
635	return true;
636	}
637
638
639	void RuleBasedBreakIterator::BreakCache::dumpCache() {
640	#ifdef RBBI_DEBUG
641	RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx);
642	for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+`1`)) {
643	RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]);
644	if (i == fEndBufIdx) {
645	break;
646	}
647	}
648	#endif
649	}
650
651	U_NAMESPACE_END
652
653	#endif // #if !UCONFIG_NO_BREAK_ITERATION
654

Browse the source code of ClickHouse/contrib/icu/icu4c/source/common/rbbi_cache.cpp