rbbi_cache.cpp source code [Godot/thirdparty/icu4c/common/rbbi_cache.cpp]

1	// Copyright (C) 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3
4	// file: rbbi_cache.cpp
5
6	#include "unicode/utypes.h"
7
8	#if !UCONFIG_NO_BREAK_ITERATION
9
10	#include "unicode/ubrk.h"
11	#include "unicode/rbbi.h"
12
13	#include "rbbi_cache.h"
14
15	#include "brkeng.h"
16	#include "cmemory.h"
17	#include "rbbidata.h"
18	#include "rbbirb.h"
19	#include "uassert.h"
20	#include "uvectr32.h"
21
22	U_NAMESPACE_BEGIN
23
24	/*
25	* DictionaryCache implementation
26	*/
27
28	RuleBasedBreakIterator::DictionaryCache::DictionaryCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
29	fBI(bi), fBreaks (status), fPositionInCache(-`1`),
30	fStart(`0`), fLimit(`0`), fFirstRuleStatusIndex(`0`), fOtherRuleStatusIndex(`0`) {
31	}
32
33	RuleBasedBreakIterator::DictionaryCache::~DictionaryCache() {
34	}
35
36	void RuleBasedBreakIterator::DictionaryCache::reset() {
37	fPositionInCache = -`1`;
38	fStart = `0`;
39	fLimit = `0`;
40	fFirstRuleStatusIndex = `0`;
41	fOtherRuleStatusIndex = `0`;
42	fBreaks.removeAllElements();
43	}
44
45	UBool RuleBasedBreakIterator::DictionaryCache::following(int32_t fromPos, int32_t result, int32_t statusIndex) {
46	if (fromPos >= fLimit \|\| fromPos < fStart) {
47	fPositionInCache = -`1`;
48	return false;
49	}
50
51	// Sequential iteration, move from previous boundary to the following
52
53	int32_t r = `0`;
54	if (fPositionInCache >= `0` && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
55	++fPositionInCache;
56	if (fPositionInCache >= fBreaks.size()) {
57	fPositionInCache = -`1`;
58	return false;
59	}
60	r = fBreaks.elementAti(fPositionInCache);
61	U_ASSERT(r > fromPos);
62	*result = r;
63	*statusIndex = fOtherRuleStatusIndex;
64	return true;
65	}
66
67	// Random indexing. Linear search for the boundary following the given position.
68
69	for (fPositionInCache = `0`; fPositionInCache < fBreaks.size(); ++fPositionInCache) {
70	r= fBreaks.elementAti(fPositionInCache);
71	if (r > fromPos) {
72	*result = r;
73	*statusIndex = fOtherRuleStatusIndex;
74	return true;
75	}
76	}
77	UPRV_UNREACHABLE_EXIT;
78	}
79
80
81	UBool RuleBasedBreakIterator::DictionaryCache::preceding(int32_t fromPos, int32_t result, int32_t statusIndex) {
82	if (fromPos <= fStart \|\| fromPos > fLimit) {
83	fPositionInCache = -`1`;
84	return false;
85	}
86
87	if (fromPos == fLimit) {
88	fPositionInCache = fBreaks.size() - `1`;
89	if (fPositionInCache >= `0`) {
90	U_ASSERT(fBreaks.elementAti(fPositionInCache) == fromPos);
91	}
92	}
93
94	int32_t r;
95	if (fPositionInCache > `0` && fPositionInCache < fBreaks.size() && fBreaks.elementAti(fPositionInCache) == fromPos) {
96	--fPositionInCache;
97	r = fBreaks.elementAti(fPositionInCache);
98	U_ASSERT(r < fromPos);
99	*result = r;
100	*statusIndex = ( r== fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
101	return true;
102	}
103
104	if (fPositionInCache == `0`) {
105	fPositionInCache = -`1`;
106	return false;
107	}
108
109	for (fPositionInCache = fBreaks.size()-`1`; fPositionInCache >= `0`; --fPositionInCache) {
110	r = fBreaks.elementAti(fPositionInCache);
111	if (r < fromPos) {
112	*result = r;
113	*statusIndex = ( r == fStart) ? fFirstRuleStatusIndex : fOtherRuleStatusIndex;
114	return true;
115	}
116	}
117	UPRV_UNREACHABLE_EXIT;
118	}
119
120	void RuleBasedBreakIterator::DictionaryCache::populateDictionary(int32_t startPos, int32_t endPos,
121	int32_t firstRuleStatus, int32_t otherRuleStatus) {
122	if ((endPos - startPos) <= `1`) {
123	return;
124	}
125
126	reset();
127	fFirstRuleStatusIndex = firstRuleStatus;
128	fOtherRuleStatusIndex = otherRuleStatus;
129
130	int32_t rangeStart = startPos;
131	int32_t rangeEnd = endPos;
132
133	uint16_t category;
134	int32_t current;
135	UErrorCode status = U_ZERO_ERROR;
136	int32_t foundBreakCount = `0`;
137	UText *text = &fBI->fText;
138
139	// Loop through the text, looking for ranges of dictionary characters.
140	// For each span, find the appropriate break engine, and ask it to find
141	// any breaks within the span.
142
143	utext_setNativeIndex(text, rangeStart);
144	UChar32 c = utext_current32(text);
145	category = ucptrie_get(fBI->fData->fTrie, c);
146	uint32_t dictStart = fBI->fData->fForwardTable->fDictCategoriesStart;
147
148	while(U_SUCCESS(status)) {
149	while((current = (int32_t)UTEXT_GETNATIVEINDEX(text)) < rangeEnd
150	&& (category < dictStart)) {
151	utext_next32(text); // TODO: cleaner loop structure.
152	c = utext_current32(text);
153	category = ucptrie_get(fBI->fData->fTrie, c);
154	}
155	if (current >= rangeEnd) {
156	break;
157	}
158
159	// We now have a dictionary character. Get the appropriate language object
160	// to deal with it.
161	const LanguageBreakEngine *lbe = fBI->getLanguageBreakEngine(c);
162
163	// Ask the language object if there are any breaks. It will add them to the cache and
164	// leave the text pointer on the other side of its range, ready to search for the next one.
165	if (lbe != nullptr) {
166	foundBreakCount += lbe->findBreaks(text, rangeStart, rangeEnd, fBreaks, fBI->fIsPhraseBreaking, status);
167	}
168
169	// Reload the loop variables for the next go-round
170	c = utext_current32(text);
171	category = ucptrie_get(fBI->fData->fTrie, c);
172	}
173
174	// If we found breaks, ensure that the first and last entries are
175	// the original starting and ending position. And initialize the
176	// cache iteration position to the first entry.
177
178	// printf("foundBreakCount = %d\n", foundBreakCount);
179	if (foundBreakCount > `0`) {
180	U_ASSERT(foundBreakCount == fBreaks.size());
181	if (startPos < fBreaks.elementAti(`0`)) {
182	// The dictionary did not place a boundary at the start of the segment of text.
183	// Add one now. This should not commonly happen, but it would be easy for interactions
184	// of the rules for dictionary segments and the break engine implementations to
185	// inadvertently cause it. Cover it here, just in case.
186	fBreaks.insertElementAt(startPos, `0`, status);
187	}
188	if (endPos > fBreaks.peeki()) {
189	fBreaks.push(endPos, status);
190	}
191	fPositionInCache = `0`;
192	// Note: Dictionary matching may extend beyond the original limit.
193	fStart = fBreaks.elementAti(`0`);
194	fLimit = fBreaks.peeki();
195	} else {
196	// there were no language-based breaks, even though the segment contained
197	// dictionary characters. Subsequent attempts to fetch boundaries from the dictionary cache
198	// for this range will fail, and the calling code will fall back to the rule based boundaries.
199	}
200	}
201
202
203	/*
204	* BreakCache implementation
205	*/
206
207	RuleBasedBreakIterator::BreakCache::BreakCache(RuleBasedBreakIterator *bi, UErrorCode &status) :
208	fBI(bi), fSideBuffer (status) {
209	reset();
210	}
211
212
213	RuleBasedBreakIterator::BreakCache::~BreakCache() {
214	}
215
216
217	void RuleBasedBreakIterator::BreakCache::reset(int32_t pos, int32_t ruleStatus) {
218	fStartBufIdx = `0`;
219	fEndBufIdx = `0`;
220	fTextIdx = pos;
221	fBufIdx = `0`;
222	fBoundaries[`0`] = pos;
223	fStatuses[`0`] = (uint16_t)ruleStatus;
224	}
225
226
227	int32_t RuleBasedBreakIterator::BreakCache::current() {
228	fBI->fPosition = fTextIdx;
229	fBI->fRuleStatusIndex = fStatuses[fBufIdx];
230	fBI->fDone = false;
231	return fTextIdx;
232	}
233
234
235	void RuleBasedBreakIterator::BreakCache::following(int32_t startPos, UErrorCode &status) {
236	if (U_FAILURE(status)) {
237	return;
238	}
239	if (startPos == fTextIdx \|\| seek(startPos) \|\| populateNear(startPos, status)) {
240	// startPos is in the cache. Do a next() from that position.
241	// TODO: an awkward set of interactions with bi->fDone
242	// seek() does not clear it; it can't because of interactions with populateNear().
243	// next() does not clear it in the fast-path case, where everything matters. Maybe it should.
244	// So clear it here, for the case where seek() succeeded on an iterator that had previously run off the end.
245	fBI->fDone = false;
246	next();
247	}
248	return;
249	}
250
251
252	void RuleBasedBreakIterator::BreakCache::preceding(int32_t startPos, UErrorCode &status) {
253	if (U_FAILURE(status)) {
254	return;
255	}
256	if (startPos == fTextIdx \|\| seek(startPos) \|\| populateNear(startPos, status)) {
257	if (startPos == fTextIdx) {
258	previous(status);
259	} else {
260	// seek() leaves the BreakCache positioned at the preceding boundary
261	// if the requested position is between two boundaries.
262	// current() pushes the BreakCache position out to the BreakIterator itself.
263	U_ASSERT(startPos > fTextIdx);
264	current();
265	}
266	}
267	return;
268	}
269
270
271	/*
272	* Out-of-line code for BreakCache::next().
273	* Cache does not already contain the boundary
274	*/
275	void RuleBasedBreakIterator::BreakCache::nextOL() {
276	fBI->fDone = !populateFollowing();
277	fBI->fPosition = fTextIdx;
278	fBI->fRuleStatusIndex = fStatuses[fBufIdx];
279	return;
280	}
281
282
283	void RuleBasedBreakIterator::BreakCache::previous(UErrorCode &status) {
284	if (U_FAILURE(status)) {
285	return;
286	}
287	int32_t initialBufIdx = fBufIdx;
288	if (fBufIdx == fStartBufIdx) {
289	// At start of cache. Prepend to it.
290	populatePreceding(status);
291	} else {
292	// Cache already holds the next boundary
293	fBufIdx = modChunkSize(fBufIdx - `1`);
294	fTextIdx = fBoundaries[fBufIdx];
295	}
296	fBI->fDone = (fBufIdx == initialBufIdx);
297	fBI->fPosition = fTextIdx;
298	fBI->fRuleStatusIndex = fStatuses[fBufIdx];
299	return;
300	}
301
302
303	UBool RuleBasedBreakIterator::BreakCache::seek(int32_t pos) {
304	if (pos < fBoundaries[fStartBufIdx] \|\| pos > fBoundaries[fEndBufIdx]) {
305	return false;
306	}
307	if (pos == fBoundaries[fStartBufIdx]) {
308	// Common case: seek(0), from BreakIterator::first()
309	fBufIdx = fStartBufIdx;
310	fTextIdx = fBoundaries[fBufIdx];
311	return true;
312	}
313	if (pos == fBoundaries[fEndBufIdx]) {
314	fBufIdx = fEndBufIdx;
315	fTextIdx = fBoundaries[fBufIdx];
316	return true;
317	}
318
319	int32_t min = fStartBufIdx;
320	int32_t max = fEndBufIdx;
321	while (min != max) {
322	int32_t probe = (min + max + (min>max ? CACHE_SIZE : `0`)) / `2`;
323	probe = modChunkSize(probe);
324	if (fBoundaries[probe] > pos) {
325	max = probe;
326	} else {
327	min = modChunkSize(probe + `1`);
328	}
329	}
330	U_ASSERT(fBoundaries[max] > pos);
331	fBufIdx = modChunkSize(max - `1`);
332	fTextIdx = fBoundaries[fBufIdx];
333	U_ASSERT(fTextIdx <= pos);
334	return true;
335	}
336
337
338	UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorCode &status) {
339	if (U_FAILURE(status)) {
340	return false;
341	}
342	U_ASSERT(position < fBoundaries[fStartBufIdx] \|\| position > fBoundaries[fEndBufIdx]);
343
344	// Add boundaries to the cache near the specified position.
345	// The given position need not be a boundary itself.
346	// The input position must be within the range of the text, and
347	// on a code point boundary.
348	// If the requested position is a break boundary, leave the iteration
349	// position on it.
350	// If the requested position is not a boundary, leave the iteration
351	// position on the preceding boundary and include both the
352	// preceding and following boundaries in the cache.
353	// Additional boundaries, either preceding or following, may be added
354	// to the cache as a side effect.
355
356	// If the requested position is not near already cached positions, clear the existing cache,
357	// find a near-by boundary and begin new cache contents there.
358
359	// Threshold for a text position to be considered near to existing cache contents.
360	// TODO: See issue ICU-22024 "perf tuning of Cache needed."
361	// This value is subject to change. See the ticket for more details.
362	static constexpr int32_t CACHE_NEAR = `15`;
363
364	int32_t aBoundary = -`1`;
365	int32_t ruleStatusIndex = `0`;
366	bool retainCache = false;
367	if ((position > fBoundaries[fStartBufIdx] - CACHE_NEAR) && position < (fBoundaries[fEndBufIdx] + CACHE_NEAR)) {
368	// Requested position is near the existing cache. Retain it.
369	retainCache = true;
370	} else if (position <= CACHE_NEAR) {
371	// Requested position is near the start of the text. Fill cache from start, skipping
372	// the need to find a safe point.
373	retainCache = false;
374	aBoundary = `0`;
375	} else {
376	// Requested position is not near the existing cache.
377	// Find a safe point to refill the cache from.
378	int32_t backupPos = fBI->handleSafePrevious(position);
379
380	if (fBoundaries[fEndBufIdx] < position && fBoundaries[fEndBufIdx] >= (backupPos - CACHE_NEAR)) {
381	// The requested position is beyond the end of the existing cache, but the
382	// reverse rules produced a position near or before the cached region.
383	// Retain the existing cache, and fill from the end of it.
384	retainCache = true;
385	} else if (backupPos < CACHE_NEAR) {
386	// The safe reverse rules moved us to near the start of text.
387	// Take that (index 0) as the backup boundary, avoiding the complication
388	// (in the following block) of moving forward from the safe point to a known boundary.
389	//
390	// Retain the cache if it begins not too far from the requested position.
391	aBoundary = `0`;
392	retainCache = (fBoundaries[fStartBufIdx] <= (position + CACHE_NEAR));
393	} else {
394	// The safe reverse rules produced a position that is neither near the existing
395	// cache, nor near the start of text.
396	// Advance to the boundary following.
397	// There is a complication: the safe reverse rules identify pairs of code points
398	// that are safe. If advancing from the safe point moves forwards by less than
399	// two code points, we need to advance one more time to ensure that the boundary
400	// is good, including a correct rules status value.
401	retainCache = false;
402	fBI->fPosition = backupPos;
403	aBoundary = fBI->handleNext();
404	if (aBoundary != UBRK_DONE && aBoundary <= backupPos + `4`) {
405	// +4 is a quick test for possibly having advanced only one codepoint.
406	// Four being the length of the longest potential code point, a supplementary in UTF-8
407	utext_setNativeIndex(&fBI->fText, aBoundary);
408	if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) {
409	// The initial handleNext() only advanced by a single code point. Go again.
410	aBoundary = fBI->handleNext(); // Safe rules identify safe pairs.
411	}
412	}
413	if (aBoundary == UBRK_DONE) {
414	// Note (Andy Heninger): I don't think this condition can occur, but it's hard
415	// to prove that it can't. We ran off the end of the string looking a boundary
416	// following a safe point; choose the end of the string as that boundary.
417	aBoundary = utext_nativeLength(&fBI->fText);
418	}
419	ruleStatusIndex = fBI->fRuleStatusIndex;
420	}
421	}
422
423	if (!retainCache) {
424	U_ASSERT(aBoundary != -`1`);
425	reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point.
426	}
427
428	// Fill in boundaries between existing cache content and the new requested position.
429
430	if (fBoundaries[fEndBufIdx] < position) {
431	// The last position in the cache precedes the requested position.
432	// Add following position(s) to the cache.
433	while (fBoundaries[fEndBufIdx] < position) {
434	if (!populateFollowing()) {
435	UPRV_UNREACHABLE_EXIT;
436	}
437	}
438	fBufIdx = fEndBufIdx; // Set iterator position to the end of the buffer.
439	fTextIdx = fBoundaries[fBufIdx]; // Required because populateFollowing may add extra boundaries.
440	while (fTextIdx > position) { // Move backwards to a position at or preceding the requested pos.
441	previous(status);
442	}
443	return true;
444	}
445
446	if (fBoundaries[fStartBufIdx] > position) {
447	// The first position in the cache is beyond the requested position.
448	// back up more until we get a boundary <= the requested position.
449	while (fBoundaries[fStartBufIdx] > position) {
450	populatePreceding(status);
451	}
452	fBufIdx = fStartBufIdx; // Set iterator position to the start of the buffer.
453	fTextIdx = fBoundaries[fBufIdx]; // Required because populatePreceding may add extra boundaries.
454	while (fTextIdx < position) { // Move forwards to a position at or following the requested pos.
455	next();
456	}
457	if (fTextIdx > position) {
458	// If position is not itself a boundary, the next() loop above will overshoot.
459	// Back up one, leaving cache position at the boundary preceding the requested position.
460	previous(status);
461	}
462	return true;
463	}
464
465	U_ASSERT(fTextIdx == position);
466	return true;
467	}
468
469
470
471	UBool RuleBasedBreakIterator::BreakCache::populateFollowing() {
472	int32_t fromPosition = fBoundaries[fEndBufIdx];
473	int32_t fromRuleStatusIdx = fStatuses[fEndBufIdx];
474	int32_t pos = `0`;
475	int32_t ruleStatusIdx = `0`;
476
477	if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
478	addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
479	return true;
480	}
481
482	fBI->fPosition = fromPosition;
483	pos = fBI->handleNext();
484	if (pos == UBRK_DONE) {
485	return false;
486	}
487
488	ruleStatusIdx = fBI->fRuleStatusIndex;
489	if (fBI->fDictionaryCharCount > `0`) {
490	// The text segment obtained from the rules includes dictionary characters.
491	// Subdivide it, with subdivided results going into the dictionary cache.
492	fBI->fDictionaryCache->populateDictionary(fromPosition, pos, fromRuleStatusIdx, ruleStatusIdx);
493	if (fBI->fDictionaryCache->following(fromPosition, &pos, &ruleStatusIdx)) {
494	addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
495	return true;
496	// TODO: may want to move a sizable chunk of dictionary cache to break cache at this point.
497	// But be careful with interactions with populateNear().
498	}
499	}
500
501	// Rule based segment did not include dictionary characters.
502	// Or, it did contain dictionary chars, but the dictionary segmenter didn't handle them,
503	// meaning that we didn't take the return, above.
504	// Add its end point to the cache.
505	addFollowing(pos, ruleStatusIdx, UpdateCachePosition);
506
507	// Add several non-dictionary boundaries at this point, to optimize straight forward iteration.
508	// (subsequent calls to BreakIterator::next() will take the fast path, getting cached results.
509	//
510	for (int count=`0`; count<`6`; ++count) {
511	pos = fBI->handleNext();
512	if (pos == UBRK_DONE \|\| fBI->fDictionaryCharCount > `0`) {
513	break;
514	}
515	addFollowing(pos, fBI->fRuleStatusIndex, RetainCachePosition);
516	}
517
518	return true;
519	}
520
521
522	UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) {
523	if (U_FAILURE(status)) {
524	return false;
525	}
526
527	int32_t fromPosition = fBoundaries[fStartBufIdx];
528	if (fromPosition == `0`) {
529	return false;
530	}
531
532	int32_t position = `0`;
533	int32_t positionStatusIdx = `0`;
534
535	if (fBI->fDictionaryCache->preceding(fromPosition, &position, &positionStatusIdx)) {
536	addPreceding(position, positionStatusIdx, UpdateCachePosition);
537	return true;
538	}
539
540	int32_t backupPosition = fromPosition;
541
542	// Find a boundary somewhere preceding the first already-cached boundary
543	do {
544	backupPosition = backupPosition - `30`;
545	if (backupPosition <= `0`) {
546	backupPosition = `0`;
547	} else {
548	backupPosition = fBI->handleSafePrevious(backupPosition);
549	}
550	if (backupPosition == UBRK_DONE \|\| backupPosition == `0`) {
551	position = `0`;
552	positionStatusIdx = `0`;
553	} else {
554	// Advance to the boundary following the backup position.
555	// There is a complication: the safe reverse rules identify pairs of code points
556	// that are safe. If advancing from the safe point moves forwards by less than
557	// two code points, we need to advance one more time to ensure that the boundary
558	// is good, including a correct rules status value.
559	//
560	fBI->fPosition = backupPosition;
561	position = fBI->handleNext();
562	if (position <= backupPosition + `4`) {
563	// +4 is a quick test for possibly having advanced only one codepoint.
564	// Four being the length of the longest potential code point, a supplementary in UTF-8
565	utext_setNativeIndex(&fBI->fText, position);
566	if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) {
567	// The initial handleNext() only advanced by a single code point. Go again.
568	position = fBI->handleNext(); // Safe rules identify safe pairs.
569	}
570	}
571	positionStatusIdx = fBI->fRuleStatusIndex;
572	}
573	} while (position >= fromPosition);
574
575	// Find boundaries between the one we just located and the first already-cached boundary
576	// Put them in a side buffer, because we don't yet know where they will fall in the circular cache buffer..
577
578	fSideBuffer.removeAllElements();
579	fSideBuffer.addElement(position, status);
580	fSideBuffer.addElement(positionStatusIdx, status);
581
582	do {
583	int32_t prevPosition = fBI->fPosition = position;
584	int32_t prevStatusIdx = positionStatusIdx;
585	position = fBI->handleNext();
586	positionStatusIdx = fBI->fRuleStatusIndex;
587	if (position == UBRK_DONE) {
588	break;
589	}
590
591	UBool segmentHandledByDictionary = false;
592	if (fBI->fDictionaryCharCount != `0`) {
593	// Segment from the rules includes dictionary characters.
594	// Subdivide it, with subdivided results going into the dictionary cache.
595	int32_t dictSegEndPosition = position;
596	fBI->fDictionaryCache->populateDictionary(prevPosition, dictSegEndPosition, prevStatusIdx, positionStatusIdx);
597	while (fBI->fDictionaryCache->following(prevPosition, &position, &positionStatusIdx)) {
598	segmentHandledByDictionary = true;
599	U_ASSERT(position > prevPosition);
600	if (position >= fromPosition) {
601	break;
602	}
603	U_ASSERT(position <= dictSegEndPosition);
604	fSideBuffer.addElement(position, status);
605	fSideBuffer.addElement(positionStatusIdx, status);
606	prevPosition = position;
607	}
608	U_ASSERT(position==dictSegEndPosition \|\| position>=fromPosition);
609	}
610
611	if (!segmentHandledByDictionary && position < fromPosition) {
612	fSideBuffer.addElement(position, status);
613	fSideBuffer.addElement(positionStatusIdx, status);
614	}
615	} while (position < fromPosition);
616
617	// Move boundaries from the side buffer to the main circular buffer.
618	UBool success = false;
619	if (!fSideBuffer.isEmpty()) {
620	positionStatusIdx = fSideBuffer.popi();
621	position = fSideBuffer.popi();
622	addPreceding(position, positionStatusIdx, UpdateCachePosition);
623	success = true;
624	}
625
626	while (!fSideBuffer.isEmpty()) {
627	positionStatusIdx = fSideBuffer.popi();
628	position = fSideBuffer.popi();
629	if (!addPreceding(position, positionStatusIdx, RetainCachePosition)) {
630	// No space in circular buffer to hold a new preceding result while
631	// also retaining the current cache (iteration) position.
632	// Bailing out is safe; the cache will refill again if needed.
633	break;
634	}
635	}
636
637	return success;
638	}
639
640
641	void RuleBasedBreakIterator::BreakCache::addFollowing(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
642	U_ASSERT(position > fBoundaries[fEndBufIdx]);
643	U_ASSERT(ruleStatusIdx <= UINT16_MAX);
644	int32_t nextIdx = modChunkSize(fEndBufIdx + `1`);
645	if (nextIdx == fStartBufIdx) {
646	fStartBufIdx = modChunkSize(fStartBufIdx + `6`); // TODO: experiment. Probably revert to 1.
647	}
648	fBoundaries[nextIdx] = position;
649	fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
650	fEndBufIdx = nextIdx;
651	if (update == UpdateCachePosition) {
652	// Set current position to the newly added boundary.
653	fBufIdx = nextIdx;
654	fTextIdx = position;
655	} else {
656	// Retaining the original cache position.
657	// Check if the added boundary wraps around the buffer, and would over-write the original position.
658	// It's the responsibility of callers of this function to not add too many.
659	U_ASSERT(nextIdx != fBufIdx);
660	}
661	}
662
663	bool RuleBasedBreakIterator::BreakCache::addPreceding(int32_t position, int32_t ruleStatusIdx, UpdatePositionValues update) {
664	U_ASSERT(position < fBoundaries[fStartBufIdx]);
665	U_ASSERT(ruleStatusIdx <= UINT16_MAX);
666	int32_t nextIdx = modChunkSize(fStartBufIdx - `1`);
667	if (nextIdx == fEndBufIdx) {
668	if (fBufIdx == fEndBufIdx && update == RetainCachePosition) {
669	// Failure. The insertion of the new boundary would claim the buffer position that is the
670	// current iteration position. And we also want to retain the current iteration position.
671	// (The buffer is already completely full of entries that precede the iteration position.)
672	return false;
673	}
674	fEndBufIdx = modChunkSize(fEndBufIdx - `1`);
675	}
676	fBoundaries[nextIdx] = position;
677	fStatuses[nextIdx] = static_cast<uint16_t>(ruleStatusIdx);
678	fStartBufIdx = nextIdx;
679	if (update == UpdateCachePosition) {
680	fBufIdx = nextIdx;
681	fTextIdx = position;
682	}
683	return true;
684	}
685
686
687	void RuleBasedBreakIterator::BreakCache::dumpCache() {
688	#ifdef RBBI_DEBUG
689	RBBIDebugPrintf("fTextIdx:%d fBufIdx:%d\n", fTextIdx, fBufIdx);
690	for (int32_t i=fStartBufIdx; ; i=modChunkSize(i+`1`)) {
691	RBBIDebugPrintf("%d %d\n", i, fBoundaries[i]);
692	if (i == fEndBufIdx) {
693	break;
694	}
695	}
696	#endif
697	}
698
699	U_NAMESPACE_END
700
701	#endif // #if !UCONFIG_NO_BREAK_ITERATION
702

Browse the source code of Godot/thirdparty/icu4c/common/rbbi_cache.cpp