utext.cpp source code [Godot/thirdparty/icu4c/common/utext.cpp]

1	// © 2016 and later: Unicode, Inc. and others.
2	// License & terms of use: http://www.unicode.org/copyright.html
3	/*
4	*******************************************************************************
5	*
6	* Copyright (C) 2005-2016, International Business Machines
7	* Corporation and others. All Rights Reserved.
8	*
9	*******************************************************************************
10	* file name: utext.cpp
11	* encoding: UTF-8
12	* tab size: 8 (not used)
13	* indentation:4
14	*
15	* created on: 2005apr12
16	* created by: Markus W. Scherer
17	*/
18
19	#include <cstddef>
20
21	#include "unicode/utypes.h"
22	#include "unicode/ustring.h"
23	#include "unicode/unistr.h"
24	#include "unicode/chariter.h"
25	#include "unicode/utext.h"
26	#include "unicode/utf.h"
27	#include "unicode/utf8.h"
28	#include "unicode/utf16.h"
29	#include "ustr_imp.h"
30	#include "cmemory.h"
31	#include "cstring.h"
32	#include "uassert.h"
33	#include "putilimp.h"
34
35	U_NAMESPACE_USE
36
37	#define I32_FLAG(bitIndex) ((int32_t)1<<(bitIndex))
38
39
40	static UBool
41	utext_access(UText *ut, int64_t index, UBool forward) {
42	return ut->pFuncs->access(ut, index, forward);
43	}
44
45
46
47	U_CAPI UBool U_EXPORT2
48	utext_moveIndex32(UText *ut, int32_t delta) {
49	UChar32 c;
50	if (delta > `0`) {
51	do {
52	if(ut->chunkOffset>=ut->chunkLength && !utext_access(ut, ut->chunkNativeLimit, true)) {
53	return false;
54	}
55	c = ut->chunkContents[ut->chunkOffset];
56	if (U16_IS_SURROGATE(c)) {
57	c = utext_next32(ut);
58	if (c == U_SENTINEL) {
59	return false;
60	}
61	} else {
62	ut->chunkOffset++;
63	}
64	} while(--delta>`0`);
65
66	} else if (delta<`0`) {
67	do {
68	if(ut->chunkOffset<=`0` && !utext_access(ut, ut->chunkNativeStart, false)) {
69	return false;
70	}
71	c = ut->chunkContents[ut->chunkOffset-`1`];
72	if (U16_IS_SURROGATE(c)) {
73	c = utext_previous32(ut);
74	if (c == U_SENTINEL) {
75	return false;
76	}
77	} else {
78	ut->chunkOffset--;
79	}
80	} while(++delta<`0`);
81	}
82
83	return true;
84	}
85
86
87	U_CAPI int64_t U_EXPORT2
88	utext_nativeLength(UText *ut) {
89	return ut->pFuncs->nativeLength(ut);
90	}
91
92
93	U_CAPI UBool U_EXPORT2
94	utext_isLengthExpensive(const UText *ut) {
95	UBool r = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE)) != `0`;
96	return r;
97	}
98
99
100	U_CAPI int64_t U_EXPORT2
101	utext_getNativeIndex(const UText *ut) {
102	if(ut->chunkOffset <= ut->nativeIndexingLimit) {
103	return ut->chunkNativeStart+ut->chunkOffset;
104	} else {
105	return ut->pFuncs->mapOffsetToNative(ut);
106	}
107	}
108
109
110	U_CAPI void U_EXPORT2
111	utext_setNativeIndex(UText *ut, int64_t index) {
112	if(index<ut->chunkNativeStart \|\| index>=ut->chunkNativeLimit) {
113	// The desired position is outside of the current chunk.
114	// Access the new position. Assume a forward iteration from here,
115	// which will also be optimimum for a single random access.
116	// Reverse iterations may suffer slightly.
117	ut->pFuncs->access(ut, index, true);
118	} else if((int32_t)(index - ut->chunkNativeStart) <= ut->nativeIndexingLimit) {
119	// utf-16 indexing.
120	ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);
121	} else {
122	ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
123	}
124	// The convention is that the index must always be on a code point boundary.
125	// Adjust the index position if it is in the middle of a surrogate pair.
126	if (ut->chunkOffset<ut->chunkLength) {
127	char16_t c= ut->chunkContents[ut->chunkOffset];
128	if (U16_IS_TRAIL(c)) {
129	if (ut->chunkOffset==`0`) {
130	ut->pFuncs->access(ut, ut->chunkNativeStart, false);
131	}
132	if (ut->chunkOffset>`0`) {
133	char16_t lead = ut->chunkContents[ut->chunkOffset-`1`];
134	if (U16_IS_LEAD(lead)) {
135	ut->chunkOffset--;
136	}
137	}
138	}
139	}
140	}
141
142
143
144	U_CAPI int64_t U_EXPORT2
145	utext_getPreviousNativeIndex(UText *ut) {
146	//
147	// Fast-path the common case.
148	// Common means current position is not at the beginning of a chunk
149	// and the preceding character is not supplementary.
150	//
151	int32_t i = ut->chunkOffset - `1`;
152	int64_t result;
153	if (i >= `0`) {
154	char16_t c = ut->chunkContents[i];
155	if (U16_IS_TRAIL(c) == false) {
156	if (i <= ut->nativeIndexingLimit) {
157	result = ut->chunkNativeStart + i;
158	} else {
159	ut->chunkOffset = i;
160	result = ut->pFuncs->mapOffsetToNative(ut);
161	ut->chunkOffset++;
162	}
163	return result;
164	}
165	}
166
167	// If at the start of text, simply return 0.
168	if (ut->chunkOffset==`0` && ut->chunkNativeStart==`0`) {
169	return `0`;
170	}
171
172	// Harder, less common cases. We are at a chunk boundary, or on a surrogate.
173	// Keep it simple, use other functions to handle the edges.
174	//
175	utext_previous32(ut);
176	result = UTEXT_GETNATIVEINDEX(ut);
177	utext_next32(ut);
178	return result;
179	}
180
181
182	//
183	// utext_current32. Get the UChar32 at the current position.
184	// UText iteration position is always on a code point boundary,
185	// never on the trail half of a surrogate pair.
186	//
187	U_CAPI UChar32 U_EXPORT2
188	utext_current32(UText *ut) {
189	UChar32 c;
190	if (ut->chunkOffset==ut->chunkLength) {
191	// Current position is just off the end of the chunk.
192	if (ut->pFuncs->access(ut, ut->chunkNativeLimit, true) == false) {
193	// Off the end of the text.
194	return U_SENTINEL;
195	}
196	}
197
198	c = ut->chunkContents[ut->chunkOffset];
199	if (U16_IS_LEAD(c) == false) {
200	// Normal, non-supplementary case.
201	return c;
202	}
203
204	//
205	// Possible supplementary char.
206	//
207	UChar32 trail = `0`;
208	UChar32 supplementaryC = c;
209	if ((ut->chunkOffset+`1`) < ut->chunkLength) {
210	// The trail surrogate is in the same chunk.
211	trail = ut->chunkContents[ut->chunkOffset+`1`];
212	} else {
213	// The trail surrogate is in a different chunk.
214	// Because we must maintain the iteration position, we need to switch forward
215	// into the new chunk, get the trail surrogate, then revert the chunk back to the
216	// original one.
217	// An edge case to be careful of: the entire text may end with an unpaired
218	// leading surrogate. The attempt to access the trail will fail, but
219	// the original position before the unpaired lead still needs to be restored.
220	int64_t nativePosition = ut->chunkNativeLimit;
221	if (ut->pFuncs->access(ut, nativePosition, true)) {
222	trail = ut->chunkContents[ut->chunkOffset];
223	}
224	UBool r = ut->pFuncs->access(ut, nativePosition, false); // reverse iteration flag loads preceding chunk
225	U_ASSERT(r);
226	// Here we need to restore chunkOffset since the access functions were called with
227	// chunkNativeLimit but that is not where we were (we were 1 code unit before the
228	// limit). Restoring was originally added in ICU-4669 but did not support access
229	// functions that changed the chunk size, the following does.
230	ut->chunkOffset = ut->chunkLength - `1`;
231	if(!r) {
232	return U_SENTINEL;
233	}
234	}
235
236	if (U16_IS_TRAIL(trail)) {
237	supplementaryC = U16_GET_SUPPLEMENTARY(c, trail);
238	}
239	return supplementaryC;
240
241	}
242
243
244	U_CAPI UChar32 U_EXPORT2
245	utext_char32At(UText *ut, int64_t nativeIndex) {
246	UChar32 c = U_SENTINEL;
247
248	// Fast path the common case.
249	if (nativeIndex>=ut->chunkNativeStart && nativeIndex < ut->chunkNativeStart + ut->nativeIndexingLimit) {
250	ut->chunkOffset = (int32_t)(nativeIndex - ut->chunkNativeStart);
251	c = ut->chunkContents[ut->chunkOffset];
252	if (U16_IS_SURROGATE(c) == false) {
253	return c;
254	}
255	}
256
257
258	utext_setNativeIndex(ut, nativeIndex);
259	if (nativeIndex>=ut->chunkNativeStart && ut->chunkOffset<ut->chunkLength) {
260	c = ut->chunkContents[ut->chunkOffset];
261	if (U16_IS_SURROGATE(c)) {
262	// For surrogates, let current32() deal with the complications
263	// of supplementaries that may span chunk boundaries.
264	c = utext_current32(ut);
265	}
266	}
267	return c;
268	}
269
270
271	U_CAPI UChar32 U_EXPORT2
272	utext_next32(UText *ut) {
273	UChar32 c;
274
275	if (ut->chunkOffset >= ut->chunkLength) {
276	if (ut->pFuncs->access(ut, ut->chunkNativeLimit, true) == false) {
277	return U_SENTINEL;
278	}
279	}
280
281	c = ut->chunkContents[ut->chunkOffset++];
282	if (U16_IS_LEAD(c) == false) {
283	// Normal case, not supplementary.
284	// (A trail surrogate seen here is just returned as is, as a surrogate value.
285	// It cannot be part of a pair.)
286	return c;
287	}
288
289	if (ut->chunkOffset >= ut->chunkLength) {
290	if (ut->pFuncs->access(ut, ut->chunkNativeLimit, true) == false) {
291	// c is an unpaired lead surrogate at the end of the text.
292	// return it as it is.
293	return c;
294	}
295	}
296	UChar32 trail = ut->chunkContents[ut->chunkOffset];
297	if (U16_IS_TRAIL(trail) == false) {
298	// c was an unpaired lead surrogate, not at the end of the text.
299	// return it as it is (unpaired). Iteration position is on the
300	// following character, possibly in the next chunk, where the
301	// trail surrogate would have been if it had existed.
302	return c;
303	}
304
305	UChar32 supplementary = U16_GET_SUPPLEMENTARY(c, trail);
306	ut->chunkOffset++; // move iteration position over the trail surrogate.
307	return supplementary;
308	}
309
310
311	U_CAPI UChar32 U_EXPORT2
312	utext_previous32(UText *ut) {
313	UChar32 c;
314
315	if (ut->chunkOffset <= `0`) {
316	if (ut->pFuncs->access(ut, ut->chunkNativeStart, false) == false) {
317	return U_SENTINEL;
318	}
319	}
320	ut->chunkOffset--;
321	c = ut->chunkContents[ut->chunkOffset];
322	if (U16_IS_TRAIL(c) == false) {
323	// Normal case, not supplementary.
324	// (A lead surrogate seen here is just returned as is, as a surrogate value.
325	// It cannot be part of a pair.)
326	return c;
327	}
328
329	if (ut->chunkOffset <= `0`) {
330	if (ut->pFuncs->access(ut, ut->chunkNativeStart, false) == false) {
331	// c is an unpaired trail surrogate at the start of the text.
332	// return it as it is.
333	return c;
334	}
335	}
336
337	UChar32 lead = ut->chunkContents[ut->chunkOffset-`1`];
338	if (U16_IS_LEAD(lead) == false) {
339	// c was an unpaired trail surrogate, not at the end of the text.
340	// return it as it is (unpaired). Iteration position is at c
341	return c;
342	}
343
344	UChar32 supplementary = U16_GET_SUPPLEMENTARY(lead, c);
345	ut->chunkOffset--; // move iteration position over the lead surrogate.
346	return supplementary;
347	}
348
349
350
351	U_CAPI UChar32 U_EXPORT2
352	utext_next32From(UText *ut, int64_t index) {
353	UChar32 c = U_SENTINEL;
354
355	if(index<ut->chunkNativeStart \|\| index>=ut->chunkNativeLimit) {
356	// Desired position is outside of the current chunk.
357	if(!ut->pFuncs->access(ut, index, true)) {
358	// no chunk available here
359	return U_SENTINEL;
360	}
361	} else if (index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {
362	// Desired position is in chunk, with direct 1:1 native to UTF16 indexing
363	ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
364	} else {
365	// Desired position is in chunk, with non-UTF16 indexing.
366	ut->chunkOffset = ut->pFuncs->mapNativeIndexToUTF16(ut, index);
367	}
368
369	c = ut->chunkContents[ut->chunkOffset++];
370	if (U16_IS_SURROGATE(c)) {
371	// Surrogates. Many edge cases. Use other functions that already
372	// deal with the problems.
373	utext_setNativeIndex(ut, index);
374	c = utext_next32(ut);
375	}
376	return c;
377	}
378
379
380	U_CAPI UChar32 U_EXPORT2
381	utext_previous32From(UText *ut, int64_t index) {
382	//
383	// Return the character preceding the specified index.
384	// Leave the iteration position at the start of the character that was returned.
385	//
386	UChar32 cPrev; // The character preceding cCurr, which is what we will return.
387
388	// Address the chunk containing the position preceding the incoming index
389	// A tricky edge case:
390	// We try to test the requested native index against the chunkNativeStart to determine
391	// whether the character preceding the one at the index is in the current chunk.
392	// BUT, this test can fail with UTF-8 (or any other multibyte encoding), when the
393	// requested index is on something other than the first position of the first char.
394	//
395	if(index<=ut->chunkNativeStart \|\| index>ut->chunkNativeLimit) {
396	// Requested native index is outside of the current chunk.
397	if(!ut->pFuncs->access(ut, index, false)) {
398	// no chunk available here
399	return U_SENTINEL;
400	}
401	} else if(index - ut->chunkNativeStart <= (int64_t)ut->nativeIndexingLimit) {
402	// Direct UTF-16 indexing.
403	ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
404	} else {
405	ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
406	if (ut->chunkOffset==`0` && !ut->pFuncs->access(ut, index, false)) {
407	// no chunk available here
408	return U_SENTINEL;
409	}
410	}
411
412	//
413	// Simple case with no surrogates.
414	//
415	ut->chunkOffset--;
416	cPrev = ut->chunkContents[ut->chunkOffset];
417
418	if (U16_IS_SURROGATE(cPrev)) {
419	// Possible supplementary. Many edge cases.
420	// Let other functions do the heavy lifting.
421	utext_setNativeIndex(ut, index);
422	cPrev = utext_previous32(ut);
423	}
424	return cPrev;
425	}
426
427
428	U_CAPI int32_t U_EXPORT2
429	utext_extract(UText *ut,
430	int64_t start, int64_t limit,
431	char16_t *dest, int32_t destCapacity,
432	UErrorCode *status) {
433	return ut->pFuncs->extract(ut, start, limit, dest, destCapacity, status);
434	}
435
436
437
438	U_CAPI UBool U_EXPORT2
439	utext_equals(const UText a, const* UText *b) {
440	if (a==nullptr \|\| b==nullptr \|\|
441	a->magic != UTEXT_MAGIC \|\|
442	b->magic != UTEXT_MAGIC) {
443	// Null or invalid arguments don't compare equal to anything.
444	return false;
445	}
446
447	if (a->pFuncs != b->pFuncs) {
448	// Different types of text providers.
449	return false;
450	}
451
452	if (a->context != b->context) {
453	// Different sources (different strings)
454	return false;
455	}
456	if (utext_getNativeIndex(a) != utext_getNativeIndex(b)) {
457	// Different current position in the string.
458	return false;
459	}
460
461	return true;
462	}
463
464	U_CAPI UBool U_EXPORT2
465	utext_isWritable(const UText *ut)
466	{
467	UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) != `0`;
468	return b;
469	}
470
471
472	U_CAPI void U_EXPORT2
473	utext_freeze(UText *ut) {
474	// Zero out the WRITABLE flag.
475	ut->providerProperties &= ~(I32_FLAG(UTEXT_PROVIDER_WRITABLE));
476	}
477
478
479	U_CAPI UBool U_EXPORT2
480	utext_hasMetaData(const UText *ut)
481	{
482	UBool b = (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA)) != `0`;
483	return b;
484	}
485
486
487
488	U_CAPI int32_t U_EXPORT2
489	utext_replace(UText *ut,
490	int64_t nativeStart, int64_t nativeLimit,
491	const char16_t *replacementText, int32_t replacementLength,
492	UErrorCode *status)
493	{
494	if (U_FAILURE(*status)) {
495	return `0`;
496	}
497	if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == `0`) {
498	*status = U_NO_WRITE_PERMISSION;
499	return `0`;
500	}
501	int32_t i = ut->pFuncs->replace(ut, nativeStart, nativeLimit, replacementText, replacementLength, status);
502	return i;
503	}
504
505	U_CAPI void U_EXPORT2
506	utext_copy(UText *ut,
507	int64_t nativeStart, int64_t nativeLimit,
508	int64_t destIndex,
509	UBool move,
510	UErrorCode *status)
511	{
512	if (U_FAILURE(*status)) {
513	return;
514	}
515	if ((ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_WRITABLE)) == `0`) {
516	*status = U_NO_WRITE_PERMISSION;
517	return;
518	}
519	ut->pFuncs->copy(ut, nativeStart, nativeLimit, destIndex, move, status);
520	}
521
522
523
524	U_CAPI UText * U_EXPORT2
525	utext_clone(UText dest, const* UText src, UBool deep, UBool readOnly, UErrorCode status) {
526	if (U_FAILURE(*status)) {
527	return dest;
528	}
529	UText *result = src->pFuncs->clone(dest, src, deep, status);
530	if (U_FAILURE(*status)) {
531	return result;
532	}
533	if (result == nullptr) {
534	*status = U_MEMORY_ALLOCATION_ERROR;
535	return result;
536	}
537	if (readOnly) {
538	utext_freeze(result);
539	}
540	return result;
541	}
542
543
544
545	//------------------------------------------------------------------------------
546	//
547	// UText common functions implementation
548	//
549	//------------------------------------------------------------------------------
550
551	//
552	// UText.flags bit definitions
553	//
554	enum {
555	UTEXT_HEAP_ALLOCATED = `1`, // 1 if ICU has allocated this UText struct on the heap.
556	// 0 if caller provided storage for the UText.
557
558	UTEXT_EXTRA_HEAP_ALLOCATED = `2`, // 1 if ICU has allocated extra storage as a separate
559	// heap block.
560	// 0 if there is no separate allocation. Either no extra
561	// storage was requested, or it is appended to the end
562	// of the main UText storage.
563
564	UTEXT_OPEN = `4` // 1 if this UText is currently open
565	// 0 if this UText is not open.
566	};
567
568
569	//
570	// Extended form of a UText. The purpose is to aid in computing the total size required
571	// when a provider asks for a UText to be allocated with extra storage.
572
573	struct ExtendedUText {
574	UText ut;
575	std::max_align_t extension;
576	};
577
578	static const UText emptyText = UTEXT_INITIALIZER;
579
580	U_CAPI UText * U_EXPORT2
581	utext_setup(UText ut, int32_t extraSpace, UErrorCode status) {
582	if (U_FAILURE(*status)) {
583	return ut;
584	}
585
586	if (ut == nullptr) {
587	// We need to heap-allocate storage for the new UText
588	int32_t spaceRequired = sizeof(UText);
589	if (extraSpace > `0`) {
590	spaceRequired = sizeof(ExtendedUText) + extraSpace - sizeof(std::max_align_t);
591	}
592	ut = (UText *)uprv_malloc(spaceRequired);
593	if (ut == nullptr) {
594	*status = U_MEMORY_ALLOCATION_ERROR;
595	return nullptr;
596	} else {
597	*ut = emptyText;
598	ut->flags \|= UTEXT_HEAP_ALLOCATED;
599	if (spaceRequired>`0`) {
600	ut->extraSize = extraSpace;
601	ut->pExtra = &((ExtendedUText *)ut)->extension;
602	}
603	}
604	} else {
605	// We have been supplied with an already existing UText.
606	// Verify that it really appears to be a UText.
607	if (ut->magic != UTEXT_MAGIC) {
608	*status = U_ILLEGAL_ARGUMENT_ERROR;
609	return ut;
610	}
611	// If the ut is already open and there's a provider supplied close
612	// function, call it.
613	if ((ut->flags & UTEXT_OPEN) && ut->pFuncs->close != nullptr) {
614	ut->pFuncs->close(ut);
615	}
616	ut->flags &= ~UTEXT_OPEN;
617
618	// If extra space was requested by our caller, check whether
619	// sufficient already exists, and allocate new if needed.
620	if (extraSpace > ut->extraSize) {
621	// Need more space. If there is existing separately allocated space,
622	// delete it first, then allocate new space.
623	if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
624	uprv_free(ut->pExtra);
625	ut->extraSize = `0`;
626	}
627	ut->pExtra = uprv_malloc(extraSpace);
628	if (ut->pExtra == nullptr) {
629	*status = U_MEMORY_ALLOCATION_ERROR;
630	} else {
631	ut->extraSize = extraSpace;
632	ut->flags \|= UTEXT_EXTRA_HEAP_ALLOCATED;
633	}
634	}
635	}
636	if (U_SUCCESS(*status)) {
637	ut->flags \|= UTEXT_OPEN;
638
639	// Initialize all remaining fields of the UText.
640	//
641	ut->context = nullptr;
642	ut->chunkContents = nullptr;
643	ut->p = nullptr;
644	ut->q = nullptr;
645	ut->r = nullptr;
646	ut->a = `0`;
647	ut->b = `0`;
648	ut->c = `0`;
649	ut->chunkOffset = `0`;
650	ut->chunkLength = `0`;
651	ut->chunkNativeStart = `0`;
652	ut->chunkNativeLimit = `0`;
653	ut->nativeIndexingLimit = `0`;
654	ut->providerProperties = `0`;
655	ut->privA = `0`;
656	ut->privB = `0`;
657	ut->privC = `0`;
658	ut->privP = nullptr;
659	if (ut->pExtra!=nullptr && ut->extraSize>`0`)
660	uprv_memset(ut->pExtra, `0`, ut->extraSize);
661
662	}
663	return ut;
664	}
665
666
667	U_CAPI UText * U_EXPORT2
668	utext_close(UText *ut) {
669	if (ut==nullptr \|\|
670	ut->magic != UTEXT_MAGIC \|\|
671	(ut->flags & UTEXT_OPEN) == `0`)
672	{
673	// The supplied ut is not an open UText.
674	// Do nothing.
675	return ut;
676	}
677
678	// If the provider gave us a close function, call it now.
679	// This will clean up anything allocated specifically by the provider.
680	if (ut->pFuncs->close != nullptr) {
681	ut->pFuncs->close(ut);
682	}
683	ut->flags &= ~UTEXT_OPEN;
684
685	// If we (the framework) allocated the UText or subsidiary storage,
686	// delete it.
687	if (ut->flags & UTEXT_EXTRA_HEAP_ALLOCATED) {
688	uprv_free(ut->pExtra);
689	ut->pExtra = nullptr;
690	ut->flags &= ~UTEXT_EXTRA_HEAP_ALLOCATED;
691	ut->extraSize = `0`;
692	}
693
694	// Zero out function table of the closed UText. This is a defensive move,
695	// intended to cause applications that inadvertently use a closed
696	// utext to crash with null pointer errors.
697	ut->pFuncs = nullptr;
698
699	if (ut->flags & UTEXT_HEAP_ALLOCATED) {
700	// This UText was allocated by UText setup. We need to free it.
701	// Clear magic, so we can detect if the user messes up and immediately
702	// tries to reopen another UText using the deleted storage.
703	ut->magic = `0`;
704	uprv_free(ut);
705	ut = nullptr;
706	}
707	return ut;
708	}
709
710
711
712
713	//
714	// invalidateChunk Reset a chunk to have no contents, so that the next call
715	// to access will cause new data to load.
716	// This is needed when copy/move/replace operate directly on the
717	// backing text, potentially putting it out of sync with the
718	// contents in the chunk.
719	//
720	static void
721	invalidateChunk(UText *ut) {
722	ut->chunkLength = `0`;
723	ut->chunkNativeLimit = `0`;
724	ut->chunkNativeStart = `0`;
725	ut->chunkOffset = `0`;
726	ut->nativeIndexingLimit = `0`;
727	}
728
729	//
730	// pinIndex Do range pinning on a native index parameter.
731	// 64 bit pinning is done in place.
732	// 32 bit truncated result is returned as a convenience for
733	// use in providers that don't need 64 bits.
734	static int32_t
735	pinIndex(int64_t &index, int64_t limit) {
736	if (index<`0`) {
737	index = `0`;
738	} else if (index > limit) {
739	index = limit;
740	}
741	return (int32_t)index;
742	}
743
744
745	U_CDECL_BEGIN
746
747	//
748	// Pointer relocation function,
749	// a utility used by shallow clone.
750	// Adjust a pointer that refers to something within one UText (the source)
751	// to refer to the same relative offset within a another UText (the target)
752	//
753	static void adjustPointer(UText dest, const* void *destPtr, const* UText *src) {
754	// convert all pointers to (char ) so that byte address arithmetic will work.*
755	char dptr = (char* )destPtr;
756	char dUText = (char* *)dest;
757	char sUText = (char* *)src;
758
759	if (dptr >= (char )src->pExtra && dptr < ((char**)src->pExtra)+src->extraSize) {
760	// target ptr was to something within the src UText's pExtra storage.
761	// relocate it into the target UText's pExtra region.
762	destPtr = ((char* )dest->pExtra) + (dptr - (char* *)src->pExtra);
763	} else if (dptr>=sUText && dptr < sUText+src->sizeOfStruct) {
764	// target ptr was pointing to somewhere within the source UText itself.
765	// Move it to the same offset within the target UText.
766	*destPtr = dUText + (dptr-sUText);
767	}
768	}
769
770
771	//
772	// Clone. This is a generic copy-the-utext-by-value clone function that can be
773	// used as-is with some utext types, and as a helper by other clones.
774	//
775	static UText * U_CALLCONV
776	shallowTextClone(UText * dest, const UText * src, UErrorCode * status) {
777	if (U_FAILURE(*status)) {
778	return nullptr;
779	}
780	int32_t srcExtraSize = src->extraSize;
781
782	//
783	// Use the generic text_setup to allocate storage if required.
784	//
785	dest = utext_setup(dest, srcExtraSize, status);
786	if (U_FAILURE(*status)) {
787	return dest;
788	}
789
790	//
791	// flags (how the UText was allocated) and the pointer to the
792	// extra storage must retain the values in the cloned utext that
793	// were set up by utext_setup. Save them separately before
794	// copying the whole struct.
795	//
796	void *destExtra = dest->pExtra;
797	int32_t flags = dest->flags;
798
799
800	//
801	// Copy the whole UText struct by value.
802	// Any "Extra" storage is copied also.
803	//
804	int sizeToCopy = src->sizeOfStruct;
805	if (sizeToCopy > dest->sizeOfStruct) {
806	sizeToCopy = dest->sizeOfStruct;
807	}
808	uprv_memcpy(dest, src, sizeToCopy);
809	dest->pExtra = destExtra;
810	dest->flags = flags;
811	if (srcExtraSize > `0`) {
812	uprv_memcpy(dest->pExtra, src->pExtra, srcExtraSize);
813	}
814
815	//
816	// Relocate any pointers in the target that refer to the UText itself
817	// to point to the cloned copy rather than the original source.
818	//
819	adjustPointer(dest, &dest->context, src);
820	adjustPointer(dest, &dest->p, src);
821	adjustPointer(dest, &dest->q, src);
822	adjustPointer(dest, &dest->r, src);
823	adjustPointer(dest, (const void **)&dest->chunkContents, src);
824
825	// The newly shallow-cloned UText does _not_ own the underlying storage for the text.
826	// (The source for the clone may or may not have owned the text.)
827
828	dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
829
830	return dest;
831	}
832
833
834	U_CDECL_END
835
836
837
838	//------------------------------------------------------------------------------
839	//
840	// UText implementation for UTF-8 char strings (read-only)*
841	// Limitation: string length must be <= 0x7fffffff in length.
842	// (length must for in an int32_t variable)
843	//
844	// Use of UText data members:
845	// context pointer to UTF-8 string
846	// utext.b is the input string length (bytes).
847	// utext.c Length scanned so far in string
848	// (for optimizing finding length of zero terminated strings.)
849	// utext.p pointer to the current buffer
850	// utext.q pointer to the other buffer.
851	//
852	//------------------------------------------------------------------------------
853
854	// Chunk size.
855	// Must be less than 85 (256/3), because of byte mapping from char16_t indexes to native indexes.
856	// Worst case is three native bytes to one char16_t. (Supplemenaries are 4 native bytes
857	// to two UChars.)
858	// The longest illegal byte sequence treated as a single error (and converted to U+FFFD)
859	// is a three-byte sequence (truncated four-byte sequence).
860	//
861	enum { UTF8_TEXT_CHUNK_SIZE=`32` };
862
863	//
864	// UTF8Buf Two of these structs will be set up in the UText's extra allocated space.
865	// Each contains the char16_t chunk buffer, the to and from native maps, and
866	// header info.
867	//
868	// because backwards iteration fills the buffers starting at the end and
869	// working towards the front, the filled part of the buffers may not begin
870	// at the start of the available storage for the buffers.
871	//
872	// Buffer size is one bigger than the specified UTF8_TEXT_CHUNK_SIZE to allow for
873	// the last character added being a supplementary, and thus requiring a surrogate
874	// pair. Doing this is simpler than checking for the edge case.
875	//
876
877	struct UTF8Buf {
878	int32_t bufNativeStart; // Native index of first char in char16_t buf
879	int32_t bufNativeLimit; // Native index following last char in buf.
880	int32_t bufStartIdx; // First filled position in buf.
881	int32_t bufLimitIdx; // Limit of filled range in buf.
882	int32_t bufNILimit; // Limit of native indexing part of buf
883	int32_t toUCharsMapStart; // Native index corresponding to
884	// mapToUChars[0].
885	// Set to bufNativeStart when filling forwards.
886	// Set to computed value when filling backwards.
887
888	char16_t buf[UTF8_TEXT_CHUNK_SIZE+`4`]; // The char16_t buffer. Requires one extra position beyond the
889	// the chunk size, to allow for surrogate at the end.
890	// Length must be identical to mapToNative array, below,
891	// because of the way indexing works when the array is
892	// filled backwards during a reverse iteration. Thus,
893	// the additional extra size.
894	uint8_t mapToNative[UTF8_TEXT_CHUNK_SIZE+`4`]; // map char16_t index in buf to
895	// native offset from bufNativeStart.
896	// Requires two extra slots,
897	// one for a supplementary starting in the last normal position,
898	// and one for an entry for the buffer limit position.
899	uint8_t mapToUChars[UTF8_TEXT_CHUNK_SIZE`3`+`6`]; // Map native offset from bufNativeStart to*
900	// corresponding offset in filled part of buf.
901	int32_t align;
902	};
903
904	U_CDECL_BEGIN
905
906	//
907	// utf8TextLength
908	//
909	// Get the length of the string. If we don't already know it,
910	// we'll need to scan for the trailing nul.
911	//
912	static int64_t U_CALLCONV
913	utf8TextLength(UText *ut) {
914	if (ut->b < `0`) {
915	// Zero terminated string, and we haven't scanned to the end yet.
916	// Scan it now.
917	const char r = (const* char *)ut->context + ut->c;
918	while (*r != `0`) {
919	r++;
920	}
921	if ((r - (const char *)ut->context) < `0x7fffffff`) {
922	ut->b = (int32_t)(r - (const char *)ut->context);
923	} else {
924	// Actual string was bigger (more than 2 gig) than we
925	// can handle. Clip it to 2 GB.
926	ut->b = `0x7fffffff`;
927	}
928	ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
929	}
930	return ut->b;
931	}
932
933
934
935
936
937
938	static UBool U_CALLCONV
939	utf8TextAccess(UText *ut, int64_t index, UBool forward) {
940	//
941	// Apologies to those who are allergic to goto statements.
942	// Consider each goto to a labelled block to be the equivalent of
943	// call the named block as if it were a function();
944	// return;
945	//
946	const uint8_t s8=(const* uint8_t *)ut->context;
947	UTF8Buf u8b = nullptr*;
948	int32_t length = ut->b; // Length of original utf-8
949	int32_t ix= (int32_t)index; // Requested index, trimmed to 32 bits.
950	int32_t mapIndex = `0`;
951	if (index<`0`) {
952	ix=`0`;
953	} else if (index > `0x7fffffff`) {
954	// Strings with 64 bit lengths not supported by this UTF-8 provider.
955	ix = `0x7fffffff`;
956	}
957
958	// Pin requested index to the string length.
959	if (ix>length) {
960	if (length>=`0`) {
961	ix=length;
962	} else if (ix>=ut->c) {
963	// Zero terminated string, and requested index is beyond
964	// the region that has already been scanned.
965	// Scan up to either the end of the string or to the
966	// requested position, whichever comes first.
967	while (ut->c<ix && s8[ut->c]!=`0`) {
968	ut->c++;
969	}
970	// TODO: support for null terminated string length > 32 bits.
971	if (s8[ut->c] == `0`) {
972	// We just found the actual length of the string.
973	// Trim the requested index back to that.
974	ix = ut->c;
975	ut->b = ut->c;
976	length = ut->c;
977	ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
978	}
979	}
980	}
981
982	//
983	// Dispatch to the appropriate action for a forward iteration request.
984	//
985	if (forward) {
986	if (ix==ut->chunkNativeLimit) {
987	// Check for normal sequential iteration cases first.
988	if (ix==length) {
989	// Just reached end of string
990	// Don't swap buffers, but do set the
991	// current buffer position.
992	ut->chunkOffset = ut->chunkLength;
993	return false;
994	} else {
995	// End of current buffer.
996	// check whether other buffer already has what we need.
997	UTF8Buf altB = (UTF8Buf )ut->q;
998	if (ix>=altB->bufNativeStart && ix<altB->bufNativeLimit) {
999	goto swapBuffers;
1000	}
1001	}
1002	}
1003
1004	// A random access. Desired index could be in either or niether buf.
1005	// For optimizing the order of testing, first check for the index
1006	// being in the other buffer. This will be the case for uses that
1007	// move back and forth over a fairly limited range
1008	{
1009	u8b = (UTF8Buf )ut->q; // the alternate buffer*
1010	if (ix>=u8b->bufNativeStart && ix<u8b->bufNativeLimit) {
1011	// Requested index is in the other buffer.
1012	goto swapBuffers;
1013	}
1014	if (ix == length) {
1015	// Requested index is end-of-string.
1016	// (this is the case of randomly seeking to the end.
1017	// The case of iterating off the end is handled earlier.)
1018	if (ix == ut->chunkNativeLimit) {
1019	// Current buffer extends up to the end of the string.
1020	// Leave it as the current buffer.
1021	ut->chunkOffset = ut->chunkLength;
1022	return false;
1023	}
1024	if (ix == u8b->bufNativeLimit) {
1025	// Alternate buffer extends to the end of string.
1026	// Swap it in as the current buffer.
1027	goto swapBuffersAndFail;
1028	}
1029
1030	// Neither existing buffer extends to the end of the string.
1031	goto makeStubBuffer;
1032	}
1033
1034	if (ix<ut->chunkNativeStart \|\| ix>=ut->chunkNativeLimit) {
1035	// Requested index is in neither buffer.
1036	goto fillForward;
1037	}
1038
1039	// Requested index is in this buffer.
1040	u8b = (UTF8Buf )ut->p; // the current buffer*
1041	mapIndex = ix - u8b->toUCharsMapStart;
1042	U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
1043	ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
1044	return true;
1045
1046	}
1047	}
1048
1049
1050	//
1051	// Dispatch to the appropriate action for a
1052	// Backwards Direction iteration request.
1053	//
1054	if (ix==ut->chunkNativeStart) {
1055	// Check for normal sequential iteration cases first.
1056	if (ix==`0`) {
1057	// Just reached the start of string
1058	// Don't swap buffers, but do set the
1059	// current buffer position.
1060	ut->chunkOffset = `0`;
1061	return false;
1062	} else {
1063	// Start of current buffer.
1064	// check whether other buffer already has what we need.
1065	UTF8Buf altB = (UTF8Buf )ut->q;
1066	if (ix>altB->bufNativeStart && ix<=altB->bufNativeLimit) {
1067	goto swapBuffers;
1068	}
1069	}
1070	}
1071
1072	// A random access. Desired index could be in either or niether buf.
1073	// For optimizing the order of testing,
1074	// Most likely case: in the other buffer.
1075	// Second most likely: in neither buffer.
1076	// Unlikely, but must work: in the current buffer.
1077	u8b = (UTF8Buf )ut->q; // the alternate buffer*
1078	if (ix>u8b->bufNativeStart && ix<=u8b->bufNativeLimit) {
1079	// Requested index is in the other buffer.
1080	goto swapBuffers;
1081	}
1082	// Requested index is start-of-string.
1083	// (this is the case of randomly seeking to the start.
1084	// The case of iterating off the start is handled earlier.)
1085	if (ix==`0`) {
1086	if (u8b->bufNativeStart==`0`) {
1087	// Alternate buffer contains the data for the start string.
1088	// Make it be the current buffer.
1089	goto swapBuffersAndFail;
1090	} else {
1091	// Request for data before the start of string,
1092	// neither buffer is usable.
1093	// set up a zero-length buffer.
1094	goto makeStubBuffer;
1095	}
1096	}
1097
1098	if (ix<=ut->chunkNativeStart \|\| ix>ut->chunkNativeLimit) {
1099	// Requested index is in neither buffer.
1100	goto fillReverse;
1101	}
1102
1103	// Requested index is in this buffer.
1104	// Set the utf16 buffer index.
1105	u8b = (UTF8Buf *)ut->p;
1106	mapIndex = ix - u8b->toUCharsMapStart;
1107	ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
1108	if (ut->chunkOffset==`0`) {
1109	// This occurs when the first character in the text is
1110	// a multi-byte UTF-8 char, and the requested index is to
1111	// one of the trailing bytes. Because there is no preceding ,
1112	// character, this access fails. We can't pick up on the
1113	// situation sooner because the requested index is not zero.
1114	return false;
1115	} else {
1116	return true;
1117	}
1118
1119
1120
1121	swapBuffers:
1122	// The alternate buffer (ut->q) has the string data that was requested.
1123	// Swap the primary and alternate buffers, and set the
1124	// chunk index into the new primary buffer.
1125	{
1126	u8b = (UTF8Buf *)ut->q;
1127	ut->q = ut->p;
1128	ut->p = u8b;
1129	ut->chunkContents = &u8b->buf[u8b->bufStartIdx];
1130	ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;
1131	ut->chunkNativeStart = u8b->bufNativeStart;
1132	ut->chunkNativeLimit = u8b->bufNativeLimit;
1133	ut->nativeIndexingLimit = u8b->bufNILimit;
1134
1135	// Index into the (now current) chunk
1136	// Use the map to set the chunk index. It's more trouble than it's worth
1137	// to check whether native indexing can be used.
1138	U_ASSERT(ix>=u8b->bufNativeStart);
1139	U_ASSERT(ix<=u8b->bufNativeLimit);
1140	mapIndex = ix - u8b->toUCharsMapStart;
1141	U_ASSERT(mapIndex>=`0`);
1142	U_ASSERT(mapIndex<(int32_t)sizeof(u8b->mapToUChars));
1143	ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
1144
1145	return true;
1146	}
1147
1148
1149	swapBuffersAndFail:
1150	// We got a request for either the start or end of the string,
1151	// with iteration continuing in the out-of-bounds direction.
1152	// The alternate buffer already contains the data up to the
1153	// start/end.
1154	// Swap the buffers, then return failure, indicating that we couldn't
1155	// make things correct for continuing the iteration in the requested
1156	// direction. The position & buffer are correct should the
1157	// user decide to iterate in the opposite direction.
1158	u8b = (UTF8Buf *)ut->q;
1159	ut->q = ut->p;
1160	ut->p = u8b;
1161	ut->chunkContents = &u8b->buf[u8b->bufStartIdx];
1162	ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;
1163	ut->chunkNativeStart = u8b->bufNativeStart;
1164	ut->chunkNativeLimit = u8b->bufNativeLimit;
1165	ut->nativeIndexingLimit = u8b->bufNILimit;
1166
1167	// Index into the (now current) chunk
1168	// For this function (swapBuffersAndFail), the requested index
1169	// will always be at either the start or end of the chunk.
1170	if (ix==u8b->bufNativeLimit) {
1171	ut->chunkOffset = ut->chunkLength;
1172	} else {
1173	ut->chunkOffset = `0`;
1174	U_ASSERT(ix == u8b->bufNativeStart);
1175	}
1176	return false;
1177
1178	makeStubBuffer:
1179	// The user has done a seek/access past the start or end
1180	// of the string. Rather than loading data that is likely
1181	// to never be used, just set up a zero-length buffer at
1182	// the position.
1183	u8b = (UTF8Buf *)ut->q;
1184	u8b->bufNativeStart = ix;
1185	u8b->bufNativeLimit = ix;
1186	u8b->bufStartIdx = `0`;
1187	u8b->bufLimitIdx = `0`;
1188	u8b->bufNILimit = `0`;
1189	u8b->toUCharsMapStart = ix;
1190	u8b->mapToNative[`0`] = `0`;
1191	u8b->mapToUChars[`0`] = `0`;
1192	goto swapBuffersAndFail;
1193
1194
1195
1196	fillForward:
1197	{
1198	// Move the incoming index to a code point boundary.
1199	U8_SET_CP_START(s8, `0`, ix);
1200
1201	// Swap the UText buffers.
1202	// We want to fill what was previously the alternate buffer,
1203	// and make what was the current buffer be the new alternate.
1204	UTF8Buf u8b_swap = (UTF8Buf )ut->q;
1205	ut->q = ut->p;
1206	ut->p = u8b_swap;
1207
1208	int32_t strLen = ut->b;
1209	UBool nulTerminated = false;
1210	if (strLen < `0`) {
1211	strLen = `0x7fffffff`;
1212	nulTerminated = true;
1213	}
1214
1215	char16_t *buf = u8b_swap->buf;
1216	uint8_t *mapToNative = u8b_swap->mapToNative;
1217	uint8_t *mapToUChars = u8b_swap->mapToUChars;
1218	int32_t destIx = `0`;
1219	int32_t srcIx = ix;
1220	UBool seenNonAscii = false;
1221	UChar32 c = `0`;
1222
1223	// Fill the chunk buffer and mapping arrays.
1224	while (destIx<UTF8_TEXT_CHUNK_SIZE) {
1225	c = s8[srcIx];
1226	if (c>`0` && c<`0x80`) {
1227	// Special case ASCII range for speed.
1228	// zero is excluded to simplify bounds checking.
1229	buf[destIx] = (char16_t)c;
1230	mapToNative[destIx] = (uint8_t)(srcIx - ix);
1231	mapToUChars[srcIx-ix] = (uint8_t)destIx;
1232	srcIx++;
1233	destIx++;
1234	} else {
1235	// General case, handle everything.
1236	if (seenNonAscii == false) {
1237	seenNonAscii = true;
1238	u8b_swap->bufNILimit = destIx;
1239	}
1240
1241	int32_t cIx = srcIx;
1242	int32_t dIx = destIx;
1243	int32_t dIxSaved = destIx;
1244	U8_NEXT_OR_FFFD(s8, srcIx, strLen, c);
1245	if (c==`0` && nulTerminated) {
1246	srcIx--;
1247	break;
1248	}
1249
1250	U16_APPEND_UNSAFE(buf, destIx, c);
1251	do {
1252	mapToNative[dIx++] = (uint8_t)(cIx - ix);
1253	} while (dIx < destIx);
1254
1255	do {
1256	mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;
1257	} while (cIx < srcIx);
1258	}
1259	if (srcIx>=strLen) {
1260	break;
1261	}
1262
1263	}
1264
1265	// store Native <--> Chunk Map entries for the end of the buffer.
1266	// There is no actual character here, but the index position is valid.
1267	mapToNative[destIx] = (uint8_t)(srcIx - ix);
1268	mapToUChars[srcIx - ix] = (uint8_t)destIx;
1269
1270	// fill in Buffer descriptor
1271	u8b_swap->bufNativeStart = ix;
1272	u8b_swap->bufNativeLimit = srcIx;
1273	u8b_swap->bufStartIdx = `0`;
1274	u8b_swap->bufLimitIdx = destIx;
1275	if (seenNonAscii == false) {
1276	u8b_swap->bufNILimit = destIx;
1277	}
1278	u8b_swap->toUCharsMapStart = u8b_swap->bufNativeStart;
1279
1280	// Set UText chunk to refer to this buffer.
1281	ut->chunkContents = buf;
1282	ut->chunkOffset = `0`;
1283	ut->chunkLength = u8b_swap->bufLimitIdx;
1284	ut->chunkNativeStart = u8b_swap->bufNativeStart;
1285	ut->chunkNativeLimit = u8b_swap->bufNativeLimit;
1286	ut->nativeIndexingLimit = u8b_swap->bufNILimit;
1287
1288	// For zero terminated strings, keep track of the maximum point
1289	// scanned so far.
1290	if (nulTerminated && srcIx>ut->c) {
1291	ut->c = srcIx;
1292	if (c==`0`) {
1293	// We scanned to the end.
1294	// Remember the actual length.
1295	ut->b = srcIx;
1296	ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
1297	}
1298	}
1299	return true;
1300	}
1301
1302
1303	fillReverse:
1304	{
1305	// Move the incoming index to a code point boundary.
1306	// Can only do this if the incoming index is somewhere in the interior of the string.
1307	// If index is at the end, there is no character there to look at.
1308	if (ix != ut->b) {
1309	// Note: this function will only move the index back if it is on a trail byte
1310	// and there is a preceding lead byte and the sequence from the lead
1311	// through this trail could be part of a valid UTF-8 sequence
1312	// Otherwise the index remains unchanged.
1313	U8_SET_CP_START(s8, `0`, ix);
1314	}
1315
1316	// Swap the UText buffers.
1317	// We want to fill what was previously the alternate buffer,
1318	// and make what was the current buffer be the new alternate.
1319	UTF8Buf u8b_swap = (UTF8Buf )ut->q;
1320	ut->q = ut->p;
1321	ut->p = u8b_swap;
1322
1323	char16_t *buf = u8b_swap->buf;
1324	uint8_t *mapToNative = u8b_swap->mapToNative;
1325	uint8_t *mapToUChars = u8b_swap->mapToUChars;
1326	int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + `1`;
1327	// Note that toUCharsMapStart can be negative. Happens when the remaining
1328	// text from current position to the beginning is less than the buffer size.
1329	// + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
1330	int32_t destIx = UTF8_TEXT_CHUNK_SIZE+`2`; // Start in the overflow region
1331	// at end of buffer to leave room
1332	// for a surrogate pair at the
1333	// buffer start.
1334	int32_t srcIx = ix;
1335	int32_t bufNILimit = destIx;
1336	UChar32 c;
1337
1338	// Map to/from Native Indexes, fill in for the position at the end of
1339	// the buffer.
1340	//
1341	mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1342	mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
1343
1344	// Fill the chunk buffer
1345	// Work backwards, filling from the end of the buffer towards the front.
1346	//
1347	while (destIx>`2` && (srcIx - toUCharsMapStart > `5`) && (srcIx > `0`)) {
1348	srcIx--;
1349	destIx--;
1350
1351	// Get last byte of the UTF-8 character
1352	c = s8[srcIx];
1353	if (c<`0x80`) {
1354	// Special case ASCII range for speed.
1355	buf[destIx] = (char16_t)c;
1356	U_ASSERT(toUCharsMapStart <= srcIx);
1357	mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
1358	mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1359	} else {
1360	// General case, handle everything non-ASCII.
1361
1362	int32_t sIx = srcIx; // ix of last byte of multi-byte u8 char
1363
1364	// Get the full character from the UTF8 string.
1365	// use code derived from the macros in utf8.h
1366	// Leaves srcIx pointing at the first byte of the UTF-8 char.
1367	//
1368	c=utf8_prevCharSafeBody(s8, `0`, &srcIx, c, -`3`);
1369	// leaves srcIx at first byte of the multi-byte char.
1370
1371	// Store the character in UTF-16 buffer.
1372	if (c<`0x10000`) {
1373	buf[destIx] = (char16_t)c;
1374	mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1375	} else {
1376	buf[destIx] = U16_TRAIL(c);
1377	mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1378	buf[--destIx] = U16_LEAD(c);
1379	mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
1380	}
1381
1382	// Fill in the map from native indexes to UChars buf index.
1383	do {
1384	mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
1385	} while (sIx >= srcIx);
1386	U_ASSERT(toUCharsMapStart <= (srcIx+`1`));
1387
1388	// Set native indexing limit to be the current position.
1389	// We are processing a non-ascii, non-native-indexing char now;
1390	// the limit will be here if the rest of the chars to be
1391	// added to this buffer are ascii.
1392	bufNILimit = destIx;
1393	}
1394	}
1395	u8b_swap->bufNativeStart = srcIx;
1396	u8b_swap->bufNativeLimit = ix;
1397	u8b_swap->bufStartIdx = destIx;
1398	u8b_swap->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+`2`;
1399	u8b_swap->bufNILimit = bufNILimit - u8b_swap->bufStartIdx;
1400	u8b_swap->toUCharsMapStart = toUCharsMapStart;
1401
1402	ut->chunkContents = &buf[u8b_swap->bufStartIdx];
1403	ut->chunkLength = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx;
1404	ut->chunkOffset = ut->chunkLength;
1405	ut->chunkNativeStart = u8b_swap->bufNativeStart;
1406	ut->chunkNativeLimit = u8b_swap->bufNativeLimit;
1407	ut->nativeIndexingLimit = u8b_swap->bufNILimit;
1408	return true;
1409	}
1410
1411	}
1412
1413
1414
1415	//
1416	// This is a slightly modified copy of u_strFromUTF8,
1417	// Inserts a Replacement Char rather than failing on invalid UTF-8
1418	// Removes unnecessary features.
1419	//
1420	static char16_t*
1421	utext_strFromUTF8(char16_t *dest,
1422	int32_t destCapacity,
1423	int32_t *pDestLength,
1424	const char* src,
1425	int32_t srcLength, // required. NUL terminated not supported.
1426	UErrorCode *pErrorCode
1427	)
1428	{
1429
1430	char16_t *pDest = dest;
1431	char16_t pDestLimit = (dest!=nullptr)?(dest+destCapacity):nullptr*;
1432	UChar32 ch=`0`;
1433	int32_t index = `0`;
1434	int32_t reqLength = `0`;
1435	uint8_t* pSrc = (uint8_t*) src;
1436
1437
1438	while((index < srcLength)&&(pDest<pDestLimit)){
1439	ch = pSrc[index++];
1440	if(ch <=`0x7f`){
1441	pDest++=(char16_t*)ch;
1442	}else{
1443	ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -`3`);
1444	if(U_IS_BMP(ch)){
1445	(pDest++)=(char16_t*)ch;
1446	}else{
1447	*(pDest++)=U16_LEAD(ch);
1448	if(pDest<pDestLimit){
1449	*(pDest++)=U16_TRAIL(ch);
1450	}else{
1451	reqLength++;
1452	break;
1453	}
1454	}
1455	}
1456	}
1457	/ donot fill the dest buffer just count the UChars needed /
1458	while(index < srcLength){
1459	ch = pSrc[index++];
1460	if(ch <= `0x7f`){
1461	reqLength++;
1462	}else{
1463	ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -`3`);
1464	reqLength+=U16_LENGTH(ch);
1465	}
1466	}
1467
1468	reqLength+=(int32_t)(pDest - dest);
1469
1470	if(pDestLength){
1471	*pDestLength = reqLength;
1472	}
1473
1474	/ Terminate the buffer /
1475	u_terminateUChars(dest,destCapacity,reqLength,pErrorCode);
1476
1477	return dest;
1478	}
1479
1480
1481
1482	static int32_t U_CALLCONV
1483	utf8TextExtract(UText *ut,
1484	int64_t start, int64_t limit,
1485	char16_t *dest, int32_t destCapacity,
1486	UErrorCode *pErrorCode) {
1487	if(U_FAILURE(*pErrorCode)) {
1488	return `0`;
1489	}
1490	if(destCapacity<`0` \|\| (dest==nullptr && destCapacity>`0`)) {
1491	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1492	return `0`;
1493	}
1494	int32_t length = ut->b;
1495	int32_t start32 = pinIndex(start, length);
1496	int32_t limit32 = pinIndex(limit, length);
1497
1498	if(start32>limit32) {
1499	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1500	return `0`;
1501	}
1502
1503
1504	// adjust the incoming indexes to land on code point boundaries if needed.
1505	// adjust by no more than three, because that is the largest number of trail bytes
1506	// in a well formed UTF8 character.
1507	const uint8_t buf = (const* uint8_t *)ut->context;
1508	int i;
1509	if (start32 < ut->chunkNativeLimit) {
1510	for (i=`0`; i<`3`; i++) {
1511	if (U8_IS_SINGLE(buf[start32]) \|\| U8_IS_LEAD(buf[start32]) \|\| start32==`0`) {
1512	break;
1513	}
1514	start32--;
1515	}
1516	}
1517
1518	if (limit32 < ut->chunkNativeLimit) {
1519	for (i=`0`; i<`3`; i++) {
1520	if (U8_IS_SINGLE(buf[limit32]) \|\| U8_IS_LEAD(buf[limit32]) \|\| limit32==`0`) {
1521	break;
1522	}
1523	limit32--;
1524	}
1525	}
1526
1527	// Do the actual extract.
1528	int32_t destLength=`0`;
1529	utext_strFromUTF8(dest, destCapacity, &destLength,
1530	(const char *)ut->context+start32, limit32-start32,
1531	pErrorCode);
1532	utf8TextAccess(ut, limit32, true);
1533	return destLength;
1534	}
1535
1536	//
1537	// utf8TextMapOffsetToNative
1538	//
1539	// Map a chunk (UTF-16) offset to a native index.
1540	static int64_t U_CALLCONV
1541	utf8TextMapOffsetToNative(const UText *ut) {
1542	//
1543	UTF8Buf u8b = (UTF8Buf )ut->p;
1544	U_ASSERT(ut->chunkOffset>ut->nativeIndexingLimit && ut->chunkOffset<=ut->chunkLength);
1545	int32_t nativeOffset = u8b->mapToNative[ut->chunkOffset + u8b->bufStartIdx] + u8b->toUCharsMapStart;
1546	U_ASSERT(nativeOffset >= ut->chunkNativeStart && nativeOffset <= ut->chunkNativeLimit);
1547	return nativeOffset;
1548	}
1549
1550	//
1551	// Map a native index to the corresponding chunk offset
1552	//
1553	static int32_t U_CALLCONV
1554	utf8TextMapIndexToUTF16(const UText *ut, int64_t index64) {
1555	U_ASSERT(index64 <= `0x7fffffff`);
1556	int32_t index = (int32_t)index64;
1557	UTF8Buf u8b = (UTF8Buf )ut->p;
1558	U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
1559	U_ASSERT(index<=ut->chunkNativeLimit);
1560	int32_t mapIndex = index - u8b->toUCharsMapStart;
1561	U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
1562	int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
1563	U_ASSERT(offset>=`0` && offset<=ut->chunkLength);
1564	return offset;
1565	}
1566
1567	static UText * U_CALLCONV
1568	utf8TextClone(UText dest, const* UText src, UBool deep, UErrorCode status)
1569	{
1570	// First do a generic shallow clone. Does everything needed for the UText struct itself.
1571	dest = shallowTextClone(dest, src, status);
1572
1573	// For deep clones, make a copy of the string.
1574	// The copied storage is owned by the newly created clone.
1575	//
1576	// TODO: There is an issue with using utext_nativeLength().
1577	// That function is non-const in cases where the input was NUL terminated
1578	// and the length has not yet been determined.
1579	// This function (clone()) is const.
1580	// There potentially a thread safety issue lurking here.
1581	//
1582	if (deep && U_SUCCESS(*status)) {
1583	int32_t len = (int32_t)utext_nativeLength((UText *)src);
1584	char copyStr = (char* *)uprv_malloc(len+`1`);
1585	if (copyStr == nullptr) {
1586	*status = U_MEMORY_ALLOCATION_ERROR;
1587	} else {
1588	uprv_memcpy(copyStr, src->context, len+`1`);
1589	dest->context = copyStr;
1590	dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
1591	}
1592	}
1593	return dest;
1594	}
1595
1596
1597	static void U_CALLCONV
1598	utf8TextClose(UText *ut) {
1599	// Most of the work of close is done by the generic UText framework close.
1600	// All that needs to be done here is to delete the UTF8 string if the UText
1601	// owns it. This occurs if the UText was created by cloning.
1602	if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
1603	char s = (char* *)ut->context;
1604	uprv_free(s);
1605	ut->context = nullptr;
1606	}
1607	}
1608
1609	U_CDECL_END
1610
1611
1612	static const struct UTextFuncs utf8Funcs =
1613	{
1614	sizeof(UTextFuncs),
1615	`0`, `0`, `0`, // Reserved alignment padding
1616	utf8TextClone,
1617	utf8TextLength,
1618	utf8TextAccess,
1619	utf8TextExtract,
1620	nullptr, / replace/
1621	nullptr, / copy /
1622	utf8TextMapOffsetToNative,
1623	utf8TextMapIndexToUTF16,
1624	utf8TextClose,
1625	nullptr, // spare 1
1626	nullptr, // spare 2
1627	nullptr // spare 3
1628	};
1629
1630
1631	static const char gEmptyString[] = {`0`};
1632
1633	U_CAPI UText * U_EXPORT2
1634	utext_openUTF8(UText ut, const* char s, int64_t length, UErrorCode status) {
1635	if(U_FAILURE(*status)) {
1636	return nullptr;
1637	}
1638	if(s==nullptr && length==`0`) {
1639	s = gEmptyString;
1640	}
1641
1642	if(s==nullptr \|\| length<-`1` \|\| length>INT32_MAX) {
1643	*status=U_ILLEGAL_ARGUMENT_ERROR;
1644	return nullptr;
1645	}
1646
1647	ut = utext_setup(ut, sizeof(UTF8Buf) * `2`, status);
1648	if (U_FAILURE(*status)) {
1649	return ut;
1650	}
1651
1652	ut->pFuncs = &utf8Funcs;
1653	ut->context = s;
1654	ut->b = (int32_t)length;
1655	ut->c = (int32_t)length;
1656	if (ut->c < `0`) {
1657	ut->c = `0`;
1658	ut->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
1659	}
1660	ut->p = ut->pExtra;
1661	ut->q = (char )ut->pExtra + sizeof*(UTF8Buf);
1662	return ut;
1663
1664	}
1665
1666
1667
1668
1669
1670
1671
1672
1673	//------------------------------------------------------------------------------
1674	//
1675	// UText implementation wrapper for Replaceable (read/write)
1676	//
1677	// Use of UText data members:
1678	// context pointer to Replaceable.
1679	// p pointer to Replaceable if it is owned by the UText.
1680	//
1681	//------------------------------------------------------------------------------
1682
1683
1684
1685	// minimum chunk size for this implementation: 3
1686	// to allow for possible trimming for code point boundaries
1687	enum { REP_TEXT_CHUNK_SIZE=`10` };
1688
1689	struct ReplExtra {
1690	/*
1691	* Chunk UChars.
1692	* +1 to simplify filling with surrogate pair at the end.
1693	*/
1694	char16_t s[REP_TEXT_CHUNK_SIZE+`1`];
1695	};
1696
1697
1698	U_CDECL_BEGIN
1699
1700	static UText * U_CALLCONV
1701	repTextClone(UText dest, const* UText src, UBool deep, UErrorCode status) {
1702	// First do a generic shallow clone. Does everything needed for the UText struct itself.
1703	dest = shallowTextClone(dest, src, status);
1704
1705	// For deep clones, make a copy of the Replaceable.
1706	// The copied Replaceable storage is owned by the newly created UText clone.
1707	// A non-nullptr pointer in UText.p is the signal to the close() function to delete
1708	// it.
1709	//
1710	if (deep && U_SUCCESS(*status)) {
1711	const Replaceable replSrc = (const* Replaceable *)src->context;
1712	dest->context = replSrc->clone();
1713	dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
1714
1715	// with deep clone, the copy is writable, even when the source is not.
1716	dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
1717	}
1718	return dest;
1719	}
1720
1721
1722	static void U_CALLCONV
1723	repTextClose(UText *ut) {
1724	// Most of the work of close is done by the generic UText framework close.
1725	// All that needs to be done here is delete the Replaceable if the UText
1726	// owns it. This occurs if the UText was created by cloning.
1727	if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
1728	Replaceable rep = (Replaceable )ut->context;
1729	delete rep;
1730	ut->context = nullptr;
1731	}
1732	}
1733
1734
1735	static int64_t U_CALLCONV
1736	repTextLength(UText *ut) {
1737	const Replaceable replSrc = (const* Replaceable *)ut->context;
1738	int32_t len = replSrc->length();
1739	return len;
1740	}
1741
1742
1743	static UBool U_CALLCONV
1744	repTextAccess(UText *ut, int64_t index, UBool forward) {
1745	const Replaceable rep=(const* Replaceable *)ut->context;
1746	int32_t length=rep->length(); // Full length of the input text (bigger than a chunk)
1747
1748	// clip the requested index to the limits of the text.
1749	int32_t index32 = pinIndex(index, length);
1750	U_ASSERT(index<=INT32_MAX);
1751
1752
1753	/*
1754	* Compute start/limit boundaries around index, for a segment of text
1755	* to be extracted.
1756	* To allow for the possibility that our user gave an index to the trailing
1757	* half of a surrogate pair, we must request one extra preceding char16_t when
1758	* going in the forward direction. This will ensure that the buffer has the
1759	* entire code point at the specified index.
1760	*/
1761	if(forward) {
1762
1763	if (index32>=ut->chunkNativeStart && index32<ut->chunkNativeLimit) {
1764	// Buffer already contains the requested position.
1765	ut->chunkOffset = (int32_t)(index - ut->chunkNativeStart);
1766	return true;
1767	}
1768	if (index32>=length && ut->chunkNativeLimit==length) {
1769	// Request for end of string, and buffer already extends up to it.
1770	// Can't get the data, but don't change the buffer.
1771	ut->chunkOffset = length - (int32_t)ut->chunkNativeStart;
1772	return false;
1773	}
1774
1775	ut->chunkNativeLimit = index + REP_TEXT_CHUNK_SIZE - `1`;
1776	// Going forward, so we want to have the buffer with stuff at and beyond
1777	// the requested index. The -1 gets us one code point before the
1778	// requested index also, to handle the case of the index being on
1779	// a trail surrogate of a surrogate pair.
1780	if(ut->chunkNativeLimit > length) {
1781	ut->chunkNativeLimit = length;
1782	}
1783	// unless buffer ran off end, start is index-1.
1784	ut->chunkNativeStart = ut->chunkNativeLimit - REP_TEXT_CHUNK_SIZE;
1785	if(ut->chunkNativeStart < `0`) {
1786	ut->chunkNativeStart = `0`;
1787	}
1788	} else {
1789	// Reverse iteration. Fill buffer with data preceding the requested index.
1790	if (index32>ut->chunkNativeStart && index32<=ut->chunkNativeLimit) {
1791	// Requested position already in buffer.
1792	ut->chunkOffset = index32 - (int32_t)ut->chunkNativeStart;
1793	return true;
1794	}
1795	if (index32==`0` && ut->chunkNativeStart==`0`) {
1796	// Request for start, buffer already begins at start.
1797	// No data, but keep the buffer as is.
1798	ut->chunkOffset = `0`;
1799	return false;
1800	}
1801
1802	// Figure out the bounds of the chunk to extract for reverse iteration.
1803	// Need to worry about chunk not splitting surrogate pairs, and while still
1804	// containing the data we need.
1805	// Fix by requesting a chunk that includes an extra char16_t at the end.
1806	// If this turns out to be a lead surrogate, we can lop it off and still have
1807	// the data we wanted.
1808	ut->chunkNativeStart = index32 + `1` - REP_TEXT_CHUNK_SIZE;
1809	if (ut->chunkNativeStart < `0`) {
1810	ut->chunkNativeStart = `0`;
1811	}
1812
1813	ut->chunkNativeLimit = index32 + `1`;
1814	if (ut->chunkNativeLimit > length) {
1815	ut->chunkNativeLimit = length;
1816	}
1817	}
1818
1819	// Extract the new chunk of text from the Replaceable source.
1820	ReplExtra ex = (ReplExtra )ut->pExtra;
1821	// UnicodeString with its buffer a writable alias to the chunk buffer
1822	UnicodeString buffer(ex->s, `0` /buffer length/, REP_TEXT_CHUNK_SIZE /buffer capacity/);
1823	rep->extractBetween((int32_t)ut->chunkNativeStart, (int32_t)ut->chunkNativeLimit, buffer);
1824
1825	ut->chunkContents = ex->s;
1826	ut->chunkLength = (int32_t)(ut->chunkNativeLimit - ut->chunkNativeStart);
1827	ut->chunkOffset = (int32_t)(index32 - ut->chunkNativeStart);
1828
1829	// Surrogate pairs from the input text must not span chunk boundaries.
1830	// If end of chunk could be the start of a surrogate, trim it off.
1831	if (ut->chunkNativeLimit < length &&
1832	U16_IS_LEAD(ex->s[ut->chunkLength-`1`])) {
1833	ut->chunkLength--;
1834	ut->chunkNativeLimit--;
1835	if (ut->chunkOffset > ut->chunkLength) {
1836	ut->chunkOffset = ut->chunkLength;
1837	}
1838	}
1839
1840	// if the first char16_t in the chunk could be the trailing half of a surrogate pair,
1841	// trim it off.
1842	if(ut->chunkNativeStart>`0` && U16_IS_TRAIL(ex->s[`0`])) {
1843	++(ut->chunkContents);
1844	++(ut->chunkNativeStart);
1845	--(ut->chunkLength);
1846	--(ut->chunkOffset);
1847	}
1848
1849	// adjust the index/chunkOffset to a code point boundary
1850	U16_SET_CP_START(ut->chunkContents, `0`, ut->chunkOffset);
1851
1852	// Use fast indexing for get/setNativeIndex()
1853	ut->nativeIndexingLimit = ut->chunkLength;
1854
1855	return true;
1856	}
1857
1858
1859
1860	static int32_t U_CALLCONV
1861	repTextExtract(UText *ut,
1862	int64_t start, int64_t limit,
1863	char16_t *dest, int32_t destCapacity,
1864	UErrorCode *status) {
1865	const Replaceable rep=(const* Replaceable *)ut->context;
1866	int32_t length=rep->length();
1867
1868	if(U_FAILURE(*status)) {
1869	return `0`;
1870	}
1871	if(destCapacity<`0` \|\| (dest==nullptr && destCapacity>`0`)) {
1872	*status=U_ILLEGAL_ARGUMENT_ERROR;
1873	}
1874	if(start>limit) {
1875	*status=U_INDEX_OUTOFBOUNDS_ERROR;
1876	return `0`;
1877	}
1878
1879	int32_t start32 = pinIndex(start, length);
1880	int32_t limit32 = pinIndex(limit, length);
1881
1882	// adjust start, limit if they point to trail half of surrogates
1883	if (start32<length && U16_IS_TRAIL(rep->charAt(start32)) &&
1884	U_IS_SUPPLEMENTARY(rep->char32At(start32))){
1885	start32--;
1886	}
1887	if (limit32<length && U16_IS_TRAIL(rep->charAt(limit32)) &&
1888	U_IS_SUPPLEMENTARY(rep->char32At(limit32))){
1889	limit32--;
1890	}
1891
1892	length=limit32-start32;
1893	if(length>destCapacity) {
1894	limit32 = start32 + destCapacity;
1895	}
1896	UnicodeString buffer(dest, `0`, destCapacity); // writable alias
1897	rep->extractBetween(start32, limit32, buffer);
1898	repTextAccess(ut, limit32, true);
1899
1900	return u_terminateUChars(dest, destCapacity, length, status);
1901	}
1902
1903	static int32_t U_CALLCONV
1904	repTextReplace(UText *ut,
1905	int64_t start, int64_t limit,
1906	const char16_t *src, int32_t length,
1907	UErrorCode *status) {
1908	Replaceable rep=(Replaceable )ut->context;
1909	int32_t oldLength;
1910
1911	if(U_FAILURE(*status)) {
1912	return `0`;
1913	}
1914	if(src==nullptr && length!=`0`) {
1915	*status=U_ILLEGAL_ARGUMENT_ERROR;
1916	return `0`;
1917	}
1918	oldLength=rep->length(); // will subtract from new length
1919	if(start>limit ) {
1920	*status=U_INDEX_OUTOFBOUNDS_ERROR;
1921	return `0`;
1922	}
1923
1924	int32_t start32 = pinIndex(start, oldLength);
1925	int32_t limit32 = pinIndex(limit, oldLength);
1926
1927	// Snap start & limit to code point boundaries.
1928	if (start32<oldLength && U16_IS_TRAIL(rep->charAt(start32)) &&
1929	start32>`0` && U16_IS_LEAD(rep->charAt(start32-`1`)))
1930	{
1931	start32--;
1932	}
1933	if (limit32<oldLength && U16_IS_LEAD(rep->charAt(limit32-`1`)) &&
1934	U16_IS_TRAIL(rep->charAt(limit32)))
1935	{
1936	limit32++;
1937	}
1938
1939	// Do the actual replace operation using methods of the Replaceable class
1940	UnicodeString replStr((UBool)(length<`0`), src, length); // read-only alias
1941	rep->handleReplaceBetween(start32, limit32, replStr);
1942	int32_t newLength = rep->length();
1943	int32_t lengthDelta = newLength - oldLength;
1944
1945	// Is the UText chunk buffer OK?
1946	if (ut->chunkNativeLimit > start32) {
1947	// this replace operation may have impacted the current chunk.
1948	// invalidate it, which will force a reload on the next access.
1949	invalidateChunk(ut);
1950	}
1951
1952	// set the iteration position to the end of the newly inserted replacement text.
1953	int32_t newIndexPos = limit32 + lengthDelta;
1954	repTextAccess(ut, newIndexPos, true);
1955
1956	return lengthDelta;
1957	}
1958
1959
1960	static void U_CALLCONV
1961	repTextCopy(UText *ut,
1962	int64_t start, int64_t limit,
1963	int64_t destIndex,
1964	UBool move,
1965	UErrorCode *status)
1966	{
1967	Replaceable rep=(Replaceable )ut->context;
1968	int32_t length=rep->length();
1969
1970	if(U_FAILURE(*status)) {
1971	return;
1972	}
1973	if (start>limit \|\| (start<destIndex && destIndex<limit))
1974	{
1975	*status=U_INDEX_OUTOFBOUNDS_ERROR;
1976	return;
1977	}
1978
1979	int32_t start32 = pinIndex(start, length);
1980	int32_t limit32 = pinIndex(limit, length);
1981	int32_t destIndex32 = pinIndex(destIndex, length);
1982
1983	// TODO: snap input parameters to code point boundaries.
1984
1985	if(move) {
1986	// move: copy to destIndex, then replace original with nothing
1987	int32_t segLength=limit32-start32;
1988	rep->copy(start32, limit32, destIndex32);
1989	if(destIndex32<start32) {
1990	start32+=segLength;
1991	limit32+=segLength;
1992	}
1993	rep->handleReplaceBetween(start32, limit32, UnicodeString ());
1994	} else {
1995	// copy
1996	rep->copy(start32, limit32, destIndex32);
1997	}
1998
1999	// If the change to the text touched the region in the chunk buffer,
2000	// invalidate the buffer.
2001	int32_t firstAffectedIndex = destIndex32;
2002	if (move && start32<firstAffectedIndex) {
2003	firstAffectedIndex = start32;
2004	}
2005	if (firstAffectedIndex < ut->chunkNativeLimit) {
2006	// changes may have affected range covered by the chunk
2007	invalidateChunk(ut);
2008	}
2009
2010	// Put iteration position at the newly inserted (moved) block,
2011	int32_t nativeIterIndex = destIndex32 + limit32 - start32;
2012	if (move && destIndex32>start32) {
2013	// moved a block of text towards the end of the string.
2014	nativeIterIndex = destIndex32;
2015	}
2016
2017	// Set position, reload chunk if needed.
2018	repTextAccess(ut, nativeIterIndex, true);
2019	}
2020
2021	static const struct UTextFuncs repFuncs =
2022	{
2023	sizeof(UTextFuncs),
2024	`0`, `0`, `0`, // Reserved alignment padding
2025	repTextClone,
2026	repTextLength,
2027	repTextAccess,
2028	repTextExtract,
2029	repTextReplace,
2030	repTextCopy,
2031	nullptr, // MapOffsetToNative,
2032	nullptr, // MapIndexToUTF16,
2033	repTextClose,
2034	nullptr, // spare 1
2035	nullptr, // spare 2
2036	nullptr // spare 3
2037	};
2038
2039
2040	U_CAPI UText * U_EXPORT2
2041	utext_openReplaceable(UText ut, Replaceable rep, UErrorCode *status)
2042	{
2043	if(U_FAILURE(*status)) {
2044	return nullptr;
2045	}
2046	if(rep==nullptr) {
2047	*status=U_ILLEGAL_ARGUMENT_ERROR;
2048	return nullptr;
2049	}
2050	ut = utext_setup(ut, sizeof(ReplExtra), status);
2051	if(U_FAILURE(*status)) {
2052	return ut;
2053	}
2054
2055	ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);
2056	if(rep->hasMetaData()) {
2057	ut->providerProperties \|=I32_FLAG(UTEXT_PROVIDER_HAS_META_DATA);
2058	}
2059
2060	ut->pFuncs = &repFuncs;
2061	ut->context = rep;
2062	return ut;
2063	}
2064
2065	U_CDECL_END
2066
2067
2068
2069
2070
2071
2072
2073
2074	//------------------------------------------------------------------------------
2075	//
2076	// UText implementation for UnicodeString (read/write) and
2077	// for const UnicodeString (read only)
2078	// (same implementation, only the flags are different)
2079	//
2080	// Use of UText data members:
2081	// context pointer to UnicodeString
2082	// p pointer to UnicodeString IF this UText owns the string
2083	// and it must be deleted on close(). nullptr otherwise.
2084	//
2085	//------------------------------------------------------------------------------
2086
2087	U_CDECL_BEGIN
2088
2089
2090	static UText * U_CALLCONV
2091	unistrTextClone(UText dest, const* UText src, UBool deep, UErrorCode status) {
2092	// First do a generic shallow clone. Does everything needed for the UText struct itself.
2093	dest = shallowTextClone(dest, src, status);
2094
2095	// For deep clones, make a copy of the UnicodeSring.
2096	// The copied UnicodeString storage is owned by the newly created UText clone.
2097	// A non-nullptr pointer in UText.p is the signal to the close() function to delete
2098	// the UText.
2099	//
2100	if (deep && U_SUCCESS(*status)) {
2101	const UnicodeString srcString = (const* UnicodeString *)src->context;
2102	dest->context = new UnicodeString (*srcString);
2103	dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
2104
2105	// with deep clone, the copy is writable, even when the source is not.
2106	dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
2107	}
2108	return dest;
2109	}
2110
2111	static void U_CALLCONV
2112	unistrTextClose(UText *ut) {
2113	// Most of the work of close is done by the generic UText framework close.
2114	// All that needs to be done here is delete the UnicodeString if the UText
2115	// owns it. This occurs if the UText was created by cloning.
2116	if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
2117	UnicodeString str = (UnicodeString )ut->context;
2118	delete str;
2119	ut->context = nullptr;
2120	}
2121	}
2122
2123
2124	static int64_t U_CALLCONV
2125	unistrTextLength(UText *t) {
2126	return ((const UnicodeString *)t->context)->length();
2127	}
2128
2129
2130	static UBool U_CALLCONV
2131	unistrTextAccess(UText *ut, int64_t index, UBool forward) {
2132	int32_t length = ut->chunkLength;
2133	ut->chunkOffset = pinIndex(index, length);
2134
2135	// Check whether request is at the start or end
2136	UBool retVal = (forward && index<length) \|\| (!forward && index>`0`);
2137	return retVal;
2138	}
2139
2140
2141
2142	static int32_t U_CALLCONV
2143	unistrTextExtract(UText *t,
2144	int64_t start, int64_t limit,
2145	char16_t *dest, int32_t destCapacity,
2146	UErrorCode *pErrorCode) {
2147	const UnicodeString us=(const* UnicodeString *)t->context;
2148	int32_t length=us->length();
2149
2150	if(U_FAILURE(*pErrorCode)) {
2151	return `0`;
2152	}
2153	if(destCapacity<`0` \|\| (dest==nullptr && destCapacity>`0`)) {
2154	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2155	}
2156	if(start<`0` \|\| start>limit) {
2157	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2158	return `0`;
2159	}
2160
2161	int32_t start32 = start<length ? us->getChar32Start((int32_t)start) : length;
2162	int32_t limit32 = limit<length ? us->getChar32Start((int32_t)limit) : length;
2163
2164	length=limit32-start32;
2165	if (destCapacity>`0` && dest!=nullptr) {
2166	int32_t trimmedLength = length;
2167	if(trimmedLength>destCapacity) {
2168	trimmedLength=destCapacity;
2169	}
2170	us->extract(start32, trimmedLength, dest);
2171	t->chunkOffset = start32+trimmedLength;
2172	} else {
2173	t->chunkOffset = start32;
2174	}
2175	u_terminateUChars(dest, destCapacity, length, pErrorCode);
2176	return length;
2177	}
2178
2179	static int32_t U_CALLCONV
2180	unistrTextReplace(UText *ut,
2181	int64_t start, int64_t limit,
2182	const char16_t *src, int32_t length,
2183	UErrorCode *pErrorCode) {
2184	UnicodeString us=(UnicodeString )ut->context;
2185	int32_t oldLength;
2186
2187	if(U_FAILURE(*pErrorCode)) {
2188	return `0`;
2189	}
2190	if(src==nullptr && length!=`0`) {
2191	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2192	}
2193	if(start>limit) {
2194	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2195	return `0`;
2196	}
2197	oldLength=us->length();
2198	int32_t start32 = pinIndex(start, oldLength);
2199	int32_t limit32 = pinIndex(limit, oldLength);
2200	if (start32 < oldLength) {
2201	start32 = us->getChar32Start(start32);
2202	}
2203	if (limit32 < oldLength) {
2204	limit32 = us->getChar32Start(limit32);
2205	}
2206
2207	// replace
2208	us->replace(start32, limit32-start32, src, length);
2209	int32_t newLength = us->length();
2210
2211	// Update the chunk description.
2212	ut->chunkContents = us->getBuffer();
2213	ut->chunkLength = newLength;
2214	ut->chunkNativeLimit = newLength;
2215	ut->nativeIndexingLimit = newLength;
2216
2217	// Set iteration position to the point just following the newly inserted text.
2218	int32_t lengthDelta = newLength - oldLength;
2219	ut->chunkOffset = limit32 + lengthDelta;
2220
2221	return lengthDelta;
2222	}
2223
2224	static void U_CALLCONV
2225	unistrTextCopy(UText *ut,
2226	int64_t start, int64_t limit,
2227	int64_t destIndex,
2228	UBool move,
2229	UErrorCode *pErrorCode) {
2230	UnicodeString us=(UnicodeString )ut->context;
2231	int32_t length=us->length();
2232
2233	if(U_FAILURE(*pErrorCode)) {
2234	return;
2235	}
2236	int32_t start32 = pinIndex(start, length);
2237	int32_t limit32 = pinIndex(limit, length);
2238	int32_t destIndex32 = pinIndex(destIndex, length);
2239
2240	if( start32>limit32 \|\| (start32<destIndex32 && destIndex32<limit32)) {
2241	*pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2242	return;
2243	}
2244
2245	if(move) {
2246	// move: copy to destIndex, then remove original
2247	int32_t segLength=limit32-start32;
2248	us->copy(start32, limit32, destIndex32);
2249	if(destIndex32<start32) {
2250	start32+=segLength;
2251	}
2252	us->remove(start32, segLength);
2253	} else {
2254	// copy
2255	us->copy(start32, limit32, destIndex32);
2256	}
2257
2258	// update chunk description, set iteration position.
2259	ut->chunkContents = us->getBuffer();
2260	if (move==false) {
2261	// copy operation, string length grows
2262	ut->chunkLength += limit32-start32;
2263	ut->chunkNativeLimit = ut->chunkLength;
2264	ut->nativeIndexingLimit = ut->chunkLength;
2265	}
2266
2267	// Iteration position to end of the newly inserted text.
2268	ut->chunkOffset = destIndex32+limit32-start32;
2269	if (move && destIndex32>start32) {
2270	ut->chunkOffset = destIndex32;
2271	}
2272
2273	}
2274
2275	static const struct UTextFuncs unistrFuncs =
2276	{
2277	sizeof(UTextFuncs),
2278	`0`, `0`, `0`, // Reserved alignment padding
2279	unistrTextClone,
2280	unistrTextLength,
2281	unistrTextAccess,
2282	unistrTextExtract,
2283	unistrTextReplace,
2284	unistrTextCopy,
2285	nullptr, // MapOffsetToNative,
2286	nullptr, // MapIndexToUTF16,
2287	unistrTextClose,
2288	nullptr, // spare 1
2289	nullptr, // spare 2
2290	nullptr // spare 3
2291	};
2292
2293
2294
2295	U_CDECL_END
2296
2297
2298	U_CAPI UText * U_EXPORT2
2299	utext_openUnicodeString(UText ut, UnicodeString s, UErrorCode *status) {
2300	ut = utext_openConstUnicodeString(ut, s, status);
2301	if (U_SUCCESS(*status)) {
2302	ut->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
2303	}
2304	return ut;
2305	}
2306
2307
2308
2309	U_CAPI UText * U_EXPORT2
2310	utext_openConstUnicodeString(UText ut, const* UnicodeString s, UErrorCode status) {
2311	if (U_SUCCESS(*status) && s->isBogus()) {
2312	// The UnicodeString is bogus, but we still need to detach the UText
2313	// from whatever it was hooked to before, if anything.
2314	utext_openUChars(ut, nullptr, `0`, status);
2315	*status = U_ILLEGAL_ARGUMENT_ERROR;
2316	return ut;
2317	}
2318	ut = utext_setup(ut, `0`, status);
2319	// note: use the standard (writable) function table for UnicodeString.
2320	// The flag settings disable writing, so having the functions in
2321	// the table is harmless.
2322	if (U_SUCCESS(*status)) {
2323	ut->pFuncs = &unistrFuncs;
2324	ut->context = s;
2325	ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
2326	ut->chunkContents = s->getBuffer();
2327	ut->chunkLength = s->length();
2328	ut->chunkNativeStart = `0`;
2329	ut->chunkNativeLimit = ut->chunkLength;
2330	ut->nativeIndexingLimit = ut->chunkLength;
2331	}
2332	return ut;
2333	}
2334
2335	//------------------------------------------------------------------------------
2336	//
2337	// UText implementation for const char16_t strings*
2338	//
2339	// Use of UText data members:
2340	// context pointer to UnicodeString
2341	// a length. -1 if not yet known.
2342	//
2343	// TODO: support 64 bit lengths.
2344	//
2345	//------------------------------------------------------------------------------
2346
2347	U_CDECL_BEGIN
2348
2349
2350	static UText * U_CALLCONV
2351	ucstrTextClone(UText dest, const* UText * src, UBool deep, UErrorCode * status) {
2352	// First do a generic shallow clone.
2353	dest = shallowTextClone(dest, src, status);
2354
2355	// For deep clones, make a copy of the string.
2356	// The copied storage is owned by the newly created clone.
2357	// A non-nullptr pointer in UText.p is the signal to the close() function to delete
2358	// it.
2359	//
2360	if (deep && U_SUCCESS(*status)) {
2361	U_ASSERT(utext_nativeLength(dest) < INT32_MAX);
2362	int32_t len = (int32_t)utext_nativeLength(dest);
2363
2364	// The cloned string IS going to be NUL terminated, whether or not the original was.
2365	const char16_t srcStr = (const* char16_t *)src->context;
2366	char16_t copyStr = (char16_t* )uprv_malloc((len+`1`) sizeof(char16_t));
2367	if (copyStr == nullptr) {
2368	*status = U_MEMORY_ALLOCATION_ERROR;
2369	} else {
2370	int64_t i;
2371	for (i=`0`; i<len; i++) {
2372	copyStr[i] = srcStr[i];
2373	}
2374	copyStr[len] = `0`;
2375	dest->context = copyStr;
2376	dest->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
2377	}
2378	}
2379	return dest;
2380	}
2381
2382
2383	static void U_CALLCONV
2384	ucstrTextClose(UText *ut) {
2385	// Most of the work of close is done by the generic UText framework close.
2386	// All that needs to be done here is delete the string if the UText
2387	// owns it. This occurs if the UText was created by cloning.
2388	if (ut->providerProperties & I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT)) {
2389	char16_t s = (char16_t* *)ut->context;
2390	uprv_free(s);
2391	ut->context = nullptr;
2392	}
2393	}
2394
2395
2396
2397	static int64_t U_CALLCONV
2398	ucstrTextLength(UText *ut) {
2399	if (ut->a < `0`) {
2400	// null terminated, we don't yet know the length. Scan for it.
2401	// Access is not convenient for doing this
2402	// because the current iteration position can't be changed.
2403	const char16_t str = (const* char16_t *)ut->context;
2404	for (;;) {
2405	if (str[ut->chunkNativeLimit] == `0`) {
2406	break;
2407	}
2408	ut->chunkNativeLimit++;
2409	}
2410	ut->a = ut->chunkNativeLimit;
2411	ut->chunkLength = (int32_t)ut->chunkNativeLimit;
2412	ut->nativeIndexingLimit = ut->chunkLength;
2413	ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
2414	}
2415	return ut->a;
2416	}
2417
2418
2419	static UBool U_CALLCONV
2420	ucstrTextAccess(UText *ut, int64_t index, UBool forward) {
2421	const char16_t str = (const* char16_t *)ut->context;
2422
2423	// pin the requested index to the bounds of the string,
2424	// and set current iteration position.
2425	if (index<`0`) {
2426	index = `0`;
2427	} else if (index < ut->chunkNativeLimit) {
2428	// The request data is within the chunk as it is known so far.
2429	// Put index on a code point boundary.
2430	U16_SET_CP_START(str, `0`, index);
2431	} else if (ut->a >= `0`) {
2432	// We know the length of this string, and the user is requesting something
2433	// at or beyond the length. Pin the requested index to the length.
2434	index = ut->a;
2435	} else {
2436	// Null terminated string, length not yet known, and the requested index
2437	// is beyond where we have scanned so far.
2438	// Scan to 32 UChars beyond the requested index. The strategy here is
2439	// to avoid fully scanning a long string when the caller only wants to
2440	// see a few characters at its beginning.
2441	int32_t scanLimit = (int32_t)index + `32`;
2442	if ((index + `32`)>INT32_MAX \|\| (index + `32`)<`0` ) { // note: int64 expression
2443	scanLimit = INT32_MAX;
2444	}
2445
2446	int32_t chunkLimit = (int32_t)ut->chunkNativeLimit;
2447	for (; chunkLimit<scanLimit; chunkLimit++) {
2448	if (str[chunkLimit] == `0`) {
2449	// We found the end of the string. Remember it, pin the requested index to it,
2450	// and bail out of here.
2451	ut->a = chunkLimit;
2452	ut->chunkLength = chunkLimit;
2453	ut->nativeIndexingLimit = chunkLimit;
2454	if (index >= chunkLimit) {
2455	index = chunkLimit;
2456	} else {
2457	U16_SET_CP_START(str, `0`, index);
2458	}
2459
2460	ut->chunkNativeLimit = chunkLimit;
2461	ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
2462	goto breakout;
2463	}
2464	}
2465	// We scanned through the next batch of UChars without finding the end.
2466	U16_SET_CP_START(str, `0`, index);
2467	if (chunkLimit == INT32_MAX) {
2468	// Scanned to the limit of a 32 bit length.
2469	// Forceably trim the overlength string back so length fits in int32
2470	// TODO: add support for 64 bit strings.
2471	ut->a = chunkLimit;
2472	ut->chunkLength = chunkLimit;
2473	ut->nativeIndexingLimit = chunkLimit;
2474	if (index > chunkLimit) {
2475	index = chunkLimit;
2476	}
2477	ut->chunkNativeLimit = chunkLimit;
2478	ut->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
2479	} else {
2480	// The endpoint of a chunk must not be left in the middle of a surrogate pair.
2481	// If the current end is on a lead surrogate, back the end up by one.
2482	// It doesn't matter if the end char happens to be an unpaired surrogate,
2483	// and it's simpler not to worry about it.
2484	if (U16_IS_LEAD(str[chunkLimit-`1`])) {
2485	--chunkLimit;
2486	}
2487	// Null-terminated chunk with end still unknown.
2488	// Update the chunk length to reflect what has been scanned thus far.
2489	// That the full length is still unknown is (still) flagged by
2490	// ut->a being < 0.
2491	ut->chunkNativeLimit = chunkLimit;
2492	ut->nativeIndexingLimit = chunkLimit;
2493	ut->chunkLength = chunkLimit;
2494	}
2495
2496	}
2497	breakout:
2498	U_ASSERT(index<=INT32_MAX);
2499	ut->chunkOffset = (int32_t)index;
2500
2501	// Check whether request is at the start or end
2502	UBool retVal = (forward && index<ut->chunkNativeLimit) \|\| (!forward && index>`0`);
2503	return retVal;
2504	}
2505
2506
2507
2508	static int32_t U_CALLCONV
2509	ucstrTextExtract(UText *ut,
2510	int64_t start, int64_t limit,
2511	char16_t *dest, int32_t destCapacity,
2512	UErrorCode *pErrorCode)
2513	{
2514	if(U_FAILURE(*pErrorCode)) {
2515	return `0`;
2516	}
2517	if(destCapacity<`0` \|\| (dest==nullptr && destCapacity>`0`) \|\| start>limit) {
2518	*pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
2519	return `0`;
2520	}
2521
2522	//const char16_t s=(const char16_t )ut->context;
2523	int32_t si, di;
2524
2525	int32_t start32;
2526	int32_t limit32;
2527
2528	// Access the start. Does two things we need:
2529	// Pins 'start' to the length of the string, if it came in out-of-bounds.
2530	// Snaps 'start' to the beginning of a code point.
2531	ucstrTextAccess(ut, start, true);
2532	const char16_t *s=ut->chunkContents;
2533	start32 = ut->chunkOffset;
2534
2535	int32_t strLength=(int32_t)ut->a;
2536	if (strLength >= `0`) {
2537	limit32 = pinIndex(limit, strLength);
2538	} else {
2539	limit32 = pinIndex(limit, INT32_MAX);
2540	}
2541	di = `0`;
2542	for (si=start32; si<limit32; si++) {
2543	if (strLength<`0` && s[si]==`0`) {
2544	// Just hit the end of a null-terminated string.
2545	ut->a = si; // set string length for this UText
2546	ut->chunkNativeLimit = si;
2547	ut->chunkLength = si;
2548	ut->nativeIndexingLimit = si;
2549	strLength = si;
2550	limit32 = si;
2551	break;
2552	}
2553	U_ASSERT(di>=`0`); / to ensure di never exceeds INT32_MAX, which must not happen logically /
2554	if (di<destCapacity) {
2555	// only store if there is space.
2556	dest[di] = s[si];
2557	} else {
2558	if (strLength>=`0`) {
2559	// We have filled the destination buffer, and the string length is known.
2560	// Cut the loop short. There is no need to scan string termination.
2561	di = limit32 - start32;
2562	si = limit32;
2563	break;
2564	}
2565	}
2566	di++;
2567	}
2568
2569	// If the limit index points to a lead surrogate of a pair,
2570	// add the corresponding trail surrogate to the destination.
2571	if (si>`0` && U16_IS_LEAD(s[si-`1`]) &&
2572	((si<strLength \|\| strLength<`0`) && U16_IS_TRAIL(s[si])))
2573	{
2574	if (di<destCapacity) {
2575	// store only if there is space in the output buffer.
2576	dest[di++] = s[si];
2577	}
2578	si++;
2579	}
2580
2581	// Put iteration position at the point just following the extracted text
2582	if (si <= ut->chunkNativeLimit) {
2583	ut->chunkOffset = si;
2584	} else {
2585	ucstrTextAccess(ut, si, true);
2586	}
2587
2588	// Add a terminating NUL if space in the buffer permits,
2589	// and set the error status as required.
2590	u_terminateUChars(dest, destCapacity, di, pErrorCode);
2591	return di;
2592	}
2593
2594	static const struct UTextFuncs ucstrFuncs =
2595	{
2596	sizeof(UTextFuncs),
2597	`0`, `0`, `0`, // Reserved alignment padding
2598	ucstrTextClone,
2599	ucstrTextLength,
2600	ucstrTextAccess,
2601	ucstrTextExtract,
2602	nullptr, // Replace
2603	nullptr, // Copy
2604	nullptr, // MapOffsetToNative,
2605	nullptr, // MapIndexToUTF16,
2606	ucstrTextClose,
2607	nullptr, // spare 1
2608	nullptr, // spare 2
2609	nullptr, // spare 3
2610	};
2611
2612	U_CDECL_END
2613
2614	static const char16_t gEmptyUString[] = {`0`};
2615
2616	U_CAPI UText * U_EXPORT2
2617	utext_openUChars(UText ut, const* char16_t s, int64_t length, UErrorCode status) {
2618	if (U_FAILURE(*status)) {
2619	return nullptr;
2620	}
2621	if(s==nullptr && length==`0`) {
2622	s = gEmptyUString;
2623	}
2624	if (s==nullptr \|\| length < -`1` \|\| length>INT32_MAX) {
2625	*status = U_ILLEGAL_ARGUMENT_ERROR;
2626	return nullptr;
2627	}
2628	ut = utext_setup(ut, `0`, status);
2629	if (U_SUCCESS(*status)) {
2630	ut->pFuncs = &ucstrFuncs;
2631	ut->context = s;
2632	ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS);
2633	if (length==-`1`) {
2634	ut->providerProperties \|= I32_FLAG(UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE);
2635	}
2636	ut->a = length;
2637	ut->chunkContents = s;
2638	ut->chunkNativeStart = `0`;
2639	ut->chunkNativeLimit = length>=`0`? length : `0`;
2640	ut->chunkLength = (int32_t)ut->chunkNativeLimit;
2641	ut->chunkOffset = `0`;
2642	ut->nativeIndexingLimit = ut->chunkLength;
2643	}
2644	return ut;
2645	}
2646
2647
2648	//------------------------------------------------------------------------------
2649	//
2650	// UText implementation for text from ICU CharacterIterators
2651	//
2652	// Use of UText data members:
2653	// context pointer to the CharacterIterator
2654	// a length of the full text.
2655	// p pointer to buffer 1
2656	// b start index of local buffer 1 contents
2657	// q pointer to buffer 2
2658	// c start index of local buffer 2 contents
2659	// r pointer to the character iterator if the UText owns it.
2660	// Null otherwise.
2661	//
2662	//------------------------------------------------------------------------------
2663	#define CIBufSize 16
2664
2665	U_CDECL_BEGIN
2666	static void U_CALLCONV
2667	charIterTextClose(UText *ut) {
2668	// Most of the work of close is done by the generic UText framework close.
2669	// All that needs to be done here is delete the CharacterIterator if the UText
2670	// owns it. This occurs if the UText was created by cloning.
2671	CharacterIterator ci = (CharacterIterator )ut->r;
2672	delete ci;
2673	ut->r = nullptr;
2674	}
2675
2676	static int64_t U_CALLCONV
2677	charIterTextLength(UText *ut) {
2678	return (int32_t)ut->a;
2679	}
2680
2681	static UBool U_CALLCONV
2682	charIterTextAccess(UText *ut, int64_t index, UBool forward) {
2683	CharacterIterator ci = (CharacterIterator )ut->context;
2684
2685	int32_t clippedIndex = (int32_t)index;
2686	if (clippedIndex<`0`) {
2687	clippedIndex=`0`;
2688	} else if (clippedIndex>=ut->a) {
2689	clippedIndex=(int32_t)ut->a;
2690	}
2691	int32_t neededIndex = clippedIndex;
2692	if (!forward && neededIndex>`0`) {
2693	// reverse iteration, want the position just before what was asked for.
2694	neededIndex--;
2695	} else if (forward && neededIndex==ut->a && neededIndex>`0`) {
2696	// Forward iteration, don't ask for something past the end of the text.
2697	neededIndex--;
2698	}
2699
2700	// Find the native index of the start of the buffer containing what we want.
2701	neededIndex -= neededIndex % CIBufSize;
2702
2703	char16_t buf = nullptr*;
2704	UBool needChunkSetup = true;
2705	int i;
2706	if (ut->chunkNativeStart == neededIndex) {
2707	// The buffer we want is already the current chunk.
2708	needChunkSetup = false;
2709	} else if (ut->b == neededIndex) {
2710	// The first buffer (buffer p) has what we need.
2711	buf = (char16_t *)ut->p;
2712	} else if (ut->c == neededIndex) {
2713	// The second buffer (buffer q) has what we need.
2714	buf = (char16_t *)ut->q;
2715	} else {
2716	// Neither buffer already has what we need.
2717	// Load new data from the character iterator.
2718	// Use the buf that is not the current buffer.
2719	buf = (char16_t *)ut->p;
2720	if (ut->p == ut->chunkContents) {
2721	buf = (char16_t *)ut->q;
2722	}
2723	ci->setIndex(neededIndex);
2724	for (i=`0`; i<CIBufSize; i++) {
2725	buf[i] = ci->nextPostInc();
2726	if (i+neededIndex > ut->a) {
2727	break;
2728	}
2729	}
2730	}
2731
2732	// We have a buffer with the data we need.
2733	// Set it up as the current chunk, if it wasn't already.
2734	if (needChunkSetup) {
2735	ut->chunkContents = buf;
2736	ut->chunkLength = CIBufSize;
2737	ut->chunkNativeStart = neededIndex;
2738	ut->chunkNativeLimit = neededIndex + CIBufSize;
2739	if (ut->chunkNativeLimit > ut->a) {
2740	ut->chunkNativeLimit = ut->a;
2741	ut->chunkLength = (int32_t)(ut->chunkNativeLimit)-(int32_t)(ut->chunkNativeStart);
2742	}
2743	ut->nativeIndexingLimit = ut->chunkLength;
2744	U_ASSERT(ut->chunkOffset>=`0` && ut->chunkOffset<=CIBufSize);
2745	}
2746	ut->chunkOffset = clippedIndex - (int32_t)ut->chunkNativeStart;
2747	UBool success = (forward? ut->chunkOffset<ut->chunkLength : ut->chunkOffset>`0`);
2748	return success;
2749	}
2750
2751	static UText * U_CALLCONV
2752	charIterTextClone(UText dest, const* UText src, UBool deep, UErrorCode status) {
2753	if (U_FAILURE(*status)) {
2754	return nullptr;
2755	}
2756
2757	if (deep) {
2758	// There is no CharacterIterator API for cloning the underlying text storage.
2759	*status = U_UNSUPPORTED_ERROR;
2760	return nullptr;
2761	} else {
2762	CharacterIterator srcCI =(CharacterIterator )src->context;
2763	srcCI = srcCI->clone();
2764	dest = utext_openCharacterIterator(dest, srcCI, status);
2765	if (U_FAILURE(*status)) {
2766	return dest;
2767	}
2768	// cast off const on getNativeIndex.
2769	// For CharacterIterator based UTexts, this is safe, the operation is const.
2770	int64_t ix = utext_getNativeIndex((UText *)src);
2771	utext_setNativeIndex(dest, ix);
2772	dest->r = srcCI; // flags that this UText owns the CharacterIterator
2773	}
2774	return dest;
2775	}
2776
2777	static int32_t U_CALLCONV
2778	charIterTextExtract(UText *ut,
2779	int64_t start, int64_t limit,
2780	char16_t *dest, int32_t destCapacity,
2781	UErrorCode *status)
2782	{
2783	if(U_FAILURE(*status)) {
2784	return `0`;
2785	}
2786	if(destCapacity<`0` \|\| (dest==nullptr && destCapacity>`0`) \|\| start>limit) {
2787	*status=U_ILLEGAL_ARGUMENT_ERROR;
2788	return `0`;
2789	}
2790	int32_t length = (int32_t)ut->a;
2791	int32_t start32 = pinIndex(start, length);
2792	int32_t limit32 = pinIndex(limit, length);
2793	int32_t desti = `0`;
2794	int32_t srci;
2795	int32_t copyLimit;
2796
2797	CharacterIterator ci = (CharacterIterator )ut->context;
2798	ci->setIndex32(start32); // Moves ix to lead of surrogate pair, if needed.
2799	srci = ci->getIndex();
2800	copyLimit = srci;
2801	while (srci<limit32) {
2802	UChar32 c = ci->next32PostInc();
2803	int32_t len = U16_LENGTH(c);
2804	U_ASSERT(desti+len>`0`); / to ensure desti+len never exceeds MAX_INT32, which must not happen logically /
2805	if (desti+len <= destCapacity) {
2806	U16_APPEND_UNSAFE(dest, desti, c);
2807	copyLimit = srci+len;
2808	} else {
2809	desti += len;
2810	*status = U_BUFFER_OVERFLOW_ERROR;
2811	}
2812	srci += len;
2813	}
2814
2815	charIterTextAccess(ut, copyLimit, true);
2816
2817	u_terminateUChars(dest, destCapacity, desti, status);
2818	return desti;
2819	}
2820
2821	static const struct UTextFuncs charIterFuncs =
2822	{
2823	sizeof(UTextFuncs),
2824	`0`, `0`, `0`, // Reserved alignment padding
2825	charIterTextClone,
2826	charIterTextLength,
2827	charIterTextAccess,
2828	charIterTextExtract,
2829	nullptr, // Replace
2830	nullptr, // Copy
2831	nullptr, // MapOffsetToNative,
2832	nullptr, // MapIndexToUTF16,
2833	charIterTextClose,
2834	nullptr, // spare 1
2835	nullptr, // spare 2
2836	nullptr // spare 3
2837	};
2838	U_CDECL_END
2839
2840
2841	U_CAPI UText * U_EXPORT2
2842	utext_openCharacterIterator(UText ut, CharacterIterator ci, UErrorCode *status) {
2843	if (U_FAILURE(*status)) {
2844	return nullptr;
2845	}
2846
2847	if (ci->startIndex() > `0`) {
2848	// No support for CharacterIterators that do not start indexing from zero.
2849	*status = U_UNSUPPORTED_ERROR;
2850	return nullptr;
2851	}
2852
2853	// Extra space in UText for 2 buffers of CIBufSize UChars each.
2854	int32_t extraSpace = `2` * CIBufSize * sizeof(char16_t);
2855	ut = utext_setup(ut, extraSpace, status);
2856	if (U_SUCCESS(*status)) {
2857	ut->pFuncs = &charIterFuncs;
2858	ut->context = ci;
2859	ut->providerProperties = `0`;
2860	ut->a = ci->endIndex(); // Length of text
2861	ut->p = ut->pExtra; // First buffer
2862	ut->b = -`1`; // Native index of first buffer contents
2863	ut->q = (char16_t)ut->pExtra+CIBufSize; // Second buffer*
2864	ut->c = -`1`; // Native index of second buffer contents
2865
2866	// Initialize current chunk contents to be empty.
2867	// First access will fault something in.
2868	// Note: The initial nativeStart and chunkOffset must sum to zero
2869	// so that getNativeIndex() will correctly compute to zero
2870	// if no call to Access() has ever been made. They can't be both
2871	// zero without Access() thinking that the chunk is valid.
2872	ut->chunkContents = (char16_t *)ut->p;
2873	ut->chunkNativeStart = -`1`;
2874	ut->chunkOffset = `1`;
2875	ut->chunkNativeLimit = `0`;
2876	ut->chunkLength = `0`;
2877	ut->nativeIndexingLimit = ut->chunkOffset; // enables native indexing
2878	}
2879	return ut;
2880	}
2881

Browse the source code of Godot/thirdparty/icu4c/common/utext.cpp