1// © 2017 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// bytesinkutil.cpp
5// created: 2017sep14 Markus W. Scherer
6
7#include "unicode/utypes.h"
8#include "unicode/bytestream.h"
9#include "unicode/edits.h"
10#include "unicode/stringoptions.h"
11#include "unicode/utf8.h"
12#include "unicode/utf16.h"
13#include "bytesinkutil.h"
14#include "charstr.h"
15#include "cmemory.h"
16#include "uassert.h"
17
18U_NAMESPACE_BEGIN
19
20UBool
21ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length,
22 ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
23 if (U_FAILURE(errorCode)) { return FALSE; }
24 char scratch[200];
25 int32_t s8Length = 0;
26 for (int32_t i = 0; i < s16Length;) {
27 int32_t capacity;
28 int32_t desiredCapacity = s16Length - i;
29 if (desiredCapacity < (INT32_MAX / 3)) {
30 desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit
31 } else if (desiredCapacity < (INT32_MAX / 2)) {
32 desiredCapacity *= 2;
33 } else {
34 desiredCapacity = INT32_MAX;
35 }
36 char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity,
37 scratch, UPRV_LENGTHOF(scratch), &capacity);
38 capacity -= U8_MAX_LENGTH - 1;
39 int32_t j = 0;
40 for (; i < s16Length && j < capacity;) {
41 UChar32 c;
42 U16_NEXT_UNSAFE(s16, i, c);
43 U8_APPEND_UNSAFE(buffer, j, c);
44 }
45 if (j > (INT32_MAX - s8Length)) {
46 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
47 return FALSE;
48 }
49 sink.Append(buffer, j);
50 s8Length += j;
51 }
52 if (edits != nullptr) {
53 edits->addReplace(length, s8Length);
54 }
55 return TRUE;
56}
57
58UBool
59ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit,
60 const char16_t *s16, int32_t s16Length,
61 ByteSink &sink, Edits *edits, UErrorCode &errorCode) {
62 if (U_FAILURE(errorCode)) { return FALSE; }
63 if ((limit - s) > INT32_MAX) {
64 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
65 return FALSE;
66 }
67 return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode);
68}
69
70void
71ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) {
72 char s8[U8_MAX_LENGTH];
73 int32_t s8Length = 0;
74 U8_APPEND_UNSAFE(s8, s8Length, c);
75 if (edits != nullptr) {
76 edits->addReplace(length, s8Length);
77 }
78 sink.Append(s8, s8Length);
79}
80
81namespace {
82
83// See unicode/utf8.h U8_APPEND_UNSAFE().
84inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); }
85inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); }
86
87} // namespace
88
89void
90ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) {
91 U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8
92 char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) };
93 sink.Append(s8, 2);
94}
95
96void
97ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length,
98 ByteSink &sink, uint32_t options, Edits *edits) {
99 U_ASSERT(length > 0);
100 if (edits != nullptr) {
101 edits->addUnchanged(length);
102 }
103 if ((options & U_OMIT_UNCHANGED_TEXT) == 0) {
104 sink.Append(reinterpret_cast<const char *>(s), length);
105 }
106}
107
108UBool
109ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit,
110 ByteSink &sink, uint32_t options, Edits *edits,
111 UErrorCode &errorCode) {
112 if (U_FAILURE(errorCode)) { return FALSE; }
113 if ((limit - s) > INT32_MAX) {
114 errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
115 return FALSE;
116 }
117 int32_t length = (int32_t)(limit - s);
118 if (length > 0) {
119 appendNonEmptyUnchanged(s, length, sink, options, edits);
120 }
121 return TRUE;
122}
123
124CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) {
125}
126
127CharStringByteSink::~CharStringByteSink() = default;
128
129void
130CharStringByteSink::Append(const char* bytes, int32_t n) {
131 UErrorCode status = U_ZERO_ERROR;
132 dest_.append(bytes, n, status);
133 // Any errors are silently ignored.
134}
135
136char*
137CharStringByteSink::GetAppendBuffer(int32_t min_capacity,
138 int32_t desired_capacity_hint,
139 char* scratch,
140 int32_t scratch_capacity,
141 int32_t* result_capacity) {
142 if (min_capacity < 1 || scratch_capacity < min_capacity) {
143 *result_capacity = 0;
144 return nullptr;
145 }
146
147 UErrorCode status = U_ZERO_ERROR;
148 char* result = dest_.getAppendBuffer(
149 min_capacity,
150 desired_capacity_hint,
151 *result_capacity,
152 status);
153 if (U_SUCCESS(status)) {
154 return result;
155 }
156
157 *result_capacity = scratch_capacity;
158 return scratch;
159}
160
161U_NAMESPACE_END
162