SkUnicode_icu.cpp source code [engine/third_party/skia/modules/skshaper/src/SkUnicode_icu.cpp]

1	/*
2	* Copyright 2020 Google Inc.
3	*
4	* Use of this source code is governed by a BSD-style license that can be
5	* found in the LICENSE file.
6	*/
7	#include "include/private/SkTFitsIn.h"
8	#include "include/private/SkTemplates.h"
9	#include "modules/skshaper/src/SkUnicode.h"
10	#include "src/utils/SkUTF.h"
11	#include <unicode/ubidi.h>
12	#include <unicode/ubrk.h>
13	#include <unicode/utext.h>
14	#include <unicode/utypes.h>
15	#include <vector>
16	#include <functional>
17
18	using ICUBiDi = std::unique_ptr<UBiDi, SkFunctionWrapper<decltype(ubidi_close), ubidi_close>>;
19	using ICUUText = std::unique_ptr<UText, SkFunctionWrapper<decltype(utext_close), utext_close>>;
20	using ICUBreakIterator = std::unique_ptr<UBreakIterator, SkFunctionWrapper<decltype(ubrk_close), ubrk_close>>;
21
22	/* Replaces invalid utf-8 sequences with REPLACEMENT CHARACTER U+FFFD. /
23	static inline SkUnichar utf8_next(const char** ptr, const char* end) {
24	SkUnichar val = SkUTF::NextUTF8(ptr, end);
25	return val < `0` ? `0xFFFD` : val;
26	}
27
28	namespace skia {
29
30	class SkUnicode_icu : public SkUnicode {
31
32	static UBreakIteratorType convertType(UBreakType type) {
33	switch (type) {
34	case UBreakType::kLines: return UBRK_LINE;
35	case UBreakType::kGraphemes: return UBRK_CHARACTER;
36	case UBreakType::kWords: return UBRK_WORD;
37	default:
38	SkDEBUGF("Convert error: wrong break type");
39	return UBRK_CHARACTER;
40	}
41	}
42
43	static int convertUtf8ToUtf16(const char* utf8, size_t utf8Units, std::unique_ptr<uint16_t[]>* utf16) {
44	int utf16Units = SkUTF::UTF8ToUTF16(nullptr, `0`, utf8, utf8Units);
45	if (utf16Units < `0`) {
46	SkDEBUGF("Convert error: Invalid utf8 input");
47	return utf16Units;
48	}
49	utf16 = std::unique_ptr<uint16_t[]>(new* uint16_t[utf16Units]);
50	SkDEBUGCODE(int dstLen =) SkUTF::UTF8ToUTF16(utf16->get(), utf16Units, utf8, utf8Units);
51	SkASSERT(dstLen == utf16Units);
52	return utf16Units;
53	}
54
55	static bool extractBidi(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* bidiRegions) {
56
57	// Convert to UTF16 since for now bidi iterator only operates on utf16
58	std::unique_ptr<uint16_t[]> utf16;
59	auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
60	if (utf16Units < `0`) {
61	return false;
62	}
63
64	// Create bidi iterator
65	UErrorCode status = U_ZERO_ERROR;
66	ICUBiDi bidi(ubidi_openSized(utf16Units, `0`, &status));
67	if (U_FAILURE(status)) {
68	SkDEBUGF("Bidi error: %s", u_errorName(status));
69	return false;
70	}
71	SkASSERT(bidi);
72	uint8_t bidiLevel = (dir == Direction::kLTR) ? UBIDI_LTR : UBIDI_RTL;
73	// The required lifetime of utf16 isn't well documented.
74	// It appears it isn't used after ubidi_setPara except through ubidi_getText.
75	ubidi_setPara(bidi.get(), (const UChar)utf16.get(), utf16Units, bidiLevel, nullptr*, &status);
76	if (U_FAILURE(status)) {
77	SkDEBUGF("Bidi error: %s", u_errorName(status));
78	return false;
79	}
80
81	// Iterate through bidi regions and the result positions into utf8
82	const char* start8 = utf8;
83	const char* end8 = utf8 + utf8Units;
84	BidiLevel currentLevel = `0`;
85
86	Position pos8 = `0`;
87	Position pos16 = `0`;
88	Position end16 = ubidi_getLength(bidi.get());
89	while (pos16 < end16) {
90	auto level = ubidi_getLevelAt(bidi.get(), pos16);
91	if (pos16 == `0`) {
92	currentLevel = level;
93	} else if (level != currentLevel) {
94	Position end = start8 - utf8;
95	bidiRegions->emplace_back(pos8, end, currentLevel);
96	currentLevel = level;
97	pos8 = end;
98	}
99	SkUnichar u = utf8_next(&start8, end8);
100	pos16 += SkUTF::ToUTF16(u);
101	}
102	Position end = start8 - utf8;
103	if (end != pos8) {
104	bidiRegions->emplace_back(pos8, end, currentLevel);
105	}
106	return true;
107	}
108
109	static bool extractWords(uint16_t utf16[], int utf16Units, std::vector<Position>* words) {
110
111	UErrorCode status = U_ZERO_ERROR;
112
113	UBreakIteratorType breakType = convertType(UBreakType::kWords);
114	ICUBreakIterator iterator(ubrk_open(breakType, uloc_getDefault(), nullptr, `0`, &status));
115	if (U_FAILURE(status)) {
116	SkDEBUGF("Break error: %s", u_errorName(status));
117	return false;
118	}
119	SkASSERT(iterator);
120
121	UText sUtf16UText = UTEXT_INITIALIZER;
122	ICUUText utf16UText(utext_openUChars(&sUtf16UText, (UChar*)utf16, utf16Units, &status));
123	if (U_FAILURE(status)) {
124	SkDEBUGF("Break error: %s", u_errorName(status));
125	return false;
126	}
127
128	ubrk_setUText(iterator.get(), utf16UText.get(), &status);
129	if (U_FAILURE(status)) {
130	SkDEBUGF("Break error: %s", u_errorName(status));
131	return false;
132	}
133
134	// Get the words
135	int32_t pos = ubrk_first(iterator.get());
136	while (pos != UBRK_DONE) {
137	words->emplace_back(pos);
138	pos = ubrk_next(iterator.get());
139	}
140
141	return true;
142	}
143
144	static bool extractPositions(const char utf8[], int utf8Units, UBreakType type, std::function<void(int, int)> add) {
145
146	UErrorCode status = U_ZERO_ERROR;
147	UText sUtf8UText = UTEXT_INITIALIZER;
148	ICUUText text(utext_openUTF8(&sUtf8UText, &utf8[`0`], utf8Units, &status));
149
150	if (U_FAILURE(status)) {
151	SkDEBUGF("Break error: %s", u_errorName(status));
152	return false;
153	}
154	SkASSERT(text);
155
156	ICUBreakIterator iterator(ubrk_open(convertType(type), uloc_getDefault(), nullptr, `0`, &status));
157	if (U_FAILURE(status)) {
158	SkDEBUGF("Break error: %s", u_errorName(status));
159	}
160
161	ubrk_setUText(iterator.get(), text.get(), &status);
162	if (U_FAILURE(status)) {
163	SkDEBUGF("Break error: %s", u_errorName(status));
164	return false;
165	}
166
167	auto iter = iterator.get();
168	int32_t pos = ubrk_first(iter);
169	while (pos != UBRK_DONE) {
170	add (pos, ubrk_getRuleStatus(iter));
171	pos = ubrk_next(iter);
172	}
173	return true;
174	}
175
176	static bool extractWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* whitespaces) {
177
178	const char* start = utf8;
179	const char* end = utf8 + utf8Units;
180	const char* ch = start;
181	while (ch < end) {
182	auto index = ch - start;
183	auto unichar = utf8_next(&ch, end);
184	if (u_isWhitespace(unichar)) {
185	auto ending = ch - start;
186	for (auto k = index; k < ending; ++k) {
187	whitespaces->emplace_back(k);
188	}
189	}
190	}
191	return true;
192	}
193
194	public:
195	~SkUnicode_icu() override { }
196
197	bool getBidiRegions(const char utf8[], int utf8Units, Direction dir, std::vector<BidiRegion>* results) override {
198	return extractBidi(utf8, utf8Units, dir, results);
199	}
200
201	bool getLineBreaks(const char utf8[], int utf8Units, std::vector<LineBreakBefore>* results) override {
202
203	return extractPositions(utf8, utf8Units, UBreakType::kLines,
204	[results](int pos, int status) {
205	results->emplace_back(pos,status == UBRK_LINE_HARD
206	? LineBreakType::kHardLineBreak
207	: LineBreakType::kSoftLineBreak);
208	});
209	}
210
211	bool getWords(const char utf8[], int utf8Units, std::vector<Position>* results) override {
212
213	// Convert to UTF16 since we want the results in utf16
214	std::unique_ptr<uint16_t[]> utf16;
215	auto utf16Units = convertUtf8ToUtf16(utf8, utf8Units, &utf16);
216	if (utf16Units < `0`) {
217	return false;
218	}
219
220	return extractWords(utf16.get(), utf16Units, results);
221	}
222
223	bool getGraphemes(const char utf8[], int utf8Units, std::vector<Position>* results) override {
224
225	return extractPositions(utf8, utf8Units, UBreakType::kGraphemes,
226	[results](int pos, int status) { results->emplace_back(pos);
227	});
228	}
229
230	bool getWhitespaces(const char utf8[], int utf8Units, std::vector<Position>* results) override {
231
232	return extractWhitespaces(utf8, utf8Units, results);
233	}
234
235	void reorderVisual(const BidiLevel runLevels[], int levelsCount, int32_t logicalFromVisual[]) override {
236	ubidi_reorderVisual(runLevels, levelsCount, logicalFromVisual);
237	}
238	};
239
240	std::unique_ptr<SkUnicode> SkUnicode::Make() { return std::make_unique<SkUnicode_icu>(); }
241
242	} // namespace skia
243
244

Browse the source code of engine/third_party/skia/modules/skshaper/src/SkUnicode_icu.cpp