1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4********************************************************************************
5* Copyright (C) 1996-2015, International Business Machines
6* Corporation and others. All Rights Reserved.
7********************************************************************************
8*/
9
10#include "unicode/utypes.h"
11
12#if !UCONFIG_NO_BREAK_ITERATION
13
14#include "unicode/ubrk.h"
15
16#include "unicode/brkiter.h"
17#include "unicode/uloc.h"
18#include "unicode/ustring.h"
19#include "unicode/uchriter.h"
20#include "unicode/rbbi.h"
21#include "rbbirb.h"
22#include "uassert.h"
23#include "cmemory.h"
24
25U_NAMESPACE_USE
26
27//------------------------------------------------------------------------------
28//
29// ubrk_open Create a canned type of break iterator based on type (word, line, etc.)
30// and locale.
31//
32//------------------------------------------------------------------------------
33U_CAPI UBreakIterator* U_EXPORT2
34ubrk_open(UBreakIteratorType type,
35 const char *locale,
36 const char16_t *text,
37 int32_t textLength,
38 UErrorCode *status)
39{
40
41 if(U_FAILURE(*status)) return 0;
42
43 BreakIterator *result = 0;
44
45 switch(type) {
46
47 case UBRK_CHARACTER:
48 result = BreakIterator::createCharacterInstance(Locale(locale), *status);
49 break;
50
51 case UBRK_WORD:
52 result = BreakIterator::createWordInstance(Locale(locale), *status);
53 break;
54
55 case UBRK_LINE:
56 result = BreakIterator::createLineInstance(Locale(locale), *status);
57 break;
58
59 case UBRK_SENTENCE:
60 result = BreakIterator::createSentenceInstance(Locale(locale), *status);
61 break;
62
63 case UBRK_TITLE:
64 result = BreakIterator::createTitleInstance(Locale(locale), *status);
65 break;
66
67 default:
68 *status = U_ILLEGAL_ARGUMENT_ERROR;
69 }
70
71 // check for allocation error
72 if (U_FAILURE(*status)) {
73 return 0;
74 }
75 if(result == 0) {
76 *status = U_MEMORY_ALLOCATION_ERROR;
77 return 0;
78 }
79
80
81 UBreakIterator *uBI = (UBreakIterator *)result;
82 if (text != nullptr) {
83 ubrk_setText(uBI, text, textLength, status);
84 }
85 return uBI;
86}
87
88
89
90//------------------------------------------------------------------------------
91//
92// ubrk_openRules open a break iterator from a set of break rules.
93// Invokes the rule builder.
94//
95//------------------------------------------------------------------------------
96U_CAPI UBreakIterator* U_EXPORT2
97ubrk_openRules( const char16_t *rules,
98 int32_t rulesLength,
99 const char16_t *text,
100 int32_t textLength,
101 UParseError *parseErr,
102 UErrorCode *status) {
103
104 if (status == nullptr || U_FAILURE(*status)){
105 return 0;
106 }
107
108 BreakIterator *result = 0;
109 UnicodeString ruleString(rules, rulesLength);
110 result = RBBIRuleBuilder::createRuleBasedBreakIterator(ruleString, parseErr, *status);
111 if(U_FAILURE(*status)) {
112 return 0;
113 }
114
115 UBreakIterator *uBI = (UBreakIterator *)result;
116 if (text != nullptr) {
117 ubrk_setText(uBI, text, textLength, status);
118 }
119 return uBI;
120}
121
122
123U_CAPI UBreakIterator* U_EXPORT2
124ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
125 const char16_t * text, int32_t textLength,
126 UErrorCode * status)
127{
128 if (U_FAILURE(*status)) {
129 return nullptr;
130 }
131 if (rulesLength < 0) {
132 *status = U_ILLEGAL_ARGUMENT_ERROR;
133 return nullptr;
134 }
135 LocalPointer<RuleBasedBreakIterator> lpRBBI(new RuleBasedBreakIterator(binaryRules, rulesLength, *status), *status);
136 if (U_FAILURE(*status)) {
137 return nullptr;
138 }
139 UBreakIterator *uBI = reinterpret_cast<UBreakIterator *>(lpRBBI.orphan());
140 if (text != nullptr) {
141 ubrk_setText(uBI, text, textLength, status);
142 }
143 return uBI;
144}
145
146
147U_CAPI UBreakIterator * U_EXPORT2
148ubrk_safeClone(
149 const UBreakIterator *bi,
150 void * /*stackBuffer*/,
151 int32_t *pBufferSize,
152 UErrorCode *status)
153{
154 if (status == nullptr || U_FAILURE(*status)){
155 return nullptr;
156 }
157 if (bi == nullptr) {
158 *status = U_ILLEGAL_ARGUMENT_ERROR;
159 return nullptr;
160 }
161 if (pBufferSize != nullptr) {
162 int32_t inputSize = *pBufferSize;
163 *pBufferSize = 1;
164 if (inputSize == 0) {
165 return nullptr; // preflighting for deprecated functionality
166 }
167 }
168 BreakIterator *newBI = ((BreakIterator *)bi)->clone();
169 if (newBI == nullptr) {
170 *status = U_MEMORY_ALLOCATION_ERROR;
171 } else if (pBufferSize != nullptr) {
172 *status = U_SAFECLONE_ALLOCATED_WARNING;
173 }
174 return (UBreakIterator *)newBI;
175}
176
177U_CAPI UBreakIterator * U_EXPORT2
178ubrk_clone(const UBreakIterator *bi, UErrorCode *status) {
179 return ubrk_safeClone(bi, nullptr, nullptr, status);
180}
181
182
183U_CAPI void U_EXPORT2
184ubrk_close(UBreakIterator *bi)
185{
186 delete (BreakIterator *)bi;
187}
188
189U_CAPI void U_EXPORT2
190ubrk_setText(UBreakIterator* bi,
191 const char16_t* text,
192 int32_t textLength,
193 UErrorCode* status)
194{
195 UText ut = UTEXT_INITIALIZER;
196 utext_openUChars(&ut, text, textLength, status);
197 ((BreakIterator*)bi)->setText(&ut, *status);
198 // A stack allocated UText wrapping a char16_t * string
199 // can be dumped without explicitly closing it.
200}
201
202
203
204U_CAPI void U_EXPORT2
205ubrk_setUText(UBreakIterator *bi,
206 UText *text,
207 UErrorCode *status)
208{
209 ((BreakIterator*)bi)->setText(text, *status);
210}
211
212
213
214
215
216U_CAPI int32_t U_EXPORT2
217ubrk_current(const UBreakIterator *bi)
218{
219
220 return ((BreakIterator*)bi)->current();
221}
222
223U_CAPI int32_t U_EXPORT2
224ubrk_next(UBreakIterator *bi)
225{
226
227 return ((BreakIterator*)bi)->next();
228}
229
230U_CAPI int32_t U_EXPORT2
231ubrk_previous(UBreakIterator *bi)
232{
233
234 return ((BreakIterator*)bi)->previous();
235}
236
237U_CAPI int32_t U_EXPORT2
238ubrk_first(UBreakIterator *bi)
239{
240
241 return ((BreakIterator*)bi)->first();
242}
243
244U_CAPI int32_t U_EXPORT2
245ubrk_last(UBreakIterator *bi)
246{
247
248 return ((BreakIterator*)bi)->last();
249}
250
251U_CAPI int32_t U_EXPORT2
252ubrk_preceding(UBreakIterator *bi,
253 int32_t offset)
254{
255
256 return ((BreakIterator*)bi)->preceding(offset);
257}
258
259U_CAPI int32_t U_EXPORT2
260ubrk_following(UBreakIterator *bi,
261 int32_t offset)
262{
263
264 return ((BreakIterator*)bi)->following(offset);
265}
266
267U_CAPI const char* U_EXPORT2
268ubrk_getAvailable(int32_t index)
269{
270
271 return uloc_getAvailable(index);
272}
273
274U_CAPI int32_t U_EXPORT2
275ubrk_countAvailable()
276{
277
278 return uloc_countAvailable();
279}
280
281
282U_CAPI UBool U_EXPORT2
283ubrk_isBoundary(UBreakIterator *bi, int32_t offset)
284{
285 return ((BreakIterator*)bi)->isBoundary(offset);
286}
287
288
289U_CAPI int32_t U_EXPORT2
290ubrk_getRuleStatus(UBreakIterator *bi)
291{
292 return ((BreakIterator*)bi)->getRuleStatus();
293}
294
295U_CAPI int32_t U_EXPORT2
296ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status)
297{
298 return ((BreakIterator*)bi)->getRuleStatusVec(fillInVec, capacity, *status);
299}
300
301
302U_CAPI const char* U_EXPORT2
303ubrk_getLocaleByType(const UBreakIterator *bi,
304 ULocDataLocaleType type,
305 UErrorCode* status)
306{
307 if (bi == nullptr) {
308 if (U_SUCCESS(*status)) {
309 *status = U_ILLEGAL_ARGUMENT_ERROR;
310 }
311 return nullptr;
312 }
313 return ((BreakIterator*)bi)->getLocaleID(type, *status);
314}
315
316
317U_CAPI void U_EXPORT2
318ubrk_refreshUText(UBreakIterator *bi,
319 UText *text,
320 UErrorCode *status)
321{
322 BreakIterator *bii = reinterpret_cast<BreakIterator *>(bi);
323 bii->refreshInputText(text, *status);
324}
325
326U_CAPI int32_t U_EXPORT2
327ubrk_getBinaryRules(UBreakIterator *bi,
328 uint8_t * binaryRules, int32_t rulesCapacity,
329 UErrorCode * status)
330{
331 if (U_FAILURE(*status)) {
332 return 0;
333 }
334 if ((binaryRules == nullptr && rulesCapacity > 0) || rulesCapacity < 0) {
335 *status = U_ILLEGAL_ARGUMENT_ERROR;
336 return 0;
337 }
338 RuleBasedBreakIterator* rbbi;
339 if ((rbbi = dynamic_cast<RuleBasedBreakIterator*>(reinterpret_cast<BreakIterator*>(bi))) == nullptr) {
340 *status = U_ILLEGAL_ARGUMENT_ERROR;
341 return 0;
342 }
343 uint32_t rulesLength;
344 const uint8_t * returnedRules = rbbi->getBinaryRules(rulesLength);
345 if (rulesLength > INT32_MAX) {
346 *status = U_INDEX_OUTOFBOUNDS_ERROR;
347 return 0;
348 }
349 if (binaryRules != nullptr) { // if not preflighting
350 // Here we know rulesLength <= INT32_MAX and rulesCapacity >= 0, can cast safely
351 if ((int32_t)rulesLength > rulesCapacity) {
352 *status = U_BUFFER_OVERFLOW_ERROR;
353 } else {
354 uprv_memcpy(binaryRules, returnedRules, rulesLength);
355 }
356 }
357 return (int32_t)rulesLength;
358}
359
360
361#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
362