1/**************************************************************************/
2/* script_iterator.cpp */
3/**************************************************************************/
4/* This file is part of: */
5/* GODOT ENGINE */
6/* https://godotengine.org */
7/**************************************************************************/
8/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10/* */
11/* Permission is hereby granted, free of charge, to any person obtaining */
12/* a copy of this software and associated documentation files (the */
13/* "Software"), to deal in the Software without restriction, including */
14/* without limitation the rights to use, copy, modify, merge, publish, */
15/* distribute, sublicense, and/or sell copies of the Software, and to */
16/* permit persons to whom the Software is furnished to do so, subject to */
17/* the following conditions: */
18/* */
19/* The above copyright notice and this permission notice shall be */
20/* included in all copies or substantial portions of the Software. */
21/* */
22/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29/**************************************************************************/
30
31#include "script_iterator.h"
32
33// This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp
34
35bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) {
36 return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two;
37}
38
39ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) {
40 struct ParenStackEntry {
41 int pair_index;
42 UScriptCode script_code;
43 };
44
45 if (p_start >= p_length) {
46 p_start = p_length - 1;
47 }
48
49 if (p_start < 0) {
50 p_start = 0;
51 }
52
53 int paren_size = PAREN_STACK_DEPTH;
54 ParenStackEntry *paren_stack = static_cast<ParenStackEntry *>(memalloc(paren_size * sizeof(ParenStackEntry)));
55
56 int script_start;
57 int script_end = p_start;
58 UScriptCode script_code;
59 int paren_sp = -1;
60 int start_sp = paren_sp;
61 UErrorCode err = U_ZERO_ERROR;
62 const char32_t *str = p_string.ptr();
63
64 do {
65 script_code = USCRIPT_COMMON;
66 for (script_start = script_end; script_end < p_length; script_end++) {
67 UChar32 ch = str[script_end];
68 UScriptCode sc = uscript_getScript(ch, &err);
69 if (U_FAILURE(err)) {
70 memfree(paren_stack);
71 ERR_FAIL_MSG(u_errorName(err));
72 }
73 if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) {
74 if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) {
75 // If it's an open character, push it onto the stack.
76 paren_sp++;
77 if (unlikely(paren_sp >= paren_size)) {
78 // If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text.
79 paren_size += PAREN_STACK_DEPTH;
80 paren_stack = static_cast<ParenStackEntry *>(memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry)));
81 }
82 paren_stack[paren_sp].pair_index = ch;
83 paren_stack[paren_sp].script_code = script_code;
84 } else if (paren_sp >= 0) {
85 // If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped.
86 UChar32 paired_ch = u_getBidiPairedBracket(ch);
87 while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) {
88 paren_sp -= 1;
89 }
90 if (paren_sp < start_sp) {
91 start_sp = paren_sp;
92 }
93 if (paren_sp >= 0) {
94 sc = paren_stack[paren_sp].script_code;
95 }
96 }
97 }
98
99 if (same_script(script_code, sc)) {
100 if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) {
101 script_code = sc;
102 // Now that we have a final script code, fix any open characters we pushed before we knew the script code.
103 while (start_sp < paren_sp) {
104 paren_stack[++start_sp].script_code = script_code;
105 }
106 }
107 if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) {
108 // If this character is a close paired character pop the matching open character from the stack.
109 paren_sp -= 1;
110 if (start_sp >= 0) {
111 start_sp -= 1;
112 }
113 }
114 } else {
115 break;
116 }
117 }
118
119 ScriptRange rng;
120 rng.script = hb_icu_script_to_script(script_code);
121 rng.start = script_start;
122 rng.end = script_end;
123
124 script_ranges.push_back(rng);
125 } while (script_end < p_length);
126
127 memfree(paren_stack);
128}
129