1 | /**************************************************************************/ |
2 | /* script_iterator.cpp */ |
3 | /**************************************************************************/ |
4 | /* This file is part of: */ |
5 | /* GODOT ENGINE */ |
6 | /* https://godotengine.org */ |
7 | /**************************************************************************/ |
8 | /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ |
9 | /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ |
10 | /* */ |
11 | /* Permission is hereby granted, free of charge, to any person obtaining */ |
12 | /* a copy of this software and associated documentation files (the */ |
13 | /* "Software"), to deal in the Software without restriction, including */ |
14 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
15 | /* distribute, sublicense, and/or sell copies of the Software, and to */ |
16 | /* permit persons to whom the Software is furnished to do so, subject to */ |
17 | /* the following conditions: */ |
18 | /* */ |
19 | /* The above copyright notice and this permission notice shall be */ |
20 | /* included in all copies or substantial portions of the Software. */ |
21 | /* */ |
22 | /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ |
23 | /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ |
24 | /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ |
25 | /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ |
26 | /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ |
27 | /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ |
28 | /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
29 | /**************************************************************************/ |
30 | |
31 | #include "script_iterator.h" |
32 | |
33 | // This implementation is derived from ICU: icu4c/source/extra/scrptrun/scrptrun.cpp |
34 | |
35 | bool ScriptIterator::same_script(int32_t p_script_one, int32_t p_script_two) { |
36 | return p_script_one <= USCRIPT_INHERITED || p_script_two <= USCRIPT_INHERITED || p_script_one == p_script_two; |
37 | } |
38 | |
39 | ScriptIterator::ScriptIterator(const String &p_string, int p_start, int p_length) { |
40 | struct ParenStackEntry { |
41 | int pair_index; |
42 | UScriptCode script_code; |
43 | }; |
44 | |
45 | if (p_start >= p_length) { |
46 | p_start = p_length - 1; |
47 | } |
48 | |
49 | if (p_start < 0) { |
50 | p_start = 0; |
51 | } |
52 | |
53 | int paren_size = PAREN_STACK_DEPTH; |
54 | ParenStackEntry *paren_stack = static_cast<ParenStackEntry *>(memalloc(paren_size * sizeof(ParenStackEntry))); |
55 | |
56 | int script_start; |
57 | int script_end = p_start; |
58 | UScriptCode script_code; |
59 | int paren_sp = -1; |
60 | int start_sp = paren_sp; |
61 | UErrorCode err = U_ZERO_ERROR; |
62 | const char32_t *str = p_string.ptr(); |
63 | |
64 | do { |
65 | script_code = USCRIPT_COMMON; |
66 | for (script_start = script_end; script_end < p_length; script_end++) { |
67 | UChar32 ch = str[script_end]; |
68 | UScriptCode sc = uscript_getScript(ch, &err); |
69 | if (U_FAILURE(err)) { |
70 | memfree(paren_stack); |
71 | ERR_FAIL_MSG(u_errorName(err)); |
72 | } |
73 | if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) != U_BPT_NONE) { |
74 | if (u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_OPEN) { |
75 | // If it's an open character, push it onto the stack. |
76 | paren_sp++; |
77 | if (unlikely(paren_sp >= paren_size)) { |
78 | // If the stack is full, allocate more space to handle deeply nested parentheses. This is unlikely to happen with any real text. |
79 | paren_size += PAREN_STACK_DEPTH; |
80 | paren_stack = static_cast<ParenStackEntry *>(memrealloc(paren_stack, paren_size * sizeof(ParenStackEntry))); |
81 | } |
82 | paren_stack[paren_sp].pair_index = ch; |
83 | paren_stack[paren_sp].script_code = script_code; |
84 | } else if (paren_sp >= 0) { |
85 | // If it's a close character, find the matching open on the stack, and use that script code. Any non-matching open characters above it on the stack will be popped. |
86 | UChar32 paired_ch = u_getBidiPairedBracket(ch); |
87 | while (paren_sp >= 0 && paren_stack[paren_sp].pair_index != paired_ch) { |
88 | paren_sp -= 1; |
89 | } |
90 | if (paren_sp < start_sp) { |
91 | start_sp = paren_sp; |
92 | } |
93 | if (paren_sp >= 0) { |
94 | sc = paren_stack[paren_sp].script_code; |
95 | } |
96 | } |
97 | } |
98 | |
99 | if (same_script(script_code, sc)) { |
100 | if (script_code <= USCRIPT_INHERITED && sc > USCRIPT_INHERITED) { |
101 | script_code = sc; |
102 | // Now that we have a final script code, fix any open characters we pushed before we knew the script code. |
103 | while (start_sp < paren_sp) { |
104 | paren_stack[++start_sp].script_code = script_code; |
105 | } |
106 | } |
107 | if ((u_getIntPropertyValue(ch, UCHAR_BIDI_PAIRED_BRACKET_TYPE) == U_BPT_CLOSE) && paren_sp >= 0) { |
108 | // If this character is a close paired character pop the matching open character from the stack. |
109 | paren_sp -= 1; |
110 | if (start_sp >= 0) { |
111 | start_sp -= 1; |
112 | } |
113 | } |
114 | } else { |
115 | break; |
116 | } |
117 | } |
118 | |
119 | ScriptRange rng; |
120 | rng.script = hb_icu_script_to_script(script_code); |
121 | rng.start = script_start; |
122 | rng.end = script_end; |
123 | |
124 | script_ranges.push_back(rng); |
125 | } while (script_end < p_length); |
126 | |
127 | memfree(paren_stack); |
128 | } |
129 | |