| 1 | /**************************************************************************/ |
| 2 | /* optimized_translation.cpp */ |
| 3 | /**************************************************************************/ |
| 4 | /* This file is part of: */ |
| 5 | /* GODOT ENGINE */ |
| 6 | /* https://godotengine.org */ |
| 7 | /**************************************************************************/ |
| 8 | /* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ |
| 9 | /* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ |
| 10 | /* */ |
| 11 | /* Permission is hereby granted, free of charge, to any person obtaining */ |
| 12 | /* a copy of this software and associated documentation files (the */ |
| 13 | /* "Software"), to deal in the Software without restriction, including */ |
| 14 | /* without limitation the rights to use, copy, modify, merge, publish, */ |
| 15 | /* distribute, sublicense, and/or sell copies of the Software, and to */ |
| 16 | /* permit persons to whom the Software is furnished to do so, subject to */ |
| 17 | /* the following conditions: */ |
| 18 | /* */ |
| 19 | /* The above copyright notice and this permission notice shall be */ |
| 20 | /* included in all copies or substantial portions of the Software. */ |
| 21 | /* */ |
| 22 | /* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ |
| 23 | /* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ |
| 24 | /* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ |
| 25 | /* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ |
| 26 | /* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ |
| 27 | /* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ |
| 28 | /* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ |
| 29 | /**************************************************************************/ |
| 30 | |
| 31 | #include "optimized_translation.h" |
| 32 | |
| 33 | #include "core/templates/pair.h" |
| 34 | |
| 35 | extern "C" { |
| 36 | #include "thirdparty/misc/smaz.h" |
| 37 | } |
| 38 | |
| 39 | struct CompressedString { |
| 40 | int orig_len = 0; |
| 41 | CharString compressed; |
| 42 | int offset = 0; |
| 43 | }; |
| 44 | |
| 45 | void OptimizedTranslation::generate(const Ref<Translation> &p_from) { |
| 46 | // This method compresses a Translation instance. |
| 47 | // Right now, it doesn't handle context or plurals, so Translation subclasses using plurals or context (i.e TranslationPO) shouldn't be compressed. |
| 48 | #ifdef TOOLS_ENABLED |
| 49 | ERR_FAIL_COND(p_from.is_null()); |
| 50 | List<StringName> keys; |
| 51 | p_from->get_message_list(&keys); |
| 52 | |
| 53 | int size = Math::larger_prime(keys.size()); |
| 54 | |
| 55 | Vector<Vector<Pair<int, CharString>>> buckets; |
| 56 | Vector<HashMap<uint32_t, int>> table; |
| 57 | Vector<uint32_t> hfunc_table; |
| 58 | Vector<CompressedString> compressed; |
| 59 | |
| 60 | table.resize(size); |
| 61 | hfunc_table.resize(size); |
| 62 | buckets.resize(size); |
| 63 | compressed.resize(keys.size()); |
| 64 | |
| 65 | int idx = 0; |
| 66 | int total_compression_size = 0; |
| 67 | |
| 68 | for (const StringName &E : keys) { |
| 69 | //hash string |
| 70 | CharString cs = E.operator String().utf8(); |
| 71 | uint32_t h = hash(0, cs.get_data()); |
| 72 | Pair<int, CharString> p; |
| 73 | p.first = idx; |
| 74 | p.second = cs; |
| 75 | buckets.write[h % size].push_back(p); |
| 76 | |
| 77 | //compress string |
| 78 | CharString src_s = p_from->get_message(E).operator String().utf8(); |
| 79 | CompressedString ps; |
| 80 | ps.orig_len = src_s.size(); |
| 81 | ps.offset = total_compression_size; |
| 82 | |
| 83 | if (ps.orig_len != 0) { |
| 84 | CharString dst_s; |
| 85 | dst_s.resize(src_s.size()); |
| 86 | int ret = smaz_compress(src_s.get_data(), src_s.size(), dst_s.ptrw(), src_s.size()); |
| 87 | if (ret >= src_s.size()) { |
| 88 | //if compressed is larger than original, just use original |
| 89 | ps.orig_len = src_s.size(); |
| 90 | ps.compressed = src_s; |
| 91 | } else { |
| 92 | dst_s.resize(ret); |
| 93 | //ps.orig_len=; |
| 94 | ps.compressed = dst_s; |
| 95 | } |
| 96 | } else { |
| 97 | ps.orig_len = 1; |
| 98 | ps.compressed.resize(1); |
| 99 | ps.compressed[0] = 0; |
| 100 | } |
| 101 | |
| 102 | compressed.write[idx] = ps; |
| 103 | total_compression_size += ps.compressed.size(); |
| 104 | idx++; |
| 105 | } |
| 106 | |
| 107 | int bucket_table_size = 0; |
| 108 | |
| 109 | for (int i = 0; i < size; i++) { |
| 110 | const Vector<Pair<int, CharString>> &b = buckets[i]; |
| 111 | HashMap<uint32_t, int> &t = table.write[i]; |
| 112 | |
| 113 | if (b.size() == 0) { |
| 114 | continue; |
| 115 | } |
| 116 | |
| 117 | int d = 1; |
| 118 | int item = 0; |
| 119 | |
| 120 | while (item < b.size()) { |
| 121 | uint32_t slot = hash(d, b[item].second.get_data()); |
| 122 | if (t.has(slot)) { |
| 123 | item = 0; |
| 124 | d++; |
| 125 | t.clear(); |
| 126 | } else { |
| 127 | t[slot] = b[item].first; |
| 128 | item++; |
| 129 | } |
| 130 | } |
| 131 | |
| 132 | hfunc_table.write[i] = d; |
| 133 | bucket_table_size += 2 + b.size() * 4; |
| 134 | } |
| 135 | |
| 136 | ERR_FAIL_COND(bucket_table_size == 0); |
| 137 | |
| 138 | hash_table.resize(size); |
| 139 | bucket_table.resize(bucket_table_size); |
| 140 | |
| 141 | int *htwb = hash_table.ptrw(); |
| 142 | int *btwb = bucket_table.ptrw(); |
| 143 | |
| 144 | uint32_t *htw = (uint32_t *)&htwb[0]; |
| 145 | uint32_t *btw = (uint32_t *)&btwb[0]; |
| 146 | |
| 147 | int btindex = 0; |
| 148 | |
| 149 | for (int i = 0; i < size; i++) { |
| 150 | const HashMap<uint32_t, int> &t = table[i]; |
| 151 | if (t.size() == 0) { |
| 152 | htw[i] = 0xFFFFFFFF; //nothing |
| 153 | continue; |
| 154 | } |
| 155 | |
| 156 | htw[i] = btindex; |
| 157 | btw[btindex++] = t.size(); |
| 158 | btw[btindex++] = hfunc_table[i]; |
| 159 | |
| 160 | for (const KeyValue<uint32_t, int> &E : t) { |
| 161 | btw[btindex++] = E.key; |
| 162 | btw[btindex++] = compressed[E.value].offset; |
| 163 | btw[btindex++] = compressed[E.value].compressed.size(); |
| 164 | btw[btindex++] = compressed[E.value].orig_len; |
| 165 | } |
| 166 | } |
| 167 | |
| 168 | strings.resize(total_compression_size); |
| 169 | uint8_t *cw = strings.ptrw(); |
| 170 | |
| 171 | for (int i = 0; i < compressed.size(); i++) { |
| 172 | memcpy(&cw[compressed[i].offset], compressed[i].compressed.get_data(), compressed[i].compressed.size()); |
| 173 | } |
| 174 | |
| 175 | ERR_FAIL_COND(btindex != bucket_table_size); |
| 176 | set_locale(p_from->get_locale()); |
| 177 | |
| 178 | #endif |
| 179 | } |
| 180 | |
| 181 | bool OptimizedTranslation::_set(const StringName &p_name, const Variant &p_value) { |
| 182 | String prop_name = p_name.operator String(); |
| 183 | if (prop_name == "hash_table" ) { |
| 184 | hash_table = p_value; |
| 185 | } else if (prop_name == "bucket_table" ) { |
| 186 | bucket_table = p_value; |
| 187 | } else if (prop_name == "strings" ) { |
| 188 | strings = p_value; |
| 189 | } else if (prop_name == "load_from" ) { |
| 190 | generate(p_value); |
| 191 | } else { |
| 192 | return false; |
| 193 | } |
| 194 | |
| 195 | return true; |
| 196 | } |
| 197 | |
| 198 | bool OptimizedTranslation::_get(const StringName &p_name, Variant &r_ret) const { |
| 199 | String prop_name = p_name.operator String(); |
| 200 | if (prop_name == "hash_table" ) { |
| 201 | r_ret = hash_table; |
| 202 | } else if (prop_name == "bucket_table" ) { |
| 203 | r_ret = bucket_table; |
| 204 | } else if (prop_name == "strings" ) { |
| 205 | r_ret = strings; |
| 206 | } else { |
| 207 | return false; |
| 208 | } |
| 209 | |
| 210 | return true; |
| 211 | } |
| 212 | |
| 213 | StringName OptimizedTranslation::get_message(const StringName &p_src_text, const StringName &p_context) const { |
| 214 | // p_context passed in is ignore. The use of context is not yet supported in OptimizedTranslation. |
| 215 | |
| 216 | int htsize = hash_table.size(); |
| 217 | |
| 218 | if (htsize == 0) { |
| 219 | return StringName(); |
| 220 | } |
| 221 | |
| 222 | CharString str = p_src_text.operator String().utf8(); |
| 223 | uint32_t h = hash(0, str.get_data()); |
| 224 | |
| 225 | const int *htr = hash_table.ptr(); |
| 226 | const uint32_t *htptr = (const uint32_t *)&htr[0]; |
| 227 | const int *btr = bucket_table.ptr(); |
| 228 | const uint32_t *btptr = (const uint32_t *)&btr[0]; |
| 229 | const uint8_t *sr = strings.ptr(); |
| 230 | const char *sptr = (const char *)&sr[0]; |
| 231 | |
| 232 | uint32_t p = htptr[h % htsize]; |
| 233 | |
| 234 | if (p == 0xFFFFFFFF) { |
| 235 | return StringName(); //nothing |
| 236 | } |
| 237 | |
| 238 | const Bucket &bucket = *(const Bucket *)&btptr[p]; |
| 239 | |
| 240 | h = hash(bucket.func, str.get_data()); |
| 241 | |
| 242 | int idx = -1; |
| 243 | |
| 244 | for (int i = 0; i < bucket.size; i++) { |
| 245 | if (bucket.elem[i].key == h) { |
| 246 | idx = i; |
| 247 | break; |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | if (idx == -1) { |
| 252 | return StringName(); |
| 253 | } |
| 254 | |
| 255 | if (bucket.elem[idx].comp_size == bucket.elem[idx].uncomp_size) { |
| 256 | String rstr; |
| 257 | rstr.parse_utf8(&sptr[bucket.elem[idx].str_offset], bucket.elem[idx].uncomp_size); |
| 258 | |
| 259 | return rstr; |
| 260 | } else { |
| 261 | CharString uncomp; |
| 262 | uncomp.resize(bucket.elem[idx].uncomp_size + 1); |
| 263 | smaz_decompress(&sptr[bucket.elem[idx].str_offset], bucket.elem[idx].comp_size, uncomp.ptrw(), bucket.elem[idx].uncomp_size); |
| 264 | String rstr; |
| 265 | rstr.parse_utf8(uncomp.get_data()); |
| 266 | return rstr; |
| 267 | } |
| 268 | } |
| 269 | |
| 270 | Vector<String> OptimizedTranslation::get_translated_message_list() const { |
| 271 | Vector<String> msgs; |
| 272 | |
| 273 | const int *htr = hash_table.ptr(); |
| 274 | const uint32_t *htptr = (const uint32_t *)&htr[0]; |
| 275 | const int *btr = bucket_table.ptr(); |
| 276 | const uint32_t *btptr = (const uint32_t *)&btr[0]; |
| 277 | const uint8_t *sr = strings.ptr(); |
| 278 | const char *sptr = (const char *)&sr[0]; |
| 279 | |
| 280 | for (int i = 0; i < hash_table.size(); i++) { |
| 281 | uint32_t p = htptr[i]; |
| 282 | if (p != 0xFFFFFFFF) { |
| 283 | const Bucket &bucket = *(const Bucket *)&btptr[p]; |
| 284 | for (int j = 0; j < bucket.size; j++) { |
| 285 | if (bucket.elem[j].comp_size == bucket.elem[j].uncomp_size) { |
| 286 | String rstr; |
| 287 | rstr.parse_utf8(&sptr[bucket.elem[j].str_offset], bucket.elem[j].uncomp_size); |
| 288 | msgs.push_back(rstr); |
| 289 | } else { |
| 290 | CharString uncomp; |
| 291 | uncomp.resize(bucket.elem[j].uncomp_size + 1); |
| 292 | smaz_decompress(&sptr[bucket.elem[j].str_offset], bucket.elem[j].comp_size, uncomp.ptrw(), bucket.elem[j].uncomp_size); |
| 293 | String rstr; |
| 294 | rstr.parse_utf8(uncomp.get_data()); |
| 295 | msgs.push_back(rstr); |
| 296 | } |
| 297 | } |
| 298 | } |
| 299 | } |
| 300 | return msgs; |
| 301 | } |
| 302 | |
| 303 | StringName OptimizedTranslation::get_plural_message(const StringName &p_src_text, const StringName &p_plural_text, int p_n, const StringName &p_context) const { |
| 304 | // The use of plurals translation is not yet supported in OptimizedTranslation. |
| 305 | return get_message(p_src_text, p_context); |
| 306 | } |
| 307 | |
| 308 | void OptimizedTranslation::_get_property_list(List<PropertyInfo> *p_list) const { |
| 309 | p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "hash_table" )); |
| 310 | p_list->push_back(PropertyInfo(Variant::PACKED_INT32_ARRAY, "bucket_table" )); |
| 311 | p_list->push_back(PropertyInfo(Variant::PACKED_BYTE_ARRAY, "strings" )); |
| 312 | p_list->push_back(PropertyInfo(Variant::OBJECT, "load_from" , PROPERTY_HINT_RESOURCE_TYPE, "Translation" , PROPERTY_USAGE_EDITOR)); |
| 313 | } |
| 314 | |
| 315 | void OptimizedTranslation::_bind_methods() { |
| 316 | ClassDB::bind_method(D_METHOD("generate" , "from" ), &OptimizedTranslation::generate); |
| 317 | } |
| 318 | |