1 | /* |
2 | * Copyright (C) 2020-2022 Roy Qu (royqh1979@gmail.com) |
3 | * |
4 | * This program is free software: you can redistribute it and/or modify |
5 | * it under the terms of the GNU General Public License as published by |
6 | * the Free Software Foundation, either version 3 of the License, or |
7 | * (at your option) any later version. |
8 | * |
9 | * This program is distributed in the hope that it will be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 | * GNU General Public License for more details. |
13 | * |
14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program. If not, see <https://www.gnu.org/licenses/>. |
16 | */ |
17 | #include "charsetinfo.h" |
18 | #include <QObject> |
19 | #include <memory> |
20 | #include <QMap> |
21 | #include <QSet> |
22 | #ifdef Q_OS_WIN |
23 | #include <windows.h> |
24 | #else |
25 | #include <langinfo.h> |
26 | #endif |
27 | |
28 | CharsetInfoManager* pCharsetInfoManager; |
29 | |
30 | QByteArray CharsetInfoManager::getDefaultSystemEncoding() |
31 | { |
32 | #ifdef Q_OS_WIN |
33 | DWORD acp = GetACP(); |
34 | PCharsetInfo info = findCharsetByCodepage(acp); |
35 | if (info) { |
36 | return info->name; |
37 | } |
38 | return "unknown" ; |
39 | #else |
40 | return QByteArray(nl_langinfo(CODESET)); |
41 | #endif |
42 | } |
43 | |
44 | PCharsetInfo CharsetInfoManager::findCharsetByCodepage(int codepage) |
45 | { |
46 | foreach (const PCharsetInfo& info, mCodePages) { |
47 | if (info->codepage == codepage) |
48 | return info; |
49 | } |
50 | return PCharsetInfo(); |
51 | } |
52 | |
53 | QStringList CharsetInfoManager::languageNames() |
54 | { |
55 | QSet<QString> languages; |
56 | foreach (const PCharsetInfo& info, mCodePages) { |
57 | if (info->enabled) |
58 | languages.insert(info->language); |
59 | } |
60 | QStringList lst; |
61 | foreach (const QString& s, languages) |
62 | lst.append(s); |
63 | lst.sort(Qt::CaseInsensitive); |
64 | return lst; |
65 | } |
66 | |
67 | QList<PCharsetInfo> CharsetInfoManager::findCharsetsByLanguageName(const QString &languageName) |
68 | { |
69 | QList<PCharsetInfo> result; |
70 | foreach (const PCharsetInfo& info, mCodePages) { |
71 | if (info->enabled && info->language == languageName) |
72 | result.append(info); |
73 | } |
74 | std::sort(result.begin(),result.end(),[](const PCharsetInfo& info1,const PCharsetInfo& info2){ |
75 | return (info1->name < info2->name); |
76 | }); |
77 | return result; |
78 | } |
79 | |
80 | QList<PCharsetInfo> CharsetInfoManager::findCharsetByLocale(const QString &localeName) |
81 | { |
82 | QList<PCharsetInfo> result; |
83 | foreach (const PCharsetInfo& info, mCodePages) { |
84 | if (info->enabled && info->localeName == localeName) |
85 | result.append(info); |
86 | } |
87 | return result; |
88 | } |
89 | |
90 | QString CharsetInfoManager::findLanguageByCharsetName(const QString &encodingName) |
91 | { |
92 | |
93 | foreach (const PCharsetInfo& info, mCodePages) { |
94 | if (info->enabled && |
95 | QString::compare(info->name, encodingName, Qt::CaseInsensitive)==0) |
96 | return info->language; |
97 | } |
98 | return "Unknown" ; |
99 | } |
100 | |
101 | const QString &CharsetInfoManager::localeName() const |
102 | { |
103 | return mLocaleName; |
104 | } |
105 | |
106 | CharsetInfoManager::CharsetInfoManager(const QString& localeName): |
107 | QObject(), |
108 | mLocaleName(localeName) |
109 | { |
110 | mCodePages.append(std::make_shared<CharsetInfo>(37,"IBM037" ,"" ,"" ,false)); |
111 | mCodePages.append(std::make_shared<CharsetInfo>(437,"IBM437" ,"" ,"" ,false)); |
112 | mCodePages.append(std::make_shared<CharsetInfo>(500,"IBM500" ,"" ,"" ,false)); |
113 | mCodePages.append(std::make_shared<CharsetInfo>(708,"ASMO-708" ,"" ,"" ,false)); |
114 | mCodePages.append(std::make_shared<CharsetInfo>(709,"" ,"" ,"" ,false)); |
115 | mCodePages.append(std::make_shared<CharsetInfo>(710,"" ,"" ,"" ,false)); |
116 | mCodePages.append(std::make_shared<CharsetInfo>(720,"DOS-720" ,tr("Arabic" ),"" ,false)); |
117 | mCodePages.append(std::make_shared<CharsetInfo>(737,"ibm737" ,tr("Greek" ),"" ,false)); |
118 | mCodePages.append(std::make_shared<CharsetInfo>(775,"ibm775" ,tr("Baltic" ),"" ,false)); |
119 | mCodePages.append(std::make_shared<CharsetInfo>(850,"ibm850" ,tr("Western Europe" ),"" ,false)); |
120 | mCodePages.append(std::make_shared<CharsetInfo>(852,"ibm852" ,tr("Central Europe" ),"" ,false)); |
121 | mCodePages.append(std::make_shared<CharsetInfo>(855,"IBM855" ,tr("Cyrillic" ),"" ,false)); |
122 | mCodePages.append(std::make_shared<CharsetInfo>(857,"ibm857" ,tr("Turkish" ),"" ,false)); |
123 | mCodePages.append(std::make_shared<CharsetInfo>(858,"ibm858" ,tr("Western Europe" ),"" ,false)); |
124 | mCodePages.append(std::make_shared<CharsetInfo>(860,"IBM860" ,tr("Western Europe" ),"" ,false)); |
125 | mCodePages.append(std::make_shared<CharsetInfo>(861,"ibm861" ,tr("Northern Europe" ),"" ,false)); |
126 | mCodePages.append(std::make_shared<CharsetInfo>(862,"DOS-862" ,tr("Hebrew" ),"" ,false)); |
127 | mCodePages.append(std::make_shared<CharsetInfo>(863,"IBM863" ,tr("Western Europe" ),"" ,false)); |
128 | mCodePages.append(std::make_shared<CharsetInfo>(864,"IBM864" ,"" ,"" ,false)); |
129 | mCodePages.append(std::make_shared<CharsetInfo>(865,"IBM865" ,tr("Northern Europe" ),"" ,false)); |
130 | mCodePages.append(std::make_shared<CharsetInfo>(866,"cp866" ,tr("Cyrillic" ),"" ,false)); |
131 | mCodePages.append(std::make_shared<CharsetInfo>(869,"ibm869" ,tr("Greek" ),"" ,false)); |
132 | mCodePages.append(std::make_shared<CharsetInfo>(870,"IBM870" ,"" ,"" ,false)); |
133 | mCodePages.append(std::make_shared<CharsetInfo>(874,"tis-620" ,tr("Thai" ),"" ,true)); |
134 | mCodePages.append(std::make_shared<CharsetInfo>(875,"cp875" ,"" ,"" ,false)); |
135 | mCodePages.append(std::make_shared<CharsetInfo>(932,"shift_jis" ,tr("Japanese" ),"" ,true)); |
136 | mCodePages.append(std::make_shared<CharsetInfo>(936,"gbk" ,tr("Chinese" ),"zh_CN" ,true)); |
137 | mCodePages.append(std::make_shared<CharsetInfo>(949,"windows-949" ,tr("Korean" ),"" ,true)); |
138 | mCodePages.append(std::make_shared<CharsetInfo>(950,"big5" ,tr("Chinese" ),"" ,true)); |
139 | mCodePages.append(std::make_shared<CharsetInfo>(1026,"IBM1026" ,"" ,"" ,false)); |
140 | mCodePages.append(std::make_shared<CharsetInfo>(1047,"IBM01047" ,"" ,"" ,false)); |
141 | mCodePages.append(std::make_shared<CharsetInfo>(1140,"IBM01140" ,"" ,"" ,false)); |
142 | mCodePages.append(std::make_shared<CharsetInfo>(1141,"IBM01141" ,"" ,"" ,false)); |
143 | mCodePages.append(std::make_shared<CharsetInfo>(1142,"IBM01142" ,"" ,"" ,false)); |
144 | mCodePages.append(std::make_shared<CharsetInfo>(1143,"IBM01143" ,"" ,"" ,false)); |
145 | mCodePages.append(std::make_shared<CharsetInfo>(1144,"IBM01144" ,"" ,"" ,false)); |
146 | mCodePages.append(std::make_shared<CharsetInfo>(1145,"IBM01145" ,"" ,"" ,false)); |
147 | mCodePages.append(std::make_shared<CharsetInfo>(1146,"IBM01146" ,"" ,"" ,false)); |
148 | mCodePages.append(std::make_shared<CharsetInfo>(1147,"IBM01147" ,"" ,"" ,false)); |
149 | mCodePages.append(std::make_shared<CharsetInfo>(1148,"IBM01148" ,"" ,"" ,false)); |
150 | mCodePages.append(std::make_shared<CharsetInfo>(1149,"IBM01149" ,"" ,"" ,false)); |
151 | mCodePages.append(std::make_shared<CharsetInfo>(1200,"utf-16" ,"" ,"" ,false)); |
152 | mCodePages.append(std::make_shared<CharsetInfo>(1201,"unicodeFFFE" ,"" ,"" ,false)); |
153 | mCodePages.append(std::make_shared<CharsetInfo>(1250,"windows-1250" ,tr("Central Europe" ),"" ,true)); |
154 | mCodePages.append(std::make_shared<CharsetInfo>(1251,"windows-1251" ,tr("Cyrillic" ),"" ,true)); |
155 | mCodePages.append(std::make_shared<CharsetInfo>(1252,"windows-1252" ,tr("Western Europe" ),"" ,true)); |
156 | mCodePages.append(std::make_shared<CharsetInfo>(1253,"windows-1253" ,tr("Greek" ),"" ,true)); |
157 | mCodePages.append(std::make_shared<CharsetInfo>(1254,"windows-1254" ,tr("Turkish" ),"" ,true)); |
158 | mCodePages.append(std::make_shared<CharsetInfo>(1255,"windows-1255" ,tr("Hebrew" ),"" ,true)); |
159 | mCodePages.append(std::make_shared<CharsetInfo>(1256,"windows-1256" ,tr("Arabic" ),"" ,true)); |
160 | mCodePages.append(std::make_shared<CharsetInfo>(1257,"windows-1257" ,tr("Baltic" ),"" ,true)); |
161 | mCodePages.append(std::make_shared<CharsetInfo>(1258,"windows-1258" ,tr("Vietnamese" ),"" ,true)); |
162 | mCodePages.append(std::make_shared<CharsetInfo>(1361,"Johab" ,"" ,"" ,false)); |
163 | mCodePages.append(std::make_shared<CharsetInfo>(10000,"macintosh" ,tr("Cyrillic" ),"" ,true)); |
164 | mCodePages.append(std::make_shared<CharsetInfo>(10001,"x-mac-japanese" ,"" ,"" ,false)); |
165 | mCodePages.append(std::make_shared<CharsetInfo>(10002,"x-mac-chinesetrad" ,"" ,"" ,false)); |
166 | mCodePages.append(std::make_shared<CharsetInfo>(10003,"x-mac-korean" ,"" ,"" ,false)); |
167 | mCodePages.append(std::make_shared<CharsetInfo>(10004,"x-mac-arabic" ,"" ,"" ,false)); |
168 | mCodePages.append(std::make_shared<CharsetInfo>(10005,"x-mac-hebrew" ,"" ,"" ,false)); |
169 | mCodePages.append(std::make_shared<CharsetInfo>(10006,"x-mac-greek" ,"" ,"" ,false)); |
170 | mCodePages.append(std::make_shared<CharsetInfo>(10007,"x-mac-cyrillic" ,"" ,"" ,false)); |
171 | mCodePages.append(std::make_shared<CharsetInfo>(10008,"x-mac-chinesesimp" ,"" ,"" ,false)); |
172 | mCodePages.append(std::make_shared<CharsetInfo>(10010,"x-mac-romanian" ,"" ,"" ,false)); |
173 | mCodePages.append(std::make_shared<CharsetInfo>(10017,"x-mac-ukrainian" ,"" ,"" ,false)); |
174 | mCodePages.append(std::make_shared<CharsetInfo>(10021,"x-mac-thai" ,"" ,"" ,false)); |
175 | mCodePages.append(std::make_shared<CharsetInfo>(10029,"x-mac-ce" ,"" ,"" ,false)); |
176 | mCodePages.append(std::make_shared<CharsetInfo>(10079,"x-mac-icelandic" ,"" ,"" ,false)); |
177 | mCodePages.append(std::make_shared<CharsetInfo>(10081,"x-mac-turkish" ,"" ,"" ,false)); |
178 | mCodePages.append(std::make_shared<CharsetInfo>(10082,"x-mac-croatian" ,"" ,"" ,false)); |
179 | mCodePages.append(std::make_shared<CharsetInfo>(12000,"utf-32" ,"" ,"" ,false)); |
180 | mCodePages.append(std::make_shared<CharsetInfo>(12001,"utf-32BE" ,"" ,"" ,false)); |
181 | mCodePages.append(std::make_shared<CharsetInfo>(20000,"x-Chinese_CNS" ,"" ,"" ,false)); |
182 | mCodePages.append(std::make_shared<CharsetInfo>(20001,"x-cp20001" ,"" ,"" ,false)); |
183 | mCodePages.append(std::make_shared<CharsetInfo>(20002,"x_Chinese-Eten" ,"" ,"" ,false)); |
184 | mCodePages.append(std::make_shared<CharsetInfo>(20003,"x-cp20003" ,"" ,"" ,false)); |
185 | mCodePages.append(std::make_shared<CharsetInfo>(20004,"x-cp20004" ,"" ,"" ,false)); |
186 | mCodePages.append(std::make_shared<CharsetInfo>(20005,"x-cp20005" ,"" ,"" ,false)); |
187 | mCodePages.append(std::make_shared<CharsetInfo>(20105,"x-IA5" ,"" ,"" ,false)); |
188 | mCodePages.append(std::make_shared<CharsetInfo>(20106,"x-IA5-German" ,"" ,"" ,false)); |
189 | mCodePages.append(std::make_shared<CharsetInfo>(20107,"x-IA5-Swedish" ,"" ,"" ,false)); |
190 | mCodePages.append(std::make_shared<CharsetInfo>(20108,"x-IA5-Norwegian" ,"" ,"" ,false)); |
191 | mCodePages.append(std::make_shared<CharsetInfo>(20127,"us-ascii" ,"" ,"" ,false)); |
192 | mCodePages.append(std::make_shared<CharsetInfo>(20261,"x-cp20261" ,"" ,"" ,false)); |
193 | mCodePages.append(std::make_shared<CharsetInfo>(20269,"x-cp20269" ,"" ,"" ,false)); |
194 | mCodePages.append(std::make_shared<CharsetInfo>(20273,"IBM273" ,"" ,"" ,false)); |
195 | mCodePages.append(std::make_shared<CharsetInfo>(20277,"IBM277" ,"" ,"" ,false)); |
196 | mCodePages.append(std::make_shared<CharsetInfo>(20278,"IBM278" ,"" ,"" ,false)); |
197 | mCodePages.append(std::make_shared<CharsetInfo>(20280,"IBM280" ,"" ,"" ,false)); |
198 | mCodePages.append(std::make_shared<CharsetInfo>(20284,"IBM284" ,"" ,"" ,false)); |
199 | mCodePages.append(std::make_shared<CharsetInfo>(20285,"IBM285" ,"" ,"" ,false)); |
200 | mCodePages.append(std::make_shared<CharsetInfo>(20290,"IBM290" ,"" ,"" ,false)); |
201 | mCodePages.append(std::make_shared<CharsetInfo>(20297,"IBM297" ,"" ,"" ,false)); |
202 | mCodePages.append(std::make_shared<CharsetInfo>(20420,"IBM420" ,"" ,"" ,false)); |
203 | mCodePages.append(std::make_shared<CharsetInfo>(20423,"IBM423" ,"" ,"" ,false)); |
204 | mCodePages.append(std::make_shared<CharsetInfo>(20424,"IBM424" ,"" ,"" ,false)); |
205 | mCodePages.append(std::make_shared<CharsetInfo>(20833,"x-EBCDIC-KoreanExtended" ,"" ,"" ,false)); |
206 | mCodePages.append(std::make_shared<CharsetInfo>(20838,"IBM-Thai" ,"" ,"" ,false)); |
207 | mCodePages.append(std::make_shared<CharsetInfo>(20866,"koi8-r" ,tr("Cyrillic" ),"" ,true)); |
208 | mCodePages.append(std::make_shared<CharsetInfo>(20871,"IBM871" ,"" ,"" ,false)); |
209 | mCodePages.append(std::make_shared<CharsetInfo>(20880,"IBM880" ,"" ,"" ,false)); |
210 | mCodePages.append(std::make_shared<CharsetInfo>(20905,"IBM905" ,"" ,"" ,false)); |
211 | mCodePages.append(std::make_shared<CharsetInfo>(20924,"IBM00924" ,"" ,"" ,false)); |
212 | mCodePages.append(std::make_shared<CharsetInfo>(20932,"EUC-JP" ,"" ,"" ,false)); |
213 | mCodePages.append(std::make_shared<CharsetInfo>(20936,"x-cp20936" ,"" ,"" ,false)); |
214 | mCodePages.append(std::make_shared<CharsetInfo>(20949,"x-cp20949" ,"" ,"" ,false)); |
215 | mCodePages.append(std::make_shared<CharsetInfo>(21025,"cp1025" ,"" ,"" ,false)); |
216 | mCodePages.append(std::make_shared<CharsetInfo>(21027,"" ,"" ,"" ,false)); |
217 | mCodePages.append(std::make_shared<CharsetInfo>(21866,"koi8-u" ,tr("Cyrillic" ),"" ,true)); |
218 | mCodePages.append(std::make_shared<CharsetInfo>(28591,"iso-8859-1" ,tr("Western Europe" ),"" ,true)); |
219 | mCodePages.append(std::make_shared<CharsetInfo>(28592,"iso-8859-2" ,tr("Eastern Europe" ),"" ,true)); |
220 | mCodePages.append(std::make_shared<CharsetInfo>(28593,"iso-8859-3" ,tr("Turkish" ),"" ,true)); |
221 | mCodePages.append(std::make_shared<CharsetInfo>(28594,"iso-8859-4" ,tr("Baltic" ),"" ,true)); |
222 | mCodePages.append(std::make_shared<CharsetInfo>(28595,"iso-8859-5" ,tr("Cyrillic" ),"" ,true)); |
223 | mCodePages.append(std::make_shared<CharsetInfo>(28596,"iso-8859-6" ,tr("Arabic" ),"" ,true)); |
224 | mCodePages.append(std::make_shared<CharsetInfo>(28597,"iso-8859-7" ,tr("Greek" ),"" ,true)); |
225 | mCodePages.append(std::make_shared<CharsetInfo>(28598,"iso-8859-8" ,tr("Hebrew" ),"" ,true)); |
226 | mCodePages.append(std::make_shared<CharsetInfo>(28599,"iso-8859-9" ,tr("Turkish" ),"" ,true)); |
227 | mCodePages.append(std::make_shared<CharsetInfo>(28603,"iso-8859-13" ,tr("Baltic" ),"" ,true)); |
228 | mCodePages.append(std::make_shared<CharsetInfo>(-1,"iso-8859-14" ,tr("Celtic" ),"" ,true)); |
229 | mCodePages.append(std::make_shared<CharsetInfo>(28605,"iso-8859-15" ,tr("Western Europe" ),"" ,true)); |
230 | mCodePages.append(std::make_shared<CharsetInfo>(29001,"x-Europa" ,"" ,"" ,false)); |
231 | mCodePages.append(std::make_shared<CharsetInfo>(38598,"iso-8859-8-i" ,"" ,"" ,false)); |
232 | mCodePages.append(std::make_shared<CharsetInfo>(50220,"iso-2022-jp" ,"" ,"" ,false)); |
233 | mCodePages.append(std::make_shared<CharsetInfo>(50221,"csISO2022JP" ,"" ,"" ,false)); |
234 | mCodePages.append(std::make_shared<CharsetInfo>(50222,"iso-2022-jp" ,"" ,"" ,false)); |
235 | mCodePages.append(std::make_shared<CharsetInfo>(50225,"iso-2022-kr" ,"" ,"" ,false)); |
236 | mCodePages.append(std::make_shared<CharsetInfo>(50227,"x-cp50227" ,"" ,"" ,false)); |
237 | mCodePages.append(std::make_shared<CharsetInfo>(50229,"" ,"" ,"" ,false)); |
238 | mCodePages.append(std::make_shared<CharsetInfo>(50930,"" ,"" ,"" ,false)); |
239 | mCodePages.append(std::make_shared<CharsetInfo>(50931,"" ,"" ,"" ,false)); |
240 | mCodePages.append(std::make_shared<CharsetInfo>(50933,"" ,"" ,"" ,false)); |
241 | mCodePages.append(std::make_shared<CharsetInfo>(50935,"" ,"" ,"" ,false)); |
242 | mCodePages.append(std::make_shared<CharsetInfo>(50936,"" ,"" ,"" ,false)); |
243 | mCodePages.append(std::make_shared<CharsetInfo>(50937,"" ,"" ,"" ,false)); |
244 | mCodePages.append(std::make_shared<CharsetInfo>(50939,"" ,"" ,"" ,false)); |
245 | mCodePages.append(std::make_shared<CharsetInfo>(51932,"euc-jp" ,tr("Japanese" ),"" ,true)); |
246 | mCodePages.append(std::make_shared<CharsetInfo>(51936,"euc-cn" ,"" ,"" ,false)); |
247 | mCodePages.append(std::make_shared<CharsetInfo>(51949,"euc-kr" ,tr("Korean" ),"" ,true)); |
248 | mCodePages.append(std::make_shared<CharsetInfo>(51950,"" ,"" ,"" ,false)); |
249 | mCodePages.append(std::make_shared<CharsetInfo>(52936,"hz-gb-2312" ,"" ,"" ,false)); |
250 | mCodePages.append(std::make_shared<CharsetInfo>(54936,"gb18030" ,tr("Chinese" ),"zh_CN" ,true)); |
251 | mCodePages.append(std::make_shared<CharsetInfo>(57002,"x-iscii-de" ,"" ,"" ,false)); |
252 | mCodePages.append(std::make_shared<CharsetInfo>(57003,"x-iscii-be" ,"" ,"" ,false)); |
253 | mCodePages.append(std::make_shared<CharsetInfo>(57004,"x-iscii-ta" ,"" ,"" ,false)); |
254 | mCodePages.append(std::make_shared<CharsetInfo>(57005,"x-iscii-te" ,"" ,"" ,false)); |
255 | mCodePages.append(std::make_shared<CharsetInfo>(57006,"x-iscii-as" ,"" ,"" ,false)); |
256 | mCodePages.append(std::make_shared<CharsetInfo>(57007,"x-iscii-or" ,"" ,"" ,false)); |
257 | mCodePages.append(std::make_shared<CharsetInfo>(57008,"x-iscii-ka" ,"" ,"" ,false)); |
258 | mCodePages.append(std::make_shared<CharsetInfo>(57009,"x-iscii-ma" ,"" ,"" ,false)); |
259 | mCodePages.append(std::make_shared<CharsetInfo>(57010,"x-iscii-gu" ,"" ,"" ,false)); |
260 | mCodePages.append(std::make_shared<CharsetInfo>(57011,"x-iscii-pa" ,"" ,"" ,false)); |
261 | mCodePages.append(std::make_shared<CharsetInfo>(65000,"utf-7" ,"" ,"" ,false)); |
262 | mCodePages.append(std::make_shared<CharsetInfo>(65001,"utf-8" ,"" ,"" ,false)); |
263 | |
264 | } |
265 | |
266 | CharsetInfo::CharsetInfo(int codepage, const QByteArray &name, const QString &language,const QString& localeName, bool enabled) |
267 | { |
268 | this->codepage = codepage; |
269 | this->name = name; |
270 | this->language = language; |
271 | this->localeName = localeName; |
272 | this->enabled = enabled; |
273 | } |
274 | |