1/*
2 * Copyright (C) 2020-2022 Roy Qu (royqh1979@gmail.com)
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17#include "charsetinfo.h"
18#include <QObject>
19#include <memory>
20#include <QMap>
21#include <QSet>
22#ifdef Q_OS_WIN
23#include <windows.h>
24#else
25#include <langinfo.h>
26#endif
27
28CharsetInfoManager* pCharsetInfoManager;
29
30QByteArray CharsetInfoManager::getDefaultSystemEncoding()
31{
32#ifdef Q_OS_WIN
33 DWORD acp = GetACP();
34 PCharsetInfo info = findCharsetByCodepage(acp);
35 if (info) {
36 return info->name;
37 }
38 return "unknown";
39#else
40 return QByteArray(nl_langinfo(CODESET));
41#endif
42}
43
44PCharsetInfo CharsetInfoManager::findCharsetByCodepage(int codepage)
45{
46 foreach (const PCharsetInfo& info, mCodePages) {
47 if (info->codepage == codepage)
48 return info;
49 }
50 return PCharsetInfo();
51}
52
53QStringList CharsetInfoManager::languageNames()
54{
55 QSet<QString> languages;
56 foreach (const PCharsetInfo& info, mCodePages) {
57 if (info->enabled)
58 languages.insert(info->language);
59 }
60 QStringList lst;
61 foreach (const QString& s, languages)
62 lst.append(s);
63 lst.sort(Qt::CaseInsensitive);
64 return lst;
65}
66
67QList<PCharsetInfo> CharsetInfoManager::findCharsetsByLanguageName(const QString &languageName)
68{
69 QList<PCharsetInfo> result;
70 foreach (const PCharsetInfo& info, mCodePages) {
71 if (info->enabled && info->language == languageName)
72 result.append(info);
73 }
74 std::sort(result.begin(),result.end(),[](const PCharsetInfo& info1,const PCharsetInfo& info2){
75 return (info1->name < info2->name);
76 });
77 return result;
78}
79
80QList<PCharsetInfo> CharsetInfoManager::findCharsetByLocale(const QString &localeName)
81{
82 QList<PCharsetInfo> result;
83 foreach (const PCharsetInfo& info, mCodePages) {
84 if (info->enabled && info->localeName == localeName)
85 result.append(info);
86 }
87 return result;
88}
89
90QString CharsetInfoManager::findLanguageByCharsetName(const QString &encodingName)
91{
92
93 foreach (const PCharsetInfo& info, mCodePages) {
94 if (info->enabled &&
95 QString::compare(info->name, encodingName, Qt::CaseInsensitive)==0)
96 return info->language;
97 }
98 return "Unknown";
99}
100
101const QString &CharsetInfoManager::localeName() const
102{
103 return mLocaleName;
104}
105
106CharsetInfoManager::CharsetInfoManager(const QString& localeName):
107 QObject(),
108 mLocaleName(localeName)
109{
110 mCodePages.append(std::make_shared<CharsetInfo>(37,"IBM037","","",false));
111 mCodePages.append(std::make_shared<CharsetInfo>(437,"IBM437","","",false));
112 mCodePages.append(std::make_shared<CharsetInfo>(500,"IBM500","","",false));
113 mCodePages.append(std::make_shared<CharsetInfo>(708,"ASMO-708","","",false));
114 mCodePages.append(std::make_shared<CharsetInfo>(709,"","","",false));
115 mCodePages.append(std::make_shared<CharsetInfo>(710,"","","",false));
116 mCodePages.append(std::make_shared<CharsetInfo>(720,"DOS-720",tr("Arabic"),"",false));
117 mCodePages.append(std::make_shared<CharsetInfo>(737,"ibm737",tr("Greek"),"",false));
118 mCodePages.append(std::make_shared<CharsetInfo>(775,"ibm775",tr("Baltic"),"",false));
119 mCodePages.append(std::make_shared<CharsetInfo>(850,"ibm850",tr("Western Europe"),"",false));
120 mCodePages.append(std::make_shared<CharsetInfo>(852,"ibm852",tr("Central Europe"),"",false));
121 mCodePages.append(std::make_shared<CharsetInfo>(855,"IBM855",tr("Cyrillic"),"",false));
122 mCodePages.append(std::make_shared<CharsetInfo>(857,"ibm857",tr("Turkish"),"",false));
123 mCodePages.append(std::make_shared<CharsetInfo>(858,"ibm858",tr("Western Europe"),"",false));
124 mCodePages.append(std::make_shared<CharsetInfo>(860,"IBM860",tr("Western Europe"),"",false));
125 mCodePages.append(std::make_shared<CharsetInfo>(861,"ibm861",tr("Northern Europe"),"",false));
126 mCodePages.append(std::make_shared<CharsetInfo>(862,"DOS-862",tr("Hebrew"),"",false));
127 mCodePages.append(std::make_shared<CharsetInfo>(863,"IBM863",tr("Western Europe"),"",false));
128 mCodePages.append(std::make_shared<CharsetInfo>(864,"IBM864","","",false));
129 mCodePages.append(std::make_shared<CharsetInfo>(865,"IBM865",tr("Northern Europe"),"",false));
130 mCodePages.append(std::make_shared<CharsetInfo>(866,"cp866",tr("Cyrillic"),"",false));
131 mCodePages.append(std::make_shared<CharsetInfo>(869,"ibm869",tr("Greek"),"",false));
132 mCodePages.append(std::make_shared<CharsetInfo>(870,"IBM870","","",false));
133 mCodePages.append(std::make_shared<CharsetInfo>(874,"tis-620",tr("Thai"),"",true));
134 mCodePages.append(std::make_shared<CharsetInfo>(875,"cp875","","",false));
135 mCodePages.append(std::make_shared<CharsetInfo>(932,"shift_jis",tr("Japanese"),"",true));
136 mCodePages.append(std::make_shared<CharsetInfo>(936,"gbk",tr("Chinese"),"zh_CN",true));
137 mCodePages.append(std::make_shared<CharsetInfo>(949,"windows-949",tr("Korean"),"",true));
138 mCodePages.append(std::make_shared<CharsetInfo>(950,"big5",tr("Chinese"),"",true));
139 mCodePages.append(std::make_shared<CharsetInfo>(1026,"IBM1026","","",false));
140 mCodePages.append(std::make_shared<CharsetInfo>(1047,"IBM01047","","",false));
141 mCodePages.append(std::make_shared<CharsetInfo>(1140,"IBM01140","","",false));
142 mCodePages.append(std::make_shared<CharsetInfo>(1141,"IBM01141","","",false));
143 mCodePages.append(std::make_shared<CharsetInfo>(1142,"IBM01142","","",false));
144 mCodePages.append(std::make_shared<CharsetInfo>(1143,"IBM01143","","",false));
145 mCodePages.append(std::make_shared<CharsetInfo>(1144,"IBM01144","","",false));
146 mCodePages.append(std::make_shared<CharsetInfo>(1145,"IBM01145","","",false));
147 mCodePages.append(std::make_shared<CharsetInfo>(1146,"IBM01146","","",false));
148 mCodePages.append(std::make_shared<CharsetInfo>(1147,"IBM01147","","",false));
149 mCodePages.append(std::make_shared<CharsetInfo>(1148,"IBM01148","","",false));
150 mCodePages.append(std::make_shared<CharsetInfo>(1149,"IBM01149","","",false));
151 mCodePages.append(std::make_shared<CharsetInfo>(1200,"utf-16","","",false));
152 mCodePages.append(std::make_shared<CharsetInfo>(1201,"unicodeFFFE","","",false));
153 mCodePages.append(std::make_shared<CharsetInfo>(1250,"windows-1250",tr("Central Europe"),"",true));
154 mCodePages.append(std::make_shared<CharsetInfo>(1251,"windows-1251",tr("Cyrillic"),"",true));
155 mCodePages.append(std::make_shared<CharsetInfo>(1252,"windows-1252",tr("Western Europe"),"",true));
156 mCodePages.append(std::make_shared<CharsetInfo>(1253,"windows-1253",tr("Greek"),"",true));
157 mCodePages.append(std::make_shared<CharsetInfo>(1254,"windows-1254",tr("Turkish"),"",true));
158 mCodePages.append(std::make_shared<CharsetInfo>(1255,"windows-1255",tr("Hebrew"),"",true));
159 mCodePages.append(std::make_shared<CharsetInfo>(1256,"windows-1256",tr("Arabic"),"",true));
160 mCodePages.append(std::make_shared<CharsetInfo>(1257,"windows-1257",tr("Baltic"),"",true));
161 mCodePages.append(std::make_shared<CharsetInfo>(1258,"windows-1258",tr("Vietnamese"),"",true));
162 mCodePages.append(std::make_shared<CharsetInfo>(1361,"Johab","","",false));
163 mCodePages.append(std::make_shared<CharsetInfo>(10000,"macintosh",tr("Cyrillic"),"",true));
164 mCodePages.append(std::make_shared<CharsetInfo>(10001,"x-mac-japanese","","",false));
165 mCodePages.append(std::make_shared<CharsetInfo>(10002,"x-mac-chinesetrad","","",false));
166 mCodePages.append(std::make_shared<CharsetInfo>(10003,"x-mac-korean","","",false));
167 mCodePages.append(std::make_shared<CharsetInfo>(10004,"x-mac-arabic","","",false));
168 mCodePages.append(std::make_shared<CharsetInfo>(10005,"x-mac-hebrew","","",false));
169 mCodePages.append(std::make_shared<CharsetInfo>(10006,"x-mac-greek","","",false));
170 mCodePages.append(std::make_shared<CharsetInfo>(10007,"x-mac-cyrillic","","",false));
171 mCodePages.append(std::make_shared<CharsetInfo>(10008,"x-mac-chinesesimp","","",false));
172 mCodePages.append(std::make_shared<CharsetInfo>(10010,"x-mac-romanian","","",false));
173 mCodePages.append(std::make_shared<CharsetInfo>(10017,"x-mac-ukrainian","","",false));
174 mCodePages.append(std::make_shared<CharsetInfo>(10021,"x-mac-thai","","",false));
175 mCodePages.append(std::make_shared<CharsetInfo>(10029,"x-mac-ce","","",false));
176 mCodePages.append(std::make_shared<CharsetInfo>(10079,"x-mac-icelandic","","",false));
177 mCodePages.append(std::make_shared<CharsetInfo>(10081,"x-mac-turkish","","",false));
178 mCodePages.append(std::make_shared<CharsetInfo>(10082,"x-mac-croatian","","",false));
179 mCodePages.append(std::make_shared<CharsetInfo>(12000,"utf-32","","",false));
180 mCodePages.append(std::make_shared<CharsetInfo>(12001,"utf-32BE","","",false));
181 mCodePages.append(std::make_shared<CharsetInfo>(20000,"x-Chinese_CNS","","",false));
182 mCodePages.append(std::make_shared<CharsetInfo>(20001,"x-cp20001","","",false));
183 mCodePages.append(std::make_shared<CharsetInfo>(20002,"x_Chinese-Eten","","",false));
184 mCodePages.append(std::make_shared<CharsetInfo>(20003,"x-cp20003","","",false));
185 mCodePages.append(std::make_shared<CharsetInfo>(20004,"x-cp20004","","",false));
186 mCodePages.append(std::make_shared<CharsetInfo>(20005,"x-cp20005","","",false));
187 mCodePages.append(std::make_shared<CharsetInfo>(20105,"x-IA5","","",false));
188 mCodePages.append(std::make_shared<CharsetInfo>(20106,"x-IA5-German","","",false));
189 mCodePages.append(std::make_shared<CharsetInfo>(20107,"x-IA5-Swedish","","",false));
190 mCodePages.append(std::make_shared<CharsetInfo>(20108,"x-IA5-Norwegian","","",false));
191 mCodePages.append(std::make_shared<CharsetInfo>(20127,"us-ascii","","",false));
192 mCodePages.append(std::make_shared<CharsetInfo>(20261,"x-cp20261","","",false));
193 mCodePages.append(std::make_shared<CharsetInfo>(20269,"x-cp20269","","",false));
194 mCodePages.append(std::make_shared<CharsetInfo>(20273,"IBM273","","",false));
195 mCodePages.append(std::make_shared<CharsetInfo>(20277,"IBM277","","",false));
196 mCodePages.append(std::make_shared<CharsetInfo>(20278,"IBM278","","",false));
197 mCodePages.append(std::make_shared<CharsetInfo>(20280,"IBM280","","",false));
198 mCodePages.append(std::make_shared<CharsetInfo>(20284,"IBM284","","",false));
199 mCodePages.append(std::make_shared<CharsetInfo>(20285,"IBM285","","",false));
200 mCodePages.append(std::make_shared<CharsetInfo>(20290,"IBM290","","",false));
201 mCodePages.append(std::make_shared<CharsetInfo>(20297,"IBM297","","",false));
202 mCodePages.append(std::make_shared<CharsetInfo>(20420,"IBM420","","",false));
203 mCodePages.append(std::make_shared<CharsetInfo>(20423,"IBM423","","",false));
204 mCodePages.append(std::make_shared<CharsetInfo>(20424,"IBM424","","",false));
205 mCodePages.append(std::make_shared<CharsetInfo>(20833,"x-EBCDIC-KoreanExtended","","",false));
206 mCodePages.append(std::make_shared<CharsetInfo>(20838,"IBM-Thai","","",false));
207 mCodePages.append(std::make_shared<CharsetInfo>(20866,"koi8-r",tr("Cyrillic"),"",true));
208 mCodePages.append(std::make_shared<CharsetInfo>(20871,"IBM871","","",false));
209 mCodePages.append(std::make_shared<CharsetInfo>(20880,"IBM880","","",false));
210 mCodePages.append(std::make_shared<CharsetInfo>(20905,"IBM905","","",false));
211 mCodePages.append(std::make_shared<CharsetInfo>(20924,"IBM00924","","",false));
212 mCodePages.append(std::make_shared<CharsetInfo>(20932,"EUC-JP","","",false));
213 mCodePages.append(std::make_shared<CharsetInfo>(20936,"x-cp20936","","",false));
214 mCodePages.append(std::make_shared<CharsetInfo>(20949,"x-cp20949","","",false));
215 mCodePages.append(std::make_shared<CharsetInfo>(21025,"cp1025","","",false));
216 mCodePages.append(std::make_shared<CharsetInfo>(21027,"","","",false));
217 mCodePages.append(std::make_shared<CharsetInfo>(21866,"koi8-u",tr("Cyrillic"),"",true));
218 mCodePages.append(std::make_shared<CharsetInfo>(28591,"iso-8859-1",tr("Western Europe"),"",true));
219 mCodePages.append(std::make_shared<CharsetInfo>(28592,"iso-8859-2",tr("Eastern Europe"),"",true));
220 mCodePages.append(std::make_shared<CharsetInfo>(28593,"iso-8859-3",tr("Turkish"),"",true));
221 mCodePages.append(std::make_shared<CharsetInfo>(28594,"iso-8859-4",tr("Baltic"),"",true));
222 mCodePages.append(std::make_shared<CharsetInfo>(28595,"iso-8859-5",tr("Cyrillic"),"",true));
223 mCodePages.append(std::make_shared<CharsetInfo>(28596,"iso-8859-6",tr("Arabic"),"",true));
224 mCodePages.append(std::make_shared<CharsetInfo>(28597,"iso-8859-7",tr("Greek"),"",true));
225 mCodePages.append(std::make_shared<CharsetInfo>(28598,"iso-8859-8",tr("Hebrew"),"",true));
226 mCodePages.append(std::make_shared<CharsetInfo>(28599,"iso-8859-9",tr("Turkish"),"",true));
227 mCodePages.append(std::make_shared<CharsetInfo>(28603,"iso-8859-13",tr("Baltic"),"",true));
228 mCodePages.append(std::make_shared<CharsetInfo>(-1,"iso-8859-14",tr("Celtic"),"",true));
229 mCodePages.append(std::make_shared<CharsetInfo>(28605,"iso-8859-15",tr("Western Europe"),"",true));
230 mCodePages.append(std::make_shared<CharsetInfo>(29001,"x-Europa","","",false));
231 mCodePages.append(std::make_shared<CharsetInfo>(38598,"iso-8859-8-i","","",false));
232 mCodePages.append(std::make_shared<CharsetInfo>(50220,"iso-2022-jp","","",false));
233 mCodePages.append(std::make_shared<CharsetInfo>(50221,"csISO2022JP","","",false));
234 mCodePages.append(std::make_shared<CharsetInfo>(50222,"iso-2022-jp","","",false));
235 mCodePages.append(std::make_shared<CharsetInfo>(50225,"iso-2022-kr","","",false));
236 mCodePages.append(std::make_shared<CharsetInfo>(50227,"x-cp50227","","",false));
237 mCodePages.append(std::make_shared<CharsetInfo>(50229,"","","",false));
238 mCodePages.append(std::make_shared<CharsetInfo>(50930,"","","",false));
239 mCodePages.append(std::make_shared<CharsetInfo>(50931,"","","",false));
240 mCodePages.append(std::make_shared<CharsetInfo>(50933,"","","",false));
241 mCodePages.append(std::make_shared<CharsetInfo>(50935,"","","",false));
242 mCodePages.append(std::make_shared<CharsetInfo>(50936,"","","",false));
243 mCodePages.append(std::make_shared<CharsetInfo>(50937,"","","",false));
244 mCodePages.append(std::make_shared<CharsetInfo>(50939,"","","",false));
245 mCodePages.append(std::make_shared<CharsetInfo>(51932,"euc-jp",tr("Japanese"),"",true));
246 mCodePages.append(std::make_shared<CharsetInfo>(51936,"euc-cn","","",false));
247 mCodePages.append(std::make_shared<CharsetInfo>(51949,"euc-kr",tr("Korean"),"",true));
248 mCodePages.append(std::make_shared<CharsetInfo>(51950,"","","",false));
249 mCodePages.append(std::make_shared<CharsetInfo>(52936,"hz-gb-2312","","",false));
250 mCodePages.append(std::make_shared<CharsetInfo>(54936,"gb18030",tr("Chinese"),"zh_CN",true));
251 mCodePages.append(std::make_shared<CharsetInfo>(57002,"x-iscii-de","","",false));
252 mCodePages.append(std::make_shared<CharsetInfo>(57003,"x-iscii-be","","",false));
253 mCodePages.append(std::make_shared<CharsetInfo>(57004,"x-iscii-ta","","",false));
254 mCodePages.append(std::make_shared<CharsetInfo>(57005,"x-iscii-te","","",false));
255 mCodePages.append(std::make_shared<CharsetInfo>(57006,"x-iscii-as","","",false));
256 mCodePages.append(std::make_shared<CharsetInfo>(57007,"x-iscii-or","","",false));
257 mCodePages.append(std::make_shared<CharsetInfo>(57008,"x-iscii-ka","","",false));
258 mCodePages.append(std::make_shared<CharsetInfo>(57009,"x-iscii-ma","","",false));
259 mCodePages.append(std::make_shared<CharsetInfo>(57010,"x-iscii-gu","","",false));
260 mCodePages.append(std::make_shared<CharsetInfo>(57011,"x-iscii-pa","","",false));
261 mCodePages.append(std::make_shared<CharsetInfo>(65000,"utf-7","","",false));
262 mCodePages.append(std::make_shared<CharsetInfo>(65001,"utf-8","","",false));
263
264}
265
266CharsetInfo::CharsetInfo(int codepage, const QByteArray &name, const QString &language,const QString& localeName, bool enabled)
267{
268 this->codepage = codepage;
269 this->name = name;
270 this->language = language;
271 this->localeName = localeName;
272 this->enabled = enabled;
273}
274