10 #include "lcf/config.h"
11 #include "lcf/scope_guard.h"
14 # include <unicode/ucsdet.h>
15 # include <unicode/ucnv.h>
16 # include <unicode/normalizer2.h>
17 # include <unicode/unistr.h>
20 # error MSVC builds require ICU
39 #include "lcf/inireader.h"
40 #include "lcf/ldb/reader.h"
41 #include "lcf/reader_util.h"
45 namespace ReaderUtil {
48 std::string ReaderUtil::CodepageToEncoding(
int codepage) {
52 if (codepage == 932) {
54 return "ibm-943_P15A-2003";
59 if (codepage == 949) {
61 return "windows-949-2000";
66 std::ostringstream out;
68 out <<
"windows-" << codepage;
70 out <<
"CP" << codepage;
74 std::string outs = out.str();
78 std::string ReaderUtil::DetectEncoding(lcf::rpg::Database& db) {
79 std::vector<std::string> encodings = DetectEncodings(db);
81 if (encodings.empty()) {
85 return encodings.front();
88 std::vector<std::string> ReaderUtil::DetectEncodings(lcf::rpg::Database& db) {
90 std::ostringstream text;
92 auto append = [](
const auto& s) {
93 return ToString(s) +
" ";
96 lcf::rpg::ForEachString(db.system, [&](
const auto& val,
const auto& ctx) {
102 for (
const auto& s: {
114 db.terms.health_points,
115 db.terms.spirit_points,
116 db.terms.normal_status,
127 db.terms.save_game_message,
128 db.terms.load_game_message,
129 db.terms.exit_game_message,
137 return ReaderUtil::DetectEncodings(text.str());
139 return std::vector<std::string>();
143 std::string ReaderUtil::DetectEncoding(StringView
string) {
144 std::vector<std::string> encodings = DetectEncodings(
string);
146 if (encodings.empty()) {
150 return encodings.front();
153 std::vector<std::string> ReaderUtil::DetectEncodings(StringView
string) {
154 std::vector<std::string> encodings;
156 if (!
string.empty()) {
157 UErrorCode status = U_ZERO_ERROR;
158 UCharsetDetector* detector = ucsdet_open(&status);
160 auto s = std::string(
string);
161 ucsdet_setText(detector, s.c_str(), s.length(), &status);
163 int32_t matches_count;
164 const UCharsetMatch** matches = ucsdet_detectAll(detector, &matches_count, &status);
166 if (matches !=
nullptr) {
168 for (
int i = 0; i < matches_count; ++i) {
169 std::string encoding = ucsdet_getName(matches[i], &status);
172 if (encoding ==
"Shift_JIS") {
173 encodings.emplace_back(
"ibm-943_P15A-2003");
174 }
else if (encoding ==
"EUC-KR") {
175 encodings.emplace_back(
"windows-949-2000");
176 }
else if (encoding ==
"GB18030") {
177 encodings.emplace_back(
"windows-936-2000");
178 }
else if (encoding ==
"ISO-8859-1" || encoding ==
"windows-1252") {
179 encodings.emplace_back(
"ibm-5348_P100-1997");
180 }
else if (encoding ==
"ISO-8859-2" || encoding ==
"windows-1250") {
181 encodings.emplace_back(
"ibm-5346_P100-1998");
182 }
else if (encoding ==
"ISO-8859-5" || encoding ==
"windows-1251") {
183 encodings.emplace_back(
"ibm-5347_P100-1998");
184 }
else if (encoding ==
"ISO-8859-6" || encoding ==
"windows-1256") {
185 encodings.emplace_back(
"ibm-9448_X100-2005");
186 }
else if (encoding ==
"ISO-8859-7" || encoding ==
"windows-1253") {
187 encodings.emplace_back(
"ibm-5349_P100-1998");
188 }
else if (encoding ==
"ISO-8859-8" || encoding ==
"windows-1255") {
189 encodings.emplace_back(
"ibm-9447_P100-2002");
191 encodings.push_back(encoding);
195 ucsdet_close(detector);
202 std::string ReaderUtil::GetEncoding(StringView ini_file) {
203 INIReader ini(ToString(ini_file));
204 if (ini.ParseError() != -1) {
205 std::string encoding = ini.Get(
"EasyRPG",
"Encoding", std::string());
206 if (!encoding.empty()) {
207 return ReaderUtil::CodepageToEncoding(atoi(encoding.c_str()));
210 return std::string();
213 std::string ReaderUtil::GetEncoding(std::istream& filestream) {
214 INIReader ini(filestream);
215 if (ini.ParseError() != -1) {
216 std::string encoding = ini.Get(
"EasyRPG",
"Encoding", std::string());
217 if (!encoding.empty()) {
218 return ReaderUtil::CodepageToEncoding(atoi(encoding.c_str()));
221 return std::string();
224 std::string ReaderUtil::GetLocaleEncoding() {
226 int codepage = GetACP();
234 std::locale loc = std::locale(
"");
236 std::string loc_full = loc.name().substr(0, loc.name().find_first_of(
"@."));
238 std::string loc_lang = loc.name().substr(0, loc.name().find_first_of(
"_"));
240 if (loc_lang ==
"th") codepage = 874;
241 else if (loc_lang ==
"ja") codepage = 932;
242 else if (loc_full ==
"zh_CN" ||
243 loc_full ==
"zh_SG") codepage = 936;
244 else if (loc_lang ==
"ko") codepage = 949;
245 else if (loc_full ==
"zh_TW" ||
246 loc_full ==
"zh_HK") codepage = 950;
247 else if (loc_lang ==
"cs" ||
253 loc_lang ==
"sl") codepage = 1250;
254 else if (loc_lang ==
"ru") codepage = 1251;
255 else if (loc_lang ==
"ca" ||
267 loc_lang ==
"eu") codepage = 1252;
268 else if (loc_lang ==
"el") codepage = 1253;
269 else if (loc_lang ==
"tr") codepage = 1254;
270 else if (loc_lang ==
"he") codepage = 1255;
271 else if (loc_lang ==
"ar") codepage = 1256;
272 else if (loc_lang ==
"et" ||
274 loc_lang ==
"lv") codepage = 1257;
275 else if (loc_lang ==
"vi") codepage = 1258;
278 return CodepageToEncoding(codepage);
281 std::string ReaderUtil::Recode(StringView str_to_encode, StringView source_encoding) {
282 return ReaderUtil::Recode(str_to_encode, source_encoding,
"UTF-8");
285 std::string ReaderUtil::Recode(StringView str_to_encode,
287 StringView dst_enc) {
289 if (src_enc.empty() || dst_enc.empty() || str_to_encode.empty()) {
290 return ToString(str_to_encode);
293 auto src_cp = SvAtoi(src_enc);
294 const auto& src_enc_str = src_cp > 0
295 ? ReaderUtil::CodepageToEncoding(src_cp)
298 auto dst_cp = SvAtoi(dst_enc);
299 const auto& dst_enc_str = dst_cp > 0
300 ? ReaderUtil::CodepageToEncoding(dst_cp)
304 auto status = U_ZERO_ERROR;
305 auto conv_from = ucnv_open(src_enc_str.c_str(), &status);
307 if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
308 fprintf(stderr,
"liblcf: ucnv_open() error for source encoding \"%s\": %s\n", src_enc_str.c_str(), u_errorName(status));
309 return std::string();
311 status = U_ZERO_ERROR;
312 auto conv_from_sg = makeScopeGuard([&]() { ucnv_close(conv_from); });
314 auto conv_to = ucnv_open(dst_enc_str.c_str(), &status);
316 if (status != U_ZERO_ERROR && status != U_AMBIGUOUS_ALIAS_WARNING) {
317 fprintf(stderr,
"liblcf: ucnv_open() error for dest encoding \"%s\": %s\n", dst_enc_str.c_str(), u_errorName(status));
318 return std::string();
320 auto conv_to_sg = makeScopeGuard([&]() { ucnv_close(conv_to); });
321 status = U_ZERO_ERROR;
323 std::string result(str_to_encode.size() * 4,
'\0');
324 auto* src = str_to_encode.data();
325 auto* dst = &result.front();
327 ucnv_convertEx(conv_to, conv_from,
328 &dst, dst + result.size(),
329 &src, src + str_to_encode.size(),
330 nullptr,
nullptr,
nullptr,
nullptr,
334 if (U_FAILURE(status)) {
335 fprintf(stderr,
"liblcf: ucnv_convertEx() error when encoding \"%.*s\": %s\n", (
int)str_to_encode.length(), str_to_encode.data(), u_errorName(status));
336 return std::string();
339 result.resize(dst - result.c_str());
340 result.shrink_to_fit();
344 iconv_t cd = iconv_open(dst_enc_str.c_str(), src_enc_str.c_str());
345 if (cd == (iconv_t)-1)
346 return ToString(str_to_encode);
347 char *src =
const_cast<char *
>(str_to_encode.data());
348 size_t src_left = str_to_encode.size();
349 size_t dst_size = str_to_encode.size() * 5 + 10;
350 char *dst =
new char[dst_size];
351 size_t dst_left = dst_size;
353 char ICONV_CONST *p = src;
358 size_t status = iconv(cd, &p, &src_left, &q, &dst_left);
360 if (status == (
size_t) -1 || src_left > 0) {
362 return std::string();
365 std::string result(dst);
371 std::string ReaderUtil::Normalize(StringView str) {
373 icu::UnicodeString uni = icu::UnicodeString(str.data(), str.length(),
"utf-8").toLower();
374 UErrorCode err = U_ZERO_ERROR;
376 const icu::Normalizer2* norm = icu::Normalizer2::getNFKCInstance(err);
377 if (U_FAILURE(err)) {
378 static bool err_reported =
false;
380 fprintf(stderr,
"Normalizer2::getNFKCInstance failed (%s). \"nrm\" is probably missing in the ICU data file. Unicode normalization will not work!\n", u_errorName(err));
383 uni.toUTF8String(res);
386 icu::UnicodeString f = norm->normalize(uni, err);
387 if (U_FAILURE(err)) {
388 uni.toUTF8String(res);
394 auto result = std::string(str);
395 std::transform(result.begin(), result.end(), result.begin(), tolower);