10 #include "lcf/config.h"
11 #include "lcf/encoder.h"
12 #include "lcf/reader_util.h"
13 #include "lcf/scope_guard.h"
19 # include <unicode/ucsdet.h>
20 # include <unicode/ucnv.h>
23 # error MSVC builds require ICU
40 if (ucnv_compareNames(enc.c_str(),
"UTF-8") == 0) {
47 Encoder::Encoder(std::string encoding)
57 bool Encoder::IsOk()
const {
58 return _encoding.empty() || (_conv_storage && _conv_runtime);
61 void Encoder::Encode(std::string& str) {
62 if (_encoding.empty() || str.empty()) {
65 Convert(str, _conv_runtime, _conv_storage);
68 void Encoder::Decode(std::string& str) {
69 if (_encoding.empty() || str.empty()) {
72 Convert(str, _conv_storage, _conv_runtime);
75 void Encoder::Init() {
76 if (_encoding.empty()) {
80 auto code_page = atoi(_encoding.c_str());
81 const auto& storage_encoding = code_page > 0
82 ? ReaderUtil::CodepageToEncoding(code_page)
85 auto status = U_ZERO_ERROR;
86 constexpr
auto runtime_encoding =
"UTF-8";
87 auto conv_runtime = ucnv_open(runtime_encoding, &status);
89 if (conv_runtime ==
nullptr) {
90 fprintf(stderr,
"liblcf: ucnv_open() error for encoding \"%s\": %s\n", runtime_encoding, u_errorName(status));
93 status = U_ZERO_ERROR;
94 auto sg = makeScopeGuard([&]() { ucnv_close(conv_runtime); });
96 auto conv_storage = ucnv_open(storage_encoding.c_str(), &status);
98 if (conv_storage ==
nullptr) {
99 fprintf(stderr,
"liblcf: ucnv_open() error for dest encoding \"%s\": %s\n", storage_encoding.c_str(), u_errorName(status));
105 _conv_runtime = conv_runtime;
106 _conv_storage = conv_storage;
108 _conv_runtime =
const_cast<char*
>(
"UTF-8");
109 _conv_storage =
const_cast<char*
>(_encoding.c_str());
113 void Encoder::Reset() {
115 auto* conv =
reinterpret_cast<UConverter*
>(_conv_runtime);
116 if (conv) ucnv_close(conv);
117 conv =
reinterpret_cast<UConverter*
>(_conv_storage);
118 if (conv) ucnv_close(conv);
123 void Encoder::Convert(std::string& str,
void* conv_dst_void,
void* conv_src_void) {
125 const auto& src = str;
126 auto* conv_dst =
reinterpret_cast<UConverter*
>(conv_dst_void);
127 auto* conv_src =
reinterpret_cast<UConverter*
>(conv_src_void);
129 auto status = U_ZERO_ERROR;
130 _buffer.resize(src.size() * 4);
132 const auto* src_p = src.c_str();
133 auto* dst_p = _buffer.data();
135 ucnv_convertEx(conv_dst, conv_src,
136 &dst_p, dst_p + _buffer.size(),
137 &src_p, src_p + src.size(),
138 nullptr,
nullptr,
nullptr,
nullptr,
142 if (U_FAILURE(status)) {
143 fprintf(stderr,
"liblcf: ucnv_convertEx() error when encoding \"%s\": %s\n", src.c_str(), u_errorName(status));
147 str.assign(_buffer.data(), dst_p);
150 auto* conv_dst =
reinterpret_cast<const char*
>(conv_dst_void);
151 auto* conv_src =
reinterpret_cast<const char*
>(conv_src_void);
152 iconv_t cd = iconv_open(conv_dst, conv_src);
153 if (cd == (iconv_t)-1)
155 char *src = &str.front();
156 size_t src_left = str.size();
157 size_t dst_size = str.size() * 5 + 10;
158 _buffer.resize(dst_size);
159 char *dst = _buffer.data();
160 size_t dst_left = dst_size;
162 char ICONV_CONST *p = src;
167 size_t status = iconv(cd, &p, &src_left, &q, &dst_left);
169 if (status == (
size_t) -1 || src_left > 0) {
174 str.assign(dst, dst_size - dst_left);
static std::string filterUtf8Compatible(std::string enc)