msg_tool\utils/
encoding_win.rs

1use anyhow::Result;
2use windows_sys::Win32::Foundation::{ERROR_NO_UNICODE_TRANSLATION, GetLastError};
3use windows_sys::Win32::Globalization::{
4    CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WC_ERR_INVALID_CHARS,
5    WideCharToMultiByte,
6};
7use windows_sys::Win32::System::Diagnostics::Debug::{
8    FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW,
9};
10
11#[derive(Debug)]
12pub struct WinError {
13    pub code: u32,
14}
15
16impl WinError {
17    pub fn new(code: u32) -> Self {
18        WinError { code }
19    }
20
21    pub fn from_last_error() -> Self {
22        let code = unsafe { GetLastError() };
23        WinError::new(code)
24    }
25}
26
27impl std::error::Error for WinError {}
28
29impl std::fmt::Display for WinError {
30    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31        let mut buffer = [0u16; 256];
32        let len = unsafe {
33            FormatMessageW(
34                FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
35                std::ptr::null(),
36                self.code,
37                0,
38                buffer.as_mut_ptr(),
39                buffer.len() as u32,
40                std::ptr::null_mut(),
41            )
42        };
43        if len == 0 {
44            write!(f, "Unknown error code: 0x{:08X}", self.code)
45        } else {
46            let message = String::from_utf16_lossy(&buffer[..len as usize]);
47            write!(f, "{} (0x{:08X})", message.trim(), self.code)
48        }
49    }
50}
51
52fn is_special_code_page(cp: u32) -> bool {
53    matches!(
54        cp,
55        50220 | 50221 | 50222 | 50225 | 50227 | 50229 | 57002..=57011 | 65000 | 42
56    )
57}
58
59pub fn decode_to_string(cp: u32, data: &[u8], check: bool) -> Result<String> {
60    if data.is_empty() {
61        return Ok(String::new());
62    }
63    let dwflags = if check && !is_special_code_page(cp) {
64        MB_ERR_INVALID_CHARS
65    } else {
66        0
67    };
68    let needed_len = unsafe {
69        MultiByteToWideChar(
70            cp,
71            dwflags,
72            data.as_ptr() as _,
73            data.len() as i32,
74            std::ptr::null_mut(),
75            0,
76        )
77    };
78    if needed_len == 0 {
79        return Err(WinError::from_last_error().into());
80    }
81    let last_error = unsafe { GetLastError() };
82    if last_error == ERROR_NO_UNICODE_TRANSLATION {
83        if check {
84            return Err(WinError::new(last_error).into());
85        } else {
86            eprintln!(
87                "Warning: Some characters could not be decoded in code page {}: {:?}",
88                cp, data
89            );
90            crate::COUNTER.inc_warning();
91        }
92    }
93    let mut wc = Vec::with_capacity(needed_len as usize);
94    wc.resize(needed_len as usize, 0);
95    let result = unsafe {
96        MultiByteToWideChar(
97            cp,
98            dwflags,
99            data.as_ptr() as _,
100            data.len() as i32,
101            wc.as_mut_ptr(),
102            needed_len,
103        )
104    };
105    if result == 0 {
106        return Err(WinError::from_last_error().into());
107    }
108    Ok(String::from_utf16_lossy(&wc))
109}
110
111pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>> {
112    if data.is_empty() {
113        return Ok(Vec::new());
114    }
115    let dwflags = if check && (cp == 65001 || cp == 54936) {
116        WC_ERR_INVALID_CHARS
117    } else {
118        0
119    };
120    let wstr = data.encode_utf16().collect::<Vec<u16>>();
121    let needed_len = unsafe {
122        WideCharToMultiByte(
123            cp,
124            dwflags,
125            wstr.as_ptr(),
126            wstr.len() as i32,
127            std::ptr::null_mut(),
128            0,
129            std::ptr::null_mut(),
130            std::ptr::null_mut(),
131        )
132    };
133    if needed_len == 0 {
134        return Err(WinError::from_last_error().into());
135    }
136    let mut mb = Vec::with_capacity(needed_len as usize);
137    mb.resize(needed_len as usize, 0);
138    let mut used_default_char = 0;
139    let result = unsafe {
140        WideCharToMultiByte(
141            cp,
142            dwflags,
143            wstr.as_ptr(),
144            wstr.len() as i32,
145            mb.as_mut_ptr(),
146            needed_len,
147            std::ptr::null_mut(),
148            if cp == CP_UTF7 || cp == CP_UTF8 {
149                std::ptr::null_mut()
150            } else {
151                &mut used_default_char
152            },
153        )
154    };
155    if used_default_char != 0 {
156        if check {
157            return Err(anyhow::anyhow!(
158                "Some characters could not be encoded in code page {}: {}",
159                cp,
160                data
161            ));
162        } else {
163            eprintln!(
164                "Warning: Some characters could not be encoded in code page {}: {}",
165                cp, data
166            );
167            crate::COUNTER.inc_warning();
168        }
169    }
170    if result == 0 {
171        return Err(WinError::from_last_error().into());
172    }
173    Ok(mb)
174}
175
176#[test]
177fn test_decode_to_string() {
178    assert_eq!(
179        decode_to_string(
180            65001,
181            &[228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149],
182            true
183        )
184        .unwrap(),
185        "中文测试".to_string()
186    );
187    assert_eq!(
188        decode_to_string(
189            932,
190            &[
191                130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
192            ],
193            true
194        )
195        .unwrap(),
196        "きゃべつそふと".to_string()
197    );
198    assert_eq!(
199        decode_to_string(936, &[214, 208, 206, 196], true).unwrap(),
200        "中文".to_string()
201    );
202}
203
204#[test]
205fn test_encode_string() {
206    assert_eq!(
207        encode_string(65001, "中文测试", true).unwrap(),
208        vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149]
209    );
210    assert_eq!(
211        encode_string(932, "きゃべつそふと", true).unwrap(),
212        vec![
213            130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
214        ]
215    );
216    assert_eq!(
217        encode_string(936, "中文", true).unwrap(),
218        vec![214, 208, 206, 196]
219    );
220    assert!(
221        encode_string(
222            936,
223            "「あ、こーら、逃げちゃダメだよー? 起きちゃうのも、まだダメだけ\nどね♪」",
224            true
225        )
226        .is_err()
227    );
228}