1use anyhow::Result;
2use windows_sys::Win32::Foundation::{ERROR_NO_UNICODE_TRANSLATION, GetLastError};
3use windows_sys::Win32::Globalization::{
4 CP_UTF7, CP_UTF8, MB_ERR_INVALID_CHARS, MultiByteToWideChar, WC_ERR_INVALID_CHARS,
5 WideCharToMultiByte,
6};
7use windows_sys::Win32::System::Diagnostics::Debug::{
8 FORMAT_MESSAGE_FROM_SYSTEM, FORMAT_MESSAGE_IGNORE_INSERTS, FormatMessageW,
9};
10
11#[derive(Debug)]
12pub struct WinError {
13 pub code: u32,
14}
15
16impl WinError {
17 pub fn new(code: u32) -> Self {
18 WinError { code }
19 }
20
21 pub fn from_last_error() -> Self {
22 let code = unsafe { GetLastError() };
23 WinError::new(code)
24 }
25}
26
27impl std::error::Error for WinError {}
28
29impl std::fmt::Display for WinError {
30 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
31 let mut buffer = [0u16; 256];
32 let len = unsafe {
33 FormatMessageW(
34 FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
35 std::ptr::null(),
36 self.code,
37 0,
38 buffer.as_mut_ptr(),
39 buffer.len() as u32,
40 std::ptr::null_mut(),
41 )
42 };
43 if len == 0 {
44 write!(f, "Unknown error code: 0x{:08X}", self.code)
45 } else {
46 let message = String::from_utf16_lossy(&buffer[..len as usize]);
47 write!(f, "{} (0x{:08X})", message.trim(), self.code)
48 }
49 }
50}
51
52fn is_special_code_page(cp: u32) -> bool {
53 matches!(
54 cp,
55 50220 | 50221 | 50222 | 50225 | 50227 | 50229 | 57002..=57011 | 65000 | 42
56 )
57}
58
59pub fn decode_to_string(cp: u32, data: &[u8], check: bool) -> Result<String> {
60 if data.is_empty() {
61 return Ok(String::new());
62 }
63 let dwflags = if check && !is_special_code_page(cp) {
64 MB_ERR_INVALID_CHARS
65 } else {
66 0
67 };
68 let needed_len = unsafe {
69 MultiByteToWideChar(
70 cp,
71 dwflags,
72 data.as_ptr() as _,
73 data.len() as i32,
74 std::ptr::null_mut(),
75 0,
76 )
77 };
78 if needed_len == 0 {
79 return Err(WinError::from_last_error().into());
80 }
81 let last_error = unsafe { GetLastError() };
82 if last_error == ERROR_NO_UNICODE_TRANSLATION {
83 if check {
84 return Err(WinError::new(last_error).into());
85 } else {
86 eprintln!(
87 "Warning: Some characters could not be decoded in code page {}: {:?}",
88 cp, data
89 );
90 crate::COUNTER.inc_warning();
91 }
92 }
93 let mut wc = Vec::with_capacity(needed_len as usize);
94 wc.resize(needed_len as usize, 0);
95 let result = unsafe {
96 MultiByteToWideChar(
97 cp,
98 dwflags,
99 data.as_ptr() as _,
100 data.len() as i32,
101 wc.as_mut_ptr(),
102 needed_len,
103 )
104 };
105 if result == 0 {
106 return Err(WinError::from_last_error().into());
107 }
108 Ok(String::from_utf16_lossy(&wc))
109}
110
111pub fn encode_string(cp: u32, data: &str, check: bool) -> Result<Vec<u8>> {
112 if data.is_empty() {
113 return Ok(Vec::new());
114 }
115 let dwflags = if check && (cp == 65001 || cp == 54936) {
116 WC_ERR_INVALID_CHARS
117 } else {
118 0
119 };
120 let wstr = data.encode_utf16().collect::<Vec<u16>>();
121 let needed_len = unsafe {
122 WideCharToMultiByte(
123 cp,
124 dwflags,
125 wstr.as_ptr(),
126 wstr.len() as i32,
127 std::ptr::null_mut(),
128 0,
129 std::ptr::null_mut(),
130 std::ptr::null_mut(),
131 )
132 };
133 if needed_len == 0 {
134 return Err(WinError::from_last_error().into());
135 }
136 let mut mb = Vec::with_capacity(needed_len as usize);
137 mb.resize(needed_len as usize, 0);
138 let mut used_default_char = 0;
139 let result = unsafe {
140 WideCharToMultiByte(
141 cp,
142 dwflags,
143 wstr.as_ptr(),
144 wstr.len() as i32,
145 mb.as_mut_ptr(),
146 needed_len,
147 std::ptr::null_mut(),
148 if cp == CP_UTF7 || cp == CP_UTF8 {
149 std::ptr::null_mut()
150 } else {
151 &mut used_default_char
152 },
153 )
154 };
155 if used_default_char != 0 {
156 if check {
157 return Err(anyhow::anyhow!(
158 "Some characters could not be encoded in code page {}: {}",
159 cp,
160 data
161 ));
162 } else {
163 eprintln!(
164 "Warning: Some characters could not be encoded in code page {}: {}",
165 cp, data
166 );
167 crate::COUNTER.inc_warning();
168 }
169 }
170 if result == 0 {
171 return Err(WinError::from_last_error().into());
172 }
173 Ok(mb)
174}
175
176#[test]
177fn test_decode_to_string() {
178 assert_eq!(
179 decode_to_string(
180 65001,
181 &[228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149],
182 true
183 )
184 .unwrap(),
185 "中文测试".to_string()
186 );
187 assert_eq!(
188 decode_to_string(
189 932,
190 &[
191 130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
192 ],
193 true
194 )
195 .unwrap(),
196 "きゃべつそふと".to_string()
197 );
198 assert_eq!(
199 decode_to_string(936, &[214, 208, 206, 196], true).unwrap(),
200 "中文".to_string()
201 );
202}
203
204#[test]
205fn test_encode_string() {
206 assert_eq!(
207 encode_string(65001, "中文测试", true).unwrap(),
208 vec![228, 184, 173, 230, 150, 135, 230, 181, 139, 232, 175, 149]
209 );
210 assert_eq!(
211 encode_string(932, "きゃべつそふと", true).unwrap(),
212 vec![
213 130, 171, 130, 225, 130, 215, 130, 194, 130, 187, 130, 211, 130, 198
214 ]
215 );
216 assert_eq!(
217 encode_string(936, "中文", true).unwrap(),
218 vec![214, 208, 206, 196]
219 );
220 assert!(
221 encode_string(
222 936,
223 "「あ、こーら、逃げちゃダメだよー? 起きちゃうのも、まだダメだけ\nどね♪」",
224 true
225 )
226 .is_err()
227 );
228}