msg_tool\output_scripts/
m3t.rs1use crate::types::*;
12use anyhow::Result;
13
14pub struct M3tParser<'a> {
16 str: &'a str,
17 line: usize,
18 llm_mark: Option<&'a str>,
19 use_original_text: bool,
20}
21
22impl<'a> M3tParser<'a> {
23 pub fn new(str: &'a str, llm_mark: Option<&'a str>, use_original_text: bool) -> Self {
25 M3tParser {
26 str,
27 line: 1,
28 llm_mark,
29 use_original_text,
30 }
31 }
32
33 fn next_line(&mut self) -> Option<&'a str> {
34 match self.str.find('\n') {
35 Some(pos) => {
36 let line = &self.str[..pos];
37 self.str = &self.str[pos + 1..];
38 self.line += 1;
39 Some(line.trim())
40 }
41 None => {
42 if !self.str.is_empty() {
43 let line = self.str;
44 self.str = "";
45 Some(line)
46 } else {
47 None
48 }
49 }
50 }
51 }
52
53 pub fn parse_as_vec(&mut self) -> Result<Vec<(String, String)>> {
54 let mut map = Vec::new();
55 let mut ori = None;
56 let mut llm = None;
57 while let Some(line) = self.next_line() {
58 if line.is_empty() {
59 continue;
60 }
61 let line = line.trim().trim_matches('\u{200b}');
63 if line.starts_with("○") {
64 let line = line[3..].trim();
65 if !line.starts_with("NAME:") {
66 ori = Some(line.to_string());
67 }
68 } else if line.starts_with("△") {
69 let line = line[3..].trim();
70 llm = Some(line);
71 } else if line.starts_with("●") {
72 let message = line[3..].trim();
73 let message = if message
74 .trim_start_matches("「")
75 .trim_end_matches("」")
76 .is_empty()
77 {
78 llm.take()
79 .map(|s| {
80 let mut s = s.to_string();
81 if let Some(mark) = self.llm_mark {
82 s.push_str(mark);
83 }
84 s
85 })
86 .unwrap_or_else(|| {
87 String::from(if message.starts_with("「") {
88 "「」"
89 } else {
90 ""
91 })
92 })
93 .replace("\\n", "\n")
94 } else {
95 let mut tmp = message.to_owned();
96 if let Some(llm) = llm.take() {
97 if tmp == llm {
98 if let Some(mark) = self.llm_mark {
99 tmp.push_str(mark);
100 }
101 }
102 }
103 tmp.replace("\\n", "\n")
104 };
105 if let Some(ori) = ori.take() {
106 map.push((ori, message));
107 } else {
108 return Err(anyhow::anyhow!(
109 "Missing original message before translated message at line {}",
110 self.line
111 ));
112 }
113 } else {
114 return Err(anyhow::anyhow!(
115 "Invalid line format at line {}: {}",
116 self.line,
117 line
118 ));
119 }
120 }
121 Ok(map)
122 }
123
124 pub fn parse(&mut self) -> Result<Vec<Message>> {
126 let mut messages = Vec::new();
127 let mut name = None;
128 let mut llm = None;
129 let mut ori = None;
130 while let Some(line) = self.next_line() {
131 if line.is_empty() {
132 continue;
133 }
134 let line = line.trim().trim_matches('\u{200b}');
136 if line.starts_with("○") {
137 let line = line[3..].trim();
138 if line.starts_with("NAME:") {
139 name = Some(line[5..].trim().to_string());
140 } else {
141 ori = Some(line.to_string());
142 }
143 } else if line.starts_with("△") {
144 let line = line[3..].trim();
145 llm = Some(line);
146 } else if line.starts_with("●") {
147 let message = line[3..].trim();
148 let message = if message
149 .trim_start_matches("「")
150 .trim_end_matches("」")
151 .is_empty()
152 {
153 llm.take()
154 .map(|s| {
155 let mut s = s.to_string();
156 if let Some(mark) = self.llm_mark {
157 s.push_str(mark);
158 }
159 s
160 })
161 .unwrap_or_else(|| {
162 let m = if self.use_original_text {
163 ori.clone()
164 } else {
165 None
166 };
167 m.unwrap_or_else(|| {
168 String::from(if message.starts_with("「") {
169 "「」"
170 } else {
171 ""
172 })
173 })
174 })
175 .replace("\\n", "\n")
176 } else {
177 let mut tmp = message.to_owned();
178 if let Some(llm) = llm.take() {
179 if tmp == llm {
180 if let Some(mark) = self.llm_mark {
181 tmp.push_str(mark);
182 }
183 }
184 }
185 tmp.replace("\\n", "\n")
186 };
187 messages.push(Message::new(message, name.take()));
188 } else {
189 return Err(anyhow::anyhow!(
190 "Invalid line format at line {}: {}",
191 self.line,
192 line
193 ));
194 }
195 }
196 Ok(messages)
197 }
198
199 pub fn parse_as_extend(&mut self) -> Result<Vec<ExtendedMessage>> {
200 let mut messages = Vec::new();
201 let mut name = None;
202 let mut llm = None;
203 let mut source = None;
204 while let Some(line) = self.next_line() {
205 if line.is_empty() {
206 continue;
207 }
208 let line = line.trim().trim_matches('\u{200b}');
210 if line.starts_with("○") {
211 let line = line[3..].trim();
212 if line.starts_with("NAME:") {
213 name = Some(line[5..].trim().to_string());
214 } else {
215 source = Some(line.replace("\\n", "\n"));
216 }
217 } else if line.starts_with("△") {
218 let line = line[3..].trim();
219 llm = Some(line.replace("\\n", "\n"));
220 } else if line.starts_with("●") {
221 let message = line[3..].trim();
222 let source = match source.take() {
223 Some(s) => s,
224 None => {
225 return Err(anyhow::anyhow!(
226 "Missing original message before translated message at line {}",
227 self.line
228 ));
229 }
230 };
231 let m = ExtendedMessage {
232 name: name.take(),
233 source,
234 translated: message.replace("\\n", "\n"),
235 llm: llm.take(),
236 };
237 messages.push(m);
238 }
239 }
240 Ok(messages)
241 }
242}
243
244pub struct M3tDumper {}
246
247impl M3tDumper {
248 pub fn dump(messages: &[Message], no_quote: bool) -> String {
250 let mut result = String::new();
251 for message in messages {
252 if let Some(name) = &message.name {
253 result.push_str(&format!("○ NAME: {}\n\n", name));
254 }
255 result.push_str(&format!("○ {}\n", message.message.replace("\n", "\\n")));
256 if !no_quote && message.message.starts_with("「") {
257 result.push_str("● 「」\n\n");
258 } else {
259 result.push_str("●\n\n");
260 }
261 }
262 result
263 }
264
265 pub fn dump_extended(messages: &[ExtendedMessage]) -> String {
267 let mut result = String::new();
268 for message in messages {
269 if let Some(name) = &message.name {
270 result.push_str(&format!("○ NAME: {}\n\n", name));
271 }
272 result.push_str(&format!("○ {}\n", message.source.replace("\n", "\\n")));
273 if let Some(llm) = &message.llm {
274 result.push_str(&format!("△ {}\n", llm.replace("\n", "\\n")));
275 }
276 result.push_str(&format!(
277 "● {}\n\n",
278 message.translated.replace("\n", "\\n")
279 ));
280 }
281 result
282 }
283}
284
285#[test]
286fn test_zero_width_space() {
287 let input = "○ NAME: Example\n\n○ Original message\n\u{200b}● 「」\n\n";
288 let mut parser = M3tParser::new(input, None, false);
289 let messages = parser.parse().unwrap();
290 assert_eq!(messages.len(), 1);
291 let map = M3tParser::new(input, None, false).parse_as_vec().unwrap();
292 assert_eq!(map.len(), 1);
293}