msg_tool\output_scripts/
m3t.rs1use crate::types::Message;
12use anyhow::Result;
13
14pub struct M3tParser<'a> {
16 str: &'a str,
17 line: usize,
18 llm_mark: Option<&'a str>,
19}
20
21impl<'a> M3tParser<'a> {
22 pub fn new(str: &'a str, llm_mark: Option<&'a str>) -> Self {
24 M3tParser {
25 str,
26 line: 1,
27 llm_mark,
28 }
29 }
30
31 fn next_line(&mut self) -> Option<&'a str> {
32 match self.str.find('\n') {
33 Some(pos) => {
34 let line = &self.str[..pos];
35 self.str = &self.str[pos + 1..];
36 self.line += 1;
37 Some(line.trim())
38 }
39 None => {
40 if !self.str.is_empty() {
41 let line = self.str;
42 self.str = "";
43 Some(line)
44 } else {
45 None
46 }
47 }
48 }
49 }
50
51 pub fn parse_as_vec(&mut self) -> Result<Vec<(String, String)>> {
52 let mut map = Vec::new();
53 let mut ori = None;
54 let mut llm = None;
55 while let Some(line) = self.next_line() {
56 if line.is_empty() {
57 continue;
58 }
59 let line = line.trim().trim_matches('\u{200b}');
61 if line.starts_with("○") {
62 let line = line[3..].trim();
63 if !line.starts_with("NAME:") {
64 ori = Some(line.to_string());
65 }
66 } else if line.starts_with("△") {
67 let line = line[3..].trim();
68 llm = Some(line);
69 } else if line.starts_with("●") {
70 let message = line[3..].trim();
71 let message = if message
72 .trim_start_matches("「")
73 .trim_end_matches("」")
74 .is_empty()
75 {
76 llm.take()
77 .map(|s| {
78 let mut s = s.to_string();
79 if let Some(mark) = self.llm_mark {
80 s.push_str(mark);
81 }
82 s
83 })
84 .unwrap_or_else(|| {
85 String::from(if message.starts_with("「") {
86 "「」"
87 } else {
88 ""
89 })
90 })
91 .replace("\\n", "\n")
92 } else {
93 let mut tmp = message.to_owned();
94 if let Some(llm) = llm.take() {
95 if tmp == llm {
96 if let Some(mark) = self.llm_mark {
97 tmp.push_str(mark);
98 }
99 }
100 }
101 tmp.replace("\\n", "\n")
102 };
103 if let Some(ori) = ori.take() {
104 map.push((ori, message));
105 } else {
106 return Err(anyhow::anyhow!(
107 "Missing original message before translated message at line {}",
108 self.line
109 ));
110 }
111 } else {
112 return Err(anyhow::anyhow!(
113 "Invalid line format at line {}: {}",
114 self.line,
115 line
116 ));
117 }
118 }
119 Ok(map)
120 }
121
122 pub fn parse(&mut self) -> Result<Vec<Message>> {
124 let mut messages = Vec::new();
125 let mut name = None;
126 let mut llm = None;
127 while let Some(line) = self.next_line() {
128 if line.is_empty() {
129 continue;
130 }
131 let line = line.trim().trim_matches('\u{200b}');
133 if line.starts_with("○") {
134 let line = line[3..].trim();
135 if line.starts_with("NAME:") {
136 name = Some(line[5..].trim().to_string());
137 }
138 } else if line.starts_with("△") {
139 let line = line[3..].trim();
140 llm = Some(line);
141 } else if line.starts_with("●") {
142 let message = line[3..].trim();
143 let message = if message
144 .trim_start_matches("「")
145 .trim_end_matches("」")
146 .is_empty()
147 {
148 llm.take()
149 .map(|s| {
150 let mut s = s.to_string();
151 if let Some(mark) = self.llm_mark {
152 s.push_str(mark);
153 }
154 s
155 })
156 .unwrap_or_else(|| {
157 String::from(if message.starts_with("「") {
158 "「」"
159 } else {
160 ""
161 })
162 })
163 .replace("\\n", "\n")
164 } else {
165 let mut tmp = message.to_owned();
166 if let Some(llm) = llm.take() {
167 if tmp == llm {
168 if let Some(mark) = self.llm_mark {
169 tmp.push_str(mark);
170 }
171 }
172 }
173 tmp.replace("\\n", "\n")
174 };
175 messages.push(Message::new(message, name.take()));
176 } else {
177 return Err(anyhow::anyhow!(
178 "Invalid line format at line {}: {}",
179 self.line,
180 line
181 ));
182 }
183 }
184 Ok(messages)
185 }
186}
187
188pub struct M3tDumper {}
190
191impl M3tDumper {
192 pub fn dump(messages: &[Message], no_quote: bool) -> String {
194 let mut result = String::new();
195 for message in messages {
196 if let Some(name) = &message.name {
197 result.push_str(&format!("○ NAME: {}\n\n", name));
198 }
199 result.push_str(&format!("○ {}\n", message.message.replace("\n", "\\n")));
200 if !no_quote && message.message.starts_with("「") {
201 result.push_str("● 「」\n\n");
202 } else {
203 result.push_str("●\n\n");
204 }
205 }
206 result
207 }
208}
209
210#[test]
211fn test_zero_width_space() {
212 let input = "○ NAME: Example\n\n○ Original message\n\u{200b}● 「」\n\n";
213 let mut parser = M3tParser::new(input, None);
214 let messages = parser.parse().unwrap();
215 assert_eq!(messages.len(), 1);
216 let map = M3tParser::new(input, None).parse_as_vec().unwrap();
217 assert_eq!(map.len(), 1);
218}