msg_tool\output_scripts/
m3t.rs

1//! A simple text format that supports both original/llm/translated messages.
2//!
3//! A simple m3t file example:
4//! ```text
5//! ○ NAME: Example
6//!
7//! ○ Original message
8//! △ LLM message
9//! ● Translated message
10//! ```
11use crate::types::Message;
12use anyhow::Result;
13
14/// A parser for the M3T format.
15pub struct M3tParser<'a> {
16    str: &'a str,
17    line: usize,
18    llm_mark: Option<&'a str>,
19}
20
21impl<'a> M3tParser<'a> {
22    /// Creates a new M3tParser with the given string.
23    pub fn new(str: &'a str, llm_mark: Option<&'a str>) -> Self {
24        M3tParser {
25            str,
26            line: 1,
27            llm_mark,
28        }
29    }
30
31    fn next_line(&mut self) -> Option<&'a str> {
32        match self.str.find('\n') {
33            Some(pos) => {
34                let line = &self.str[..pos];
35                self.str = &self.str[pos + 1..];
36                self.line += 1;
37                Some(line.trim())
38            }
39            None => {
40                if !self.str.is_empty() {
41                    let line = self.str;
42                    self.str = "";
43                    Some(line)
44                } else {
45                    None
46                }
47            }
48        }
49    }
50
51    pub fn parse_as_vec(&mut self) -> Result<Vec<(String, String)>> {
52        let mut map = Vec::new();
53        let mut ori = None;
54        let mut llm = None;
55        while let Some(line) = self.next_line() {
56            if line.is_empty() {
57                continue;
58            }
59            // Remove zero-width space characters
60            let line = line.trim().trim_matches('\u{200b}');
61            if line.starts_with("○") {
62                let line = line[3..].trim();
63                if !line.starts_with("NAME:") {
64                    ori = Some(line.to_string());
65                }
66            } else if line.starts_with("△") {
67                let line = line[3..].trim();
68                llm = Some(line);
69            } else if line.starts_with("●") {
70                let message = line[3..].trim();
71                let message = if message
72                    .trim_start_matches("「")
73                    .trim_end_matches("」")
74                    .is_empty()
75                {
76                    llm.take()
77                        .map(|s| {
78                            let mut s = s.to_string();
79                            if let Some(mark) = self.llm_mark {
80                                s.push_str(mark);
81                            }
82                            s
83                        })
84                        .unwrap_or_else(|| {
85                            String::from(if message.starts_with("「") {
86                                "「」"
87                            } else {
88                                ""
89                            })
90                        })
91                        .replace("\\n", "\n")
92                } else {
93                    let mut tmp = message.to_owned();
94                    if let Some(llm) = llm.take() {
95                        if tmp == llm {
96                            if let Some(mark) = self.llm_mark {
97                                tmp.push_str(mark);
98                            }
99                        }
100                    }
101                    tmp.replace("\\n", "\n")
102                };
103                if let Some(ori) = ori.take() {
104                    map.push((ori, message));
105                } else {
106                    return Err(anyhow::anyhow!(
107                        "Missing original message before translated message at line {}",
108                        self.line
109                    ));
110                }
111            } else {
112                return Err(anyhow::anyhow!(
113                    "Invalid line format at line {}: {}",
114                    self.line,
115                    line
116                ));
117            }
118        }
119        Ok(map)
120    }
121
122    /// Parses the M3T format and returns a vector of messages.
123    pub fn parse(&mut self) -> Result<Vec<Message>> {
124        let mut messages = Vec::new();
125        let mut name = None;
126        let mut llm = None;
127        while let Some(line) = self.next_line() {
128            if line.is_empty() {
129                continue;
130            }
131            // Remove zero-width space characters
132            let line = line.trim().trim_matches('\u{200b}');
133            if line.starts_with("○") {
134                let line = line[3..].trim();
135                if line.starts_with("NAME:") {
136                    name = Some(line[5..].trim().to_string());
137                }
138            } else if line.starts_with("△") {
139                let line = line[3..].trim();
140                llm = Some(line);
141            } else if line.starts_with("●") {
142                let message = line[3..].trim();
143                let message = if message
144                    .trim_start_matches("「")
145                    .trim_end_matches("」")
146                    .is_empty()
147                {
148                    llm.take()
149                        .map(|s| {
150                            let mut s = s.to_string();
151                            if let Some(mark) = self.llm_mark {
152                                s.push_str(mark);
153                            }
154                            s
155                        })
156                        .unwrap_or_else(|| {
157                            String::from(if message.starts_with("「") {
158                                "「」"
159                            } else {
160                                ""
161                            })
162                        })
163                        .replace("\\n", "\n")
164                } else {
165                    let mut tmp = message.to_owned();
166                    if let Some(llm) = llm.take() {
167                        if tmp == llm {
168                            if let Some(mark) = self.llm_mark {
169                                tmp.push_str(mark);
170                            }
171                        }
172                    }
173                    tmp.replace("\\n", "\n")
174                };
175                messages.push(Message::new(message, name.take()));
176            } else {
177                return Err(anyhow::anyhow!(
178                    "Invalid line format at line {}: {}",
179                    self.line,
180                    line
181                ));
182            }
183        }
184        Ok(messages)
185    }
186}
187
188/// A dumper for the M3T format.
189pub struct M3tDumper {}
190
191impl M3tDumper {
192    /// Dumps the messages in M3T format.
193    pub fn dump(messages: &[Message], no_quote: bool) -> String {
194        let mut result = String::new();
195        for message in messages {
196            if let Some(name) = &message.name {
197                result.push_str(&format!("○ NAME: {}\n\n", name));
198            }
199            result.push_str(&format!("○ {}\n", message.message.replace("\n", "\\n")));
200            if !no_quote && message.message.starts_with("「") {
201                result.push_str("● 「」\n\n");
202            } else {
203                result.push_str("●\n\n");
204            }
205        }
206        result
207    }
208}
209
210#[test]
211fn test_zero_width_space() {
212    let input = "○ NAME: Example\n\n○ Original message\n\u{200b}● 「」\n\n";
213    let mut parser = M3tParser::new(input, None);
214    let messages = parser.parse().unwrap();
215    assert_eq!(messages.len(), 1);
216    let map = M3tParser::new(input, None).parse_as_vec().unwrap();
217    assert_eq!(map.len(), 1);
218}