1use crate::ext::io::*;
2use crate::types::*;
3use crate::utils::encoding::*;
4use anyhow::Result;
5use serde::{Deserialize, Serialize};
6use std::collections::{HashMap, HashSet};
7use std::io::{Read, Seek, SeekFrom};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq)]
10enum Oper {
11 B,
13 W,
15 D,
17 S,
19 F,
21}
22
23use Oper::*;
24
25#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
26#[serde(tag = "t", content = "c")]
27pub enum Operand {
28 B(u8),
29 W(u16),
30 D(u32),
31 S(String),
32 F(f32),
33}
34
35impl Operand {
36 pub fn len(&self, encoding: Encoding) -> Result<usize> {
37 Ok(match self {
38 Operand::B(_) => 1,
39 Operand::W(_) => 2,
40 Operand::D(_) => 4,
41 Operand::S(s) => {
42 let bytes = encode_string(encoding, s, true)?;
43 bytes.len() + 2
45 }
46 Operand::F(_) => 4,
47 })
48 }
49}
50
51const OPS: [(u8, &[Oper]); 49] = [
52 (0x00, &[]), (0x01, &[B, B]), (0x02, &[D]), (0x03, &[W]), (0x04, &[]), (0x05, &[]), (0x06, &[D]), (0x07, &[D]), (0x08, &[]), (0x09, &[]), (0x0a, &[D]), (0x0b, &[W]), (0x0c, &[B]), (0x0d, &[F]), (0x0e, &[S]), (0x0f, &[W]), (0x10, &[B]), (0x11, &[W]), (0x12, &[B]), (0x13, &[]), (0x14, &[]), (0x15, &[W]), (0x16, &[B]), (0x17, &[W]), (0x18, &[B]), (0x19, &[]), (0x1a, &[]), (0x1b, &[]), (0x1c, &[]), (0x1d, &[]), (0x1e, &[]), (0x1f, &[]), (0x20, &[]), (0x21, &[]), (0x22, &[]), (0x23, &[]), (0x24, &[]), (0x25, &[]), (0x26, &[]), (0x27, &[]), (0x33, &[]),
93 (0x3f, &[]),
94 (0x40, &[]),
95 (0xb3, &[]),
96 (0xb8, &[]),
97 (0xd8, &[]),
98 (0xf0, &[]),
99 (0x52, &[]),
100 (0x9e, &[]),
101];
102
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub struct Func {
105 pub pos: u64,
106 pub opcode: u8,
107 pub operands: Vec<Operand>,
108}
109
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct Data {
112 pub functions: Vec<Func>,
113 pub main_script: Vec<Func>,
114 pub extra_data: Vec<u8>,
115 #[serde(skip)]
116 speak_func_indices: HashSet<u32>,
117 #[serde(skip)]
118 func_pos_map: HashMap<u64, usize>,
119 #[serde(skip)]
120 speaker_names: HashMap<usize, Vec<String>>,
121 pub sys_imports: Vec<String>,
122}
123
124impl Data {
125 pub fn disasm<R: Read + Seek>(mut reader: R, encoding: Encoding) -> Result<Self> {
126 let mut data = Data {
127 functions: Vec::new(),
128 main_script: Vec::new(),
129 extra_data: Vec::new(),
130 speak_func_indices: HashSet::new(),
131 func_pos_map: HashMap::new(),
132 speaker_names: HashMap::new(),
133 sys_imports: Vec::new(),
134 };
135 let script_len = reader.read_u32()? as u64;
136 let main_script_data = reader.peek_u32_at(script_len)? as u64;
137 {
138 let mut target = &mut data.functions;
139 let mut pos = reader.stream_position()?;
140 while pos < script_len {
141 if pos >= main_script_data {
142 target = &mut data.main_script;
143 }
144 target.push(Self::read_func(&mut reader, encoding)?);
145 pos = reader.stream_position()?;
146 }
147 }
148 reader.seek(SeekFrom::Start(script_len + 4))?;
149 reader.read_to_end(&mut data.extra_data)?;
150 let mut off = script_len + 10;
151 let offset = reader.peek_u8_at(off)?;
152 off += 1 + offset as u64;
153 let sysimport_num = reader.peek_u16_at(off)?;
154 off += 2;
155 for _ in 0..sysimport_num {
156 let s = reader.peek_cstring_at(off + 2)?;
157 let s = decode_to_string(encoding, s.as_bytes(), true)?;
158 data.sys_imports.push(s);
159 off += 2 + reader.peek_u8_at(off + 1)? as u64;
160 }
161 data.index_functions();
162 data.find_speak_functions();
163 data.collect_speaker_names();
164
165 Ok(data)
166 }
167
168 fn index_functions(&mut self) {
169 for (idx, func) in self.functions.iter().enumerate() {
170 if func.opcode == 0x01 {
171 self.func_pos_map.insert(func.pos, idx);
172 }
173 }
174 }
175
176 fn find_speak_functions(&mut self) {
177 for (idx, func) in self.functions.iter().enumerate() {
178 if func.opcode == 0x01 {
179 if let (Some(Operand::B(arg_count)), Some(Operand::B(0))) =
181 (func.operands.first(), func.operands.get(1))
182 {
183 if *arg_count == 3 || *arg_count == 5 {
184 self.speak_func_indices.insert(idx as u32);
185 }
186 }
187 }
188 }
189 }
190
191 fn collect_speaker_names(&mut self) {
192 let func_starts: Vec<usize> = self
193 .functions
194 .iter()
195 .enumerate()
196 .filter(|(_, f)| f.opcode == 0x01)
197 .map(|(i, _)| i)
198 .collect();
199
200 for &speak_idx in &self.speak_func_indices {
201 let speak_idx = speak_idx as usize;
202
203 let start_pos = func_starts.iter().position(|&s| s == speak_idx);
204 if let Some(pos) = start_pos {
205 let end = func_starts
206 .get(pos + 1)
207 .copied()
208 .unwrap_or(self.functions.len());
209 let names: Vec<String> = (speak_idx..end)
210 .filter(|&i| self.functions[i].opcode == 0x0e)
211 .filter_map(|i| match self.functions[i].operands.first() {
212 Some(Operand::S(s)) if !s.trim().is_empty() => Some(s.clone()),
213 _ => None,
214 })
215 .collect();
216
217 if !names.is_empty() {
218 self.speaker_names.insert(speak_idx, names);
219 }
220 }
221 }
222 }
223
224 fn get_speaker(&self, func_idx: usize) -> Option<String> {
225 let names = self.speaker_names.get(&func_idx)?;
226
227 if let Some(name) = names.iter().filter(|n| !n.contains('?')).last() {
229 return Some(name.trim().to_string());
230 }
231
232 names.last().and_then(|name| {
234 let cleaned = name.trim().trim_start_matches('?').trim();
235 if !cleaned.is_empty() {
236 Some(cleaned.to_string())
237 } else {
238 None
239 }
240 })
241 }
242
243 pub fn extract_messages(&self, filter_ascii: bool) -> Vec<(Option<String>, String)> {
244 let mut messages = Vec::new();
245
246 for func in &self.functions {
248 if func.opcode == 0x0e {
249 if let Some(Operand::S(s)) = func.operands.first() {
250 if !(filter_ascii && s.chars().all(|c| c.is_ascii())) {
251 messages.push((None, s.clone()));
252 }
253 }
254 }
255 }
256
257 let mut current_speaker: Option<String> = None;
259
260 for func in &self.main_script {
261 if func.opcode == 0x02 {
262 if let Some(Operand::D(call_target)) = func.operands.first() {
263 if let Some(&func_idx) = self.func_pos_map.get(&(*call_target as u64)) {
264 if self.speak_func_indices.contains(&(func_idx as u32)) {
265 current_speaker = self.get_speaker(func_idx);
266 }
267 }
268 }
269 } else if func.opcode == 0x0e {
270 if let Some(Operand::S(s)) = func.operands.first() {
271 if !(filter_ascii && s.chars().all(|c| c.is_ascii())) {
272 messages.push((current_speaker.clone(), s.clone()));
273 }
274 }
275 }
276 }
277
278 messages
279 }
280
281 fn read_func<R: Read + Seek>(reader: &mut R, encoding: Encoding) -> Result<Func> {
282 let pos = reader.stream_position()?;
283 let opcode = reader.read_u8()?;
284 let operands = if let Some((_, ops)) = OPS.iter().find(|(code, _)| *code == opcode) {
285 let mut operands = Vec::with_capacity(ops.len());
286 for &op in *ops {
287 let operand = match op {
288 B => Operand::B(reader.read_u8()?),
289 W => Operand::W(reader.read_u16()?),
290 D => Operand::D(reader.read_u32()?),
291 S => {
292 let len = reader.read_u8()? as usize;
293 let s = reader.read_cstring()?;
294 if s.as_bytes_with_nul().len() != len {
295 return Err(anyhow::anyhow!(
296 "String length mismatch at {:#x}: expected {}, got {}",
297 pos,
298 len,
299 s.as_bytes_with_nul().len()
300 ));
301 }
302 let s = decode_to_string(encoding, s.as_bytes(), true)?;
303 Operand::S(s)
304 }
305 F => Operand::F(reader.read_f32()?),
306 };
307 operands.push(operand);
308 }
309 operands
310 } else {
311 return Err(anyhow::anyhow!(
312 "Unknown opcode: {:#x} at {:#x}",
313 opcode,
314 pos
315 ));
316 };
317 Ok(Func {
318 pos,
319 opcode,
320 operands,
321 })
322 }
323}