msg_tool\scripts\circus\archive/
dat.rs

1//! Circus Archive File (.dat)
2use crate::ext::io::*;
3use crate::scripts::base::*;
4use crate::types::*;
5use anyhow::Result;
6use std::io::{Read, Seek, SeekFrom};
7use std::sync::{Arc, Mutex};
8
9#[derive(Debug)]
10/// Circus DAT Archive Builder
11pub struct DatArchiveBuilder {}
12
13impl DatArchiveBuilder {
14    /// Creates a new instance of `DatArchiveBuilder`.
15    pub fn new() -> Self {
16        Self {}
17    }
18}
19
20impl ScriptBuilder for DatArchiveBuilder {
21    fn default_encoding(&self) -> Encoding {
22        Encoding::Cp932
23    }
24
25    fn default_archive_encoding(&self) -> Option<Encoding> {
26        Some(Encoding::Cp932)
27    }
28
29    fn build_script(
30        &self,
31        data: Vec<u8>,
32        _filename: &str,
33        _encoding: Encoding,
34        archive_encoding: Encoding,
35        config: &ExtraConfig,
36        _archive: Option<&Box<dyn Script>>,
37    ) -> Result<Box<dyn Script>> {
38        Ok(Box::new(DatArchive::new(
39            MemReader::new(data),
40            archive_encoding,
41            config,
42        )?))
43    }
44
45    fn build_script_from_file(
46        &self,
47        filename: &str,
48        _encoding: Encoding,
49        archive_encoding: Encoding,
50        config: &ExtraConfig,
51        _archive: Option<&Box<dyn Script>>,
52    ) -> Result<Box<dyn Script>> {
53        if filename == "-" {
54            let data = crate::utils::files::read_file(filename)?;
55            Ok(Box::new(DatArchive::new(
56                MemReader::new(data),
57                archive_encoding,
58                config,
59            )?))
60        } else {
61            let f = std::fs::File::open(filename)?;
62            let reader = std::io::BufReader::new(f);
63            Ok(Box::new(DatArchive::new(reader, archive_encoding, config)?))
64        }
65    }
66
67    fn build_script_from_reader(
68        &self,
69        reader: Box<dyn ReadSeek>,
70        _filename: &str,
71        _encoding: Encoding,
72        archive_encoding: Encoding,
73        config: &ExtraConfig,
74        _archive: Option<&Box<dyn Script>>,
75    ) -> Result<Box<dyn Script>> {
76        Ok(Box::new(DatArchive::new(reader, archive_encoding, config)?))
77    }
78
79    fn extensions(&self) -> &'static [&'static str] {
80        &["dat"]
81    }
82
83    fn script_type(&self) -> &'static ScriptType {
84        &ScriptType::CircusDat
85    }
86
87    fn is_archive(&self) -> bool {
88        true
89    }
90
91    fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
92        is_this_format(&buf[..buf_len]).ok()
93    }
94}
95
96#[derive(Debug, Clone)]
97struct DatFileHeader {
98    name: String,
99    offset: u32,
100    size: u32,
101}
102
103struct Entry<T: Read + Seek> {
104    header: DatFileHeader,
105    reader: Arc<Mutex<T>>,
106    pos: usize,
107    script_type: Option<ScriptType>,
108}
109
110impl<T: Read + Seek> ArchiveContent for Entry<T> {
111    fn name(&self) -> &str {
112        &self.header.name
113    }
114
115    fn script_type(&self) -> Option<&ScriptType> {
116        self.script_type.as_ref()
117    }
118}
119
120impl<T: Read + Seek> Read for Entry<T> {
121    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
122        let mut reader = self.reader.lock().map_err(|e| {
123            std::io::Error::new(
124                std::io::ErrorKind::Other,
125                format!("Failed to lock mutex: {}", e),
126            )
127        })?;
128        reader.seek(SeekFrom::Start(self.header.offset as u64 + self.pos as u64))?;
129        let bytes_read = buf.len().min(self.header.size as usize - self.pos);
130        if bytes_read == 0 {
131            return Ok(0);
132        }
133        let bytes_read = reader.read(&mut buf[..bytes_read])?;
134        self.pos += bytes_read;
135        Ok(bytes_read)
136    }
137}
138
139impl<T: Read + Seek> Seek for Entry<T> {
140    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
141        let new_pos = match pos {
142            SeekFrom::Start(offset) => offset as usize,
143            SeekFrom::End(offset) => {
144                if offset < 0 {
145                    if (-offset) as usize > self.header.size as usize {
146                        return Err(std::io::Error::new(
147                            std::io::ErrorKind::InvalidInput,
148                            "Seek from end exceeds file length",
149                        ));
150                    }
151                    self.header.size as usize - (-offset) as usize
152                } else {
153                    self.header.size as usize + offset as usize
154                }
155            }
156            SeekFrom::Current(offset) => {
157                if offset < 0 {
158                    if (-offset) as usize > self.pos {
159                        return Err(std::io::Error::new(
160                            std::io::ErrorKind::InvalidInput,
161                            "Seek from current exceeds current position",
162                        ));
163                    }
164                    self.pos.saturating_sub((-offset) as usize)
165                } else {
166                    self.pos + offset as usize
167                }
168            }
169        };
170        self.pos = new_pos;
171        Ok(self.pos as u64)
172    }
173
174    fn stream_position(&mut self) -> std::io::Result<u64> {
175        Ok(self.pos as u64)
176    }
177}
178
179#[derive(Debug)]
180/// Extra information for the DAT archive.
181pub struct DatExtraInfo {
182    /// Maximum length of file names in the DAT archive.
183    pub name_len: usize,
184}
185
186#[derive(Debug)]
187/// Circus DAT Archive
188pub struct DatArchive<T: Read + Seek + std::fmt::Debug> {
189    reader: Arc<Mutex<T>>,
190    entries: Vec<DatFileHeader>,
191    name_len: usize,
192}
193
194const NAME_LEN: [usize; 3] = [0x24, 0x30, 0x3C];
195
196impl<T: Read + Seek + std::fmt::Debug> DatArchive<T> {
197    /// Creates a new `DatArchive` from a reader.
198    ///
199    /// * `reader` - The reader to read the DAT archive from.
200    /// * `encoding` - The encoding to use for string fields.
201    /// * `config` - Extra configuration options.
202    pub fn new(mut reader: T, encoding: Encoding, _config: &ExtraConfig) -> Result<Self> {
203        let (name_len, entries) = Self::read_all_index(&mut reader, encoding)?;
204        let reader = Arc::new(Mutex::new(reader));
205        Ok(Self {
206            reader,
207            entries,
208            name_len,
209        })
210    }
211
212    fn read_all_index(reader: &mut T, encoding: Encoding) -> Result<(usize, Vec<DatFileHeader>)> {
213        for &name_len in &NAME_LEN {
214            match Self::read_index(reader, encoding, name_len) {
215                Ok(entries) => return Ok((name_len, entries)),
216                Err(_) => continue,
217            }
218        }
219        Err(anyhow::anyhow!("Failed to read DAT index"))
220    }
221
222    fn read_index(
223        reader: &mut T,
224        encoding: Encoding,
225        name_len: usize,
226    ) -> Result<Vec<DatFileHeader>> {
227        reader.rewind()?;
228        let mut count = reader.read_u32()?;
229        let index_size = (name_len + 4) * count as usize;
230        count -= 1;
231        let mut entries = Vec::with_capacity(count as usize);
232        let mut next_offset = reader.peek_u32_at(4 + name_len as u64)?;
233        if (next_offset as usize) < index_size + 4 {
234            return Err(anyhow::anyhow!("Invalid next_offset"));
235        }
236        let first_size = reader.peek_u32_at(name_len as u64)?;
237        let second_offset = reader.peek_u32_at(8 + name_len as u64 * 2)?;
238        if second_offset - next_offset == first_size {
239            return Err(anyhow::anyhow!("Invalid second_offset"));
240        }
241        let file_len = reader.stream_length()?;
242        for i in 0..count {
243            let name = reader.read_fstring(name_len, encoding, true)?;
244            if name.is_empty() {
245                return Err(anyhow::anyhow!("Empty file name in DAT archive"));
246            }
247            let offset = next_offset;
248            if i + 1 == count {
249                next_offset = file_len as u32;
250            } else {
251                next_offset = reader.peek_u32_at((name_len as u64 + 4) * (i as u64 + 2))?;
252            }
253            if next_offset < offset {
254                return Err(anyhow::anyhow!("Invalid offset in DAT archive"));
255            }
256            let size = next_offset - offset;
257            if offset < index_size as u32 || offset + size > file_len as u32 {
258                return Err(anyhow::anyhow!("Invalid offset or size in DAT archive"));
259            }
260            let header = DatFileHeader { name, offset, size };
261            entries.push(header);
262            reader.seek_relative(4)?;
263        }
264        Ok(entries)
265    }
266}
267
268impl<T: Read + Seek + std::fmt::Debug + 'static> Script for DatArchive<T> {
269    fn default_output_script_type(&self) -> OutputScriptType {
270        OutputScriptType::Json
271    }
272
273    fn default_format_type(&self) -> FormatOptions {
274        FormatOptions::None
275    }
276
277    fn is_archive(&self) -> bool {
278        true
279    }
280
281    fn iter_archive_filename<'a>(
282        &'a self,
283    ) -> Result<Box<dyn Iterator<Item = Result<String>> + 'a>> {
284        Ok(Box::new(self.entries.iter().map(|e| Ok(e.name.clone()))))
285    }
286
287    fn iter_archive_offset<'a>(&'a self) -> Result<Box<dyn Iterator<Item = Result<u64>> + 'a>> {
288        Ok(Box::new(self.entries.iter().map(|e| Ok(e.offset as u64))))
289    }
290
291    fn open_file<'a>(&'a self, index: usize) -> Result<Box<dyn ArchiveContent + 'a>> {
292        if index >= self.entries.len() {
293            return Err(anyhow::anyhow!(
294                "Index out of bounds: {} (max: {})",
295                index,
296                self.entries.len()
297            ));
298        }
299        let entry = &self.entries[index];
300        let mut entry = Entry {
301            header: entry.clone(),
302            reader: self.reader.clone(),
303            pos: 0,
304            script_type: None,
305        };
306        let mut buf = [0; 32];
307        let readed = match entry.read(&mut buf) {
308            Ok(readed) => readed,
309            Err(e) => {
310                return Err(anyhow::anyhow!(
311                    "Failed to read entry '{}': {}",
312                    entry.header.name,
313                    e
314                ));
315            }
316        };
317        entry.pos = 0;
318        entry.script_type = detect_script_type(&buf, readed, &entry.header.name);
319        Ok(Box::new(entry))
320    }
321
322    fn extra_info<'a>(&'a self) -> Option<Box<dyn AnyDebug + 'a>> {
323        Some(Box::new(DatExtraInfo {
324            name_len: self.name_len,
325        }))
326    }
327}
328
329fn detect_script_type(_buf: &[u8], _buf_len: usize, _filename: &str) -> Option<ScriptType> {
330    #[cfg(feature = "circus-img")]
331    if _buf_len >= 4 && _buf.starts_with(b"CRXG") {
332        return Some(ScriptType::CircusCrx);
333    }
334    #[cfg(feature = "circus-audio")]
335    if _buf_len >= 4 && _buf.starts_with(b"XPCM") {
336        return Some(ScriptType::CircusPcm);
337    }
338    None
339}
340
341fn is_this_format_name_len(buf: &[u8], name_len: usize) -> Result<u8> {
342    let mut reader = MemReaderRef::new(buf);
343    let count = reader.read_u32()? as usize;
344    let index_size = (name_len + 4) * count;
345    let mut score = if count > 0 && count < 1000 { 5 } else { 0 };
346    let mcount = ((buf.len() - 4) / (name_len + 4)).min(count - 1);
347    score += ((mcount / 2).min(10)) as u8;
348    if mcount == 0 {
349        return Err(anyhow::anyhow!("No entries found in DAT archive"));
350    }
351    let mut next_offset = reader.cpeek_u32_at(4 + name_len as u64)?;
352    if (next_offset as usize) < index_size + 4 {
353        return Err(anyhow::anyhow!("Invalid next_offset in DAT archive"));
354    }
355    let first_size = reader.cpeek_u32_at(name_len as u64)?;
356    let second_offset = reader.cpeek_u32_at(8 + name_len as u64 * 2)?;
357    if second_offset - next_offset == first_size {
358        return Err(anyhow::anyhow!("Invalid second_offset in DAT archive"));
359    }
360    for i in 0..mcount {
361        let offset = next_offset;
362        if i + 1 == mcount {
363            break;
364        } else {
365            next_offset = reader.cpeek_u32_at((name_len as u64 + 4) * (i as u64 + 2))?;
366        }
367        if next_offset < offset {
368            return Err(anyhow::anyhow!("Invalid offset in DAT archive"));
369        }
370        if offset < index_size as u32 {
371            return Err(anyhow::anyhow!(
372                "Offset is less than index size in DAT archive"
373            ));
374        }
375    }
376    Ok(score)
377}
378
379/// Checks if the buffer is a valid DAT archive format.
380///
381/// * `buf` - The buffer to check.
382pub fn is_this_format(buf: &[u8]) -> Result<u8> {
383    for &name_len in &NAME_LEN {
384        match is_this_format_name_len(buf, name_len) {
385            Ok(score) => return Ok(score),
386            Err(_) => continue,
387        }
388    }
389    Err(anyhow::anyhow!("Not a valid DAT archive format"))
390}