msg_tool\scripts\bgi\archive/
dsc.rs1use crate::ext::io::*;
3use crate::ext::vec::*;
4use crate::scripts::base::*;
5use crate::types::*;
6use crate::utils::bit_stream::*;
7use anyhow::Result;
8use rand::Rng;
9use std::collections::BinaryHeap;
10use std::io::{Seek, Write};
11
12#[derive(Debug)]
13struct HuffmanCode {
14 code: u16,
15 depth: u8,
16}
17
18impl std::cmp::PartialEq for HuffmanCode {
19 fn eq(&self, other: &Self) -> bool {
20 self.code == other.code && self.depth == other.depth
21 }
22}
23
24impl std::cmp::Eq for HuffmanCode {}
25
26impl std::cmp::PartialOrd for HuffmanCode {
27 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
28 let cmp = self.depth.cmp(&other.depth);
29 if cmp == std::cmp::Ordering::Equal {
30 Some(self.code.cmp(&other.code))
31 } else {
32 Some(cmp)
33 }
34 }
35}
36
37impl std::cmp::Ord for HuffmanCode {
38 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
39 let cmp = self.depth.cmp(&other.depth);
40 if cmp == std::cmp::Ordering::Equal {
41 self.code.cmp(&other.code)
42 } else {
43 cmp
44 }
45 }
46}
47
48#[derive(Clone, Debug)]
49struct HuffmanNode {
50 is_parent: bool,
51 code: Option<u16>,
52 left_index: usize,
53 right_index: usize,
54}
55
56pub struct DscDecoder<'a> {
58 stream: MsbBitStream<MemReaderRef<'a>>,
59 key: u32,
60 magic: u32,
61 output_size: u32,
62 dec_count: u32,
63}
64
65impl<'a> DscDecoder<'a> {
66 pub fn new(data: &'a [u8]) -> Result<Self> {
68 let mut reader = MemReaderRef::new(data);
69 let magic = (reader.read_u16()? as u32) << 16;
70 reader.pos = 0x10;
71 let key = reader.read_u32()?;
72 let output_size = reader.read_u32()?;
73 let dec_count = reader.read_u32()?;
74 let stream = MsbBitStream::new(reader);
75 Ok(DscDecoder {
76 stream,
77 key,
78 magic,
79 output_size,
80 dec_count,
81 })
82 }
83
84 pub fn unpack(mut self) -> Result<Vec<u8>> {
86 self.stream.m_input.pos = 0x20;
87 let mut codes = Vec::new();
88 for i in 0..512 {
89 let src = self.stream.m_input.read_u8()?;
90 let depth = src.overflowing_sub(self.update_key()).0;
91 if depth > 0 {
92 codes.push(HuffmanCode { code: i, depth })
93 }
94 }
95 codes.sort();
96 let root = Self::create_huffman_tree(codes);
97 self.huffman_decompress(root)
98 }
99
100 fn create_huffman_tree(codes: Vec<HuffmanCode>) -> Vec<HuffmanNode> {
101 let mut trees = Vec::with_capacity(1024);
102 trees.resize(
103 1024,
104 HuffmanNode {
105 is_parent: false,
106 code: None,
107 left_index: 0,
108 right_index: 0,
109 },
110 );
111 let mut left_index = vec![0usize; 512];
112 let mut right_index = vec![0usize; 512];
113 let mut next_node_index = 1usize;
114 let mut depth_nodes = 1usize;
115 let mut depth = 0u8;
116 let mut left_child = true;
117 let mut n = 0;
118 while n < codes.len() {
119 let huffman_node_index = left_child;
120 left_child = !left_child;
121 let mut depth_existed_nodes = 0;
122 while n < codes.len() && codes[n].depth == depth {
123 let index = if huffman_node_index {
124 left_index[depth_existed_nodes]
125 } else {
126 right_index[depth_existed_nodes]
127 };
128 trees[index].code = Some(codes[n].code);
129 n += 1;
130 depth_existed_nodes += 1;
131 }
132 let depth_nodes_to_create = depth_nodes - depth_existed_nodes;
133 for i in 0..depth_nodes_to_create {
134 let index = if huffman_node_index {
135 left_index[depth_existed_nodes + i]
136 } else {
137 right_index[depth_existed_nodes + i]
138 };
139 let node = &mut trees[index];
140 node.is_parent = true;
141 if left_child {
142 left_index[i * 2] = next_node_index;
143 node.left_index = next_node_index;
144 next_node_index += 1;
145 left_index[i * 2 + 1] = next_node_index;
146 node.right_index = next_node_index;
147 next_node_index += 1;
148 } else {
149 right_index[i * 2] = next_node_index;
150 node.left_index = next_node_index;
151 next_node_index += 1;
152 right_index[i * 2 + 1] = next_node_index;
153 node.right_index = next_node_index;
154 next_node_index += 1;
155 }
156 }
157 depth += 1;
158 depth_nodes = depth_nodes_to_create * 2;
159 }
160 trees
161 }
162
163 fn huffman_decompress(&mut self, nodes: Vec<HuffmanNode>) -> Result<Vec<u8>> {
164 let output_size = self.output_size as usize;
165 let mut output = Vec::with_capacity(output_size);
166 let mut dst = 0;
167 output.resize(output_size, 0);
168 for _ in 0..self.dec_count {
169 let mut current_node = &nodes[0];
170 loop {
171 let bit = self.stream.get_next_bit()?;
172 if !bit {
173 current_node = &nodes[current_node.left_index]
174 } else {
175 current_node = &nodes[current_node.right_index]
176 }
177 if !current_node.is_parent {
178 break;
179 }
180 }
181 let code = *current_node.code.as_ref().unwrap();
182 if code >= 256 {
183 let mut offset = self.stream.get_bits(12)?;
184 let count = ((code & 0xFF) + 2) as usize;
185 offset += 2;
186 output.copy_overlapped(dst - offset as usize, dst, count);
187 dst += count;
188 } else {
189 output[dst] = code as u8;
190 dst += 1;
191 }
192 }
193 if dst != output_size {
194 eprintln!(
195 "Warning: Output size mismatch, expected {}, got {}",
196 self.output_size, dst
197 );
198 crate::COUNTER.inc_warning();
199 }
200 Ok(output)
201 }
202
203 fn update_key(&mut self) -> u8 {
204 let v0 = 20021 * (self.key & 0xffff);
205 let mut v1 = self.magic | (self.key >> 16);
206 v1 = v1
207 .overflowing_mul(20021)
208 .0
209 .overflowing_add(self.key.overflowing_mul(346).0)
210 .0;
211 v1 = (v1 + (v0 >> 16)) & 0xffff;
212 self.key = (v1 << 16) + (v0 & 0xffff) + 1;
213 v1 as u8
214 }
215}
216
217#[derive(Debug, Clone, Copy)]
218enum LzssOp {
219 Literal(u8),
220 Match { len: u16, offset: u16 },
221}
222
223#[derive(Debug)]
224struct FreqNode {
225 freq: u32,
226 symbol: Option<u16>,
227 left: Option<Box<FreqNode>>,
228 right: Option<Box<FreqNode>>,
229}
230impl PartialEq for FreqNode {
231 fn eq(&self, other: &Self) -> bool {
232 self.freq == other.freq
233 }
234}
235impl Eq for FreqNode {}
236impl PartialOrd for FreqNode {
237 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
238 Some(self.cmp(other))
239 }
240}
241impl Ord for FreqNode {
242 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
243 other.freq.cmp(&self.freq)
244 }
245}
246
247fn calculate_huffman_depths(freqs: &[u32]) -> Vec<u8> {
248 const MAX_DEPTH: u8 = 9;
249
250 let mut symbols_with_freq: Vec<(u16, u32)> = freqs
252 .iter()
253 .enumerate()
254 .filter_map(|(symbol, &freq)| {
255 if freq > 0 {
256 Some((symbol as u16, freq))
257 } else {
258 None
259 }
260 })
261 .collect();
262
263 let mut depths = vec![0u8; 512];
264
265 if symbols_with_freq.is_empty() {
266 return depths;
267 }
268
269 if symbols_with_freq.len() == 1 {
270 depths[symbols_with_freq[0].0 as usize] = 1;
271 return depths;
272 }
273
274 loop {
276 let current_depths = build_huffman_tree(&symbols_with_freq);
277 let max_depth = current_depths.iter().max().copied().unwrap_or(0);
278
279 if max_depth <= MAX_DEPTH {
280 for &(symbol, _) in &symbols_with_freq {
282 let symbol_index = symbols_with_freq
283 .iter()
284 .position(|(s, _)| *s == symbol)
285 .unwrap();
286 depths[symbol as usize] = current_depths[symbol_index];
287 }
288 break;
289 }
290
291 adjust_frequencies_for_depth_limit(&mut symbols_with_freq);
293 }
294
295 depths
296}
297
298fn build_huffman_tree(symbols_with_freq: &[(u16, u32)]) -> Vec<u8> {
299 let mut heap = BinaryHeap::new();
300
301 for &(symbol, freq) in symbols_with_freq {
303 heap.push(FreqNode {
304 freq,
305 symbol: Some(symbol),
306 left: None,
307 right: None,
308 });
309 }
310
311 while heap.len() > 1 {
313 let node1 = heap.pop().unwrap();
314 let node2 = heap.pop().unwrap();
315 let new_node = FreqNode {
316 freq: node1.freq + node2.freq,
317 symbol: None,
318 left: Some(Box::new(node1)),
319 right: Some(Box::new(node2)),
320 };
321 heap.push(new_node);
322 }
323
324 let mut depths = vec![0u8; symbols_with_freq.len()];
326 if let Some(root) = heap.pop() {
327 calculate_depths(&root, 0, symbols_with_freq, &mut depths);
328 }
329
330 depths
331}
332
333fn calculate_depths(
334 node: &FreqNode,
335 depth: u8,
336 symbols_with_freq: &[(u16, u32)],
337 depths: &mut [u8],
338) {
339 if let Some(symbol) = node.symbol {
340 let symbol_index = symbols_with_freq
341 .iter()
342 .position(|(s, _)| *s == symbol)
343 .unwrap();
344 depths[symbol_index] = if depth == 0 { 1 } else { depth };
345 } else {
346 if let Some(ref left) = node.left {
347 calculate_depths(left, depth + 1, symbols_with_freq, depths);
348 }
349 if let Some(ref right) = node.right {
350 calculate_depths(right, depth + 1, symbols_with_freq, depths);
351 }
352 }
353}
354
355fn adjust_frequencies_for_depth_limit(symbols_with_freq: &mut [(u16, u32)]) {
356 symbols_with_freq.sort_by(|a, b| a.1.cmp(&b.1));
358
359 let min_freq = symbols_with_freq[0].1;
362 let adjustment = (min_freq as f64 * 0.1).max(1.0) as u32;
363
364 let num_to_adjust = (symbols_with_freq.len() / 4).max(1);
366 for i in 0..num_to_adjust.min(symbols_with_freq.len()) {
367 symbols_with_freq[i].1 += adjustment;
368 }
369}
370
371fn generate_canonical_codes(depths: &[u8]) -> Vec<Option<(u16, u8)>> {
372 let mut codes_with_depths = vec![];
373 for (symbol, &depth) in depths.iter().enumerate() {
374 if depth > 0 {
375 codes_with_depths.push((symbol as u16, depth));
376 }
377 }
378 codes_with_depths.sort_by(|a, b| {
379 let depth_cmp = a.1.cmp(&b.1);
380 if depth_cmp == std::cmp::Ordering::Equal {
381 a.0.cmp(&b.0)
382 } else {
383 depth_cmp
384 }
385 });
386
387 let mut huffman_codes = vec![None; 512];
388 let mut current_code = 0u16;
389 let mut last_depth = 0u8;
390
391 for &(symbol, depth) in &codes_with_depths {
392 if last_depth != 0 {
393 current_code <<= depth - last_depth;
394 }
395 huffman_codes[symbol as usize] = Some((current_code, depth));
396 current_code += 1;
397 last_depth = depth;
398 }
399
400 huffman_codes
401}
402
403pub struct DscEncoder<'a, T: Write + Seek> {
405 stream: MsbBitWriter<'a, T>,
406 magic: u32,
407 key: u32,
408 dec_count: u32,
409 min_len: usize,
410}
411
412impl<'a, T: Write + Seek> DscEncoder<'a, T> {
413 pub fn new(writer: &'a mut T, min_len: usize) -> Self {
415 let stream = MsbBitWriter::new(writer);
416 DscEncoder {
417 stream,
418 magic: 0x5344 << 16, key: rand::rng().random(),
420 dec_count: 0,
421 min_len,
422 }
423 }
424
425 pub fn pack(mut self, data: &[u8]) -> Result<()> {
427 let mut ops = vec![];
429 let mut pos = 0;
430
431 const MAX_LEN: usize = 257;
432 const WINDOW_SIZE: usize = 4097;
433
434 let mut head: Vec<i32> = vec![-1; 1 << 16];
435 let mut prev: Vec<i32> = vec![-1; data.len()];
436
437 while pos < data.len() {
438 let max_len = (data.len() - pos).min(MAX_LEN);
439 let mut best_len = 0;
440 let mut best_offset = 0;
441
442 if max_len >= self.min_len {
443 let limit = pos.saturating_sub(WINDOW_SIZE);
444 let key = (data[pos] as u16) << 8 | data[pos + 1] as u16;
445 let mut match_pos_i32 = head[key as usize];
446
447 while match_pos_i32 != -1 {
448 let match_pos = match_pos_i32 as usize;
449 if match_pos < limit {
450 break;
451 }
452
453 if data.get(match_pos + best_len) == data.get(pos + best_len) {
454 let mut current_len = 0;
455 for i in 0..max_len {
456 if data.get(pos + i) != data.get(match_pos + i) {
457 break;
458 }
459 current_len += 1;
460 }
461
462 if current_len > best_len {
463 best_len = current_len;
464 best_offset = pos - match_pos;
465 if best_len >= max_len {
466 break;
467 }
468 }
469 }
470 match_pos_i32 = prev[match_pos];
471 }
472 }
473
474 if best_len >= self.min_len && best_offset >= 2 {
475 ops.push(LzssOp::Match {
476 len: best_len as u16,
477 offset: best_offset as u16,
478 });
479 for i in 0..best_len {
480 if pos + i + 1 < data.len() {
481 let key = (data[pos + i] as u16) << 8 | data[pos + i + 1] as u16;
482 let current_pos = pos + i;
483 prev[current_pos] = head[key as usize];
484 head[key as usize] = current_pos as i32;
485 }
486 }
487 pos += best_len;
488 } else {
489 ops.push(LzssOp::Literal(data[pos]));
490 if pos + 1 < data.len() {
491 let key = (data[pos] as u16) << 8 | data[pos + 1] as u16;
492 prev[pos] = head[key as usize];
493 head[key as usize] = pos as i32;
494 }
495 pos += 1;
496 }
497 }
498
499 let symbols: Vec<u16> = ops
500 .iter()
501 .map(|op| match op {
502 LzssOp::Literal(byte) => *byte as u16,
503 LzssOp::Match { len, .. } => 256 + (len - 2),
504 })
505 .collect();
506 self.dec_count = symbols.len() as u32;
507
508 let mut freqs = vec![0u32; 512];
509 for &s in &symbols {
510 freqs[s as usize] += 1;
511 }
512
513 let depths = calculate_huffman_depths(&freqs);
514 let huffman_codes = generate_canonical_codes(&depths);
515
516 self.stream.writer.write_all(b"DSC FORMAT 1.00\0")?;
517 self.stream.writer.seek(std::io::SeekFrom::Start(0x10))?;
518 self.stream.writer.write_u32(self.key)?;
519 self.stream.writer.write_u32(data.len() as u32)?;
520 self.stream.writer.write_u32(self.dec_count)?;
521 self.stream.writer.seek(std::io::SeekFrom::Start(0x20))?;
522
523 for depth in depths.iter() {
524 let key = self.update_key();
525 self.stream.writer.write_u8(depth.overflowing_add(key).0)?;
526 }
527
528 for op in &ops {
529 match op {
530 LzssOp::Literal(byte) => {
531 let symbol = *byte as u16;
532 let (code, len) = huffman_codes[symbol as usize].unwrap();
533 self.stream.put_bits(code as u32, len)?;
534 }
535 LzssOp::Match { len, offset } => {
536 let symbol = 256 + (len - 2);
537 let (code, huff_len) = huffman_codes[symbol as usize].unwrap();
538 self.stream.put_bits(code as u32, huff_len)?;
539 self.stream.put_bits((*offset - 2) as u32, 12)?;
540 }
541 }
542 }
543 self.stream.flush()?;
544 Ok(())
545 }
546
547 fn update_key(&mut self) -> u8 {
548 let v0 = 20021 * (self.key & 0xffff);
549 let mut v1 = self.magic | (self.key >> 16);
550 v1 = v1
551 .overflowing_mul(20021)
552 .0
553 .overflowing_add(self.key.overflowing_mul(346).0)
554 .0;
555 v1 = (v1 + (v0 >> 16)) & 0xffff;
556 self.key = (v1 << 16) + (v0 & 0xffff) + 1;
557 v1 as u8
558 }
559}
560
561#[derive(Debug)]
562pub struct DscBuilder {}
564
565impl DscBuilder {
566 pub fn new() -> Self {
568 DscBuilder {}
569 }
570}
571
572impl ScriptBuilder for DscBuilder {
573 fn default_encoding(&self) -> Encoding {
574 Encoding::Cp932
575 }
576
577 fn default_archive_encoding(&self) -> Option<Encoding> {
578 Some(Encoding::Cp932)
579 }
580
581 fn build_script(
582 &self,
583 buf: Vec<u8>,
584 _filename: &str,
585 _encoding: Encoding,
586 _archive_encoding: Encoding,
587 config: &ExtraConfig,
588 _archive: Option<&Box<dyn Script>>,
589 ) -> Result<Box<dyn Script>> {
590 Ok(Box::new(Dsc::new(buf, config)?))
591 }
592
593 fn extensions(&self) -> &'static [&'static str] {
594 &[]
595 }
596
597 fn script_type(&self) -> &'static ScriptType {
598 &ScriptType::BGIDsc
599 }
600
601 fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
602 if buf_len >= 16 && buf.starts_with(b"DSC FORMAT 1.00\0") {
603 return Some(255);
604 }
605 None
606 }
607
608 fn can_create_file(&self) -> bool {
609 true
610 }
611
612 fn create_file<'a>(
613 &'a self,
614 filename: &'a str,
615 mut writer: Box<dyn WriteSeek + 'a>,
616 _encoding: Encoding,
617 _file_encoding: Encoding,
618 config: &ExtraConfig,
619 ) -> Result<()> {
620 let encoder = DscEncoder::new(&mut writer, config.bgi_compress_min_len);
621 let data = crate::utils::files::read_file(filename)?;
622 encoder.pack(&data)?;
623 Ok(())
624 }
625}
626
627#[derive(Debug)]
628pub struct Dsc {
630 data: Vec<u8>,
631 min_len: usize,
632}
633
634impl Dsc {
635 pub fn new(buf: Vec<u8>, config: &ExtraConfig) -> Result<Self> {
640 if buf.len() < 16 || !buf.starts_with(b"DSC FORMAT 1.00\0") {
641 return Err(anyhow::anyhow!("Invalid DSC format"));
642 }
643 let decoder = DscDecoder::new(&buf)?;
644 let data = decoder.unpack()?;
645 Ok(Dsc {
646 data,
647 min_len: config.bgi_compress_min_len,
648 })
649 }
650}
651
652impl Script for Dsc {
653 fn default_output_script_type(&self) -> OutputScriptType {
654 OutputScriptType::Custom
655 }
656
657 fn is_output_supported(&self, output: OutputScriptType) -> bool {
658 matches!(output, OutputScriptType::Custom)
659 }
660
661 fn default_format_type(&self) -> FormatOptions {
662 FormatOptions::None
663 }
664
665 fn custom_output_extension(&self) -> &'static str {
666 ""
667 }
668
669 fn custom_export(&self, filename: &std::path::Path, _encoding: Encoding) -> Result<()> {
670 let mut f = std::fs::File::create(filename)?;
671 f.write_all(&self.data)?;
672 Ok(())
673 }
674
675 fn custom_import<'a>(
676 &'a self,
677 custom_filename: &'a str,
678 mut file: Box<dyn WriteSeek + 'a>,
679 _encoding: Encoding,
680 _output_encoding: Encoding,
681 ) -> Result<()> {
682 let encoder = DscEncoder::new(&mut file, self.min_len);
683 let data = crate::utils::files::read_file(custom_filename)?;
684 encoder.pack(&data)?;
685 Ok(())
686 }
687}
688
689pub fn parse_min_length(len: &str) -> Result<usize, String> {
691 clap_num::number_range(len, 2, 256)
692}