1use super::parser::*;
3use crate::ext::io::*;
4use crate::scripts::base::*;
5use crate::types::*;
6use crate::utils::encoding::{decode_to_string, encode_string};
7use anyhow::Result;
8use fancy_regex::Regex;
9use lazy_static::lazy_static;
10use std::collections::{BTreeMap, HashMap};
11
12#[derive(Debug)]
13pub struct BGIScriptBuilder {}
15
16impl BGIScriptBuilder {
17 pub fn new() -> Self {
19 BGIScriptBuilder {}
20 }
21}
22
23impl ScriptBuilder for BGIScriptBuilder {
24 fn default_encoding(&self) -> Encoding {
25 #[cfg(not(windows))]
26 return Encoding::Cp932;
27 #[cfg(windows)]
28 return Encoding::CodePage(932);
30 }
31
32 fn build_script(
33 &self,
34 buf: Vec<u8>,
35 _filename: &str,
36 encoding: Encoding,
37 _archive_encoding: Encoding,
38 config: &ExtraConfig,
39 _archive: Option<&Box<dyn Script>>,
40 ) -> Result<Box<dyn Script>> {
41 Ok(Box::new(BGIScript::new(buf, encoding, config)?))
42 }
43
44 fn extensions(&self) -> &'static [&'static str] {
45 &[]
46 }
47
48 fn script_type(&self) -> &'static ScriptType {
49 &ScriptType::BGI
50 }
51
52 fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
53 if buf_len > 28 && buf.starts_with(b"BurikoCompiledScriptVer1.00\0") {
54 return Some(255);
55 }
56 None
57 }
58}
59
60pub struct BGIScript {
62 data: MemReader,
63 encoding: Encoding,
64 strings: Vec<BGIString>,
65 is_v1: bool,
66 is_v1_instr: bool,
67 offset: usize,
68 import_duplicate: bool,
69 append: bool,
70 custom_yaml: bool,
71 add_space: bool,
72}
73
74impl std::fmt::Debug for BGIScript {
75 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
76 f.debug_struct("BGIScript")
77 .field("encoding", &self.encoding)
78 .finish_non_exhaustive()
79 }
80}
81
82impl BGIScript {
83 pub fn new(data: Vec<u8>, encoding: Encoding, config: &ExtraConfig) -> Result<Self> {
89 let data = MemReader::new(data);
90 if data.data.starts_with(b"BurikoCompiledScriptVer1.00\0") {
91 let mut parser = V1Parser::new(data.to_ref(), encoding)?;
92 parser.disassemble()?;
93 let strings = parser.strings.clone();
94 let offset = parser.offset;
95 Ok(Self {
96 data,
97 encoding,
98 strings,
99 is_v1: true,
100 is_v1_instr: true,
101 offset,
102 import_duplicate: config.bgi_import_duplicate,
103 append: !config.bgi_disable_append,
104 custom_yaml: config.custom_yaml,
105 add_space: config.bgi_add_space,
106 })
107 } else {
108 let mut is_v1_instr = false;
109 let strings = {
110 let mut parser = V0Parser::new(data.to_ref());
111 match parser.disassemble() {
112 Ok(_) => parser.strings,
113 Err(_) => {
114 let mut parser = V1Parser::new(data.to_ref(), encoding)?;
115 parser.disassemble()?;
116 is_v1_instr = true;
117 parser.strings
118 }
119 }
120 };
121 Ok(Self {
122 data,
123 encoding,
124 strings,
125 is_v1: false,
126 is_v1_instr,
127 offset: 0,
128 import_duplicate: config.bgi_import_duplicate,
129 append: !config.bgi_disable_append,
130 custom_yaml: config.custom_yaml,
131 add_space: config.bgi_add_space,
132 })
133 }
134 }
135
136 fn read_string(&self, offset: usize) -> Result<String> {
137 let start = self.offset + offset;
138 let string_data = self.data.cpeek_cstring_at(start as u64)?;
139 let string = decode_to_string(self.encoding, string_data.as_bytes(), false)?;
141 Ok(string)
142 }
143
144 fn output_with_ruby(str: &mut String, ruby: &mut Vec<String>) -> Result<()> {
145 if ruby.is_empty() {
146 return Ok(());
147 }
148 if ruby.len() % 2 != 0 {
149 return Err(anyhow::anyhow!("Ruby strings count is not even."));
150 }
151 for i in (0..ruby.len()).step_by(2) {
152 let ruby_str = &ruby[i];
153 let ruby_text = &ruby[i + 1];
154 if ruby_str.is_empty() || ruby_text.is_empty() {
155 continue;
156 }
157 *str = str.replace(ruby_str, &format!("<r{ruby_text}>{ruby_str}</r>"));
158 }
159 ruby.clear();
160 Ok(())
161 }
162}
163
164impl Script for BGIScript {
165 fn default_output_script_type(&self) -> OutputScriptType {
166 OutputScriptType::Json
167 }
168
169 fn is_output_supported(&self, _: OutputScriptType) -> bool {
170 true
171 }
172
173 fn custom_output_extension<'a>(&'a self) -> &'a str {
174 if self.custom_yaml { "yaml" } else { "json" }
175 }
176
177 fn default_format_type(&self) -> FormatOptions {
178 if self.is_v1_instr {
179 FormatOptions::None
180 } else {
181 FormatOptions::Fixed {
182 length: 32,
183 keep_original: false,
184 break_words: false,
185 insert_fullwidth_space_at_line_start: true,
186 break_with_sentence: true,
187 #[cfg(feature = "jieba")]
188 break_chinese_words: true,
189 #[cfg(feature = "jieba")]
190 jieba_dict: None,
191 no_remove_space_at_line_start: false,
192 }
193 }
194 }
195
196 fn extract_messages(&self) -> Result<Vec<Message>> {
197 let mut messages = Vec::new();
198 let mut name = None;
199 let mut ruby = Vec::new();
200 for bgi_string in &self.strings {
201 match bgi_string.typ {
202 BGIStringType::Name => {
203 name = Some(self.read_string(bgi_string.address)?);
204 }
205 BGIStringType::Message => {
206 let mut message = self.read_string(bgi_string.address)?;
207 if !ruby.is_empty() {
208 Self::output_with_ruby(&mut message, &mut ruby)?;
209 }
210 messages.push(Message {
211 name: name.take(),
212 message: message,
213 });
214 }
215 BGIStringType::Ruby => {
216 let ruby_str = self.read_string(bgi_string.address)?;
217 ruby.push(ruby_str);
218 }
219 _ => {}
220 }
221 }
222 Ok(messages)
223 }
224
225 fn import_messages<'a>(
226 &'a self,
227 mut messages: Vec<Message>,
228 mut file: Box<dyn WriteSeek + 'a>,
229 _filename: &str,
230 encoding: Encoding,
231 replacement: Option<&'a ReplacementTable>,
232 ) -> Result<()> {
233 if self.add_space {
234 for mes in messages.iter_mut() {
235 if !mes.message.ends_with(' ') {
236 mes.message.push(' ');
237 }
238 }
239 }
240 if !self.import_duplicate {
241 let mut used = HashMap::new();
242 let mut extra = HashMap::new();
243 let mut mes = messages.iter_mut();
244 let mut cur_mes = mes.next();
245 let mut old_offset = 0;
246 let mut new_offset = 0;
247 let mut rubys = Vec::new();
248 let mut parsed_ruby = false;
249 if self.append {
250 file.write_all(&self.data.data)?;
251 new_offset = self.data.data.len();
252 }
253 for curs in &self.strings {
254 if !curs.is_internal() {
255 if cur_mes.is_none() {
256 cur_mes = mes.next();
257 }
258 }
259 if used.contains_key(&curs.address) && curs.is_internal() {
260 let (_, new_address) = used.get(&curs.address).unwrap();
261 file.write_u32_at(curs.offset as u64, *new_address as u32)?;
262 continue;
263 }
264 let nmes = match curs.typ {
265 BGIStringType::Internal => self.read_string(curs.address)?,
266 BGIStringType::Ruby => {
267 if !self.is_v1 && self.is_v1_instr {
268 if rubys.is_empty() {
269 if parsed_ruby {
270 String::from("<")
271 } else {
272 rubys = match &mut cur_mes {
273 Some(m) => parse_ruby_from_text(&mut m.message)?,
274 None => return Err(anyhow::anyhow!("No enough messages.")),
275 };
276 parsed_ruby = true;
277 if rubys.is_empty() {
278 String::from("<")
279 } else {
280 let ruby_str = rubys.remove(0);
281 ruby_str
282 }
283 }
284 } else {
285 rubys.remove(0)
286 }
287 } else {
288 self.read_string(curs.address)?
289 }
290 }
291 BGIStringType::Name => match &cur_mes {
292 Some(m) => {
293 if let Some(name) = &m.name {
294 let mut name = name.clone();
295 if let Some(replacement) = replacement {
296 for (key, value) in replacement.map.iter() {
297 name = name.replace(key, value);
298 }
299 }
300 name
301 } else {
302 return Err(anyhow::anyhow!("Name is missing for message."));
303 }
304 }
305 None => return Err(anyhow::anyhow!("No enough messages.")),
306 },
307 BGIStringType::Message => {
308 if !rubys.is_empty() {
309 eprintln!("Warning: Some ruby strings are unused: {:?}", rubys);
310 crate::COUNTER.inc_warning();
311 rubys.clear();
312 }
313 parsed_ruby = false;
314 let mes = match &cur_mes {
315 Some(m) => {
316 let mut message = m.message.clone();
317 if let Some(replacement) = replacement {
318 for (key, value) in replacement.map.iter() {
319 message = message.replace(key, value);
320 }
321 }
322 message
323 }
324 None => return Err(anyhow::anyhow!("No enough messages.")),
325 };
326 cur_mes.take();
327 mes
328 }
329 };
330 let in_used = match used.get(&curs.address) {
331 Some((s, address)) => {
332 if s == &nmes {
333 file.write_u32_at(curs.offset as u64, *address as u32)?;
334 continue;
335 }
336 if let Some(address) = extra.get(&nmes) {
337 file.write_u32_at(curs.offset as u64, *address as u32)?;
338 continue;
339 }
340 true
341 }
342 None => false,
343 };
344 let bgi_str_old_offset = curs.address + self.offset;
345 if !self.append && old_offset < bgi_str_old_offset {
346 file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
347 new_offset += bgi_str_old_offset - old_offset;
348 old_offset = bgi_str_old_offset;
349 }
350 let old_str_len = self
351 .data
352 .cpeek_cstring_at(bgi_str_old_offset as u64)?
353 .as_bytes_with_nul()
354 .len();
355 let nmess = encode_string(encoding, &nmes, false)?;
356 let write_to_original = self.append && !in_used && nmess.len() + 1 <= old_str_len;
357 if write_to_original {
358 file.write_all_at(bgi_str_old_offset as u64, &nmess)?;
359 file.write_u8_at(bgi_str_old_offset as u64 + nmess.len() as u64, 0)?; } else {
361 file.write_all(&nmess)?;
362 file.write_u8(0)?; }
364 let new_address = if write_to_original {
365 bgi_str_old_offset - self.offset
366 } else {
367 new_offset - self.offset
368 };
369 file.write_u32_at(curs.offset as u64, new_address as u32)?;
370 if in_used {
371 extra.insert(nmes, new_address);
372 } else {
373 used.insert(curs.address, (nmes, new_address));
374 }
375 old_offset += old_str_len;
376 if !write_to_original {
377 new_offset += nmess.len() + 1; }
379 }
380 if cur_mes.is_some() || mes.next().is_some() {
381 return Err(anyhow::anyhow!("Some messages were not processed."));
382 }
383 if !self.append && old_offset < self.data.data.len() {
384 file.write_all(&self.data.data[old_offset..])?;
385 }
386 return Ok(());
387 }
388 let mut mes = messages.iter_mut();
389 let mut cur_mes = None;
390 let mut strs = self.strings.iter();
391 let mut nstrs = Vec::new();
392 let mut cur_str = strs.next();
393 let mut old_offset = 0;
394 let mut new_offset = 0;
395 let mut rubys = Vec::new();
396 let mut parsed_ruby = false;
397 if self.append {
398 file.write_all(&self.data.data)?;
399 new_offset = self.data.data.len();
400 }
401 while let Some(curs) = cur_str {
402 if !curs.is_internal() {
403 if cur_mes.is_none() {
404 cur_mes = mes.next();
405 }
406 }
407 let bgi_str_old_offset = curs.address + self.offset;
408 if !self.append && old_offset < bgi_str_old_offset {
409 file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
410 new_offset += bgi_str_old_offset - old_offset;
411 old_offset = bgi_str_old_offset;
412 }
413 let old_str_len = self
414 .data
415 .cpeek_cstring_at((curs.address + self.offset) as u64)?
416 .as_bytes_with_nul()
417 .len();
418 let nmes = match curs.typ {
419 BGIStringType::Internal => self.read_string(curs.address)?,
420 BGIStringType::Ruby => {
421 if !self.is_v1 && self.is_v1_instr {
422 if rubys.is_empty() {
423 if parsed_ruby {
424 String::from("<")
425 } else {
426 rubys = match &mut cur_mes {
427 Some(m) => parse_ruby_from_text(&mut m.message)?,
428 None => return Err(anyhow::anyhow!("No enough messages.")),
429 };
430 parsed_ruby = true;
431 if rubys.is_empty() {
432 String::from("<")
433 } else {
434 let ruby_str = rubys.remove(0);
435 ruby_str
436 }
437 }
438 } else {
439 rubys.remove(0)
440 }
441 } else {
442 self.read_string(curs.address)?
443 }
444 }
445 BGIStringType::Name => match &cur_mes {
446 Some(m) => {
447 if let Some(name) = &m.name {
448 let mut name = name.clone();
449 if let Some(replacement) = replacement {
450 for (key, value) in replacement.map.iter() {
451 name = name.replace(key, value);
452 }
453 }
454 name
455 } else {
456 return Err(anyhow::anyhow!("Name is missing for message."));
457 }
458 }
459 None => return Err(anyhow::anyhow!("No enough messages.")),
460 },
461 BGIStringType::Message => {
462 if !rubys.is_empty() {
463 eprintln!("Warning: Some ruby strings are unused: {:?}", rubys);
464 crate::COUNTER.inc_warning();
465 rubys.clear();
466 }
467 parsed_ruby = false;
468 let mes = match &cur_mes {
469 Some(m) => {
470 let mut message = m.message.clone();
471 if let Some(replacement) = replacement {
472 for (key, value) in replacement.map.iter() {
473 message = message.replace(key, value);
474 }
475 }
476 message
477 }
478 None => return Err(anyhow::anyhow!("No enough messages.")),
479 };
480 cur_mes.take();
481 mes
482 }
483 };
484 let nmes = encode_string(encoding, &nmes, false)?;
485 file.write_all(&nmes)?;
486 file.write_u8(0)?;
487 let new_str_len = nmes.len() + 1; let new_address = new_offset - self.offset;
489 nstrs.push(BGIString {
490 offset: curs.offset,
491 address: new_address,
492 typ: curs.typ.clone(),
493 });
494 old_offset += old_str_len;
495 new_offset += new_str_len;
496 cur_str = strs.next();
497 }
498 if cur_mes.is_some() || mes.next().is_some() {
499 return Err(anyhow::anyhow!("Some messages were not processed."));
500 }
501 for str in nstrs {
502 file.write_u32_at(str.offset as u64, str.address as u32)?;
503 }
504 if !self.append && old_offset < self.data.data.len() {
505 file.write_all(&self.data.data[old_offset..])?;
506 }
507 Ok(())
508 }
509
510 fn custom_export(&self, filename: &std::path::Path, encoding: Encoding) -> Result<()> {
511 let mut strs = Vec::with_capacity(self.strings.len());
512 for s in &self.strings {
513 let string = self.read_string(s.address)?;
514 strs.push(string);
515 }
516 let data = if self.custom_yaml {
517 serde_yaml_ng::to_string(&strs)
518 .map_err(|e| anyhow::anyhow!("Failed to serialize to YAML: {}", e))?
519 } else {
520 serde_json::to_string_pretty(&strs)
521 .map_err(|e| anyhow::anyhow!("Failed to serialize to JSON: {}", e))?
522 };
523 let data = encode_string(encoding, &data, false)?;
524 let mut writer = crate::utils::files::write_file(filename)?;
525 writer.write_all(&data)?;
526 writer.flush()?;
527 Ok(())
528 }
529
530 fn custom_import<'a>(
531 &'a self,
532 custom_filename: &'a str,
533 mut file: Box<dyn WriteSeek + 'a>,
534 encoding: Encoding,
535 output_encoding: Encoding,
536 ) -> Result<()> {
537 let output = crate::utils::files::read_file(custom_filename)?;
538 let s = decode_to_string(output_encoding, &output, true)?;
539 let strs: Vec<String> = if self.custom_yaml {
540 serde_yaml_ng::from_str(&s)
541 .map_err(|e| anyhow::anyhow!("Failed to parse YAML: {}", e))?
542 } else {
543 serde_json::from_str(&s).map_err(|e| anyhow::anyhow!("Failed to parse JSON: {}", e))?
544 };
545 if strs.len() != self.strings.len() {
546 return Err(anyhow::anyhow!(
547 "The number of strings in the imported file ({}) does not match the original ({})",
548 strs.len(),
549 self.strings.len()
550 ));
551 }
552 if !self.import_duplicate {
553 let mut used = HashMap::new();
554 let mut extra = HashMap::new();
555 let mut mes = strs.iter();
556 let mut cur_str = mes.next();
557 let mut old_offset = 0;
558 let mut new_offset = 0;
559 if self.append {
560 file.write_all(&self.data.data)?;
561 new_offset = self.data.data.len();
562 }
563 for curs in &self.strings {
564 let nmes = match cur_str {
565 Some(s) => s,
566 None => return Err(anyhow::anyhow!("No enough strings.")),
567 };
568 cur_str = mes.next();
569 let in_used = match used.get(&curs.address) {
570 Some((s, address)) => {
571 if s == &nmes {
572 file.write_u32_at(curs.offset as u64, *address as u32)?;
573 continue;
574 }
575 if let Some(address) = extra.get(nmes) {
576 file.write_u32_at(curs.offset as u64, *address as u32)?;
577 continue;
578 }
579 true
580 }
581 None => false,
582 };
583 let bgi_str_old_offset = curs.address + self.offset;
584 if !self.append && old_offset < bgi_str_old_offset {
585 file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
586 new_offset += bgi_str_old_offset - old_offset;
587 old_offset = bgi_str_old_offset;
588 }
589 let old_str_len = self
590 .data
591 .cpeek_cstring_at(bgi_str_old_offset as u64)?
592 .as_bytes_with_nul()
593 .len();
594 let nmess = encode_string(encoding, nmes, false)?;
595 let write_to_original = self.append && !in_used && nmess.len() + 1 <= old_str_len;
596 if write_to_original {
597 file.write_all_at(bgi_str_old_offset as u64, &nmess)?;
598 file.write_u8_at(bgi_str_old_offset as u64 + nmess.len() as u64, 0)?; } else {
600 file.write_all(&nmess)?;
601 file.write_u8(0)?; }
603 let new_address = if write_to_original {
604 bgi_str_old_offset - self.offset
605 } else {
606 new_offset - self.offset
607 };
608 file.write_u32_at(curs.offset as u64, new_address as u32)?;
609 if in_used {
610 extra.insert(nmes, new_address);
611 } else {
612 used.insert(curs.address, (nmes, new_address));
613 }
614 old_offset += old_str_len;
615 if !write_to_original {
616 new_offset += nmess.len() + 1; }
618 }
619 if cur_str.is_some() || mes.next().is_some() {
620 return Err(anyhow::anyhow!("Some strings were not processed."));
621 }
622 if !self.append && old_offset < self.data.data.len() {
623 file.write_all(&self.data.data[old_offset..])?;
624 }
625 return Ok(());
626 }
627 let mut mes = strs.iter();
628 let mut cur_mes = mes.next();
629 let mut strs = self.strings.iter();
630 let mut nstrs = Vec::new();
631 let mut cur_str = strs.next();
632 let mut old_offset = 0;
633 let mut new_offset = 0;
634 if self.append {
635 file.write_all(&self.data.data)?;
636 new_offset = self.data.data.len();
637 }
638 while let Some(curs) = cur_str {
639 let bgi_str_old_offset = curs.address + self.offset;
640 if !self.append && old_offset < bgi_str_old_offset {
641 file.write_all(&self.data.data[old_offset..bgi_str_old_offset])?;
642 new_offset += bgi_str_old_offset - old_offset;
643 old_offset = bgi_str_old_offset;
644 }
645 let old_str_len = self
646 .data
647 .cpeek_cstring_at((curs.address + self.offset) as u64)?
648 .as_bytes_with_nul()
649 .len();
650 let nmes = match cur_mes {
651 Some(s) => s,
652 None => return Err(anyhow::anyhow!("No enough strings.")),
653 };
654 cur_mes = mes.next();
655 let nmes = encode_string(encoding, nmes, false)?;
656 file.write_all(&nmes)?;
657 file.write_u8(0)?;
658 let new_str_len = nmes.len() + 1; let new_address = new_offset - self.offset;
660 nstrs.push(BGIString {
661 offset: curs.offset,
662 address: new_address,
663 typ: curs.typ.clone(),
664 });
665 old_offset += old_str_len;
666 new_offset += new_str_len;
667 cur_str = strs.next();
668 }
669 if cur_mes.is_some() || mes.next().is_some() {
670 return Err(anyhow::anyhow!("Some strings were not processed."));
671 }
672 for str in nstrs {
673 file.write_u32_at(str.offset as u64, str.address as u32)?;
674 }
675 if !self.append && old_offset < self.data.data.len() {
676 file.write_all(&self.data.data[old_offset..])?;
677 }
678 Ok(())
679 }
680}
681
682lazy_static! {
683 static ref RUBY_REGEX: Regex = Regex::new(r"<r([^>]+)>([^<]+)</r>").unwrap();
684}
685
686fn parse_ruby_from_text(text: &mut String) -> Result<Vec<String>> {
687 let mut map = BTreeMap::new();
688 for i in RUBY_REGEX.captures_iter(&text) {
689 let i = i?;
690 let ruby_text = i.get(1).map_or("", |m| m.as_str());
691 let ruby_str = i.get(2).map_or("", |m| m.as_str());
692 if !ruby_text.is_empty() && !ruby_str.is_empty() {
693 map.insert(ruby_str.to_owned(), ruby_text.to_owned());
694 }
695 }
696 let mut result = Vec::new();
697 for (ruby_str, ruby_text) in map {
698 *text = text.replace(&format!("<r{ruby_text}>{ruby_str}</r>"), &ruby_str);
699 result.push(ruby_str);
700 result.push(ruby_text);
701 }
702 Ok(result)
703}
704
705#[test]
706fn test_parse_ruby_from_text() {
707 let mut text =
708 String::from("This is a test <rRubyText>RubyString</r> and <rAnotherText>AnotherRuby</r>.");
709 let ruby = parse_ruby_from_text(&mut text).unwrap();
710 assert_eq!(text, "This is a test RubyString and AnotherRuby.");
711 assert_eq!(
712 ruby,
713 vec![
714 "AnotherRuby".to_string(),
715 "AnotherText".to_string(),
716 "RubyString".to_string(),
717 "RubyText".to_string()
718 ]
719 );
720}