1use crate::types::*;
2use anyhow::Result;
3#[cfg(feature = "jieba")]
4use jieba_rs::Jieba;
5use unicode_segmentation::UnicodeSegmentation;
6
7const SPACE_STR_LIST: [&str; 2] = [" ", " "];
8const QUOTE_LIST: [(&str, &str); 4] = [("「", "」"), ("『", "』"), ("(", ")"), ("【", "】")];
9const BREAK_SENTENCE_SYMBOLS: [&str; 6] = ["…", ",", "。", "?", "!", "—"];
10
11fn is_non_gbk_char(c: char) -> bool {
12 if c.is_ascii_alphanumeric() {
13 return true;
14 }
15 if c >= '\u{0400}' && c <= '\u{052F}' {
17 return true;
18 }
19 if (c >= '\u{0370}' && c <= '\u{03FF}') || (c >= '\u{1F00}' && c <= '\u{1FFF}') {
21 return true;
22 }
23 false
24}
25
26fn check_is_non_gbk_word(s: &str) -> bool {
27 for c in s.chars() {
28 if !is_non_gbk_char(c) {
29 return false;
30 }
31 }
32 true
33}
34
35fn check_need_fullwidth_space(s: &str) -> bool {
36 let has_start_quote = QUOTE_LIST.iter().any(|(open, _)| s.starts_with(open));
37 if !has_start_quote {
38 return false;
39 }
40 for (open, close) in QUOTE_LIST.iter() {
41 let open_index = s.rfind(open);
42 if let Some(open_index) = open_index {
43 let index = s.rfind(close);
44 match index {
45 Some(idx) => {
46 return idx < open_index;
47 }
48 None => return true,
49 }
50 }
51 }
52 false
53}
54
55fn check_is_end_quote(segs: &[&str], pos: usize) -> bool {
56 let d = segs[pos];
57 QUOTE_LIST.iter().any(|(_, close)| d == *close)
58}
59
60fn check_is_end_quote_or_symbol(segs: &[&str], pos: usize) -> bool {
61 let d = segs[pos];
62 QUOTE_LIST.iter().any(|(_, close)| d == *close) || BREAK_SENTENCE_SYMBOLS.contains(&d)
63}
64
65fn check_is_start_quote(s: &str) -> bool {
66 QUOTE_LIST.iter().any(|(open, _)| s == *open)
67}
68
69fn take_trailing_start_quotes(buffer: &mut String) -> String {
70 let (collected, trailing) = {
71 let mut collected = buffer.graphemes(true).collect::<Vec<_>>();
72 let mut trailing = Vec::new();
73 while let Some(&last) = collected.last() {
74 if check_is_start_quote(last) {
75 collected.pop();
76 trailing.push(last);
77 } else {
78 break;
79 }
80 }
81 trailing.reverse();
82 (collected.concat(), trailing.concat())
83 };
84 *buffer = collected;
85 trailing
86}
87
88#[cfg(feature = "jieba")]
89fn check_chinese_word_is_break(segs: &[&str], pos: usize, jieba: &Jieba) -> bool {
90 let s = segs.join("");
91 let mut breaked = jieba
92 .cut(&s, false)
93 .iter()
94 .map(|s| s.graphemes(true).count())
95 .collect::<Vec<_>>();
96 let mut sum = 0;
97 for i in breaked.iter_mut() {
98 sum += *i;
99 *i = sum;
100 }
101 breaked.binary_search(&pos).is_err()
102}
103
104#[cfg(not(feature = "jieba"))]
105fn check_chinese_word_is_break(_segs: &[&str], _pos: usize, _jieba: &()) -> bool {
106 false
107}
108
109pub struct FixedFormatter {
110 length: usize,
111 keep_original: bool,
112 break_words: bool,
114 insert_fullwidth_space_at_line_start: bool,
116 break_with_sentence: bool,
118 #[cfg(feature = "jieba")]
119 jieba: Option<Jieba>,
121 #[cfg(not(feature = "jieba"))]
122 jieba: Option<()>,
123 no_remove_space_at_line_start: bool,
125 #[allow(unused)]
126 typ: Option<ScriptType>,
127}
128
129impl FixedFormatter {
130 pub fn new(
131 length: usize,
132 keep_original: bool,
133 break_words: bool,
134 insert_fullwidth_space_at_line_start: bool,
135 break_with_sentence: bool,
136 #[cfg(feature = "jieba")] break_chinese_words: bool,
137 #[cfg(feature = "jieba")] jieba_dict: Option<String>,
138 no_remove_space_at_line_start: bool,
139 typ: Option<ScriptType>,
140 ) -> Result<Self> {
141 #[cfg(feature = "jieba")]
142 let jieba = if !break_chinese_words {
143 let mut jieba = Jieba::new();
144 if let Some(dict) = jieba_dict {
145 let file = std::fs::File::open(dict)?;
146 let mut reader = std::io::BufReader::new(file);
147 jieba.load_dict(&mut reader)?;
148 }
149 Some(jieba)
150 } else {
151 None
152 };
153 Ok(FixedFormatter {
154 length,
155 keep_original,
156 break_words,
157 insert_fullwidth_space_at_line_start,
158 break_with_sentence,
159 #[cfg(feature = "jieba")]
160 jieba,
161 #[cfg(not(feature = "jieba"))]
162 jieba: None,
163 no_remove_space_at_line_start,
164 typ,
165 })
166 }
167
168 #[cfg(test)]
169 fn builder(length: usize) -> Self {
170 FixedFormatter {
171 length,
172 keep_original: false,
173 break_words: true,
174 insert_fullwidth_space_at_line_start: false,
175 break_with_sentence: false,
176 jieba: None,
177 typ: None,
178 no_remove_space_at_line_start: false,
179 }
180 }
181
182 #[cfg(test)]
183 fn keep_original(mut self, keep: bool) -> Self {
184 self.keep_original = keep;
185 self
186 }
187
188 #[cfg(test)]
189 fn break_words(mut self, break_words: bool) -> Self {
190 self.break_words = break_words;
191 self
192 }
193
194 #[cfg(test)]
195 fn insert_fullwidth_space_at_line_start(mut self, insert: bool) -> Self {
196 self.insert_fullwidth_space_at_line_start = insert;
197 self
198 }
199
200 #[cfg(test)]
201 fn break_with_sentence(mut self, break_with_sentence: bool) -> Self {
202 self.break_with_sentence = break_with_sentence;
203 self
204 }
205
206 #[cfg(all(feature = "jieba", test))]
207 fn break_chinese_words(mut self, break_chinese_words: bool) -> Result<Self> {
208 if !break_chinese_words {
209 let jieba = Jieba::new();
210 self.jieba = Some(jieba);
211 } else {
212 self.jieba = None;
213 }
214 Ok(self)
215 }
216
217 #[cfg(all(feature = "jieba", test))]
218 fn add_dict(mut self, dict: &str, freq: Option<usize>, tag: Option<&str>) -> Self {
219 if let Some(ref mut jieba) = self.jieba {
220 jieba.add_word(&dict, freq, tag);
221 }
222 self
223 }
224
225 #[cfg(test)]
226 fn no_remove_space_at_line_start(mut self, no_remove: bool) -> Self {
227 self.no_remove_space_at_line_start = no_remove;
228 self
229 }
230
231 #[cfg(test)]
232 #[allow(dead_code)]
233 fn typ(mut self, typ: Option<ScriptType>) -> Self {
234 self.typ = typ;
235 self
236 }
237
238 #[cfg(feature = "circus")]
239 fn is_circus(&self) -> bool {
240 matches!(self.typ, Some(ScriptType::Circus))
241 }
242
243 #[cfg(not(feature = "circus"))]
244 fn is_circus(&self) -> bool {
245 false
246 }
247
248 #[cfg(feature = "kirikiri")]
249 fn is_scn(&self) -> bool {
250 matches!(self.typ, Some(ScriptType::KirikiriScn))
251 }
252
253 #[cfg(not(feature = "kirikiri"))]
254 fn is_scn(&self) -> bool {
255 false
256 }
257
258 pub fn format(&self, message: &str) -> String {
259 let mut result = String::new();
260 let vec: Vec<_> = UnicodeSegmentation::graphemes(message, true).collect();
261 let mut current_length = 0;
262 let mut is_command = false;
263 let mut pre_is_lf = false;
264 let mut is_ruby = false;
265 let mut is_ruby_rt = false;
266 let mut last_command = None;
267 let mut i = 0;
268 let mut main_content = String::new();
270 let mut first_line = true;
271 let mut need_insert_fullwidth_space = false;
272
273 while i < vec.len() {
274 let grapheme = vec[i];
275
276 if grapheme == "\n" {
277 if self.keep_original
278 || (self.is_circus() && last_command.as_ref().is_some_and(|cmd| cmd == "@n"))
279 {
280 result.push('\n');
281 current_length = 0;
282 if first_line {
283 if self.insert_fullwidth_space_at_line_start {
284 if check_need_fullwidth_space(&main_content) {
285 need_insert_fullwidth_space = true;
286 }
287 }
288 }
289 if need_insert_fullwidth_space {
290 result.push(' ');
291 current_length += 1;
292 }
293 main_content.clear();
294 first_line = false;
295 }
296 pre_is_lf = true;
297 i += 1;
298 continue;
299 }
300
301 if current_length >= self.length {
303 if self.break_with_sentence
304 && !is_command
305 && !is_ruby_rt
306 && ((BREAK_SENTENCE_SYMBOLS.contains(&grapheme)
307 && i > 1
308 && BREAK_SENTENCE_SYMBOLS.contains(&vec[i - 1]))
309 || check_is_end_quote_or_symbol(&vec, i))
310 {
311 let mut break_pos = None;
312 let segs = result.graphemes(true).collect::<Vec<_>>();
313 let is_end_quote = check_is_end_quote(&vec, i);
314 let mut end = segs.len();
315 for (j, ch) in segs.iter().enumerate().rev() {
316 if BREAK_SENTENCE_SYMBOLS.contains(ch) {
317 end = j;
318 if !is_end_quote {
319 break_pos = Some(j);
320 }
321 }
322 break;
323 }
324 for (j, ch) in segs[..end].iter().enumerate().rev() {
325 if j >= end {
326 continue;
327 }
328 if BREAK_SENTENCE_SYMBOLS.contains(ch) {
329 break_pos = Some(j + 1);
330 break;
331 }
332 }
333 if let Some(pos) = break_pos {
334 let mut head = segs[..pos].concat();
335 let mut remaining = segs[pos..].concat();
336 if self.break_with_sentence {
337 let trailing = take_trailing_start_quotes(&mut head);
338 if !trailing.is_empty() {
339 remaining.insert_str(0, &trailing);
340 }
341 }
342 let remaining = remaining.trim_start().to_string();
343 result = head;
344 result.push('\n');
345 current_length = 0;
346 if first_line {
347 if self.insert_fullwidth_space_at_line_start {
348 if check_need_fullwidth_space(&main_content) {
349 need_insert_fullwidth_space = true;
350 }
351 }
352 first_line = false;
353 }
354 if need_insert_fullwidth_space {
355 result.push(' ');
356 current_length += 1;
357 }
358 result.push_str(&remaining);
359 current_length += remaining.graphemes(true).count();
360 main_content.clear();
361 pre_is_lf = true;
362 } else {
363 let trailing = if self.break_with_sentence {
364 take_trailing_start_quotes(&mut result)
365 } else {
366 String::new()
367 };
368 result.push('\n');
369 current_length = 0;
370 if first_line {
371 if self.insert_fullwidth_space_at_line_start {
372 if check_need_fullwidth_space(&main_content) {
373 need_insert_fullwidth_space = true;
374 }
375 }
376 first_line = false;
377 }
378 if need_insert_fullwidth_space {
379 result.push(' ');
380 current_length += 1;
381 }
382 main_content.clear();
383 if !trailing.is_empty() {
384 result.push_str(&trailing);
385 current_length += trailing.graphemes(true).count();
386 main_content.push_str(&trailing);
387 }
388 pre_is_lf = true;
389 }
390 } else if !self.break_words
391 && !is_command
392 && !is_ruby_rt
393 && check_is_non_gbk_word(grapheme)
394 {
395 let mut break_pos = None;
397 let mut temp_length = current_length;
398 let mut j = result.len();
399
400 for ch in result.chars().rev() {
402 if ch == ' ' || ch == ' ' || (!ch.is_ascii() && !is_non_gbk_char(ch)) {
403 break_pos = Some(j);
404 break;
405 }
406 if is_non_gbk_char(ch) {
407 temp_length -= 1;
408 if temp_length == 0 {
409 break;
410 }
411 }
412 j -= ch.len_utf8();
413 }
414
415 if let Some(pos) = break_pos {
417 let mut remaining = result[pos..].to_string();
418 result.truncate(pos);
419 if self.break_with_sentence {
420 let trailing = take_trailing_start_quotes(&mut result);
421 if !trailing.is_empty() {
422 remaining.insert_str(0, &trailing);
423 }
424 }
425 let remaining = remaining.trim_start().to_string();
426 result.push('\n');
427 current_length = 0;
428 if first_line {
429 if self.insert_fullwidth_space_at_line_start {
430 if check_need_fullwidth_space(&main_content) {
431 need_insert_fullwidth_space = true;
432 }
433 }
434 first_line = false;
435 }
436 if need_insert_fullwidth_space {
437 result.push(' ');
438 current_length += 1;
439 }
440 result.push_str(&remaining);
441 current_length += remaining.chars().count();
442 main_content.clear();
443 pre_is_lf = true;
444 } else {
445 let trailing = if self.break_with_sentence {
446 take_trailing_start_quotes(&mut result)
447 } else {
448 String::new()
449 };
450 result.push('\n');
451 current_length = 0;
452 if first_line {
453 if self.insert_fullwidth_space_at_line_start {
454 if check_need_fullwidth_space(&main_content) {
455 need_insert_fullwidth_space = true;
456 }
457 }
458 first_line = false;
459 }
460 if need_insert_fullwidth_space {
461 result.push(' ');
462 current_length += 1;
463 }
464 main_content.clear();
465 if !trailing.is_empty() {
466 result.push_str(&trailing);
467 current_length += trailing.graphemes(true).count();
468 main_content.push_str(&trailing);
469 }
470 pre_is_lf = true;
471 }
472 } else if self
473 .jieba
474 .as_ref()
475 .is_some_and(|s| check_chinese_word_is_break(&vec, i, s))
476 && !is_command
477 && !is_ruby_rt
478 {
479 #[cfg(feature = "jieba")]
480 {
481 let jieba = self.jieba.as_ref().unwrap();
482 let s = vec.join("");
483 let mut breaked = jieba
484 .cut(&s, false)
485 .iter()
486 .map(|s| s.graphemes(true).count())
487 .collect::<Vec<_>>();
488 let mut sum = 0;
489 for i in breaked.iter_mut() {
490 sum += *i;
491 *i = sum;
492 }
493 let break_pos = match breaked.binary_search(&i) {
494 Ok(pos) => Some(pos),
495 Err(pos) => {
496 if pos == 0 {
497 None
498 } else {
499 Some(pos - 1)
500 }
501 }
502 };
503 if let Some(break_pos) = break_pos {
504 let pos = breaked[break_pos];
505 let segs = result.graphemes(true).collect::<Vec<_>>();
506 let remain_count = i - pos;
507 let pos = segs.len() - remain_count;
508 let mut head = segs[..pos].concat();
509 let mut remaining = segs[pos..].concat();
510 if self.break_with_sentence {
511 let trailing = take_trailing_start_quotes(&mut head);
512 if !trailing.is_empty() {
513 remaining.insert_str(0, &trailing);
514 }
515 }
516 let remaining = remaining.trim_start().to_string();
517 result = head;
518 result.push('\n');
519 current_length = 0;
520 if first_line {
521 if self.insert_fullwidth_space_at_line_start {
522 if check_need_fullwidth_space(&main_content) {
523 need_insert_fullwidth_space = true;
524 }
525 }
526 first_line = false;
527 }
528 if need_insert_fullwidth_space {
529 result.push(' ');
530 current_length += 1;
531 }
532 result.push_str(&remaining);
533 current_length += remaining.graphemes(true).count();
534 main_content.clear();
535 pre_is_lf = true;
536 } else {
537 let trailing = if self.break_with_sentence {
538 take_trailing_start_quotes(&mut result)
539 } else {
540 String::new()
541 };
542 result.push('\n');
543 current_length = 0;
544 if first_line {
545 if self.insert_fullwidth_space_at_line_start {
546 if check_need_fullwidth_space(&main_content) {
547 need_insert_fullwidth_space = true;
548 }
549 }
550 first_line = false;
551 }
552 if need_insert_fullwidth_space {
553 result.push(' ');
554 current_length += 1;
555 }
556 main_content.clear();
557 if !trailing.is_empty() {
558 result.push_str(&trailing);
559 current_length += trailing.graphemes(true).count();
560 main_content.push_str(&trailing);
561 }
562 pre_is_lf = true;
563 }
564 }
565 } else {
566 let trailing = if self.break_with_sentence {
567 take_trailing_start_quotes(&mut result)
568 } else {
569 String::new()
570 };
571 result.push('\n');
572 current_length = 0;
573 if first_line {
574 if self.insert_fullwidth_space_at_line_start {
575 if check_need_fullwidth_space(&main_content) {
576 need_insert_fullwidth_space = true;
577 }
578 }
579 first_line = false;
580 }
581 if need_insert_fullwidth_space {
582 result.push(' ');
583 current_length += 1;
584 }
585 main_content.clear();
586 if !trailing.is_empty() {
587 result.push_str(&trailing);
588 current_length += trailing.graphemes(true).count();
589 main_content.push_str(&trailing);
590 }
591 pre_is_lf = true;
592 }
593 }
594
595 if !self.no_remove_space_at_line_start
596 && (current_length == 0 || pre_is_lf)
597 && SPACE_STR_LIST.contains(&grapheme)
598 {
599 i += 1;
600 continue;
601 }
602
603 result.push_str(grapheme);
604
605 #[cfg(feature = "kirikiri")]
606 if self.is_scn() {
607 if grapheme == "#" {
608 i += 1;
609 while i < vec.len() && vec[i] != ";" {
610 result.push_str(vec[i]);
611 i += 1;
612 }
613 if i < vec.len() {
614 result.push_str(vec[i]);
615 i += 1;
616 }
617 continue;
618 }
619 if grapheme == "%" && i + 1 < vec.len() && vec[i + 1] == "r" {
620 result.push('r');
621 i += 2;
622 continue;
623 }
624 }
625
626 if self.is_circus() {
627 if grapheme == "@" {
628 is_command = true;
629 last_command = Some(String::new());
630 } else if is_command && grapheme.len() != 1
631 || !grapheme
632 .chars()
633 .next()
634 .unwrap_or(' ')
635 .is_ascii_alphanumeric()
636 {
637 is_command = false;
638 }
639 if grapheme == "{" {
640 is_ruby = true;
641 is_ruby_rt = true;
642 } else if is_ruby && grapheme == "/" {
643 is_ruby_rt = false;
644 i += 1;
645 continue;
646 } else if is_ruby && grapheme == "}" {
647 is_ruby = false;
648 i += 1;
649 continue;
650 }
651 }
652
653 if self.is_scn() {
654 if grapheme == "%" {
655 is_command = true;
656 } else if is_command && grapheme == ";" {
657 is_command = false;
658 i += 1;
659 continue;
660 }
661 if grapheme == "[" {
662 is_ruby = true;
663 is_ruby_rt = true;
664 i += 1;
665 continue;
666 } else if is_ruby && grapheme == "]" {
667 is_ruby = false;
668 is_ruby_rt = false;
669 i += 1;
670 continue;
671 }
672 }
673
674 if is_command {
675 if let Some(ref mut cmd) = last_command {
676 cmd.push_str(grapheme);
677 }
678 }
679
680 if !is_command && !is_ruby_rt {
681 current_length += 1;
682 main_content.push_str(grapheme);
683 }
684
685 pre_is_lf = false;
686 i += 1;
687 }
688
689 result
690 }
691}
692
693#[test]
694fn test_format() {
695 let formatter = FixedFormatter::builder(10);
696 let message = "This is a test message.\nThis is another line.";
697 let formatted_message = formatter.format(message);
698 assert_eq!(
699 formatted_message,
700 "This is a \ntest messa\nge.This is\nanother li\nne."
701 );
702 assert_eq!(formatter.format("● This is a test."), "● This is \na test.");
703 assert_eq!(
704 formatter.format("● This is a test."),
705 "● This is \na test."
706 );
707 let fommater2 = FixedFormatter::builder(10).keep_original(true);
708 assert_eq!(
709 fommater2.format("● Th\n is is a te st."),
710 "● Th\nis is a te\nst."
711 );
712
713 let no_break_formatter = FixedFormatter::builder(10).break_words(false);
715 assert_eq!(
716 no_break_formatter.format("Example text."),
717 "Example \ntext."
718 );
719
720 let no_break_formatter2 = FixedFormatter::builder(6).break_words(false);
721 assert_eq!(
722 no_break_formatter2.format("Example text."),
723 "Exampl\ne text\n."
724 );
725
726 let no_break_formatter3 = FixedFormatter::builder(7).break_words(false);
727 assert_eq!(
728 no_break_formatter3.format("Example text."),
729 "Example\ntext."
730 );
731
732 let real_world_no_break_formatter = FixedFormatter::builder(32).break_words(false);
733 assert_eq!(
734 real_world_no_break_formatter.format("○咕噜咕噜(Temporary Magnetic Pattern Linkage)"),
735 "○咕噜咕噜(Temporary Magnetic Pattern\nLinkage)"
736 );
737
738 let formatter3 = FixedFormatter::builder(10)
739 .break_words(false)
740 .insert_fullwidth_space_at_line_start(true);
741 assert_eq!(
742 formatter3.format("「This is a test."),
743 "「This is a\n\u{3000}test."
744 );
745
746 assert_eq!(
747 formatter3.format("(This) is a test."),
748 "(This) is \na test."
749 );
750
751 assert_eq!(
752 formatter3.format("(long text test here, test 1234"),
753 "(long text\n\u{3000}test here\n\u{3000}, test \n\u{3000}1234"
754 );
755
756 assert_eq!(
757 formatter3.format("(This) 「is a test."),
758 "(This) 「is\n\u{3000}a test."
759 );
760
761 let formatter3b = FixedFormatter::builder(10)
762 .break_words(false)
763 .no_remove_space_at_line_start(true);
764
765 assert_eq!(
766 formatter3b.format("(This) 「is a test."),
767 "(This) 「is\n a test."
768 );
769
770 let formatter4 = FixedFormatter::builder(10)
771 .break_words(false)
772 .break_with_sentence(true);
773 assert_eq!(
774 formatter4.format("『打断测,测试一下……』"),
775 "『打断测,\n测试一下……』"
776 );
777
778 assert_eq!(
779 formatter4.format("『打断测,测试一下。』"),
780 "『打断测,\n测试一下。』"
781 );
782
783 assert_eq!(
784 formatter4.format("『打断是测试一下哦……』"),
785 "『打断是测试一下哦\n……』"
786 );
787
788 assert_eq!(
789 formatter4.format("『打断测是测试一下。』"),
790 "『打断测是测试一下。\n』"
791 );
792
793 assert_eq!(
794 formatter4.format("『打断测试,测试一下。』"),
795 "『打断测试,\n测试一下。』"
796 );
797
798 assert_eq!(
799 formatter4.format("这打断测试,测试一下。"),
800 "这打断测试,\n测试一下。"
801 );
802
803 assert_eq!(
804 formatter4.format("这打断测试哦测试一下。。"),
805 "这打断测试哦测试一下\n。。"
806 );
807
808 let formatter5 = FixedFormatter::builder(10)
809 .break_words(false)
810 .insert_fullwidth_space_at_line_start(true)
811 .break_with_sentence(true);
812 assert_eq!(
813 formatter5.format("「一二三四『whatthe』"),
814 "「一二三四\n\u{3000}『whatthe』"
815 );
816
817 let real_break_formatter = FixedFormatter::builder(27)
818 .break_words(false)
819 .break_with_sentence(true);
820 assert_eq!(
821 real_break_formatter.format("「他们就是想和阳见待在一个社团,在里面表现表现、耍耍帅,这样不就和她套上近乎了嘛!算盘珠子都打到我脸上了……」"),
822 "「他们就是想和阳见待在一个社团,\n在里面表现表现、耍耍帅,这样不就和她套上近乎了嘛!算盘\n珠子都打到我脸上了……」"
823 );
824
825 assert_eq!(
826 real_break_formatter
827 .format("「在英山的话或许可以看看『moon river』『Lavir』或是『Patisserie Yuzuru』」"),
828 "「在英山的话或许可以看看『moon river』\n『Lavir』或是『Patisserie Yuzuru\n』」"
829 );
830
831 assert_eq!(check_is_non_gbk_word("бога"), true);
832 let russian_break_formatter = FixedFormatter::builder(20).break_words(false);
833 assert_eq!(
834 russian_break_formatter.format("Разнообразный и богатый опыт"),
835 "Разнообразный и \nбогатый опыт"
836 );
837
838 #[cfg(feature = "circus")]
839 {
840 let circus_formatter = FixedFormatter::builder(10).typ(Some(ScriptType::Circus));
841 assert_eq!(
842 circus_formatter.format("● @cmd1@cmd2@cmd3中文字数是一\n 二三 四五六七八九十"),
843 "● @cmd1@cmd2@cmd3中文字数是一二三\n四五六七八九十"
844 );
845 assert_eq!(
846 circus_formatter
847 .format("● @cmd1@cmd2@cmd3{rubyText/中文}字数是一\n 二三 四五六七八九十"),
848 "● @cmd1@cmd2@cmd3{rubyText/中文}字数是一二三\n四五六七八九十"
849 );
850 let circus_formatter2 = FixedFormatter::builder(32).typ(Some(ScriptType::Circus));
851 assert_eq!(
852 circus_formatter2.format("@re1@re2@b1@t30@w1「当然现在我很幸福哦?\n 因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?\n 因为有敦也君在身边」"),
853 "@re1@re2@b1@t30@w1「当然现在我很幸福哦?因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?因为有敦也君在身边」"
854 );
855 }
856
857 #[cfg(feature = "kirikiri")]
858 {
859 let scn_formatter = FixedFormatter::builder(3)
860 .break_words(false)
861 .typ(Some(ScriptType::KirikiriScn));
862 assert_eq!(
863 scn_formatter.format("%test;[ruby]测[test]试打断。"),
864 "%test;[ruby]测[test]试打\n断。"
865 );
866 assert_eq!(
867 scn_formatter.format("%f$ハート$;#00ffadd6;♥%r打断测试"),
868 "%f$ハート$;#00ffadd6;♥%r打断\n测试"
869 )
870 }
871 #[cfg(feature = "jieba")]
872 {
873 let jieba_formatter = FixedFormatter::builder(8)
874 .break_words(false)
875 .break_chinese_words(false)
876 .unwrap();
877 assert_eq!(
878 jieba_formatter.format("测试分词,我们中出了一个叛徒。"),
879 "测试分词,我们中\n出了一个叛徒。"
880 );
881 let jieba_formatter2 = FixedFormatter::builder(8)
882 .break_words(false)
883 .break_chinese_words(false)
884 .unwrap()
885 .add_dict("中出", Some(114514), None);
886 assert_eq!(
887 jieba_formatter2
888 .jieba
889 .as_ref()
890 .is_some_and(|s| s.has_word("中出")),
891 true
892 );
893 assert_eq!(
894 jieba_formatter2.format("测试分词,我们中出了一个叛徒。"),
895 "测试分词,我们\n中出了一个叛徒。"
896 );
897 }
898}