msg_tool\format/
fixed.rs

1use crate::types::*;
2use anyhow::Result;
3#[cfg(feature = "jieba")]
4use jieba_rs::Jieba;
5use unicode_segmentation::UnicodeSegmentation;
6
7const SPACE_STR_LIST: [&str; 2] = [" ", " "];
8const QUOTE_LIST: [(&str, &str); 4] = [("「", "」"), ("『", "』"), ("(", ")"), ("【", "】")];
9const BREAK_SENTENCE_SYMBOLS: [&str; 6] = ["…", ",", "。", "?", "!", "—"];
10
11fn is_non_gbk_char(c: char) -> bool {
12    if c.is_ascii_alphanumeric() {
13        return true;
14    }
15    // Check if c is russian
16    if c >= '\u{0400}' && c <= '\u{052F}' {
17        return true;
18    }
19    // Check if c is greek
20    if (c >= '\u{0370}' && c <= '\u{03FF}') || (c >= '\u{1F00}' && c <= '\u{1FFF}') {
21        return true;
22    }
23    false
24}
25
26fn check_is_non_gbk_word(s: &str) -> bool {
27    for c in s.chars() {
28        if !is_non_gbk_char(c) {
29            return false;
30        }
31    }
32    true
33}
34
35fn check_need_fullwidth_space(s: &str) -> bool {
36    let has_start_quote = QUOTE_LIST.iter().any(|(open, _)| s.starts_with(open));
37    if !has_start_quote {
38        return false;
39    }
40    for (open, close) in QUOTE_LIST.iter() {
41        let open_index = s.rfind(open);
42        if let Some(open_index) = open_index {
43            let index = s.rfind(close);
44            match index {
45                Some(idx) => {
46                    return idx < open_index;
47                }
48                None => return true,
49            }
50        }
51    }
52    false
53}
54
55fn check_is_end_quote(segs: &[&str], pos: usize) -> bool {
56    let d = segs[pos];
57    QUOTE_LIST.iter().any(|(_, close)| d == *close)
58}
59
60fn check_is_end_quote_or_symbol(segs: &[&str], pos: usize) -> bool {
61    let d = segs[pos];
62    QUOTE_LIST.iter().any(|(_, close)| d == *close) || BREAK_SENTENCE_SYMBOLS.contains(&d)
63}
64
65fn check_is_start_quote(s: &str) -> bool {
66    QUOTE_LIST.iter().any(|(open, _)| s == *open)
67}
68
69fn take_trailing_start_quotes(buffer: &mut String) -> String {
70    let (collected, trailing) = {
71        let mut collected = buffer.graphemes(true).collect::<Vec<_>>();
72        let mut trailing = Vec::new();
73        while let Some(&last) = collected.last() {
74            if check_is_start_quote(last) {
75                collected.pop();
76                trailing.push(last);
77            } else {
78                break;
79            }
80        }
81        trailing.reverse();
82        (collected.concat(), trailing.concat())
83    };
84    *buffer = collected;
85    trailing
86}
87
88#[cfg(feature = "jieba")]
89fn check_chinese_word_is_break(segs: &[&str], pos: usize, jieba: &Jieba) -> bool {
90    let s = segs.join("");
91    let mut breaked = jieba
92        .cut(&s, false)
93        .iter()
94        .map(|s| s.graphemes(true).count())
95        .collect::<Vec<_>>();
96    let mut sum = 0;
97    for i in breaked.iter_mut() {
98        sum += *i;
99        *i = sum;
100    }
101    breaked.binary_search(&pos).is_err()
102}
103
104#[cfg(not(feature = "jieba"))]
105fn check_chinese_word_is_break(_segs: &[&str], _pos: usize, _jieba: &()) -> bool {
106    false
107}
108
109pub struct FixedFormatter {
110    length: usize,
111    keep_original: bool,
112    /// Whether to break words (ASCII only) at the end of the line.
113    break_words: bool,
114    /// Whether to insert a full-width space after a line break when a sentence starts with a full-width quotation mark.
115    insert_fullwidth_space_at_line_start: bool,
116    /// If a line break occurs in the middle of some symbols, bring the sentence to next line
117    break_with_sentence: bool,
118    #[cfg(feature = "jieba")]
119    /// Jieba instance for Chinese word segmentation.
120    jieba: Option<Jieba>,
121    #[cfg(not(feature = "jieba"))]
122    jieba: Option<()>,
123    /// Do not remove space at the start of the line
124    no_remove_space_at_line_start: bool,
125    #[allow(unused)]
126    typ: Option<ScriptType>,
127}
128
129impl FixedFormatter {
130    pub fn new(
131        length: usize,
132        keep_original: bool,
133        break_words: bool,
134        insert_fullwidth_space_at_line_start: bool,
135        break_with_sentence: bool,
136        #[cfg(feature = "jieba")] break_chinese_words: bool,
137        #[cfg(feature = "jieba")] jieba_dict: Option<String>,
138        no_remove_space_at_line_start: bool,
139        typ: Option<ScriptType>,
140    ) -> Result<Self> {
141        #[cfg(feature = "jieba")]
142        let jieba = if !break_chinese_words {
143            let mut jieba = Jieba::new();
144            if let Some(dict) = jieba_dict {
145                let file = std::fs::File::open(dict)?;
146                let mut reader = std::io::BufReader::new(file);
147                jieba.load_dict(&mut reader)?;
148            }
149            Some(jieba)
150        } else {
151            None
152        };
153        Ok(FixedFormatter {
154            length,
155            keep_original,
156            break_words,
157            insert_fullwidth_space_at_line_start,
158            break_with_sentence,
159            #[cfg(feature = "jieba")]
160            jieba,
161            #[cfg(not(feature = "jieba"))]
162            jieba: None,
163            no_remove_space_at_line_start,
164            typ,
165        })
166    }
167
168    #[cfg(test)]
169    fn builder(length: usize) -> Self {
170        FixedFormatter {
171            length,
172            keep_original: false,
173            break_words: true,
174            insert_fullwidth_space_at_line_start: false,
175            break_with_sentence: false,
176            jieba: None,
177            typ: None,
178            no_remove_space_at_line_start: false,
179        }
180    }
181
182    #[cfg(test)]
183    fn keep_original(mut self, keep: bool) -> Self {
184        self.keep_original = keep;
185        self
186    }
187
188    #[cfg(test)]
189    fn break_words(mut self, break_words: bool) -> Self {
190        self.break_words = break_words;
191        self
192    }
193
194    #[cfg(test)]
195    fn insert_fullwidth_space_at_line_start(mut self, insert: bool) -> Self {
196        self.insert_fullwidth_space_at_line_start = insert;
197        self
198    }
199
200    #[cfg(test)]
201    fn break_with_sentence(mut self, break_with_sentence: bool) -> Self {
202        self.break_with_sentence = break_with_sentence;
203        self
204    }
205
206    #[cfg(all(feature = "jieba", test))]
207    fn break_chinese_words(mut self, break_chinese_words: bool) -> Result<Self> {
208        if !break_chinese_words {
209            let jieba = Jieba::new();
210            self.jieba = Some(jieba);
211        } else {
212            self.jieba = None;
213        }
214        Ok(self)
215    }
216
217    #[cfg(all(feature = "jieba", test))]
218    fn add_dict(mut self, dict: &str, freq: Option<usize>, tag: Option<&str>) -> Self {
219        if let Some(ref mut jieba) = self.jieba {
220            jieba.add_word(&dict, freq, tag);
221        }
222        self
223    }
224
225    #[cfg(test)]
226    fn no_remove_space_at_line_start(mut self, no_remove: bool) -> Self {
227        self.no_remove_space_at_line_start = no_remove;
228        self
229    }
230
231    #[cfg(test)]
232    #[allow(dead_code)]
233    fn typ(mut self, typ: Option<ScriptType>) -> Self {
234        self.typ = typ;
235        self
236    }
237
238    #[cfg(feature = "circus")]
239    fn is_circus(&self) -> bool {
240        matches!(self.typ, Some(ScriptType::Circus))
241    }
242
243    #[cfg(not(feature = "circus"))]
244    fn is_circus(&self) -> bool {
245        false
246    }
247
248    #[cfg(feature = "kirikiri")]
249    fn is_scn(&self) -> bool {
250        matches!(self.typ, Some(ScriptType::KirikiriScn))
251    }
252
253    #[cfg(not(feature = "kirikiri"))]
254    fn is_scn(&self) -> bool {
255        false
256    }
257
258    pub fn format(&self, message: &str) -> String {
259        let mut result = String::new();
260        let vec: Vec<_> = UnicodeSegmentation::graphemes(message, true).collect();
261        let mut current_length = 0;
262        let mut is_command = false;
263        let mut pre_is_lf = false;
264        let mut is_ruby = false;
265        let mut is_ruby_rt = false;
266        let mut last_command = None;
267        let mut i = 0;
268        // Store main content of the line (excluding commands and ruby)
269        let mut main_content = String::new();
270        let mut first_line = true;
271        let mut need_insert_fullwidth_space = false;
272
273        while i < vec.len() {
274            let grapheme = vec[i];
275
276            if grapheme == "\n" {
277                if self.keep_original
278                    || (self.is_circus() && last_command.as_ref().is_some_and(|cmd| cmd == "@n"))
279                {
280                    result.push('\n');
281                    current_length = 0;
282                    if first_line {
283                        if self.insert_fullwidth_space_at_line_start {
284                            if check_need_fullwidth_space(&main_content) {
285                                need_insert_fullwidth_space = true;
286                            }
287                        }
288                    }
289                    if need_insert_fullwidth_space {
290                        result.push(' ');
291                        current_length += 1;
292                    }
293                    main_content.clear();
294                    first_line = false;
295                }
296                pre_is_lf = true;
297                i += 1;
298                continue;
299            }
300
301            // Check if we need to break and handle word breaking
302            if current_length >= self.length {
303                if self.break_with_sentence
304                    && !is_command
305                    && !is_ruby_rt
306                    && ((BREAK_SENTENCE_SYMBOLS.contains(&grapheme)
307                        && i > 1
308                        && BREAK_SENTENCE_SYMBOLS.contains(&vec[i - 1]))
309                        || check_is_end_quote_or_symbol(&vec, i))
310                {
311                    let mut break_pos = None;
312                    let segs = result.graphemes(true).collect::<Vec<_>>();
313                    let is_end_quote = check_is_end_quote(&vec, i);
314                    let mut end = segs.len();
315                    for (j, ch) in segs.iter().enumerate().rev() {
316                        if BREAK_SENTENCE_SYMBOLS.contains(ch) {
317                            end = j;
318                            if !is_end_quote {
319                                break_pos = Some(j);
320                            }
321                        }
322                        break;
323                    }
324                    for (j, ch) in segs[..end].iter().enumerate().rev() {
325                        if j >= end {
326                            continue;
327                        }
328                        if BREAK_SENTENCE_SYMBOLS.contains(ch) {
329                            break_pos = Some(j + 1);
330                            break;
331                        }
332                    }
333                    if let Some(pos) = break_pos {
334                        let mut head = segs[..pos].concat();
335                        let mut remaining = segs[pos..].concat();
336                        if self.break_with_sentence {
337                            let trailing = take_trailing_start_quotes(&mut head);
338                            if !trailing.is_empty() {
339                                remaining.insert_str(0, &trailing);
340                            }
341                        }
342                        let remaining = remaining.trim_start().to_string();
343                        result = head;
344                        result.push('\n');
345                        current_length = 0;
346                        if first_line {
347                            if self.insert_fullwidth_space_at_line_start {
348                                if check_need_fullwidth_space(&main_content) {
349                                    need_insert_fullwidth_space = true;
350                                }
351                            }
352                            first_line = false;
353                        }
354                        if need_insert_fullwidth_space {
355                            result.push(' ');
356                            current_length += 1;
357                        }
358                        result.push_str(&remaining);
359                        current_length += remaining.graphemes(true).count();
360                        main_content.clear();
361                        pre_is_lf = true;
362                    } else {
363                        let trailing = if self.break_with_sentence {
364                            take_trailing_start_quotes(&mut result)
365                        } else {
366                            String::new()
367                        };
368                        result.push('\n');
369                        current_length = 0;
370                        if first_line {
371                            if self.insert_fullwidth_space_at_line_start {
372                                if check_need_fullwidth_space(&main_content) {
373                                    need_insert_fullwidth_space = true;
374                                }
375                            }
376                            first_line = false;
377                        }
378                        if need_insert_fullwidth_space {
379                            result.push(' ');
380                            current_length += 1;
381                        }
382                        main_content.clear();
383                        if !trailing.is_empty() {
384                            result.push_str(&trailing);
385                            current_length += trailing.graphemes(true).count();
386                            main_content.push_str(&trailing);
387                        }
388                        pre_is_lf = true;
389                    }
390                } else if !self.break_words
391                    && !is_command
392                    && !is_ruby_rt
393                    && check_is_non_gbk_word(grapheme)
394                {
395                    // Look back to find a good break point (space or non-ASCII)
396                    let mut break_pos = None;
397                    let mut temp_length = current_length;
398                    let mut j = result.len();
399
400                    // Find the last space or non-ASCII character position
401                    for ch in result.chars().rev() {
402                        if ch == ' ' || ch == ' ' || (!ch.is_ascii() && !is_non_gbk_char(ch)) {
403                            break_pos = Some(j);
404                            break;
405                        }
406                        if is_non_gbk_char(ch) {
407                            temp_length -= 1;
408                            if temp_length == 0 {
409                                break;
410                            }
411                        }
412                        j -= ch.len_utf8();
413                    }
414
415                    // If we found a good break point, move content after it to next line
416                    if let Some(pos) = break_pos {
417                        let mut remaining = result[pos..].to_string();
418                        result.truncate(pos);
419                        if self.break_with_sentence {
420                            let trailing = take_trailing_start_quotes(&mut result);
421                            if !trailing.is_empty() {
422                                remaining.insert_str(0, &trailing);
423                            }
424                        }
425                        let remaining = remaining.trim_start().to_string();
426                        result.push('\n');
427                        current_length = 0;
428                        if first_line {
429                            if self.insert_fullwidth_space_at_line_start {
430                                if check_need_fullwidth_space(&main_content) {
431                                    need_insert_fullwidth_space = true;
432                                }
433                            }
434                            first_line = false;
435                        }
436                        if need_insert_fullwidth_space {
437                            result.push(' ');
438                            current_length += 1;
439                        }
440                        result.push_str(&remaining);
441                        current_length += remaining.chars().count();
442                        main_content.clear();
443                        pre_is_lf = true;
444                    } else {
445                        let trailing = if self.break_with_sentence {
446                            take_trailing_start_quotes(&mut result)
447                        } else {
448                            String::new()
449                        };
450                        result.push('\n');
451                        current_length = 0;
452                        if first_line {
453                            if self.insert_fullwidth_space_at_line_start {
454                                if check_need_fullwidth_space(&main_content) {
455                                    need_insert_fullwidth_space = true;
456                                }
457                            }
458                            first_line = false;
459                        }
460                        if need_insert_fullwidth_space {
461                            result.push(' ');
462                            current_length += 1;
463                        }
464                        main_content.clear();
465                        if !trailing.is_empty() {
466                            result.push_str(&trailing);
467                            current_length += trailing.graphemes(true).count();
468                            main_content.push_str(&trailing);
469                        }
470                        pre_is_lf = true;
471                    }
472                } else if self
473                    .jieba
474                    .as_ref()
475                    .is_some_and(|s| check_chinese_word_is_break(&vec, i, s))
476                    && !is_command
477                    && !is_ruby_rt
478                {
479                    #[cfg(feature = "jieba")]
480                    {
481                        let jieba = self.jieba.as_ref().unwrap();
482                        let s = vec.join("");
483                        let mut breaked = jieba
484                            .cut(&s, false)
485                            .iter()
486                            .map(|s| s.graphemes(true).count())
487                            .collect::<Vec<_>>();
488                        let mut sum = 0;
489                        for i in breaked.iter_mut() {
490                            sum += *i;
491                            *i = sum;
492                        }
493                        let break_pos = match breaked.binary_search(&i) {
494                            Ok(pos) => Some(pos),
495                            Err(pos) => {
496                                if pos == 0 {
497                                    None
498                                } else {
499                                    Some(pos - 1)
500                                }
501                            }
502                        };
503                        if let Some(break_pos) = break_pos {
504                            let pos = breaked[break_pos];
505                            let segs = result.graphemes(true).collect::<Vec<_>>();
506                            let remain_count = i - pos;
507                            let pos = segs.len() - remain_count;
508                            let mut head = segs[..pos].concat();
509                            let mut remaining = segs[pos..].concat();
510                            if self.break_with_sentence {
511                                let trailing = take_trailing_start_quotes(&mut head);
512                                if !trailing.is_empty() {
513                                    remaining.insert_str(0, &trailing);
514                                }
515                            }
516                            let remaining = remaining.trim_start().to_string();
517                            result = head;
518                            result.push('\n');
519                            current_length = 0;
520                            if first_line {
521                                if self.insert_fullwidth_space_at_line_start {
522                                    if check_need_fullwidth_space(&main_content) {
523                                        need_insert_fullwidth_space = true;
524                                    }
525                                }
526                                first_line = false;
527                            }
528                            if need_insert_fullwidth_space {
529                                result.push(' ');
530                                current_length += 1;
531                            }
532                            result.push_str(&remaining);
533                            current_length += remaining.graphemes(true).count();
534                            main_content.clear();
535                            pre_is_lf = true;
536                        } else {
537                            let trailing = if self.break_with_sentence {
538                                take_trailing_start_quotes(&mut result)
539                            } else {
540                                String::new()
541                            };
542                            result.push('\n');
543                            current_length = 0;
544                            if first_line {
545                                if self.insert_fullwidth_space_at_line_start {
546                                    if check_need_fullwidth_space(&main_content) {
547                                        need_insert_fullwidth_space = true;
548                                    }
549                                }
550                                first_line = false;
551                            }
552                            if need_insert_fullwidth_space {
553                                result.push(' ');
554                                current_length += 1;
555                            }
556                            main_content.clear();
557                            if !trailing.is_empty() {
558                                result.push_str(&trailing);
559                                current_length += trailing.graphemes(true).count();
560                                main_content.push_str(&trailing);
561                            }
562                            pre_is_lf = true;
563                        }
564                    }
565                } else {
566                    let trailing = if self.break_with_sentence {
567                        take_trailing_start_quotes(&mut result)
568                    } else {
569                        String::new()
570                    };
571                    result.push('\n');
572                    current_length = 0;
573                    if first_line {
574                        if self.insert_fullwidth_space_at_line_start {
575                            if check_need_fullwidth_space(&main_content) {
576                                need_insert_fullwidth_space = true;
577                            }
578                        }
579                        first_line = false;
580                    }
581                    if need_insert_fullwidth_space {
582                        result.push(' ');
583                        current_length += 1;
584                    }
585                    main_content.clear();
586                    if !trailing.is_empty() {
587                        result.push_str(&trailing);
588                        current_length += trailing.graphemes(true).count();
589                        main_content.push_str(&trailing);
590                    }
591                    pre_is_lf = true;
592                }
593            }
594
595            if !self.no_remove_space_at_line_start
596                && (current_length == 0 || pre_is_lf)
597                && SPACE_STR_LIST.contains(&grapheme)
598            {
599                i += 1;
600                continue;
601            }
602
603            result.push_str(grapheme);
604
605            #[cfg(feature = "kirikiri")]
606            if self.is_scn() {
607                if grapheme == "#" {
608                    i += 1;
609                    while i < vec.len() && vec[i] != ";" {
610                        result.push_str(vec[i]);
611                        i += 1;
612                    }
613                    if i < vec.len() {
614                        result.push_str(vec[i]);
615                        i += 1;
616                    }
617                    continue;
618                }
619                if grapheme == "%" && i + 1 < vec.len() && vec[i + 1] == "r" {
620                    result.push('r');
621                    i += 2;
622                    continue;
623                }
624            }
625
626            if self.is_circus() {
627                if grapheme == "@" {
628                    is_command = true;
629                    last_command = Some(String::new());
630                } else if is_command && grapheme.len() != 1
631                    || !grapheme
632                        .chars()
633                        .next()
634                        .unwrap_or(' ')
635                        .is_ascii_alphanumeric()
636                {
637                    is_command = false;
638                }
639                if grapheme == "{" {
640                    is_ruby = true;
641                    is_ruby_rt = true;
642                } else if is_ruby && grapheme == "/" {
643                    is_ruby_rt = false;
644                    i += 1;
645                    continue;
646                } else if is_ruby && grapheme == "}" {
647                    is_ruby = false;
648                    i += 1;
649                    continue;
650                }
651            }
652
653            if self.is_scn() {
654                if grapheme == "%" {
655                    is_command = true;
656                } else if is_command && grapheme == ";" {
657                    is_command = false;
658                    i += 1;
659                    continue;
660                }
661                if grapheme == "[" {
662                    is_ruby = true;
663                    is_ruby_rt = true;
664                    i += 1;
665                    continue;
666                } else if is_ruby && grapheme == "]" {
667                    is_ruby = false;
668                    is_ruby_rt = false;
669                    i += 1;
670                    continue;
671                }
672            }
673
674            if is_command {
675                if let Some(ref mut cmd) = last_command {
676                    cmd.push_str(grapheme);
677                }
678            }
679
680            if !is_command && !is_ruby_rt {
681                current_length += 1;
682                main_content.push_str(grapheme);
683            }
684
685            pre_is_lf = false;
686            i += 1;
687        }
688
689        result
690    }
691}
692
693#[test]
694fn test_format() {
695    let formatter = FixedFormatter::builder(10);
696    let message = "This is a test message.\nThis is another line.";
697    let formatted_message = formatter.format(message);
698    assert_eq!(
699        formatted_message,
700        "This is a \ntest messa\nge.This is\nanother li\nne."
701    );
702    assert_eq!(formatter.format("● This is a test."), "● This is \na test.");
703    assert_eq!(
704        formatter.format("● This is  a test."),
705        "● This is \na test."
706    );
707    let fommater2 = FixedFormatter::builder(10).keep_original(true);
708    assert_eq!(
709        fommater2.format("● Th\n is is a te st."),
710        "● Th\nis is a te\nst."
711    );
712
713    // Test break_words = false
714    let no_break_formatter = FixedFormatter::builder(10).break_words(false);
715    assert_eq!(
716        no_break_formatter.format("Example text."),
717        "Example \ntext."
718    );
719
720    let no_break_formatter2 = FixedFormatter::builder(6).break_words(false);
721    assert_eq!(
722        no_break_formatter2.format("Example text."),
723        "Exampl\ne text\n."
724    );
725
726    let no_break_formatter3 = FixedFormatter::builder(7).break_words(false);
727    assert_eq!(
728        no_break_formatter3.format("Example text."),
729        "Example\ntext."
730    );
731
732    let real_world_no_break_formatter = FixedFormatter::builder(32).break_words(false);
733    assert_eq!(
734        real_world_no_break_formatter.format("○咕噜咕噜(Temporary Magnetic Pattern Linkage)"),
735        "○咕噜咕噜(Temporary Magnetic Pattern\nLinkage)"
736    );
737
738    let formatter3 = FixedFormatter::builder(10)
739        .break_words(false)
740        .insert_fullwidth_space_at_line_start(true);
741    assert_eq!(
742        formatter3.format("「This is a test."),
743        "「This is a\n\u{3000}test."
744    );
745
746    assert_eq!(
747        formatter3.format("(This) is a test."),
748        "(This) is \na test."
749    );
750
751    assert_eq!(
752        formatter3.format("(long text test here, test 1234"),
753        "(long text\n\u{3000}test here\n\u{3000}, test \n\u{3000}1234"
754    );
755
756    assert_eq!(
757        formatter3.format("(This) 「is a test."),
758        "(This) 「is\n\u{3000}a test."
759    );
760
761    let formatter3b = FixedFormatter::builder(10)
762        .break_words(false)
763        .no_remove_space_at_line_start(true);
764
765    assert_eq!(
766        formatter3b.format("(This) 「is a test."),
767        "(This) 「is\n a test."
768    );
769
770    let formatter4 = FixedFormatter::builder(10)
771        .break_words(false)
772        .break_with_sentence(true);
773    assert_eq!(
774        formatter4.format("『打断测,测试一下……』"),
775        "『打断测,\n测试一下……』"
776    );
777
778    assert_eq!(
779        formatter4.format("『打断测,测试一下。』"),
780        "『打断测,\n测试一下。』"
781    );
782
783    assert_eq!(
784        formatter4.format("『打断是测试一下哦……』"),
785        "『打断是测试一下哦\n……』"
786    );
787
788    assert_eq!(
789        formatter4.format("『打断测是测试一下。』"),
790        "『打断测是测试一下。\n』"
791    );
792
793    assert_eq!(
794        formatter4.format("『打断测试,测试一下。』"),
795        "『打断测试,\n测试一下。』"
796    );
797
798    assert_eq!(
799        formatter4.format("这打断测试,测试一下。"),
800        "这打断测试,\n测试一下。"
801    );
802
803    assert_eq!(
804        formatter4.format("这打断测试哦测试一下。。"),
805        "这打断测试哦测试一下\n。。"
806    );
807
808    let formatter5 = FixedFormatter::builder(10)
809        .break_words(false)
810        .insert_fullwidth_space_at_line_start(true)
811        .break_with_sentence(true);
812    assert_eq!(
813        formatter5.format("「一二三四『whatthe』"),
814        "「一二三四\n\u{3000}『whatthe』"
815    );
816
817    let real_break_formatter = FixedFormatter::builder(27)
818        .break_words(false)
819        .break_with_sentence(true);
820    assert_eq!(
821        real_break_formatter.format("「他们就是想和阳见待在一个社团,在里面表现表现、耍耍帅,这样不就和她套上近乎了嘛!算盘珠子都打到我脸上了……」"),
822        "「他们就是想和阳见待在一个社团,\n在里面表现表现、耍耍帅,这样不就和她套上近乎了嘛!算盘\n珠子都打到我脸上了……」"
823    );
824
825    assert_eq!(
826        real_break_formatter
827            .format("「在英山的话或许可以看看『moon river』『Lavir』或是『Patisserie Yuzuru』」"),
828        "「在英山的话或许可以看看『moon river』\n『Lavir』或是『Patisserie Yuzuru\n』」"
829    );
830
831    assert_eq!(check_is_non_gbk_word("бога"), true);
832    let russian_break_formatter = FixedFormatter::builder(20).break_words(false);
833    assert_eq!(
834        russian_break_formatter.format("Разнообразный и богатый опыт"),
835        "Разнообразный и \nбогатый опыт"
836    );
837
838    #[cfg(feature = "circus")]
839    {
840        let circus_formatter = FixedFormatter::builder(10).typ(Some(ScriptType::Circus));
841        assert_eq!(
842            circus_formatter.format("● @cmd1@cmd2@cmd3中文字数是一\n 二三 四五六七八九十"),
843            "● @cmd1@cmd2@cmd3中文字数是一二三\n四五六七八九十"
844        );
845        assert_eq!(
846            circus_formatter
847                .format("● @cmd1@cmd2@cmd3{rubyText/中文}字数是一\n 二三 四五六七八九十"),
848            "● @cmd1@cmd2@cmd3{rubyText/中文}字数是一二三\n四五六七八九十"
849        );
850        let circus_formatter2 = FixedFormatter::builder(32).typ(Some(ScriptType::Circus));
851        assert_eq!(
852            circus_formatter2.format("@re1@re2@b1@t30@w1「当然现在我很幸福哦?\n 因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?\n 因为有敦也君在身边」"),
853            "@re1@re2@b1@t30@w1「当然现在我很幸福哦?因为有你在身边」@n\n「@b1@t38@w1当然现在我很幸福哦?因为有敦也君在身边」"
854        );
855    }
856
857    #[cfg(feature = "kirikiri")]
858    {
859        let scn_formatter = FixedFormatter::builder(3)
860            .break_words(false)
861            .typ(Some(ScriptType::KirikiriScn));
862        assert_eq!(
863            scn_formatter.format("%test;[ruby]测[test]试打断。"),
864            "%test;[ruby]测[test]试打\n断。"
865        );
866        assert_eq!(
867            scn_formatter.format("%f$ハート$;#00ffadd6;♥%r打断测试"),
868            "%f$ハート$;#00ffadd6;♥%r打断\n测试"
869        )
870    }
871    #[cfg(feature = "jieba")]
872    {
873        let jieba_formatter = FixedFormatter::builder(8)
874            .break_words(false)
875            .break_chinese_words(false)
876            .unwrap();
877        assert_eq!(
878            jieba_formatter.format("测试分词,我们中出了一个叛徒。"),
879            "测试分词,我们中\n出了一个叛徒。"
880        );
881        let jieba_formatter2 = FixedFormatter::builder(8)
882            .break_words(false)
883            .break_chinese_words(false)
884            .unwrap()
885            .add_dict("中出", Some(114514), None);
886        assert_eq!(
887            jieba_formatter2
888                .jieba
889                .as_ref()
890                .is_some_and(|s| s.has_word("中出")),
891            true
892        );
893        assert_eq!(
894            jieba_formatter2.format("测试分词,我们中出了一个叛徒。"),
895            "测试分词,我们\n中出了一个叛徒。"
896        );
897    }
898}