msg_tool\utils/
escape.rs

1//! Escape and Unescape Utilities
2use fancy_regex::Regex;
3
4/// Escapes special characters in XML attribute values.
5pub fn escape_xml_attr_value(s: &str) -> String {
6    let mut escaped = String::with_capacity(s.len());
7    for c in s.chars() {
8        match c {
9            '&' => escaped.push_str("&"),
10            '<' => escaped.push_str("&lt;"),
11            '"' => escaped.push_str("&quot;"),
12            '\'' => escaped.push_str("&apos;"),
13            _ => escaped.push(c),
14        }
15    }
16    escaped
17}
18
19/// Escapes special characters in XML text values.
20pub fn escape_xml_text_value(s: &str) -> String {
21    let mut escaped = String::with_capacity(s.len());
22    for c in s.chars() {
23        match c {
24            '&' => escaped.push_str("&amp;"),
25            '<' => escaped.push_str("&lt;"),
26            '>' => escaped.push_str("&gt;"),
27            '"' => escaped.push_str("&quot;"),
28            '\'' => escaped.push_str("&apos;"),
29            _ => escaped.push(c),
30        }
31    }
32    escaped
33}
34
35lazy_static::lazy_static! {
36    static ref XML_NCR_BASE10_REGEX: Regex = Regex::new(r"&#(\d+);").unwrap();
37    static ref XML_NCR_BASE16_REGEX: Regex = Regex::new(r"&#x([0-9a-fA-F]+);").unwrap();
38    static ref LUA_NCR_BASE10_REGEX: Regex = Regex::new(r"\\(\d{3})").unwrap();
39    static ref LUA_NCR_BASE16_REGEX: Regex = Regex::new(r"\\x([0-9a-fA-F]{2})").unwrap();
40    static ref LUA_NCR_BASE16_U_REGEX: Regex = Regex::new(r"\\u([0-9a-fA-F]{4})").unwrap();
41}
42
43/// Unescapes XML character references and entities.
44pub fn unescape_xml(s: &str) -> String {
45    let mut s = s.to_owned();
46    s = XML_NCR_BASE10_REGEX
47        .replace_all(&s, |caps: &fancy_regex::Captures| {
48            let codepoint = caps[1].parse::<u32>().unwrap_or(0);
49            char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string())
50        })
51        .to_string();
52    s = XML_NCR_BASE16_REGEX
53        .replace_all(&s, |caps: &fancy_regex::Captures| {
54            let codepoint = u32::from_str_radix(&caps[1], 16).unwrap_or(0);
55            char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string())
56        })
57        .to_string();
58    s.replace("&amp;", "&")
59        .replace("&lt;", "<")
60        .replace("&gt;", ">")
61        .replace("&quot;", "\"")
62        .replace("&apos;", "'")
63}
64
65/// Unescapes Lua string escape sequences.
66pub fn unescape_lua_str(s: &str) -> String {
67    let mut s = s.to_owned();
68    s = s
69        .replace("\\n", "\n")
70        .replace("\\r", "\r")
71        .replace("\\t", "\t")
72        .replace("\\v", "\x0A")
73        .replace("\\b", "\x08")
74        .replace("\\f", "\x0C")
75        .replace("\\'", "'")
76        .replace("\\\"", "\"");
77    s = LUA_NCR_BASE10_REGEX
78        .replace_all(&s, |caps: &fancy_regex::Captures| {
79            let codepoint = caps[1].parse::<u32>().unwrap_or(0);
80            char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string())
81        })
82        .to_string();
83    s = s.replace("\\0", "\0");
84    s = LUA_NCR_BASE16_REGEX
85        .replace_all(&s, |caps: &fancy_regex::Captures| {
86            let codepoint = u32::from_str_radix(&caps[1], 16).unwrap_or(0);
87            char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string())
88        })
89        .to_string();
90    s = LUA_NCR_BASE16_U_REGEX
91        .replace_all(&s, |caps: &fancy_regex::Captures| {
92            let codepoint = u32::from_str_radix(&caps[1], 16).unwrap_or(0);
93            char::from_u32(codepoint).map_or("�".to_string(), |c| c.to_string())
94        })
95        .to_string();
96    s.replace("\\\\", "\\")
97}
98
99/// Checks if a string contains characters that need to be escaped in Lua strings.
100pub fn lua_str_contains_need_escape(s: &str) -> bool {
101    s.contains('\\')
102        || s.contains('\n')
103        || s.contains('\r')
104        || s.contains('\t')
105        || s.contains('\x0A')
106        || s.contains('\x08')
107        || s.contains('\x0C')
108        || s.contains('\'')
109        || s.contains('"')
110}
111
112/// Checks if a string contains characters that need to be escaped in Lua keys.
113pub fn lua_key_contains_need_escape(s: &str) -> bool {
114    s.chars().next().map_or(false, |c| c.is_ascii_digit())
115}
116
117#[test]
118fn test_unescape_xml() {
119    assert_eq!(
120        unescape_xml("Hello &amp;amp; World &lt;script&gt;alert(&#x27;XSS&#x27;)&lt;/script&gt;"),
121        "Hello &amp; World <script>alert('XSS')</script>"
122    );
123    assert_eq!(unescape_xml("&#20320;TEST&#x20;"), "你TEST ");
124}
125
126#[test]
127fn test_unescape_lua_str() {
128    assert_eq!(unescape_lua_str(r"Hello\nWorld"), "Hello\nWorld");
129    assert_eq!(unescape_lua_str(r"Tab:\tEnd"), "Tab:\tEnd");
130    assert_eq!(unescape_lua_str("Quote: \\' and \\\""), "Quote: ' and \"");
131    assert_eq!(unescape_lua_str(r"Backslash:\\Test"), "Backslash:\\Test");
132    assert_eq!(unescape_lua_str(r"\065\066\067"), "ABC");
133    assert_eq!(unescape_lua_str(r"\x41\x42\x43"), "ABC");
134    assert_eq!(unescape_lua_str(r"\u4F60\u597D"), "你好");
135    assert_eq!(unescape_lua_str(r"Null:\0End"), "Null:\0End");
136    assert_eq!(unescape_lua_str(r"Mix:\n\x41\065\u4F60"), "Mix:\nAA你");
137}