html5ever\serialize/
mod.rs

1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use log::warn;
11pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
12use markup5ever::{local_name, ns};
13use std::io::{self, Write};
14
15use crate::{LocalName, QualName};
16
17pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()>
18where
19    Wr: Write,
20    T: Serialize,
21{
22    let mut ser = HtmlSerializer::new(writer, opts.clone());
23    node.serialize(&mut ser, opts.traversal_scope)
24}
25
26#[derive(Clone)]
27pub struct SerializeOpts {
28    /// Is scripting enabled? Default: true
29    pub scripting_enabled: bool,
30
31    /// Serialize the root node? Default: ChildrenOnly
32    pub traversal_scope: TraversalScope,
33
34    /// If the serializer is asked to serialize an invalid tree, the default
35    /// behavior is to panic in the event that an `end_elem` is created without a
36    /// matching `start_elem`. Setting this to true will prevent those panics by
37    /// creating a default parent on the element stack. No extra start elem will
38    /// actually be written. Default: false
39    pub create_missing_parent: bool,
40}
41
42impl Default for SerializeOpts {
43    fn default() -> SerializeOpts {
44        SerializeOpts {
45            scripting_enabled: true,
46            traversal_scope: TraversalScope::ChildrenOnly(None),
47            create_missing_parent: false,
48        }
49    }
50}
51
52#[derive(Default)]
53struct ElemInfo {
54    html_name: Option<LocalName>,
55    ignore_children: bool,
56}
57
58pub struct HtmlSerializer<Wr: Write> {
59    pub writer: Wr,
60    opts: SerializeOpts,
61    stack: Vec<ElemInfo>,
62}
63
64fn tagname(name: &QualName) -> LocalName {
65    match name.ns {
66        ns!(html) | ns!(mathml) | ns!(svg) => (),
67        ref ns => {
68            // FIXME(#122)
69            warn!("node with weird namespace {ns:?}");
70        },
71    }
72
73    name.local.clone()
74}
75
76impl<Wr: Write> HtmlSerializer<Wr> {
77    pub fn new(writer: Wr, opts: SerializeOpts) -> Self {
78        let html_name = match opts.traversal_scope {
79            TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None,
80            TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)),
81        };
82        HtmlSerializer {
83            writer,
84            opts,
85            stack: vec![ElemInfo {
86                html_name,
87                ignore_children: false,
88            }],
89        }
90    }
91
92    fn parent(&mut self) -> &mut ElemInfo {
93        if self.stack.is_empty() {
94            if self.opts.create_missing_parent {
95                warn!("ElemInfo stack empty, creating new parent");
96                self.stack.push(Default::default());
97            } else {
98                panic!("no parent ElemInfo")
99            }
100        }
101        self.stack.last_mut().unwrap()
102    }
103
104    fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> {
105        for c in text.chars() {
106            match c {
107                '&' => self.writer.write_all(b"&amp;"),
108                '\u{00A0}' => self.writer.write_all(b"&nbsp;"),
109                '"' if attr_mode => self.writer.write_all(b"&quot;"),
110                '<' if !attr_mode => self.writer.write_all(b"&lt;"),
111                '>' if !attr_mode => self.writer.write_all(b"&gt;"),
112                c => self.writer.write_fmt(format_args!("{c}")),
113            }?;
114        }
115        Ok(())
116    }
117}
118
119impl<Wr: Write> Serializer for HtmlSerializer<Wr> {
120    fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()>
121    where
122        AttrIter: Iterator<Item = AttrRef<'a>>,
123    {
124        let html_name = match name.ns {
125            ns!(html) => Some(name.local.clone()),
126            _ => None,
127        };
128
129        if self.parent().ignore_children {
130            self.stack.push(ElemInfo {
131                html_name,
132                ignore_children: true,
133            });
134            return Ok(());
135        }
136
137        self.writer.write_all(b"<")?;
138        self.writer.write_all(tagname(&name).as_bytes())?;
139        for (name, value) in attrs {
140            self.writer.write_all(b" ")?;
141
142            match name.ns {
143                ns!() => (),
144                ns!(xml) => self.writer.write_all(b"xml:")?,
145                ns!(xmlns) => {
146                    if name.local != local_name!("xmlns") {
147                        self.writer.write_all(b"xmlns:")?;
148                    }
149                },
150                ns!(xlink) => self.writer.write_all(b"xlink:")?,
151                ref ns => {
152                    // FIXME(#122)
153                    warn!("attr with weird namespace {ns:?}");
154                    self.writer.write_all(b"unknown_namespace:")?;
155                },
156            }
157
158            self.writer.write_all(name.local.as_bytes())?;
159            self.writer.write_all(b"=\"")?;
160            self.write_escaped(value, true)?;
161            self.writer.write_all(b"\"")?;
162        }
163        self.writer.write_all(b">")?;
164
165        let ignore_children = name.ns == ns!(html)
166            && matches!(
167                name.local,
168                local_name!("area")
169                    | local_name!("base")
170                    | local_name!("basefont")
171                    | local_name!("bgsound")
172                    | local_name!("br")
173                    | local_name!("col")
174                    | local_name!("embed")
175                    | local_name!("frame")
176                    | local_name!("hr")
177                    | local_name!("img")
178                    | local_name!("input")
179                    | local_name!("keygen")
180                    | local_name!("link")
181                    | local_name!("meta")
182                    | local_name!("param")
183                    | local_name!("source")
184                    | local_name!("track")
185                    | local_name!("wbr")
186            );
187
188        self.stack.push(ElemInfo {
189            html_name,
190            ignore_children,
191        });
192
193        Ok(())
194    }
195
196    fn end_elem(&mut self, name: QualName) -> io::Result<()> {
197        let info = match self.stack.pop() {
198            Some(info) => info,
199            None if self.opts.create_missing_parent => {
200                warn!("missing ElemInfo, creating default.");
201                Default::default()
202            },
203            _ => panic!("no ElemInfo"),
204        };
205        if info.ignore_children {
206            return Ok(());
207        }
208
209        self.writer.write_all(b"</")?;
210        self.writer.write_all(tagname(&name).as_bytes())?;
211        self.writer.write_all(b">")
212    }
213
214    fn write_text(&mut self, text: &str) -> io::Result<()> {
215        let escape = match self.parent().html_name {
216            Some(local_name!("style"))
217            | Some(local_name!("script"))
218            | Some(local_name!("xmp"))
219            | Some(local_name!("iframe"))
220            | Some(local_name!("noembed"))
221            | Some(local_name!("noframes"))
222            | Some(local_name!("plaintext")) => false,
223
224            Some(local_name!("noscript")) => !self.opts.scripting_enabled,
225
226            _ => true,
227        };
228
229        if escape {
230            self.write_escaped(text, false)
231        } else {
232            self.writer.write_all(text.as_bytes())
233        }
234    }
235
236    fn write_comment(&mut self, text: &str) -> io::Result<()> {
237        self.writer.write_all(b"<!--")?;
238        self.writer.write_all(text.as_bytes())?;
239        self.writer.write_all(b"-->")
240    }
241
242    fn write_doctype(&mut self, name: &str) -> io::Result<()> {
243        self.writer.write_all(b"<!DOCTYPE ")?;
244        self.writer.write_all(name.as_bytes())?;
245        self.writer.write_all(b">")
246    }
247
248    fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> {
249        self.writer.write_all(b"<?")?;
250        self.writer.write_all(target.as_bytes())?;
251        self.writer.write_all(b" ")?;
252        self.writer.write_all(data.as_bytes())?;
253        self.writer.write_all(b">")
254    }
255}