html5ever\serialize/
mod.rs1use log::warn;
11pub use markup5ever::serialize::{AttrRef, Serialize, Serializer, TraversalScope};
12use markup5ever::{local_name, ns};
13use std::io::{self, Write};
14
15use crate::{LocalName, QualName};
16
17pub fn serialize<Wr, T>(writer: Wr, node: &T, opts: SerializeOpts) -> io::Result<()>
18where
19 Wr: Write,
20 T: Serialize,
21{
22 let mut ser = HtmlSerializer::new(writer, opts.clone());
23 node.serialize(&mut ser, opts.traversal_scope)
24}
25
26#[derive(Clone)]
27pub struct SerializeOpts {
28 pub scripting_enabled: bool,
30
31 pub traversal_scope: TraversalScope,
33
34 pub create_missing_parent: bool,
40}
41
42impl Default for SerializeOpts {
43 fn default() -> SerializeOpts {
44 SerializeOpts {
45 scripting_enabled: true,
46 traversal_scope: TraversalScope::ChildrenOnly(None),
47 create_missing_parent: false,
48 }
49 }
50}
51
52#[derive(Default)]
53struct ElemInfo {
54 html_name: Option<LocalName>,
55 ignore_children: bool,
56}
57
58pub struct HtmlSerializer<Wr: Write> {
59 pub writer: Wr,
60 opts: SerializeOpts,
61 stack: Vec<ElemInfo>,
62}
63
64fn tagname(name: &QualName) -> LocalName {
65 match name.ns {
66 ns!(html) | ns!(mathml) | ns!(svg) => (),
67 ref ns => {
68 warn!("node with weird namespace {ns:?}");
70 },
71 }
72
73 name.local.clone()
74}
75
76impl<Wr: Write> HtmlSerializer<Wr> {
77 pub fn new(writer: Wr, opts: SerializeOpts) -> Self {
78 let html_name = match opts.traversal_scope {
79 TraversalScope::IncludeNode | TraversalScope::ChildrenOnly(None) => None,
80 TraversalScope::ChildrenOnly(Some(ref n)) => Some(tagname(n)),
81 };
82 HtmlSerializer {
83 writer,
84 opts,
85 stack: vec![ElemInfo {
86 html_name,
87 ignore_children: false,
88 }],
89 }
90 }
91
92 fn parent(&mut self) -> &mut ElemInfo {
93 if self.stack.is_empty() {
94 if self.opts.create_missing_parent {
95 warn!("ElemInfo stack empty, creating new parent");
96 self.stack.push(Default::default());
97 } else {
98 panic!("no parent ElemInfo")
99 }
100 }
101 self.stack.last_mut().unwrap()
102 }
103
104 fn write_escaped(&mut self, text: &str, attr_mode: bool) -> io::Result<()> {
105 for c in text.chars() {
106 match c {
107 '&' => self.writer.write_all(b"&"),
108 '\u{00A0}' => self.writer.write_all(b" "),
109 '"' if attr_mode => self.writer.write_all(b"""),
110 '<' if !attr_mode => self.writer.write_all(b"<"),
111 '>' if !attr_mode => self.writer.write_all(b">"),
112 c => self.writer.write_fmt(format_args!("{c}")),
113 }?;
114 }
115 Ok(())
116 }
117}
118
119impl<Wr: Write> Serializer for HtmlSerializer<Wr> {
120 fn start_elem<'a, AttrIter>(&mut self, name: QualName, attrs: AttrIter) -> io::Result<()>
121 where
122 AttrIter: Iterator<Item = AttrRef<'a>>,
123 {
124 let html_name = match name.ns {
125 ns!(html) => Some(name.local.clone()),
126 _ => None,
127 };
128
129 if self.parent().ignore_children {
130 self.stack.push(ElemInfo {
131 html_name,
132 ignore_children: true,
133 });
134 return Ok(());
135 }
136
137 self.writer.write_all(b"<")?;
138 self.writer.write_all(tagname(&name).as_bytes())?;
139 for (name, value) in attrs {
140 self.writer.write_all(b" ")?;
141
142 match name.ns {
143 ns!() => (),
144 ns!(xml) => self.writer.write_all(b"xml:")?,
145 ns!(xmlns) => {
146 if name.local != local_name!("xmlns") {
147 self.writer.write_all(b"xmlns:")?;
148 }
149 },
150 ns!(xlink) => self.writer.write_all(b"xlink:")?,
151 ref ns => {
152 warn!("attr with weird namespace {ns:?}");
154 self.writer.write_all(b"unknown_namespace:")?;
155 },
156 }
157
158 self.writer.write_all(name.local.as_bytes())?;
159 self.writer.write_all(b"=\"")?;
160 self.write_escaped(value, true)?;
161 self.writer.write_all(b"\"")?;
162 }
163 self.writer.write_all(b">")?;
164
165 let ignore_children = name.ns == ns!(html)
166 && matches!(
167 name.local,
168 local_name!("area")
169 | local_name!("base")
170 | local_name!("basefont")
171 | local_name!("bgsound")
172 | local_name!("br")
173 | local_name!("col")
174 | local_name!("embed")
175 | local_name!("frame")
176 | local_name!("hr")
177 | local_name!("img")
178 | local_name!("input")
179 | local_name!("keygen")
180 | local_name!("link")
181 | local_name!("meta")
182 | local_name!("param")
183 | local_name!("source")
184 | local_name!("track")
185 | local_name!("wbr")
186 );
187
188 self.stack.push(ElemInfo {
189 html_name,
190 ignore_children,
191 });
192
193 Ok(())
194 }
195
196 fn end_elem(&mut self, name: QualName) -> io::Result<()> {
197 let info = match self.stack.pop() {
198 Some(info) => info,
199 None if self.opts.create_missing_parent => {
200 warn!("missing ElemInfo, creating default.");
201 Default::default()
202 },
203 _ => panic!("no ElemInfo"),
204 };
205 if info.ignore_children {
206 return Ok(());
207 }
208
209 self.writer.write_all(b"</")?;
210 self.writer.write_all(tagname(&name).as_bytes())?;
211 self.writer.write_all(b">")
212 }
213
214 fn write_text(&mut self, text: &str) -> io::Result<()> {
215 let escape = match self.parent().html_name {
216 Some(local_name!("style"))
217 | Some(local_name!("script"))
218 | Some(local_name!("xmp"))
219 | Some(local_name!("iframe"))
220 | Some(local_name!("noembed"))
221 | Some(local_name!("noframes"))
222 | Some(local_name!("plaintext")) => false,
223
224 Some(local_name!("noscript")) => !self.opts.scripting_enabled,
225
226 _ => true,
227 };
228
229 if escape {
230 self.write_escaped(text, false)
231 } else {
232 self.writer.write_all(text.as_bytes())
233 }
234 }
235
236 fn write_comment(&mut self, text: &str) -> io::Result<()> {
237 self.writer.write_all(b"<!--")?;
238 self.writer.write_all(text.as_bytes())?;
239 self.writer.write_all(b"-->")
240 }
241
242 fn write_doctype(&mut self, name: &str) -> io::Result<()> {
243 self.writer.write_all(b"<!DOCTYPE ")?;
244 self.writer.write_all(name.as_bytes())?;
245 self.writer.write_all(b">")
246 }
247
248 fn write_processing_instruction(&mut self, target: &str, data: &str) -> io::Result<()> {
249 self.writer.write_all(b"<?")?;
250 self.writer.write_all(target.as_bytes())?;
251 self.writer.write_all(b" ")?;
252 self.writer.write_all(data.as_bytes())?;
253 self.writer.write_all(b">")
254 }
255}