1use std::borrow::Cow;
2use std::collections::{HashSet, VecDeque};
3use std::fmt;
4use std::io;
5use std::mem;
6use std::sync::{Arc, Mutex, Weak};
7
8use crossbeam::atomic::AtomicCell;
9use tendril::StrTendril;
10
11use markup5ever::Attribute;
12use markup5ever::ExpandedName;
13use markup5ever::QualName;
14use markup5ever::interface::tree_builder;
15use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
16use markup5ever::serialize::TraversalScope;
17use markup5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
18use markup5ever::serialize::{Serialize, Serializer};
19use xml5ever::interface::ElemName;
20use xml5ever::local_name;
21
22#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Debug)]
23pub struct AtomicAttribute {
24 pub name: QualName,
25 pub value: String,
26}
27
28impl From<Attribute> for AtomicAttribute {
29 fn from(attr: Attribute) -> Self {
30 AtomicAttribute {
31 name: attr.name,
32 value: attr.value.to_string(),
33 }
34 }
35}
36
37#[derive(Debug, Clone)]
38pub enum NodeData {
39 Document,
41
42 Doctype {
47 name: String,
48 public_id: String,
49 system_id: String,
50 },
51
52 Text { contents: Arc<Mutex<String>> },
54
55 Comment { contents: String },
57
58 Element {
60 name: QualName,
61 attrs: Arc<Mutex<Vec<AtomicAttribute>>>,
62
63 template_contents: Arc<Mutex<Option<Handle>>>,
67
68 mathml_annotation_xml_integration_point: bool,
72 },
73
74 ProcessingInstruction { target: String, contents: String },
76}
77
78pub struct Node {
80 pub parent: AtomicCell<Option<WeakHandle>>,
82 pub children: Mutex<Vec<Handle>>,
84 pub data: NodeData,
86}
87
88impl Node {
89 pub fn new(data: NodeData) -> Arc<Self> {
91 Arc::new(Node {
92 data,
93 parent: AtomicCell::new(None),
94 children: Mutex::new(Vec::new()),
95 })
96 }
97
98 fn get_option_element_nearest_ancestor_select(&self) -> Option<Arc<Self>> {
100 let mut did_see_ancestor_optgroup = false;
103
104 let mut current = self.parent().and_then(|parent| parent.upgrade())?;
106 loop {
107 if let NodeData::Element { name, .. } = ¤t.data {
108 if matches!(
110 name.local_name(),
111 &local_name!("datalist") | &local_name!("hr") | &local_name!("option")
112 ) {
113 return None;
114 }
115
116 if name.local_name() == &local_name!("optgroup") {
118 if did_see_ancestor_optgroup {
120 return None;
121 }
122
123 did_see_ancestor_optgroup = true;
125 }
126
127 if name.local_name() == &local_name!("select") {
129 return Some(current);
130 }
131 };
132
133 let Some(next_ancestor) = current.parent().and_then(|parent| parent.upgrade()) else {
135 break;
136 };
137 current = next_ancestor;
138 }
139
140 None
142 }
143
144 fn parent(&self) -> Option<Weak<Self>> {
145 let parent = self.parent.take();
146 self.parent.store(parent.clone());
147 parent
148 }
149
150 fn get_a_selects_enabled_selectedcontent(&self) -> Option<Arc<Self>> {
152 let NodeData::Element { name, attrs, .. } = &self.data else {
154 panic!("Trying to get selectedcontent of non-element");
155 };
156 debug_assert_eq!(name.local_name(), &local_name!("select"));
157 if attrs
158 .lock()
159 .unwrap()
160 .iter()
161 .any(|attribute| attribute.name.local == local_name!("multiple"))
162 {
163 return None;
164 }
165
166 let mut remaining = VecDeque::default();
170 remaining.extend(self.children.lock().unwrap().iter().cloned());
171 let mut selectedcontent = None;
172 while let Some(node) = remaining.pop_front() {
173 remaining.extend(node.children.lock().unwrap().iter().cloned());
174
175 let NodeData::Element { name, .. } = &self.data else {
176 continue;
177 };
178 if name.local_name() == &local_name!("selectedcontent") {
179 selectedcontent = Some(node);
180 break;
181 }
182 }
183 let selectedcontent = selectedcontent?;
184
185 Some(selectedcontent)
190 }
191
192 fn clone_an_option_into_selectedcontent(&self, selectedcontent: Arc<Self>) {
194 let mut document_fragment = Vec::new();
197
198 for child in self.children.lock().unwrap().iter() {
200 let child_clone = child.clone_with_subtree();
202
203 document_fragment.push(child_clone);
205 }
206
207 *selectedcontent.children.lock().unwrap() = document_fragment;
209 }
210
211 fn clone_with_subtree(&self) -> Arc<Self> {
216 let children = self
217 .children
218 .lock()
219 .unwrap()
220 .iter()
221 .map(|child| child.clone_with_subtree())
222 .collect();
223 Arc::new(Self {
224 parent: AtomicCell::new(self.parent()),
225 data: self.data.clone(),
226 children: Mutex::new(children),
227 })
228 }
229}
230
231impl fmt::Debug for Node {
232 fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
233 fmt.debug_struct("Node")
234 .field("data", &self.data)
235 .field("children", &self.children)
236 .finish()
237 }
238}
239
240pub type Handle = Arc<Node>;
242
243pub type WeakHandle = Weak<Node>;
245
246fn append(new_parent: &Handle, child: Handle) {
248 let previous_parent = child.parent.swap(Some(Arc::downgrade(new_parent)));
249 assert!(previous_parent.is_none());
251 new_parent.children.lock().unwrap().push(child);
252}
253
254fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> {
256 if let Some(weak) = target.parent.take() {
257 let parent = weak.upgrade().expect("dangling weak pointer");
258 target.parent.store(Some(weak));
259 let i = match parent
260 .children
261 .lock()
262 .unwrap()
263 .iter()
264 .enumerate()
265 .find(|&(_, child)| Arc::ptr_eq(child, target))
266 {
267 Some((i, _)) => i,
268 None => panic!("have parent but couldn't find in parent's children!"),
269 };
270 Some((parent, i))
271 } else {
272 None
273 }
274}
275
276fn append_to_existing_text(prev: &Handle, text: &str) -> bool {
277 match prev.data {
278 NodeData::Text { ref contents } => {
279 contents.lock().unwrap().push_str(text);
280 true
281 }
282 _ => false,
283 }
284}
285
286fn remove_from_parent(target: &Handle) {
287 if let Some((parent, i)) = get_parent_and_index(target) {
288 parent.children.lock().unwrap().remove(i);
289 target.parent.store(None);
290 }
291}
292
293pub struct ArcDom {
295 pub document: Handle,
297
298 pub errors: Mutex<Vec<Cow<'static, str>>>,
300
301 pub quirks_mode: AtomicCell<QuirksMode>,
303}
304
305impl TreeSink for ArcDom {
306 type Output = Self;
307 fn finish(self) -> Self {
308 self
309 }
310
311 type Handle = Handle;
312
313 type ElemName<'a>
314 = ExpandedName<'a>
315 where
316 Self: 'a;
317
318 fn parse_error(&self, msg: Cow<'static, str>) {
319 self.errors.lock().unwrap().push(msg);
320 }
321
322 fn get_document(&self) -> Handle {
323 self.document.clone()
324 }
325
326 fn get_template_contents(&self, target: &Handle) -> Handle {
327 if let NodeData::Element {
328 ref template_contents,
329 ..
330 } = target.data
331 {
332 template_contents
333 .lock()
334 .unwrap()
335 .as_ref()
336 .expect("not a template element!")
337 .clone()
338 } else {
339 panic!("not a template element!")
340 }
341 }
342
343 fn set_quirks_mode(&self, mode: QuirksMode) {
344 self.quirks_mode.store(mode);
345 }
346
347 fn same_node(&self, x: &Handle, y: &Handle) -> bool {
348 Arc::ptr_eq(x, y)
349 }
350
351 fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> {
352 match target.data {
353 NodeData::Element { ref name, .. } => name.expanded(),
354 _ => panic!("not an element!"),
355 }
356 }
357
358 fn create_element(&self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Handle {
359 Node::new(NodeData::Element {
360 name,
361 attrs: Arc::new(Mutex::new(
362 attrs.into_iter().map(AtomicAttribute::from).collect(),
363 )),
364 template_contents: Arc::new(Mutex::new(if flags.template {
365 Some(Node::new(NodeData::Document))
366 } else {
367 None
368 })),
369 mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
370 })
371 }
372
373 fn create_comment(&self, text: StrTendril) -> Handle {
374 Node::new(NodeData::Comment {
375 contents: text.to_string(),
376 })
377 }
378
379 fn create_pi(&self, target: StrTendril, data: StrTendril) -> Handle {
380 Node::new(NodeData::ProcessingInstruction {
381 target: target.to_string(),
382 contents: data.to_string(),
383 })
384 }
385
386 fn append(&self, parent: &Handle, child: NodeOrText<Handle>) {
387 if let NodeOrText::AppendText(text) = &child {
389 if let Some(h) = parent.children.lock().unwrap().last() {
390 if append_to_existing_text(h, text) {
391 return;
392 }
393 }
394 }
395
396 append(
397 parent,
398 match child {
399 NodeOrText::AppendText(text) => Node::new(NodeData::Text {
400 contents: Arc::new(Mutex::new(text.to_string())),
401 }),
402 NodeOrText::AppendNode(node) => node,
403 },
404 );
405 }
406
407 fn append_before_sibling(&self, sibling: &Handle, child: NodeOrText<Handle>) {
408 let (parent, i) = get_parent_and_index(sibling)
409 .expect("append_before_sibling called on node without parent");
410
411 let child = match (child, i) {
412 (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text {
414 contents: Arc::new(Mutex::new(text.to_string())),
415 }),
416
417 (NodeOrText::AppendText(text), i) => {
419 let children = parent.children.lock().unwrap();
420 let prev = &children[i - 1];
421 if append_to_existing_text(prev, &text) {
422 return;
423 }
424 Node::new(NodeData::Text {
425 contents: Arc::new(Mutex::new(text.to_string())),
426 })
427 }
428
429 (NodeOrText::AppendNode(node), _) => node,
434 };
435
436 remove_from_parent(&child);
437
438 child.parent.store(Some(Arc::downgrade(&parent)));
439 parent.children.lock().unwrap().insert(i, child);
440 }
441
442 fn append_based_on_parent_node(
443 &self,
444 element: &Self::Handle,
445 prev_element: &Self::Handle,
446 child: NodeOrText<Self::Handle>,
447 ) {
448 let parent = element.parent.take();
449 let has_parent = parent.is_some();
450 element.parent.store(parent);
451
452 if has_parent {
453 self.append_before_sibling(element, child);
454 } else {
455 self.append(prev_element, child);
456 }
457 }
458
459 fn append_doctype_to_document(
460 &self,
461 name: StrTendril,
462 public_id: StrTendril,
463 system_id: StrTendril,
464 ) {
465 append(
466 &self.document,
467 Node::new(NodeData::Doctype {
468 name: name.to_string(),
469 public_id: public_id.to_string(),
470 system_id: system_id.to_string(),
471 }),
472 );
473 }
474
475 fn add_attrs_if_missing(&self, target: &Handle, attrs: Vec<Attribute>) {
476 let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
477 attrs.lock().unwrap()
478 } else {
479 panic!("not an element")
480 };
481
482 let existing_names = existing
483 .iter()
484 .map(|e| e.name.clone())
485 .collect::<HashSet<_>>();
486 existing.extend(
487 attrs
488 .into_iter()
489 .filter(|attr| !existing_names.contains(&attr.name))
490 .map(AtomicAttribute::from),
491 );
492 }
493
494 fn remove_from_parent(&self, target: &Handle) {
495 remove_from_parent(target);
496 }
497
498 fn reparent_children(&self, node: &Handle, new_parent: &Handle) {
499 let mut children = node.children.lock().unwrap();
500 let mut new_children = new_parent.children.lock().unwrap();
501 for child in children.iter() {
502 let previous_parent = child.parent.swap(Some(Arc::downgrade(new_parent)));
503 assert!(Arc::ptr_eq(
504 node,
505 &previous_parent.unwrap().upgrade().expect("dangling weak")
506 ))
507 }
508 new_children.extend(mem::take(&mut *children));
509 }
510
511 fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool {
512 if let NodeData::Element {
513 mathml_annotation_xml_integration_point,
514 ..
515 } = target.data
516 {
517 mathml_annotation_xml_integration_point
518 } else {
519 panic!("not an element!")
520 }
521 }
522
523 fn maybe_clone_an_option_into_selectedcontent(&self, option: &Self::Handle) {
524 let NodeData::Element { name, attrs, .. } = &option.data else {
525 panic!("\"maybe clone an option into selectedcontent\" called with non-element node");
526 };
527 debug_assert_eq!(name.local_name(), &local_name!("option"));
528
529 let select = option.get_option_element_nearest_ancestor_select();
531
532 if let Some(selectedcontent) =
538 select.and_then(|select| select.get_a_selects_enabled_selectedcontent())
539 {
540 if attrs
541 .lock()
542 .unwrap()
543 .iter()
544 .any(|attribute| attribute.name.local == local_name!("selected"))
545 {
546 option.clone_an_option_into_selectedcontent(selectedcontent);
547 }
548 }
549 }
550}
551
552impl Default for ArcDom {
553 fn default() -> ArcDom {
554 ArcDom {
555 document: Node::new(NodeData::Document),
556 errors: Default::default(),
557 quirks_mode: AtomicCell::new(tree_builder::NoQuirks),
558 }
559 }
560}
561
562enum SerializeOp {
563 Open(Handle),
564 Close(QualName),
565}
566
567pub struct SerializableHandle(Handle);
568
569impl From<Handle> for SerializableHandle {
570 fn from(h: Handle) -> SerializableHandle {
571 SerializableHandle(h)
572 }
573}
574
575impl Serialize for SerializableHandle {
576 fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
577 where
578 S: Serializer,
579 {
580 let mut ops = VecDeque::new();
581 match traversal_scope {
582 IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())),
583 ChildrenOnly(_) => ops.extend(
584 self.0
585 .children
586 .lock()
587 .unwrap()
588 .iter()
589 .map(|h| SerializeOp::Open(h.clone())),
590 ),
591 }
592
593 while let Some(op) = ops.pop_front() {
594 match op {
595 SerializeOp::Open(handle) => match handle.data {
596 NodeData::Element {
597 ref name,
598 ref attrs,
599 ..
600 } => {
601 serializer.start_elem(
602 name.clone(),
603 attrs
604 .lock()
605 .unwrap()
606 .iter()
607 .map(|at| (&at.name, &at.value[..])),
608 )?;
609
610 ops.reserve(1 + handle.children.lock().unwrap().len());
611 ops.push_front(SerializeOp::Close(name.clone()));
612
613 for child in handle.children.lock().unwrap().iter().rev() {
614 ops.push_front(SerializeOp::Open(child.clone()));
615 }
616 }
617
618 NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?,
619
620 NodeData::Text { ref contents } => {
621 serializer.write_text(&contents.lock().unwrap())?
622 }
623
624 NodeData::Comment { ref contents } => serializer.write_comment(contents)?,
625
626 NodeData::ProcessingInstruction {
627 ref target,
628 ref contents,
629 } => serializer.write_processing_instruction(target, contents)?,
630
631 NodeData::Document => panic!("Can't serialize Document node itself"),
632 },
633
634 SerializeOp::Close(name) => {
635 serializer.end_elem(name)?;
636 }
637 }
638 }
639
640 Ok(())
641 }
642}