markup5ever_rcdom/
lib.rs

1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! A simple reference-counted DOM.
11//!
12//! This is sufficient as a static parse tree, but don't build a
13//! web browser using it. :)
14//!
15//! A DOM is a [tree structure] with ordered children that can be represented in an XML-like
16//! format. For example, the following graph
17//!
18//! ```text
19//! div
20//!  +- "text node"
21//!  +- span
22//! ```
23//! in HTML would be serialized as
24//!
25//! ```html
26//! <div>text node<span></span></div>
27//! ```
28//!
29//! See the [document object model article on wikipedia][dom wiki] for more information.
30//!
31//! This implementation stores the information associated with each node once, and then hands out
32//! refs to children. The nodes themselves are reference-counted to avoid copying - you can create
33//! a new ref and then a node will outlive the document. Nodes own their children, but only have
34//! weak references to their parents.
35//!
36//! [tree structure]: https://en.wikipedia.org/wiki/Tree_(data_structure)
37//! [dom wiki]: https://en.wikipedia.org/wiki/Document_Object_Model
38
39extern crate markup5ever;
40extern crate tendril;
41
42use std::borrow::Cow;
43use std::cell::{Cell, RefCell};
44use std::collections::{HashSet, VecDeque};
45use std::default::Default;
46use std::fmt;
47use std::io;
48use std::mem;
49use std::rc::{Rc, Weak};
50
51use tendril::StrTendril;
52
53use markup5ever::interface::tree_builder;
54use markup5ever::interface::tree_builder::{ElementFlags, NodeOrText, QuirksMode, TreeSink};
55use markup5ever::serialize::TraversalScope;
56use markup5ever::serialize::TraversalScope::{ChildrenOnly, IncludeNode};
57use markup5ever::serialize::{Serialize, Serializer};
58use markup5ever::Attribute;
59use markup5ever::ExpandedName;
60use markup5ever::QualName;
61
62/// The different kinds of nodes in the DOM.
63#[derive(Debug)]
64pub enum NodeData {
65    /// The `Document` itself - the root node of a HTML document.
66    Document,
67
68    /// A `DOCTYPE` with name, public id, and system id. See
69    /// [document type declaration on wikipedia][dtd wiki].
70    ///
71    /// [dtd wiki]: https://en.wikipedia.org/wiki/Document_type_declaration
72    Doctype {
73        name: StrTendril,
74        public_id: StrTendril,
75        system_id: StrTendril,
76    },
77
78    /// A text node.
79    Text { contents: RefCell<StrTendril> },
80
81    /// A comment.
82    Comment { contents: StrTendril },
83
84    /// An element with attributes.
85    Element {
86        name: QualName,
87        attrs: RefCell<Vec<Attribute>>,
88
89        /// For HTML \<template\> elements, the [template contents].
90        ///
91        /// [template contents]: https://html.spec.whatwg.org/multipage/#template-contents
92        template_contents: RefCell<Option<Handle>>,
93
94        /// Whether the node is a [HTML integration point].
95        ///
96        /// [HTML integration point]: https://html.spec.whatwg.org/multipage/#html-integration-point
97        mathml_annotation_xml_integration_point: bool,
98    },
99
100    /// A Processing instruction.
101    ProcessingInstruction {
102        target: StrTendril,
103        contents: StrTendril,
104    },
105}
106
107/// A DOM node.
108pub struct Node {
109    /// Parent node.
110    pub parent: Cell<Option<WeakHandle>>,
111    /// Child nodes of this node.
112    pub children: RefCell<Vec<Handle>>,
113    /// Represents this node's data.
114    pub data: NodeData,
115}
116
117impl Node {
118    /// Create a new node from its contents
119    pub fn new(data: NodeData) -> Rc<Self> {
120        Rc::new(Node {
121            data,
122            parent: Cell::new(None),
123            children: RefCell::new(Vec::new()),
124        })
125    }
126}
127
128impl Drop for Node {
129    fn drop(&mut self) {
130        let mut nodes = mem::take(&mut *self.children.borrow_mut());
131        while let Some(node) = nodes.pop() {
132            let children = mem::take(&mut *node.children.borrow_mut());
133            nodes.extend(children.into_iter());
134            if let NodeData::Element {
135                ref template_contents,
136                ..
137            } = node.data
138            {
139                if let Some(template_contents) = template_contents.borrow_mut().take() {
140                    nodes.push(template_contents);
141                }
142            }
143        }
144    }
145}
146
147impl fmt::Debug for Node {
148    fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
149        fmt.debug_struct("Node")
150            .field("data", &self.data)
151            .field("children", &self.children)
152            .finish()
153    }
154}
155
156/// Reference to a DOM node.
157pub type Handle = Rc<Node>;
158
159/// Weak reference to a DOM node, used for parent pointers.
160pub type WeakHandle = Weak<Node>;
161
162/// Append a parentless node to another nodes' children
163fn append(new_parent: &Handle, child: Handle) {
164    let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
165    // Invariant: child cannot have existing parent
166    assert!(previous_parent.is_none());
167    new_parent.children.borrow_mut().push(child);
168}
169
170/// If the node has a parent, get it and this node's position in its children
171fn get_parent_and_index(target: &Handle) -> Option<(Handle, usize)> {
172    if let Some(weak) = target.parent.take() {
173        let parent = weak.upgrade().expect("dangling weak pointer");
174        target.parent.set(Some(weak));
175        let i = match parent
176            .children
177            .borrow()
178            .iter()
179            .enumerate()
180            .find(|&(_, child)| Rc::ptr_eq(child, target))
181        {
182            Some((i, _)) => i,
183            None => panic!("have parent but couldn't find in parent's children!"),
184        };
185        Some((parent, i))
186    } else {
187        None
188    }
189}
190
191fn append_to_existing_text(prev: &Handle, text: &str) -> bool {
192    match prev.data {
193        NodeData::Text { ref contents } => {
194            contents.borrow_mut().push_slice(text);
195            true
196        },
197        _ => false,
198    }
199}
200
201fn remove_from_parent(target: &Handle) {
202    if let Some((parent, i)) = get_parent_and_index(target) {
203        parent.children.borrow_mut().remove(i);
204        target.parent.set(None);
205    }
206}
207
208/// The DOM itself; the result of parsing.
209pub struct RcDom {
210    /// The `Document` itself.
211    pub document: Handle,
212
213    /// Errors that occurred during parsing.
214    pub errors: RefCell<Vec<Cow<'static, str>>>,
215
216    /// The document's quirks mode.
217    pub quirks_mode: Cell<QuirksMode>,
218}
219
220impl TreeSink for RcDom {
221    type Output = Self;
222    fn finish(self) -> Self {
223        self
224    }
225
226    type Handle = Handle;
227
228    type ElemName<'a>
229        = ExpandedName<'a>
230    where
231        Self: 'a;
232
233    fn parse_error(&self, msg: Cow<'static, str>) {
234        self.errors.borrow_mut().push(msg);
235    }
236
237    fn get_document(&self) -> Handle {
238        self.document.clone()
239    }
240
241    fn get_template_contents(&self, target: &Handle) -> Handle {
242        if let NodeData::Element {
243            ref template_contents,
244            ..
245        } = target.data
246        {
247            template_contents
248                .borrow()
249                .as_ref()
250                .expect("not a template element!")
251                .clone()
252        } else {
253            panic!("not a template element!")
254        }
255    }
256
257    fn set_quirks_mode(&self, mode: QuirksMode) {
258        self.quirks_mode.set(mode);
259    }
260
261    fn same_node(&self, x: &Handle, y: &Handle) -> bool {
262        Rc::ptr_eq(x, y)
263    }
264
265    fn elem_name<'a>(&self, target: &'a Handle) -> ExpandedName<'a> {
266        match target.data {
267            NodeData::Element { ref name, .. } => name.expanded(),
268            _ => panic!("not an element!"),
269        }
270    }
271
272    fn create_element(&self, name: QualName, attrs: Vec<Attribute>, flags: ElementFlags) -> Handle {
273        Node::new(NodeData::Element {
274            name,
275            attrs: RefCell::new(attrs),
276            template_contents: RefCell::new(if flags.template {
277                Some(Node::new(NodeData::Document))
278            } else {
279                None
280            }),
281            mathml_annotation_xml_integration_point: flags.mathml_annotation_xml_integration_point,
282        })
283    }
284
285    fn create_comment(&self, text: StrTendril) -> Handle {
286        Node::new(NodeData::Comment { contents: text })
287    }
288
289    fn create_pi(&self, target: StrTendril, data: StrTendril) -> Handle {
290        Node::new(NodeData::ProcessingInstruction {
291            target,
292            contents: data,
293        })
294    }
295
296    fn append(&self, parent: &Handle, child: NodeOrText<Handle>) {
297        // Append to an existing Text node if we have one.
298        if let NodeOrText::AppendText(text) = &child {
299            if let Some(h) = parent.children.borrow().last() {
300                if append_to_existing_text(h, text) {
301                    return;
302                }
303            }
304        }
305
306        append(
307            parent,
308            match child {
309                NodeOrText::AppendText(text) => Node::new(NodeData::Text {
310                    contents: RefCell::new(text),
311                }),
312                NodeOrText::AppendNode(node) => node,
313            },
314        );
315    }
316
317    fn append_before_sibling(&self, sibling: &Handle, child: NodeOrText<Handle>) {
318        let (parent, i) = get_parent_and_index(sibling)
319            .expect("append_before_sibling called on node without parent");
320
321        let child = match (child, i) {
322            // No previous node.
323            (NodeOrText::AppendText(text), 0) => Node::new(NodeData::Text {
324                contents: RefCell::new(text),
325            }),
326
327            // Look for a text node before the insertion point.
328            (NodeOrText::AppendText(text), i) => {
329                let children = parent.children.borrow();
330                let prev = &children[i - 1];
331                if append_to_existing_text(prev, &text) {
332                    return;
333                }
334                Node::new(NodeData::Text {
335                    contents: RefCell::new(text),
336                })
337            },
338
339            // The tree builder promises we won't have a text node after
340            // the insertion point.
341
342            // Any other kind of node.
343            (NodeOrText::AppendNode(node), _) => node,
344        };
345
346        remove_from_parent(&child);
347
348        child.parent.set(Some(Rc::downgrade(&parent)));
349        parent.children.borrow_mut().insert(i, child);
350    }
351
352    fn append_based_on_parent_node(
353        &self,
354        element: &Self::Handle,
355        prev_element: &Self::Handle,
356        child: NodeOrText<Self::Handle>,
357    ) {
358        let parent = element.parent.take();
359        let has_parent = parent.is_some();
360        element.parent.set(parent);
361
362        if has_parent {
363            self.append_before_sibling(element, child);
364        } else {
365            self.append(prev_element, child);
366        }
367    }
368
369    fn append_doctype_to_document(
370        &self,
371        name: StrTendril,
372        public_id: StrTendril,
373        system_id: StrTendril,
374    ) {
375        append(
376            &self.document,
377            Node::new(NodeData::Doctype {
378                name,
379                public_id,
380                system_id,
381            }),
382        );
383    }
384
385    fn add_attrs_if_missing(&self, target: &Handle, attrs: Vec<Attribute>) {
386        let mut existing = if let NodeData::Element { ref attrs, .. } = target.data {
387            attrs.borrow_mut()
388        } else {
389            panic!("not an element")
390        };
391
392        let existing_names = existing
393            .iter()
394            .map(|e| e.name.clone())
395            .collect::<HashSet<_>>();
396        existing.extend(
397            attrs
398                .into_iter()
399                .filter(|attr| !existing_names.contains(&attr.name)),
400        );
401    }
402
403    fn remove_from_parent(&self, target: &Handle) {
404        remove_from_parent(target);
405    }
406
407    fn reparent_children(&self, node: &Handle, new_parent: &Handle) {
408        let mut children = node.children.borrow_mut();
409        let mut new_children = new_parent.children.borrow_mut();
410        for child in children.iter() {
411            let previous_parent = child.parent.replace(Some(Rc::downgrade(new_parent)));
412            assert!(Rc::ptr_eq(
413                node,
414                &previous_parent.unwrap().upgrade().expect("dangling weak")
415            ))
416        }
417        new_children.extend(mem::take(&mut *children));
418    }
419
420    fn is_mathml_annotation_xml_integration_point(&self, target: &Handle) -> bool {
421        if let NodeData::Element {
422            mathml_annotation_xml_integration_point,
423            ..
424        } = target.data
425        {
426            mathml_annotation_xml_integration_point
427        } else {
428            panic!("not an element!")
429        }
430    }
431}
432
433impl Default for RcDom {
434    fn default() -> RcDom {
435        RcDom {
436            document: Node::new(NodeData::Document),
437            errors: Default::default(),
438            quirks_mode: Cell::new(tree_builder::NoQuirks),
439        }
440    }
441}
442
443enum SerializeOp {
444    Open(Handle),
445    Close(QualName),
446}
447
448pub struct SerializableHandle(Handle);
449
450impl From<Handle> for SerializableHandle {
451    fn from(h: Handle) -> SerializableHandle {
452        SerializableHandle(h)
453    }
454}
455
456impl Serialize for SerializableHandle {
457    fn serialize<S>(&self, serializer: &mut S, traversal_scope: TraversalScope) -> io::Result<()>
458    where
459        S: Serializer,
460    {
461        let mut ops = VecDeque::new();
462        match traversal_scope {
463            IncludeNode => ops.push_back(SerializeOp::Open(self.0.clone())),
464            ChildrenOnly(_) => ops.extend(
465                self.0
466                    .children
467                    .borrow()
468                    .iter()
469                    .map(|h| SerializeOp::Open(h.clone())),
470            ),
471        }
472
473        while let Some(op) = ops.pop_front() {
474            match op {
475                SerializeOp::Open(handle) => match handle.data {
476                    NodeData::Element {
477                        ref name,
478                        ref attrs,
479                        ..
480                    } => {
481                        serializer.start_elem(
482                            name.clone(),
483                            attrs.borrow().iter().map(|at| (&at.name, &at.value[..])),
484                        )?;
485
486                        ops.reserve(1 + handle.children.borrow().len());
487                        ops.push_front(SerializeOp::Close(name.clone()));
488
489                        for child in handle.children.borrow().iter().rev() {
490                            ops.push_front(SerializeOp::Open(child.clone()));
491                        }
492                    },
493
494                    NodeData::Doctype { ref name, .. } => serializer.write_doctype(name)?,
495
496                    NodeData::Text { ref contents } => serializer.write_text(&contents.borrow())?,
497
498                    NodeData::Comment { ref contents } => serializer.write_comment(contents)?,
499
500                    NodeData::ProcessingInstruction {
501                        ref target,
502                        ref contents,
503                    } => serializer.write_processing_instruction(target, contents)?,
504
505                    NodeData::Document => panic!("Can't serialize Document node itself"),
506                },
507
508                SerializeOp::Close(name) => {
509                    serializer.end_elem(name)?;
510                },
511            }
512        }
513
514        Ok(())
515    }
516}