html5ever\tokenizer/
interface.rs

1// Copyright 2014-2017 The html5ever Project Developers. See the
2// COPYRIGHT file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10use crate::interface::Attribute;
11use crate::tendril::StrTendril;
12use crate::tokenizer::states;
13use crate::LocalName;
14use std::borrow::Cow;
15
16pub use self::TagKind::{EndTag, StartTag};
17pub use self::Token::{CharacterTokens, CommentToken, DoctypeToken, TagToken};
18pub use self::Token::{EOFToken, NullCharacterToken, ParseError};
19
20/// A `DOCTYPE` token.
21// FIXME: already exists in Servo DOM
22#[derive(PartialEq, Eq, Clone, Debug, Default)]
23pub struct Doctype {
24    pub name: Option<StrTendril>,
25    pub public_id: Option<StrTendril>,
26    pub system_id: Option<StrTendril>,
27    pub force_quirks: bool,
28}
29
30#[derive(PartialEq, Eq, Hash, Copy, Clone, Debug)]
31pub enum TagKind {
32    StartTag,
33    EndTag,
34}
35
36/// A tag token.
37#[derive(PartialEq, Eq, Clone, Debug)]
38pub struct Tag {
39    pub kind: TagKind,
40    pub name: LocalName,
41    pub self_closing: bool,
42    pub attrs: Vec<Attribute>,
43}
44
45impl Tag {
46    /// Are the tags equivalent when we don't care about attribute order?
47    /// Also ignores the self-closing flag.
48    pub fn equiv_modulo_attr_order(&self, other: &Tag) -> bool {
49        if (self.kind != other.kind) || (self.name != other.name) {
50            return false;
51        }
52
53        let mut self_attrs = self.attrs.clone();
54        let mut other_attrs = other.attrs.clone();
55        self_attrs.sort();
56        other_attrs.sort();
57
58        self_attrs == other_attrs
59    }
60}
61
62#[derive(PartialEq, Eq, Debug)]
63pub enum Token {
64    DoctypeToken(Doctype),
65    TagToken(Tag),
66    CommentToken(StrTendril),
67    CharacterTokens(StrTendril),
68    NullCharacterToken,
69    EOFToken,
70    ParseError(Cow<'static, str>),
71}
72
73#[derive(Debug, PartialEq)]
74#[must_use]
75pub enum TokenSinkResult<Handle> {
76    Continue,
77    Script(Handle),
78    Plaintext,
79    RawData(states::RawKind),
80}
81
82/// Types which can receive tokens from the tokenizer.
83pub trait TokenSink {
84    type Handle;
85
86    /// Process a token.
87    fn process_token(&self, token: Token, line_number: u64) -> TokenSinkResult<Self::Handle>;
88
89    // Signal sink that tokenization reached the end.
90    fn end(&self) {}
91
92    /// Used in the markup declaration open state. By default, this always
93    /// returns false and thus all CDATA sections are tokenized as bogus
94    /// comments.
95    /// <https://html.spec.whatwg.org/multipage/#markup-declaration-open-state>
96    fn adjusted_current_node_present_but_not_in_html_namespace(&self) -> bool {
97        false
98    }
99}