//! HTML (flow) is a construct that occurs in the flow content type.
//!
//! It forms with the following BNF:
//!
//! ```bnf
//! html_flow ::= raw | comment | instruction | declaration | cdata | basic | complete
//!
//! ; Note: closing tag name need to match opening tag name.
//! raw ::= '<' raw_tag_name [ [ ( whitespace | '>' ) *line ] *( eol *line ) ] [ '' raw_tag_name *line ]
//! comment ::= '' *line ] ]
//! instruction ::= '' [ '>' *line | *line *( eol *line ) [ '?>' *line ] ]
//! declaration ::= '' *line ]
//! cdata ::= '' *line ]
//! basic ::= '< [ '/' ] basic_tag_name [ [ '/' ] '>' *line *( eol 1*line ) ]
//! complete ::= ( opening_tag | closing_tag ) ( whitespace_optional *( eol 1*line ) | whitespace_optional )
//!
//! raw_tag_name ::= 'pre' | 'script' | 'style' | 'textarea' ; Note: case-insensitive.
//! basic_tag_name ::= 'address' | 'article' | 'aside' | ... ; See `constants.rs`, and note: case-insensitive.
//! opening_tag ::= '<' tag_name *( whitespace attribute ) [ whitespace_optional '/' ] whitespace_optional '>'
//! closing_tag ::= '' tag_name whitespace_optional '>'
//! tag_name ::= ascii_alphabetic *( '-' | ascii_alphanumeric )
//! attribute ::= attribute_name [ whitespace_optional '=' whitespace_optional attribute_value ]
//! attribute_name ::= ( ':' | '_' | ascii_alphabetic ) *( '-' | '.' | ':' | '_' | ascii_alphanumeric )
//! attribute_value ::= '"' *( line - '"' ) '"' | "'" *( line - "'" ) "'" | 1*( line - space_or_tab - '"' - "'" - '/' - '<' - '=' - '>' - '`')
//!
//! whitespace ::= 1*space_or_tab
//! whitespace_optional ::= [ space_or_tab ]
//! line ::= code - eol
//! eol ::= '\r' | '\r\n' | '\n'
//! space_or_tab ::= ' ' | '\t'
//! ```
//!
//! The grammar for HTML in markdown does not resemble the rules of parsing
//! HTML according to the [*§ 13.2 Parsing HTML documents* in the HTML
//! spec][html-parsing].
//! As such, HTML in markdown *resembles* HTML, but is instead a (naïve?)
//! attempt to parse an XML-like language.
//! By extension, another notable property of the grammar is that it can
//! result in invalid HTML, in that it allows things that wouldn’t work or
//! wouldn’t work well in HTML, such as mismatched tags.
//!
//! Because the **basic** and **complete** productions in the grammar form with
//! a tag, followed by more stuff, and stop at a blank line, it is possible to
//! interleave (a word for switching between languages) markdown and HTML
//! together, by placing the opening and closing tags on their own lines,
//! with blank lines between them and markdown.
//! For example:
//!
//! ```markdown
//!
This is a div
but *this* is not emphasis.
//!
//!
//!
//! This is a paragraph in a `div` and *this* is emphasis.
//!
//!
//! ```
//!
//! The **complete** production of HTML (flow) is not allowed to interrupt
//! content.
//! That means that a blank line is needed between a paragraph and it.
//! However, HTML (text) has a similar production, which will typically kick-in
//! instead.
//!
//! The list of tag names allowed in the **raw** production are defined in
//! [`HTML_RAW_NAMES`][html_raw_names].
//! This production exists because there are a few cases where markdown
//! *inside* some elements, and hence interleaving, does not make sense.
//!
//! The list of tag names allowed in the **basic** production are defined in
//! [`HTML_BLOCK_NAMES`][html_block_names].
//! This production exists because there are a few cases where we can decide
//! early that something is going to be a flow (block) element instead of a
//! phrasing (inline) element.
//! We *can* interrupt and don’t have to care too much about it being
//! well-formed.
//!
//! ## References
//!
//! * [`html-flow.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-flow.js)
//! * [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks)
//!
//! [html_raw_names]: crate::constant::HTML_RAW_NAMES
//! [html_block_names]: crate::constant::HTML_BLOCK_NAMES
//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
//!
//!
use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX};
use crate::construct::{blank_line::start as blank_line, partial_whitespace::start as whitespace};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Kind of HTML (flow).
#[derive(Debug, Clone, PartialEq)]
enum Kind {
/// Not yet known.
Unknown,
/// Symbol for `
/// ```
fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
match code {
Code::Char('>') => {
let tag_name_buffer = info.buffer.iter().collect::().to_lowercase();
// To do: life times.
let mut clone = info;
clone.buffer.clear();
if HTML_RAW_NAMES.contains(&tag_name_buffer.as_str()) {
tokenizer.consume(code);
(
State::Fn(Box::new(|tokenizer, code| {
continuation_close(tokenizer, clone, code)
})),
None,
)
} else {
continuation(tokenizer, clone, code)
}
}
Code::Char(char) if char.is_ascii_alphabetic() && info.buffer.len() < HTML_RAW_SIZE_MAX => {
tokenizer.consume(code);
// To do: life times.
let mut clone = info;
clone.buffer.push(char);
(
State::Fn(Box::new(|tokenizer, code| {
continuation_raw_end_tag(tokenizer, clone, code)
})),
None,
)
}
_ => continuation(tokenizer, info, code),
}
}
/// In cdata continuation, after `]`, expecting `]>`.
///
/// ```markdown
/// &<]|]>
/// ```
fn continuation_character_data_inside(
tokenizer: &mut Tokenizer,
info: Info,
code: Code,
) -> StateFnResult {
match code {
Code::Char(']') => {
tokenizer.consume(code);
(
State::Fn(Box::new(|tokenizer, code| {
continuation_declaration_inside(tokenizer, info, code)
})),
None,
)
}
_ => continuation(tokenizer, info, code),
}
}
/// In declaration or instruction continuation, waiting for `>` to close it.
///
/// ```markdown
///