//! HTML (flow) is a construct that occurs in the [flow][] content type. //! //! It forms with the following BNF: //! //! ```bnf //! html_flow ::= raw | comment | instruction | declaration | cdata | basic | complete //! //! ; Note: closing tag name need to match opening tag name. //! raw ::= '<' raw_tag_name [ [ ( whitespace | '>' ) *line ] *( eol *line ) ] [ '' *line | *line *( eol *line ) [ '-->' *line ] ] //! instruction ::= '' *line | *line *( eol *line ) [ '?>' *line ] ] //! declaration ::= '' *line ] //! cdata ::= '' *line ] //! basic ::= '< [ '/' ] basic_tag_name [ [ '/' ] '>' *line *( eol 1*line ) ] //! complete ::= ( opening_tag | closing_tag ) ( whitespace_optional *( eol 1*line ) | whitespace_optional ) //! //! raw_tag_name ::= 'pre' | 'script' | 'style' | 'textarea' ; Note: case-insensitive. //! basic_tag_name ::= 'address' | 'article' | 'aside' | ... ; See `constants.rs`, and note: case-insensitive. //! opening_tag ::= '<' tag_name *( whitespace attribute ) [ whitespace_optional '/' ] whitespace_optional '>' //! closing_tag ::= '' //! tag_name ::= ascii_alphabetic *( '-' | ascii_alphanumeric ) //! attribute ::= attribute_name [ whitespace_optional '=' whitespace_optional attribute_value ] //! attribute_name ::= ( ':' | '_' | ascii_alphabetic ) *( '-' | '.' | ':' | '_' | ascii_alphanumeric ) //! attribute_value ::= '"' *( line - '"' ) '"' | "'" *( line - "'" ) "'" | 1*( line - space_or_tab - '"' - "'" - '/' - '<' - '=' - '>' - '`') //! //! whitespace ::= 1*space_or_tab //! whitespace_optional ::= [ whitespace ] //! line ::= code - eol //! eol ::= '\r' | '\r\n' | '\n' //! space_or_tab ::= ' ' | '\t' //! ``` //! //! The grammar for HTML in markdown does not resemble the rules of parsing //! HTML according to the [*§ 13.2 Parsing HTML documents* in the HTML //! spec][html-parsing]. //! As such, HTML in markdown *resembles* HTML, but is instead a (naïve?) //! attempt to parse an XML-like language. //! By extension, another notable property of the grammar is that it can //! result in invalid HTML, in that it allows things that wouldn’t work or //! wouldn’t work well in HTML, such as mismatched tags. //! //! Interestingly, most of the productions above have a clear opening and //! closing condition (raw, comment, insutrction, declaration, cdata), but the //! closing condition does not need to be satisfied. //! In this case, the parser never has to backtrack. //! //! Because the **basic** and **complete** productions in the grammar form with //! a tag, followed by more stuff, and stop at a blank line, it is possible to //! interleave (a word for switching between languages) markdown and HTML //! together, by placing the opening and closing tags on their own lines, //! with blank lines between them and markdown. //! For example: //! //! ```markdown //!
This is a div but *this* is not emphasis.
//! //!
//! //! This is a paragraph in a `div` and *this* is emphasis. //! //!
//! ``` //! //! The **complete** production of HTML (flow) is not allowed to interrupt //! content. //! That means that a blank line is needed between a [paragraph][] and it. //! However, [HTML (text)][html_text] has a similar production, which will //! typically kick-in instead. //! //! The list of tag names allowed in the **raw** production are defined in //! [`HTML_RAW_NAMES`][html_raw_names]. //! This production exists because there are a few cases where markdown //! *inside* some elements, and hence interleaving, does not make sense. //! //! The list of tag names allowed in the **basic** production are defined in //! [`HTML_BLOCK_NAMES`][html_block_names]. //! This production exists because there are a few cases where we can decide //! early that something is going to be a flow (block) element instead of a //! phrasing (inline) element. //! We *can* interrupt and don’t have to care too much about it being //! well-formed. //! //! ## Tokens //! //! * [`HtmlFlow`][TokenType::HtmlFlow] //! * [`HtmlFlowData`][TokenType::HtmlFlowData] //! * [`LineEnding`][TokenType::LineEnding] //! //! ## References //! //! * [`html-flow.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-flow.js) //! * [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks) //! //! [flow]: crate::content::flow //! [html_text]: crate::construct::html_text //! [paragraph]: crate::construct::paragraph //! [html_raw_names]: crate::constant::HTML_RAW_NAMES //! [html_block_names]: crate::constant::HTML_BLOCK_NAMES //! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE}; use crate::construct::{ blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max, }; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; use crate::util::codes::{parse, serialize}; /// Kind of HTML (flow). #[derive(Debug, PartialEq)] enum Kind { /// Symbol for `