diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-14 12:26:23 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-14 12:26:23 +0200 |
commit | 82aca5003acba36a62b0032860af09f65c91ddae (patch) | |
tree | 97b3153efd27f838731ac57e580e23739455a9dd | |
parent | ef644f4def7d5cad3fb5307ec5e00fc7b0b025ff (diff) | |
download | markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.tar.gz markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.tar.bz2 markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.zip |
Add docs for html (text)
-rw-r--r-- | readme.md | 14 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 29 | ||||
-rw-r--r-- | src/construct/html_text.rs | 327 | ||||
-rw-r--r-- | src/content/text.rs | 2 | ||||
-rw-r--r-- | src/subtokenize.rs | 1 |
5 files changed, 303 insertions, 70 deletions
@@ -68,6 +68,15 @@ cargo doc --document-private-items ### Small things +- [ ] (3) Fix deep subtokenization +- [ ] (1) Add docs on bnf +- [ ] (1) Add docs to subtokenize +- [ ] (1) Add module docs to content +- [ ] (1) Add module docs to parser +- [ ] (1) Reorganize to split util +- [ ] (1) Add examples to `CompileOptions` docs +- [ ] (1) Add overview docs on how everything works +- [ ] (1) Move safe protocols to constants - [ ] (1) Parse initial and final whitespace of paragraphs (in text) - [ ] (3) Clean compiler - [ ] (1) Use preferred line ending style in markdown @@ -109,7 +118,7 @@ cargo doc --document-private-items - [x] heading (atx) - [ ] (1) heading (setext) - [x] html (flow) -- [ ] html (text) +- [x] html (text) - [ ] (3) label end - [ ] (3) label start (image) - [ ] (3) label start (link) @@ -141,7 +150,7 @@ cargo doc --document-private-items - [x] character reference - [ ] code (text) - [ ] hard break escape - - [ ] html (text) + - [x] html (text) - [ ] label end - [ ] label start (image) - [ ] label start (link) @@ -155,6 +164,7 @@ cargo doc --document-private-items one content type that also are another content type - [x] (3) Encode urls - [x] (1) Optionally remove dangerous protocols when compiling +- [x] (1) Add docs to html (text) ### Extensions diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index a1b686b..da4517d 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -24,7 +24,7 @@ //! attribute_value ::= '"' *( line - '"' ) '"' | "'" *( line - "'" ) "'" | 1*( line - space_or_tab - '"' - "'" - '/' - '<' - '=' - '>' - '`') //! //! whitespace ::= 1*space_or_tab -//! whitespace_optional ::= [ space_or_tab ] +//! whitespace_optional ::= [ whitespace ] //! line ::= code - eol //! eol ::= '\r' | '\r\n' | '\n' //! space_or_tab ::= ' ' | '\t' @@ -39,6 +39,11 @@ //! result in invalid HTML, in that it allows things that wouldn’t work or //! wouldn’t work well in HTML, such as mismatched tags. //! +//! Interestingly, most of the productions above have a clear opening and +//! closing condition (raw, comment, insutrction, declaration, cdata), but the +//! closing condition does not need to be satisfied. +//! In this case, the parser never has to backtrack. +//! //! Because the **basic** and **complete** productions in the grammar form with //! a tag, followed by more stuff, and stop at a blank line, it is possible to //! interleave (a word for switching between languages) markdown and HTML @@ -59,8 +64,8 @@ //! The **complete** production of HTML (flow) is not allowed to interrupt //! content. //! That means that a blank line is needed between a paragraph and it. -//! However, HTML (text) has a similar production, which will typically kick-in -//! instead. +//! However, [HTML (text)][html_text] has a similar production, which will +//! typically kick-in instead. //! //! The list of tag names allowed in the **raw** production are defined in //! [`HTML_RAW_NAMES`][html_raw_names]. @@ -81,11 +86,10 @@ //! * [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks) //! //! [flow]: crate::content::flow +//! [html_text]: crate::construct::html_text //! [html_raw_names]: crate::constant::HTML_RAW_NAMES //! [html_block_names]: crate::constant::HTML_BLOCK_NAMES //! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing -//! -//! <!-- To do: link html (text) --> use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX}; use crate::construct::{blank_line::start as blank_line, partial_whitespace::start as whitespace}; @@ -146,6 +150,7 @@ struct Info { /// ```markdown /// |<x /> /// ``` +/// pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(TokenType::HtmlFlow); tokenizer.enter(TokenType::HtmlFlowData); @@ -188,8 +193,8 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// /// ```markdown /// <|x /> -/// <|!doctype /> -/// <|!--xxx--/> +/// <|!doctype> +/// <|!--xxx--> /// ``` fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { match code { @@ -197,7 +202,7 @@ fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { tokenizer.consume(code); ( State::Fn(Box::new(|tokenizer, code| { - declaration_start(tokenizer, info, code) + declaration_open(tokenizer, info, code) })), None, ) @@ -238,11 +243,11 @@ fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { /// After `<!`, so inside a declaration, comment, or CDATA. /// /// ```markdown -/// <!|doctype /> -/// <!|--xxx--/> +/// <!|doctype> +/// <!|--xxx--> /// <!|[CDATA[>&<]]> /// ``` -fn declaration_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn declaration_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); @@ -287,7 +292,7 @@ fn declaration_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> State /// After `<!-`, inside a comment, before another `-`. /// /// ```markdown -/// <!-|-xxx--/> +/// <!-|-xxx--> /// ``` fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { match code { diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index da5a018..95fb8c3 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -1,4 +1,53 @@ -//! To do. +//! HTML (text) is a construct that occurs in the [text][] content type. +//! +//! It forms with the following BNF: +//! +//! ```bnf +//! html_text ::= comment | instruction | declaration | cdata | tag_close | tag_open +//! +//! ; Restriction: the text is not allowed to start with `>`, `->`, or to contain `--`. +//! comment ::= '<!--' *code '-->' +//! instruction ::= '<?' *code '?>' +//! declaration ::= '<!' ascii_alphabetic *code '>' +//! ; Restriction: the text is not allowed to contain `]]`. +//! cdata ::= '<![CDATA[' *code ']]>' +//! tag_close ::= '</' tag_name whitespace_optional '>' +//! opening_tag ::= '<' tag_name *( whitespace attribute ) [ whitespace_optional '/' ] whitespace_optional '>' +//! +//! tag_name ::= ascii_alphabetic *( '-' | ascii_alphanumeric ) +//! attribute ::= attribute_name [ whitespace_optional '=' whitespace_optional attribute_value ] +//! attribute_name ::= ( ':' | '_' | ascii_alphabetic ) *( '-' | '.' | ':' | '_' | ascii_alphanumeric ) +//! attribute_value ::= '"' *( code - '"' ) '"' | "'" *( code - "'" ) "'" | 1*( code - space_or_tab - eol - '"' - "'" - '/' - '<' - '=' - '>' - '`') +//! +//! ; Note: blank lines can never occur in `text`. +//! whitespace ::= 1*space_or_tab | [ *space_or_tab eol *space_or_tab ] +//! whitespace_optional ::= [ whitespace ] +//! eol ::= '\r' | '\r\n' | '\n' +//! space_or_tab ::= ' ' | '\t' +//! ``` +//! +//! The grammar for HTML in markdown does not resemble the rules of parsing +//! HTML according to the [*§ 13.2 Parsing HTML documents* in the HTML +//! spec][html-parsing]. +//! See the related flow construct [HTML (flow)][html_flow] for more info. +//! +//! Because the **tag open** and **tag close** productions in the grammar form +//! with just tags instead of complete elements, it is possible to interleave +//! (a word for switching between languages) markdown and HTML together. +//! For example: +//! +//! ```markdown +//! This is equivalent to <code>*emphasised* code</code>. +//! ``` +//! +//! ## References +//! +//! * [`html-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-text.js) +//! * [*§ 6.6 Raw HTML* in `CommonMark`](https://spec.commonmark.org/0.30/#raw-html) +//! +//! [text]: crate::content::text +//! [html_flow]: crate::construct::html_flow +//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing use crate::construct::partial_whitespace::start as whitespace; use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer}; @@ -15,7 +64,13 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { (State::Fn(Box::new(open)), None) } -/// To do. +/// After `<`, before a tag name or other stuff. +/// +/// ```markdown +/// a <|x /> b +/// a <|!doctype> b +/// a <|!--xxx--/> b +/// ``` pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('!') => { @@ -38,19 +93,25 @@ pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// After `<!`, so inside a declaration, comment, or CDATA. +/// +/// ```markdown +/// a <!|doctype> b +/// a <!|--xxx--> b +/// a <!|[CDATA[>&<]]> b +/// ``` pub fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); - (State::Fn(Box::new(comment_open)), None) + (State::Fn(Box::new(comment_open_inside)), None) } Code::Char('[') => { tokenizer.consume(code); let buffer = vec!['C', 'D', 'A', 'T', 'A', '[']; ( State::Fn(Box::new(|tokenizer, code| { - cdata_open(tokenizer, code, buffer, 0) + cdata_open_inside(tokenizer, code, buffer, 0) })), None, ) @@ -63,8 +124,12 @@ pub fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult } } -/// To do. -pub fn comment_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +/// After `<!-`, inside a comment, before another `-`. +/// +/// ```markdown +/// a <!-|-xxx--> b +/// ``` +pub fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); @@ -74,7 +139,18 @@ pub fn comment_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// After `<!--`, inside a comment +/// +/// > **Note**: [html (flow)][html_flow] does allow `<!-->` or `<!--->` as +/// > empty comments. +/// > This is prohibited in html (text). +/// > See: <https://github.com/commonmark/commonmark-spec/issues/712>. +/// +/// ```markdown +/// a <!--|xxx--> b +/// ``` +/// +/// [html_flow]: crate::construct::html_flow pub fn comment_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('>') => (State::Nok, None), @@ -86,7 +162,18 @@ pub fn comment_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// After `<!---`, inside a comment +/// +/// > **Note**: [html (flow)][html_flow] does allow `<!--->` as an empty +/// > comment. +/// > This is prohibited in html (text). +/// > See: <https://github.com/commonmark/commonmark-spec/issues/712>. +/// +/// ```markdown +/// a <!---|xxx--> b +/// ``` +/// +/// [html_flow]: crate::construct::html_flow pub fn comment_start_dash(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('>') => (State::Nok, None), @@ -94,7 +181,12 @@ pub fn comment_start_dash(tokenizer: &mut Tokenizer, code: Code) -> StateFnResul } } -/// To do. +/// In a comment. +/// +/// ```markdown +/// a <!--|xxx--> b +/// a <!--x|xx--> b +/// ``` pub fn comment(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Nok, None), @@ -112,7 +204,12 @@ pub fn comment(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// In a comment, after `-`. +/// +/// ```markdown +/// a <!--xxx-|-> b +/// a <!--xxx-|yyy--> b +/// ``` pub fn comment_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-') => { @@ -123,8 +220,16 @@ pub fn comment_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. -pub fn cdata_open( +/// After `<![`, inside CDATA, expecting `CDATA[`. +/// +/// ```markdown +/// a <![|CDATA[>&<]]> b +/// a <![CD|ATA[>&<]]> b +/// a <![CDA|TA[>&<]]> b +/// a <![CDAT|A[>&<]]> b +/// a <![CDATA|[>&<]]> b +/// ``` +pub fn cdata_open_inside( tokenizer: &mut Tokenizer, code: Code, buffer: Vec<char>, @@ -139,7 +244,7 @@ pub fn cdata_open( } else { ( State::Fn(Box::new(move |tokenizer, code| { - cdata_open(tokenizer, code, buffer, index + 1) + cdata_open_inside(tokenizer, code, buffer, index + 1) })), None, ) @@ -149,7 +254,11 @@ pub fn cdata_open( } } -/// To do. +/// In CDATA. +/// +/// ```markdown +/// a <![CDATA[|>&<]]> b +/// ``` pub fn cdata(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Nok, None), @@ -167,7 +276,11 @@ pub fn cdata(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// In CDATA, after `]`. +/// +/// ```markdown +/// a <![CDATA[>&<]|]> b +/// ``` pub fn cdata_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(']') => { @@ -178,7 +291,11 @@ pub fn cdata_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// In CDATA, after `]]`. +/// +/// ```markdown +/// a <![CDATA[>&<]]|> b +/// ``` pub fn cdata_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('>') => end(tokenizer, code), @@ -187,7 +304,11 @@ pub fn cdata_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// In a declaration. +/// +/// ```markdown +/// a <!a|b> b +/// ``` pub fn declaration(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('>') => end(tokenizer, code), @@ -201,7 +322,12 @@ pub fn declaration(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// In an instruction. +/// +/// ```markdown +/// a <?|ab?> b +/// a <?a|b?> b +/// ``` pub fn instruction(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Nok, None), @@ -219,7 +345,12 @@ pub fn instruction(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// In an instruction, after `?`. +/// +/// ```markdown +/// a <?aa?|> b +/// a <?aa?|bb?> b +/// ``` pub fn instruction_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('>') => end(tokenizer, code), @@ -227,7 +358,11 @@ pub fn instruction_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult } } -/// To do. +/// After `</`, in a closing tag, before a tag name. +/// +/// ```markdown +/// a </|x> b +/// ``` pub fn tag_close_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) if char.is_ascii_alphabetic() => { @@ -238,7 +373,12 @@ pub fn tag_close_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// After `</x`, in a tag name. +/// +/// ```markdown +/// a </x|> b +/// a </x|y> b +/// ``` pub fn tag_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => { @@ -249,7 +389,12 @@ pub fn tag_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// In a closing tag, after the tag name. +/// +/// ```markdown +/// a </x| > b +/// a </xy |> b +/// ``` pub fn tag_close_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { @@ -263,14 +408,17 @@ pub fn tag_close_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult } } -/// To do. +/// After `<x`, in an opening tag name. +/// +/// ```markdown +/// a <x|> b +/// ``` pub fn tag_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => { tokenizer.consume(code); (State::Fn(Box::new(tag_open)), None) } - Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\r' | '\n' | '\t' | ' ' | '/' | '>') => tag_open_between(tokenizer, code), @@ -278,7 +426,13 @@ pub fn tag_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// To do. +/// In an opening tag, after the tag name. +/// +/// ```markdown +/// a <x| y> b +/// a <x |y="z"> b +/// a <x |/> b +/// ``` pub fn tag_open_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { @@ -300,7 +454,13 @@ pub fn tag_open_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult } } -/// To do. +/// In an attribute name. +/// +/// ```markdown +/// a <x :|> b +/// a <x _|> b +/// a <x a|> b +/// ``` pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) @@ -317,7 +477,14 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer, code: Code) -> StateFn } } -/// To do. +/// After an attribute name, before an attribute initializer, the end of the +/// tag, or whitespace. +/// +/// ```markdown +/// a <x a|> b +/// a <x a|=b> b +/// a <x a|="c"> b +/// ``` pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { @@ -335,7 +502,13 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer, code: Code) -> S } } -/// To do. +/// Before an unquoted, double quoted, or single quoted attribute value, +/// allowing whitespace. +/// +/// ```markdown +/// a <x a=|b> b +/// a <x a=|"c"> b +/// ``` pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None), @@ -362,7 +535,12 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> } } -/// To do. +/// In a double or single quoted attribute value. +/// +/// ```markdown +/// a <x a="|"> b +/// a <x a='|'> b +/// ``` pub fn tag_open_attribute_value_quoted( tokenizer: &mut Tokenizer, code: Code, @@ -396,7 +574,30 @@ pub fn tag_open_attribute_value_quoted( } } -/// To do. +/// In an unquoted attribute value. +/// +/// ```markdown +/// a <x a=b|c> b +/// ``` +pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, None), + Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => { + tag_open_between(tokenizer, code) + } + Code::Char(_) => { + tokenizer.consume(code); + (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None) + } + } +} + +/// After a double or single quoted attribute value, before whitespace or the +/// end of the tag. +/// +/// ```markdown +/// a <x a="b"|> b +/// ``` pub fn tag_open_attribute_value_quoted_after( tokenizer: &mut Tokenizer, code: Code, @@ -409,23 +610,34 @@ pub fn tag_open_attribute_value_quoted_after( } } -/// To do. -pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +/// In certain circumstances of a complete tag where only an `>` is allowed. +/// +/// ```markdown +/// a <x a="b"|> b +/// a <!--xx--|> b +/// a <x /|> b +/// ``` +pub fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, None), - Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '>') => { - tag_open_between(tokenizer, code) - } - Code::Char(_) => { + Code::Char('>') => { tokenizer.consume(code); - (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None) + tokenizer.exit(TokenType::HtmlTextData); + tokenizer.exit(TokenType::HtmlText); + (State::Ok, None) } + _ => (State::Nok, None), } } -/// To do. -// We can’t have blank lines in content, so no need to worry about empty -// tokens. +/// At an allowed line ending. +/// +/// > **Note**: we can’t have blank lines in content, so no need to worry about +/// > empty tokens. +/// +/// ```markdown +/// a <!--a| +/// b--> b +/// ``` pub fn at_line_ending( tokenizer: &mut Tokenizer, code: Code, @@ -446,6 +658,15 @@ pub fn at_line_ending( } } +/// After a line ending. +/// +/// > **Note**: we can’t have blank lines in content, so no need to worry about +/// > empty tokens. +/// +/// ```markdown +/// a <!--a +/// |b--> b +/// ``` pub fn after_line_ending( tokenizer: &mut Tokenizer, code: Code, @@ -457,6 +678,15 @@ pub fn after_line_ending( )(tokenizer, code) } +/// After a line ending, after indent. +/// +/// > **Note**: we can’t have blank lines in content, so no need to worry about +/// > empty tokens. +/// +/// ```markdown +/// a <!--a +/// |b--> b +/// ``` pub fn after_line_ending_prefix( tokenizer: &mut Tokenizer, code: Code, @@ -465,16 +695,3 @@ pub fn after_line_ending_prefix( tokenizer.enter(TokenType::HtmlTextData); return_state(tokenizer, code) } - -/// To do. -pub fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::Char('>') => { - tokenizer.consume(code); - tokenizer.exit(TokenType::HtmlTextData); - tokenizer.exit(TokenType::HtmlText); - (State::Ok, None) - } - _ => (State::Nok, None), - } -} diff --git a/src/content/text.rs b/src/content/text.rs index 3db82f5..73c2d55 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -7,7 +7,7 @@ //! //! * [Autolink][crate::construct::autolink] //! * Attention -//! * [HTML (text)][crate::construct::html-text] +//! * [HTML (text)][crate::construct::html_text] //! * Hard break escape //! * Code (text) //! * Line ending diff --git a/src/subtokenize.rs b/src/subtokenize.rs index ee826b8..e004f7b 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -7,6 +7,7 @@ use crate::tokenizer::{ use crate::util::{slice_codes, Span}; use std::collections::HashMap; +/// To do. pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) { let mut events = events; let mut index = 0; |