Add docs for html (text)

author: Titus Wormer <tituswormer@gmail.com> 2022-06-14 12:26:23 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-14 12:26:23 +0200
commit: 82aca5003acba36a62b0032860af09f65c91ddae (patch)
tree: 97b3153efd27f838731ac57e580e23739455a9dd
parent: ef644f4def7d5cad3fb5307ec5e00fc7b0b025ff (diff)
download: markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.tar.gz
markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.tar.bz2
markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.zip
5 files changed, 303 insertions, 70 deletions
diff --git a/readme.md b/readme.md
index 829d132..8335644 100644
--- a/readme.md
+++ b/readme.md
@@ -68,6 +68,15 @@ cargo doc --document-private-items
 
 ### Small things
 
+- [ ] (3) Fix deep subtokenization
+- [ ] (1) Add docs on bnf
+- [ ] (1) Add docs to subtokenize
+- [ ] (1) Add module docs to content
+- [ ] (1) Add module docs to parser
+- [ ] (1) Reorganize to split util
+- [ ] (1) Add examples to `CompileOptions` docs
+- [ ] (1) Add overview docs on how everything works
+- [ ] (1) Move safe protocols to constants
 - [ ] (1) Parse initial and final whitespace of paragraphs (in text)
 - [ ] (3) Clean compiler
 - [ ] (1) Use preferred line ending style in markdown
@@ -109,7 +118,7 @@ cargo doc --document-private-items
 - [x] heading (atx)
 - [ ] (1) heading (setext)
 - [x] html (flow)
-- [ ] html (text)
+- [x] html (text)
 - [ ] (3) label end
 - [ ] (3) label start (image)
 - [ ] (3) label start (link)
@@ -141,7 +150,7 @@ cargo doc --document-private-items
   - [x] character reference
   - [ ] code (text)
   - [ ] hard break escape
-  - [ ] html (text)
+  - [x] html (text)
   - [ ] label end
   - [ ] label start (image)
   - [ ] label start (link)
@@ -155,6 +164,7 @@ cargo doc --document-private-items
       one content type that also are another content type
 - [x] (3) Encode urls
 - [x] (1) Optionally remove dangerous protocols when compiling
+- [x] (1) Add docs to html (text)
 
 ### Extensions
 
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index a1b686b..da4517d 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -24,7 +24,7 @@
 //! attribute_value ::= '"' *( line - '"' ) '"' | "'" *( line - "'" )  "'" | 1*( line - space_or_tab - '"' - "'" - '/' - '<' - '=' - '>' - '`')
 //!
 //! whitespace ::= 1*space_or_tab
-//! whitespace_optional ::= [ space_or_tab ]
+//! whitespace_optional ::= [ whitespace ]
 //! line ::= code - eol
 //! eol ::= '\r' | '\r\n' | '\n'
 //! space_or_tab ::= ' ' | '\t'
@@ -39,6 +39,11 @@
 //! result in invalid HTML, in that it allows things that wouldn’t work or
 //! wouldn’t work well in HTML, such as mismatched tags.
 //!
+//! Interestingly, most of the productions above have a clear opening and
+//! closing condition (raw, comment, insutrction, declaration, cdata), but the
+//! closing condition does not need to be satisfied.
+//! In this case, the parser never has to backtrack.
+//!
 //! Because the **basic** and **complete** productions in the grammar form with
 //! a tag, followed by more stuff, and stop at a blank line, it is possible to
 //! interleave (a word for switching between languages) markdown and HTML
@@ -59,8 +64,8 @@
 //! The **complete** production of HTML (flow) is not allowed to interrupt
 //! content.
 //! That means that a blank line is needed between a paragraph and it.
-//! However, HTML (text) has a similar production, which will typically kick-in
-//! instead.
+//! However, [HTML (text)][html_text] has a similar production, which will
+//! typically kick-in instead.
 //!
 //! The list of tag names allowed in the **raw** production are defined in
 //! [`HTML_RAW_NAMES`][html_raw_names].
@@ -81,11 +86,10 @@
 //! *   [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks)
 //!
 //! [flow]: crate::content::flow
+//! [html_text]: crate::construct::html_text
 //! [html_raw_names]: crate::constant::HTML_RAW_NAMES
 //! [html_block_names]: crate::constant::HTML_BLOCK_NAMES
 //! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
-//!
-//! <!-- To do: link html (text) -->
 
 use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX};
 use crate::construct::{blank_line::start as blank_line, partial_whitespace::start as whitespace};
@@ -146,6 +150,7 @@ struct Info {
 /// ```markdown
 /// |<x />
 /// ```
+///
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     tokenizer.enter(TokenType::HtmlFlow);
     tokenizer.enter(TokenType::HtmlFlowData);
@@ -188,8 +193,8 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 ///
 /// ```markdown
 /// <|x />
-/// <|!doctype />
-/// <|!--xxx--/>
+/// <|!doctype>
+/// <|!--xxx-->
 /// ```
 fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
     match code {
@@ -197,7 +202,7 @@ fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
             tokenizer.consume(code);
             (
                 State::Fn(Box::new(|tokenizer, code| {
-                    declaration_start(tokenizer, info, code)
+                    declaration_open(tokenizer, info, code)
                 })),
                 None,
             )
@@ -238,11 +243,11 @@ fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
 /// After `<!`, so inside a declaration, comment, or CDATA.
 ///
 /// ```markdown
-/// <!|doctype />
-/// <!|--xxx--/>
+/// <!|doctype>
+/// <!|--xxx-->
 /// <!|[CDATA[>&<]]>
 /// ```
-fn declaration_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn declaration_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
     match code {
         Code::Char('-') => {
             tokenizer.consume(code);
@@ -287,7 +292,7 @@ fn declaration_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> State
 /// After `<!-`, inside a comment, before another `-`.
 ///
 /// ```markdown
-/// <!-|-xxx--/>
+/// <!-|-xxx-->
 /// ```
 fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
     match code {
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index da5a018..95fb8c3 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -1,4 +1,53 @@
-//! To do.
+//! HTML (text) is a construct that occurs in the [text][] content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! html_text ::= comment | instruction | declaration | cdata | tag_close | tag_open
+//!
+//! ; Restriction: the text is not allowed to start with `>`, `->`, or to contain `--`.
+//! comment ::= '<!--' *code '-->'
+//! instruction ::= '<?' *code '?>'
+//! declaration ::= '<!' ascii_alphabetic *code '>'
+//! ; Restriction: the text is not allowed to contain `]]`.
+//! cdata ::= '<![CDATA[' *code ']]>'
+//! tag_close ::= '</' tag_name whitespace_optional '>'
+//! opening_tag ::= '<' tag_name *( whitespace attribute ) [ whitespace_optional '/' ] whitespace_optional '>'
+//!
+//! tag_name ::= ascii_alphabetic *( '-' | ascii_alphanumeric )
+//! attribute ::= attribute_name [ whitespace_optional '=' whitespace_optional attribute_value ]
+//! attribute_name ::= ( ':' | '_' | ascii_alphabetic ) *( '-' | '.' | ':' | '_' | ascii_alphanumeric )
+//! attribute_value ::= '"' *( code - '"' ) '"' | "'" *( code - "'" )  "'" | 1*( code - space_or_tab - eol - '"' - "'" - '/' - '<' - '=' - '>' - '`')
+//!
+//! ; Note: blank lines can never occur in `text`.
+//! whitespace ::= 1*space_or_tab | [ *space_or_tab eol *space_or_tab ]
+//! whitespace_optional ::= [ whitespace ]
+//! eol ::= '\r' | '\r\n' | '\n'
+//! space_or_tab ::= ' ' | '\t'
+//! ```
+//!
+//! The grammar for HTML in markdown does not resemble the rules of parsing
+//! HTML according to the [*§ 13.2 Parsing HTML documents* in the HTML
+//! spec][html-parsing].
+//! See the related flow construct [HTML (flow)][html_flow] for more info.
+//!
+//! Because the **tag open** and **tag close** productions in the grammar form
+//! with just tags instead of complete elements, it is possible to interleave
+//! (a word for switching between languages) markdown and HTML together.
+//! For example:
+//!
+//! ```markdown
+//! This is equivalent to <code>*emphasised* code</code>.
+//! ```
+//!
+//! ## References
+//!
+//! *   [`html-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-text.js)
+//! *   [*§ 6.6 Raw HTML* in `CommonMark`](https://spec.commonmark.org/0.30/#raw-html)
+//!
+//! [text]: crate::content::text
+//! [html_flow]: crate::construct::html_flow
+//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
 
 use crate::construct::partial_whitespace::start as whitespace;
 use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer};
@@ -15,7 +64,13 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     (State::Fn(Box::new(open)), None)
 }
 
-/// To do.
+/// After `<`, before a tag name or other stuff.
+///
+/// ```markdown
+/// a <|x /> b
+/// a <|!doctype> b
+/// a <|!--xxx--/> b
+/// ```
 pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('!') => {
@@ -38,19 +93,25 @@ pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// After `<!`, so inside a declaration, comment, or CDATA.
+///
+/// ```markdown
+/// a <!|doctype> b
+/// a <!|--xxx--> b
+/// a <!|[CDATA[>&<]]> b
+/// ```
 pub fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('-') => {
             tokenizer.consume(code);
-            (State::Fn(Box::new(comment_open)), None)
+            (State::Fn(Box::new(comment_open_inside)), None)
         }
         Code::Char('[') => {
             tokenizer.consume(code);
             let buffer = vec!['C', 'D', 'A', 'T', 'A', '['];
             (
                 State::Fn(Box::new(|tokenizer, code| {
-                    cdata_open(tokenizer, code, buffer, 0)
+                    cdata_open_inside(tokenizer, code, buffer, 0)
                 })),
                 None,
             )
@@ -63,8 +124,12 @@ pub fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
     }
 }
 
-/// To do.
-pub fn comment_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+/// After `<!-`, inside a comment, before another `-`.
+///
+/// ```markdown
+/// a <!-|-xxx--> b
+/// ```
+pub fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('-') => {
             tokenizer.consume(code);
@@ -74,7 +139,18 @@ pub fn comment_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// After `<!--`, inside a comment
+///
+/// > **Note**: [html (flow)][html_flow] does allow `<!-->` or `<!--->` as
+/// > empty comments.
+/// > This is prohibited in html (text).
+/// > See: <https://github.com/commonmark/commonmark-spec/issues/712>.
+///
+/// ```markdown
+/// a <!--|xxx--> b
+/// ```
+///
+/// [html_flow]: crate::construct::html_flow
 pub fn comment_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::Char('>') => (State::Nok, None),
@@ -86,7 +162,18 @@ pub fn comment_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// After `<!---`, inside a comment
+///
+/// > **Note**: [html (flow)][html_flow] does allow `<!--->` as an empty
+/// > comment.
+/// > This is prohibited in html (text).
+/// > See: <https://github.com/commonmark/commonmark-spec/issues/712>.
+///
+/// ```markdown
+/// a <!---|xxx--> b
+/// ```
+///
+/// [html_flow]: crate::construct::html_flow
 pub fn comment_start_dash(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::Char('>') => (State::Nok, None),
@@ -94,7 +181,12 @@ pub fn comment_start_dash(tokenizer: &mut Tokenizer, code: Code) -> StateFnResul
     }
 }
 
-/// To do.
+/// In a comment.
+///
+/// ```markdown
+/// a <!--|xxx--> b
+/// a <!--x|xx--> b
+/// ```
 pub fn comment(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None => (State::Nok, None),
@@ -112,7 +204,12 @@ pub fn comment(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In a comment, after `-`.
+///
+/// ```markdown
+/// a <!--xxx-|-> b
+/// a <!--xxx-|yyy--> b
+/// ```
 pub fn comment_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('-') => {
@@ -123,8 +220,16 @@ pub fn comment_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
-pub fn cdata_open(
+/// After `<![`, inside CDATA, expecting `CDATA[`.
+///
+/// ```markdown
+/// a <![|CDATA[>&<]]> b
+/// a <![CD|ATA[>&<]]> b
+/// a <![CDA|TA[>&<]]> b
+/// a <![CDAT|A[>&<]]> b
+/// a <![CDATA|[>&<]]> b
+/// ```
+pub fn cdata_open_inside(
     tokenizer: &mut Tokenizer,
     code: Code,
     buffer: Vec<char>,
@@ -139,7 +244,7 @@ pub fn cdata_open(
             } else {
                 (
                     State::Fn(Box::new(move |tokenizer, code| {
-                        cdata_open(tokenizer, code, buffer, index + 1)
+                        cdata_open_inside(tokenizer, code, buffer, index + 1)
                     })),
                     None,
                 )
@@ -149,7 +254,11 @@ pub fn cdata_open(
     }
 }
 
-/// To do.
+/// In CDATA.
+///
+/// ```markdown
+/// a <![CDATA[|>&<]]> b
+/// ```
 pub fn cdata(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None => (State::Nok, None),
@@ -167,7 +276,11 @@ pub fn cdata(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In CDATA, after `]`.
+///
+/// ```markdown
+/// a <![CDATA[>&<]|]> b
+/// ```
 pub fn cdata_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char(']') => {
@@ -178,7 +291,11 @@ pub fn cdata_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In CDATA, after `]]`.
+///
+/// ```markdown
+/// a <![CDATA[>&<]]|> b
+/// ```
 pub fn cdata_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('>') => end(tokenizer, code),
@@ -187,7 +304,11 @@ pub fn cdata_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In a declaration.
+///
+/// ```markdown
+/// a <!a|b> b
+/// ```
 pub fn declaration(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::Char('>') => end(tokenizer, code),
@@ -201,7 +322,12 @@ pub fn declaration(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In an instruction.
+///
+/// ```markdown
+/// a <?|ab?> b
+/// a <?a|b?> b
+/// ```
 pub fn instruction(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None => (State::Nok, None),
@@ -219,7 +345,12 @@ pub fn instruction(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In an instruction, after `?`.
+///
+/// ```markdown
+/// a <?aa?|> b
+/// a <?aa?|bb?> b
+/// ```
 pub fn instruction_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char('>') => end(tokenizer, code),
@@ -227,7 +358,11 @@ pub fn instruction_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
     }
 }
 
-/// To do.
+/// After `</`, in a closing tag, before a tag name.
+///
+/// ```markdown
+/// a </|x> b
+/// ```
 pub fn tag_close_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char(char) if char.is_ascii_alphabetic() => {
@@ -238,7 +373,12 @@ pub fn tag_close_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// After `</x`, in a tag name.
+///
+/// ```markdown
+/// a </x|> b
+/// a </x|y> b
+/// ```
 pub fn tag_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => {
@@ -249,7 +389,12 @@ pub fn tag_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In a closing tag, after the tag name.
+///
+/// ```markdown
+/// a </x| > b
+/// a </xy |> b
+/// ```
 pub fn tag_close_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
@@ -263,14 +408,17 @@ pub fn tag_close_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
     }
 }
 
-/// To do.
+/// After `<x`, in an opening tag name.
+///
+/// ```markdown
+/// a <x|> b
+/// ```
 pub fn tag_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => {
             tokenizer.consume(code);
             (State::Fn(Box::new(tag_open)), None)
         }
-
         Code::CarriageReturnLineFeed
         | Code::VirtualSpace
         | Code::Char('\r' | '\n' | '\t' | ' ' | '/' | '>') => tag_open_between(tokenizer, code),
@@ -278,7 +426,13 @@ pub fn tag_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// To do.
+/// In an opening tag, after the tag name.
+///
+/// ```markdown
+/// a <x| y> b
+/// a <x |y="z"> b
+/// a <x |/> b
+/// ```
 pub fn tag_open_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
@@ -300,7 +454,13 @@ pub fn tag_open_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
     }
 }
 
-/// To do.
+/// In an attribute name.
+///
+/// ```markdown
+/// a <x :|> b
+/// a <x _|> b
+/// a <x a|> b
+/// ```
 pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char(char)
@@ -317,7 +477,14 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer, code: Code) -> StateFn
     }
 }
 
-/// To do.
+/// After an attribute name, before an attribute initializer, the end of the
+/// tag, or whitespace.
+///
+/// ```markdown
+/// a <x a|> b
+/// a <x a|=b> b
+/// a <x a|="c"> b
+/// ```
 pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
@@ -335,7 +502,13 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer, code: Code) -> S
     }
 }
 
-/// To do.
+/// Before an unquoted, double quoted, or single quoted attribute value,
+/// allowing whitespace.
+///
+/// ```markdown
+/// a <x a=|b> b
+/// a <x a=|"c"> b
+/// ```
 pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None),
@@ -362,7 +535,12 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) ->
     }
 }
 
-/// To do.
+/// In a double or single quoted attribute value.
+///
+/// ```markdown
+/// a <x a="|"> b
+/// a <x a='|'> b
+/// ```
 pub fn tag_open_attribute_value_quoted(
     tokenizer: &mut Tokenizer,
     code: Code,
@@ -396,7 +574,30 @@ pub fn tag_open_attribute_value_quoted(
     }
 }
 
-/// To do.
+/// In an unquoted attribute value.
+///
+/// ```markdown
+/// a <x a=b|c> b
+/// ```
+pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, None),
+        Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => {
+            tag_open_between(tokenizer, code)
+        }
+        Code::Char(_) => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None)
+        }
+    }
+}
+
+/// After a double or single quoted attribute value, before whitespace or the
+/// end of the tag.
+///
+/// ```markdown
+/// a <x a="b"|> b
+/// ```
 pub fn tag_open_attribute_value_quoted_after(
     tokenizer: &mut Tokenizer,
     code: Code,
@@ -409,23 +610,34 @@ pub fn tag_open_attribute_value_quoted_after(
     }
 }
 
-/// To do.
-pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+/// In certain circumstances of a complete tag where only an `>` is allowed.
+///
+/// ```markdown
+/// a <x a="b"|> b
+/// a <!--xx--|> b
+/// a <x /|> b
+/// ```
+pub fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
-        Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, None),
-        Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '>') => {
-            tag_open_between(tokenizer, code)
-        }
-        Code::Char(_) => {
+        Code::Char('>') => {
             tokenizer.consume(code);
-            (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None)
+            tokenizer.exit(TokenType::HtmlTextData);
+            tokenizer.exit(TokenType::HtmlText);
+            (State::Ok, None)
         }
+        _ => (State::Nok, None),
     }
 }
 
-/// To do.
-// We can’t have blank lines in content, so no need to worry about empty
-// tokens.
+/// At an allowed line ending.
+///
+/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > empty tokens.
+///
+/// ```markdown
+/// a <!--a|
+/// b--> b
+/// ```
 pub fn at_line_ending(
     tokenizer: &mut Tokenizer,
     code: Code,
@@ -446,6 +658,15 @@ pub fn at_line_ending(
     }
 }
 
+/// After a line ending.
+///
+/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > empty tokens.
+///
+/// ```markdown
+/// a <!--a
+/// |b--> b
+/// ```
 pub fn after_line_ending(
     tokenizer: &mut Tokenizer,
     code: Code,
@@ -457,6 +678,15 @@ pub fn after_line_ending(
     )(tokenizer, code)
 }
 
+/// After a line ending, after indent.
+///
+/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > empty tokens.
+///
+/// ```markdown
+/// a <!--a
+///   |b--> b
+/// ```
 pub fn after_line_ending_prefix(
     tokenizer: &mut Tokenizer,
     code: Code,
@@ -465,16 +695,3 @@ pub fn after_line_ending_prefix(
     tokenizer.enter(TokenType::HtmlTextData);
     return_state(tokenizer, code)
 }
-
-/// To do.
-pub fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    match code {
-        Code::Char('>') => {
-            tokenizer.consume(code);
-            tokenizer.exit(TokenType::HtmlTextData);
-            tokenizer.exit(TokenType::HtmlText);
-            (State::Ok, None)
-        }
-        _ => (State::Nok, None),
-    }
-}
diff --git a/src/content/text.rs b/src/content/text.rs
index 3db82f5..73c2d55 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -7,7 +7,7 @@
 //!
 //! *   [Autolink][crate::construct::autolink]
 //! *   Attention
-//! *   [HTML (text)][crate::construct::html-text]
+//! *   [HTML (text)][crate::construct::html_text]
 //! *   Hard break escape
 //! *   Code (text)
 //! *   Line ending
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index ee826b8..e004f7b 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -7,6 +7,7 @@ use crate::tokenizer::{
 use crate::util::{slice_codes, Span};
 use std::collections::HashMap;
 
+/// To do.
 pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
     let mut events = events;
     let mut index = 0;
author	Titus Wormer <tituswormer@gmail.com>	2022-06-14 12:26:23 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-14 12:26:23 +0200
commit	82aca5003acba36a62b0032860af09f65c91ddae (patch)
tree	97b3153efd27f838731ac57e580e23739455a9dd
parent	ef644f4def7d5cad3fb5307ec5e00fc7b0b025ff (diff)
download	markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.tar.gz markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.tar.bz2 markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.zip