aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-14 12:26:23 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-14 12:26:23 +0200
commit82aca5003acba36a62b0032860af09f65c91ddae (patch)
tree97b3153efd27f838731ac57e580e23739455a9dd
parentef644f4def7d5cad3fb5307ec5e00fc7b0b025ff (diff)
downloadmarkdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.tar.gz
markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.tar.bz2
markdown-rs-82aca5003acba36a62b0032860af09f65c91ddae.zip
Add docs for html (text)
-rw-r--r--readme.md14
-rw-r--r--src/construct/html_flow.rs29
-rw-r--r--src/construct/html_text.rs327
-rw-r--r--src/content/text.rs2
-rw-r--r--src/subtokenize.rs1
5 files changed, 303 insertions, 70 deletions
diff --git a/readme.md b/readme.md
index 829d132..8335644 100644
--- a/readme.md
+++ b/readme.md
@@ -68,6 +68,15 @@ cargo doc --document-private-items
### Small things
+- [ ] (3) Fix deep subtokenization
+- [ ] (1) Add docs on bnf
+- [ ] (1) Add docs to subtokenize
+- [ ] (1) Add module docs to content
+- [ ] (1) Add module docs to parser
+- [ ] (1) Reorganize to split util
+- [ ] (1) Add examples to `CompileOptions` docs
+- [ ] (1) Add overview docs on how everything works
+- [ ] (1) Move safe protocols to constants
- [ ] (1) Parse initial and final whitespace of paragraphs (in text)
- [ ] (3) Clean compiler
- [ ] (1) Use preferred line ending style in markdown
@@ -109,7 +118,7 @@ cargo doc --document-private-items
- [x] heading (atx)
- [ ] (1) heading (setext)
- [x] html (flow)
-- [ ] html (text)
+- [x] html (text)
- [ ] (3) label end
- [ ] (3) label start (image)
- [ ] (3) label start (link)
@@ -141,7 +150,7 @@ cargo doc --document-private-items
- [x] character reference
- [ ] code (text)
- [ ] hard break escape
- - [ ] html (text)
+ - [x] html (text)
- [ ] label end
- [ ] label start (image)
- [ ] label start (link)
@@ -155,6 +164,7 @@ cargo doc --document-private-items
one content type that also are another content type
- [x] (3) Encode urls
- [x] (1) Optionally remove dangerous protocols when compiling
+- [x] (1) Add docs to html (text)
### Extensions
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index a1b686b..da4517d 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -24,7 +24,7 @@
//! attribute_value ::= '"' *( line - '"' ) '"' | "'" *( line - "'" ) "'" | 1*( line - space_or_tab - '"' - "'" - '/' - '<' - '=' - '>' - '`')
//!
//! whitespace ::= 1*space_or_tab
-//! whitespace_optional ::= [ space_or_tab ]
+//! whitespace_optional ::= [ whitespace ]
//! line ::= code - eol
//! eol ::= '\r' | '\r\n' | '\n'
//! space_or_tab ::= ' ' | '\t'
@@ -39,6 +39,11 @@
//! result in invalid HTML, in that it allows things that wouldn’t work or
//! wouldn’t work well in HTML, such as mismatched tags.
//!
+//! Interestingly, most of the productions above have a clear opening and
+//! closing condition (raw, comment, insutrction, declaration, cdata), but the
+//! closing condition does not need to be satisfied.
+//! In this case, the parser never has to backtrack.
+//!
//! Because the **basic** and **complete** productions in the grammar form with
//! a tag, followed by more stuff, and stop at a blank line, it is possible to
//! interleave (a word for switching between languages) markdown and HTML
@@ -59,8 +64,8 @@
//! The **complete** production of HTML (flow) is not allowed to interrupt
//! content.
//! That means that a blank line is needed between a paragraph and it.
-//! However, HTML (text) has a similar production, which will typically kick-in
-//! instead.
+//! However, [HTML (text)][html_text] has a similar production, which will
+//! typically kick-in instead.
//!
//! The list of tag names allowed in the **raw** production are defined in
//! [`HTML_RAW_NAMES`][html_raw_names].
@@ -81,11 +86,10 @@
//! * [*§ 4.6 HTML blocks* in `CommonMark`](https://spec.commonmark.org/0.30/#html-blocks)
//!
//! [flow]: crate::content::flow
+//! [html_text]: crate::construct::html_text
//! [html_raw_names]: crate::constant::HTML_RAW_NAMES
//! [html_block_names]: crate::constant::HTML_BLOCK_NAMES
//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
-//!
-//! <!-- To do: link html (text) -->
use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX};
use crate::construct::{blank_line::start as blank_line, partial_whitespace::start as whitespace};
@@ -146,6 +150,7 @@ struct Info {
/// ```markdown
/// |<x />
/// ```
+///
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::HtmlFlow);
tokenizer.enter(TokenType::HtmlFlowData);
@@ -188,8 +193,8 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
///
/// ```markdown
/// <|x />
-/// <|!doctype />
-/// <|!--xxx--/>
+/// <|!doctype>
+/// <|!--xxx-->
/// ```
fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
match code {
@@ -197,7 +202,7 @@ fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
tokenizer.consume(code);
(
State::Fn(Box::new(|tokenizer, code| {
- declaration_start(tokenizer, info, code)
+ declaration_open(tokenizer, info, code)
})),
None,
)
@@ -238,11 +243,11 @@ fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
/// After `<!`, so inside a declaration, comment, or CDATA.
///
/// ```markdown
-/// <!|doctype />
-/// <!|--xxx--/>
+/// <!|doctype>
+/// <!|--xxx-->
/// <!|[CDATA[>&<]]>
/// ```
-fn declaration_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn declaration_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
match code {
Code::Char('-') => {
tokenizer.consume(code);
@@ -287,7 +292,7 @@ fn declaration_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> State
/// After `<!-`, inside a comment, before another `-`.
///
/// ```markdown
-/// <!-|-xxx--/>
+/// <!-|-xxx-->
/// ```
fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
match code {
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index da5a018..95fb8c3 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -1,4 +1,53 @@
-//! To do.
+//! HTML (text) is a construct that occurs in the [text][] content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! html_text ::= comment | instruction | declaration | cdata | tag_close | tag_open
+//!
+//! ; Restriction: the text is not allowed to start with `>`, `->`, or to contain `--`.
+//! comment ::= '<!--' *code '-->'
+//! instruction ::= '<?' *code '?>'
+//! declaration ::= '<!' ascii_alphabetic *code '>'
+//! ; Restriction: the text is not allowed to contain `]]`.
+//! cdata ::= '<![CDATA[' *code ']]>'
+//! tag_close ::= '</' tag_name whitespace_optional '>'
+//! opening_tag ::= '<' tag_name *( whitespace attribute ) [ whitespace_optional '/' ] whitespace_optional '>'
+//!
+//! tag_name ::= ascii_alphabetic *( '-' | ascii_alphanumeric )
+//! attribute ::= attribute_name [ whitespace_optional '=' whitespace_optional attribute_value ]
+//! attribute_name ::= ( ':' | '_' | ascii_alphabetic ) *( '-' | '.' | ':' | '_' | ascii_alphanumeric )
+//! attribute_value ::= '"' *( code - '"' ) '"' | "'" *( code - "'" ) "'" | 1*( code - space_or_tab - eol - '"' - "'" - '/' - '<' - '=' - '>' - '`')
+//!
+//! ; Note: blank lines can never occur in `text`.
+//! whitespace ::= 1*space_or_tab | [ *space_or_tab eol *space_or_tab ]
+//! whitespace_optional ::= [ whitespace ]
+//! eol ::= '\r' | '\r\n' | '\n'
+//! space_or_tab ::= ' ' | '\t'
+//! ```
+//!
+//! The grammar for HTML in markdown does not resemble the rules of parsing
+//! HTML according to the [*§ 13.2 Parsing HTML documents* in the HTML
+//! spec][html-parsing].
+//! See the related flow construct [HTML (flow)][html_flow] for more info.
+//!
+//! Because the **tag open** and **tag close** productions in the grammar form
+//! with just tags instead of complete elements, it is possible to interleave
+//! (a word for switching between languages) markdown and HTML together.
+//! For example:
+//!
+//! ```markdown
+//! This is equivalent to <code>*emphasised* code</code>.
+//! ```
+//!
+//! ## References
+//!
+//! * [`html-text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/html-text.js)
+//! * [*§ 6.6 Raw HTML* in `CommonMark`](https://spec.commonmark.org/0.30/#raw-html)
+//!
+//! [text]: crate::content::text
+//! [html_flow]: crate::construct::html_flow
+//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
use crate::construct::partial_whitespace::start as whitespace;
use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer};
@@ -15,7 +64,13 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
(State::Fn(Box::new(open)), None)
}
-/// To do.
+/// After `<`, before a tag name or other stuff.
+///
+/// ```markdown
+/// a <|x /> b
+/// a <|!doctype> b
+/// a <|!--xxx--/> b
+/// ```
pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('!') => {
@@ -38,19 +93,25 @@ pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// After `<!`, so inside a declaration, comment, or CDATA.
+///
+/// ```markdown
+/// a <!|doctype> b
+/// a <!|--xxx--> b
+/// a <!|[CDATA[>&<]]> b
+/// ```
pub fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('-') => {
tokenizer.consume(code);
- (State::Fn(Box::new(comment_open)), None)
+ (State::Fn(Box::new(comment_open_inside)), None)
}
Code::Char('[') => {
tokenizer.consume(code);
let buffer = vec!['C', 'D', 'A', 'T', 'A', '['];
(
State::Fn(Box::new(|tokenizer, code| {
- cdata_open(tokenizer, code, buffer, 0)
+ cdata_open_inside(tokenizer, code, buffer, 0)
})),
None,
)
@@ -63,8 +124,12 @@ pub fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
}
}
-/// To do.
-pub fn comment_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+/// After `<!-`, inside a comment, before another `-`.
+///
+/// ```markdown
+/// a <!-|-xxx--> b
+/// ```
+pub fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('-') => {
tokenizer.consume(code);
@@ -74,7 +139,18 @@ pub fn comment_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// After `<!--`, inside a comment
+///
+/// > **Note**: [html (flow)][html_flow] does allow `<!-->` or `<!--->` as
+/// > empty comments.
+/// > This is prohibited in html (text).
+/// > See: <https://github.com/commonmark/commonmark-spec/issues/712>.
+///
+/// ```markdown
+/// a <!--|xxx--> b
+/// ```
+///
+/// [html_flow]: crate::construct::html_flow
pub fn comment_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::Char('>') => (State::Nok, None),
@@ -86,7 +162,18 @@ pub fn comment_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// After `<!---`, inside a comment
+///
+/// > **Note**: [html (flow)][html_flow] does allow `<!--->` as an empty
+/// > comment.
+/// > This is prohibited in html (text).
+/// > See: <https://github.com/commonmark/commonmark-spec/issues/712>.
+///
+/// ```markdown
+/// a <!---|xxx--> b
+/// ```
+///
+/// [html_flow]: crate::construct::html_flow
pub fn comment_start_dash(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::Char('>') => (State::Nok, None),
@@ -94,7 +181,12 @@ pub fn comment_start_dash(tokenizer: &mut Tokenizer, code: Code) -> StateFnResul
}
}
-/// To do.
+/// In a comment.
+///
+/// ```markdown
+/// a <!--|xxx--> b
+/// a <!--x|xx--> b
+/// ```
pub fn comment(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Nok, None),
@@ -112,7 +204,12 @@ pub fn comment(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In a comment, after `-`.
+///
+/// ```markdown
+/// a <!--xxx-|-> b
+/// a <!--xxx-|yyy--> b
+/// ```
pub fn comment_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('-') => {
@@ -123,8 +220,16 @@ pub fn comment_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
-pub fn cdata_open(
+/// After `<![`, inside CDATA, expecting `CDATA[`.
+///
+/// ```markdown
+/// a <![|CDATA[>&<]]> b
+/// a <![CD|ATA[>&<]]> b
+/// a <![CDA|TA[>&<]]> b
+/// a <![CDAT|A[>&<]]> b
+/// a <![CDATA|[>&<]]> b
+/// ```
+pub fn cdata_open_inside(
tokenizer: &mut Tokenizer,
code: Code,
buffer: Vec<char>,
@@ -139,7 +244,7 @@ pub fn cdata_open(
} else {
(
State::Fn(Box::new(move |tokenizer, code| {
- cdata_open(tokenizer, code, buffer, index + 1)
+ cdata_open_inside(tokenizer, code, buffer, index + 1)
})),
None,
)
@@ -149,7 +254,11 @@ pub fn cdata_open(
}
}
-/// To do.
+/// In CDATA.
+///
+/// ```markdown
+/// a <![CDATA[|>&<]]> b
+/// ```
pub fn cdata(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Nok, None),
@@ -167,7 +276,11 @@ pub fn cdata(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In CDATA, after `]`.
+///
+/// ```markdown
+/// a <![CDATA[>&<]|]> b
+/// ```
pub fn cdata_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char(']') => {
@@ -178,7 +291,11 @@ pub fn cdata_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In CDATA, after `]]`.
+///
+/// ```markdown
+/// a <![CDATA[>&<]]|> b
+/// ```
pub fn cdata_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('>') => end(tokenizer, code),
@@ -187,7 +304,11 @@ pub fn cdata_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In a declaration.
+///
+/// ```markdown
+/// a <!a|b> b
+/// ```
pub fn declaration(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::Char('>') => end(tokenizer, code),
@@ -201,7 +322,12 @@ pub fn declaration(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In an instruction.
+///
+/// ```markdown
+/// a <?|ab?> b
+/// a <?a|b?> b
+/// ```
pub fn instruction(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Nok, None),
@@ -219,7 +345,12 @@ pub fn instruction(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In an instruction, after `?`.
+///
+/// ```markdown
+/// a <?aa?|> b
+/// a <?aa?|bb?> b
+/// ```
pub fn instruction_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char('>') => end(tokenizer, code),
@@ -227,7 +358,11 @@ pub fn instruction_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
}
}
-/// To do.
+/// After `</`, in a closing tag, before a tag name.
+///
+/// ```markdown
+/// a </|x> b
+/// ```
pub fn tag_close_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char(char) if char.is_ascii_alphabetic() => {
@@ -238,7 +373,12 @@ pub fn tag_close_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// After `</x`, in a tag name.
+///
+/// ```markdown
+/// a </x|> b
+/// a </x|y> b
+/// ```
pub fn tag_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => {
@@ -249,7 +389,12 @@ pub fn tag_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In a closing tag, after the tag name.
+///
+/// ```markdown
+/// a </x| > b
+/// a </xy |> b
+/// ```
pub fn tag_close_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
@@ -263,14 +408,17 @@ pub fn tag_close_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
}
}
-/// To do.
+/// After `<x`, in an opening tag name.
+///
+/// ```markdown
+/// a <x|> b
+/// ```
pub fn tag_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => {
tokenizer.consume(code);
(State::Fn(Box::new(tag_open)), None)
}
-
Code::CarriageReturnLineFeed
| Code::VirtualSpace
| Code::Char('\r' | '\n' | '\t' | ' ' | '/' | '>') => tag_open_between(tokenizer, code),
@@ -278,7 +426,13 @@ pub fn tag_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// To do.
+/// In an opening tag, after the tag name.
+///
+/// ```markdown
+/// a <x| y> b
+/// a <x |y="z"> b
+/// a <x |/> b
+/// ```
pub fn tag_open_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
@@ -300,7 +454,13 @@ pub fn tag_open_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
}
}
-/// To do.
+/// In an attribute name.
+///
+/// ```markdown
+/// a <x :|> b
+/// a <x _|> b
+/// a <x a|> b
+/// ```
pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::Char(char)
@@ -317,7 +477,14 @@ pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer, code: Code) -> StateFn
}
}
-/// To do.
+/// After an attribute name, before an attribute initializer, the end of the
+/// tag, or whitespace.
+///
+/// ```markdown
+/// a <x a|> b
+/// a <x a|=b> b
+/// a <x a|="c"> b
+/// ```
pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
@@ -335,7 +502,13 @@ pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer, code: Code) -> S
}
}
-/// To do.
+/// Before an unquoted, double quoted, or single quoted attribute value,
+/// allowing whitespace.
+///
+/// ```markdown
+/// a <x a=|b> b
+/// a <x a=|"c"> b
+/// ```
pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None),
@@ -362,7 +535,12 @@ pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) ->
}
}
-/// To do.
+/// In a double or single quoted attribute value.
+///
+/// ```markdown
+/// a <x a="|"> b
+/// a <x a='|'> b
+/// ```
pub fn tag_open_attribute_value_quoted(
tokenizer: &mut Tokenizer,
code: Code,
@@ -396,7 +574,30 @@ pub fn tag_open_attribute_value_quoted(
}
}
-/// To do.
+/// In an unquoted attribute value.
+///
+/// ```markdown
+/// a <x a=b|c> b
+/// ```
+pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, None),
+ Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => {
+ tag_open_between(tokenizer, code)
+ }
+ Code::Char(_) => {
+ tokenizer.consume(code);
+ (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None)
+ }
+ }
+}
+
+/// After a double or single quoted attribute value, before whitespace or the
+/// end of the tag.
+///
+/// ```markdown
+/// a <x a="b"|> b
+/// ```
pub fn tag_open_attribute_value_quoted_after(
tokenizer: &mut Tokenizer,
code: Code,
@@ -409,23 +610,34 @@ pub fn tag_open_attribute_value_quoted_after(
}
}
-/// To do.
-pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+/// In certain circumstances of a complete tag where only an `>` is allowed.
+///
+/// ```markdown
+/// a <x a="b"|> b
+/// a <!--xx--|> b
+/// a <x /|> b
+/// ```
+pub fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
- Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, None),
- Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '>') => {
- tag_open_between(tokenizer, code)
- }
- Code::Char(_) => {
+ Code::Char('>') => {
tokenizer.consume(code);
- (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None)
+ tokenizer.exit(TokenType::HtmlTextData);
+ tokenizer.exit(TokenType::HtmlText);
+ (State::Ok, None)
}
+ _ => (State::Nok, None),
}
}
-/// To do.
-// We can’t have blank lines in content, so no need to worry about empty
-// tokens.
+/// At an allowed line ending.
+///
+/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > empty tokens.
+///
+/// ```markdown
+/// a <!--a|
+/// b--> b
+/// ```
pub fn at_line_ending(
tokenizer: &mut Tokenizer,
code: Code,
@@ -446,6 +658,15 @@ pub fn at_line_ending(
}
}
+/// After a line ending.
+///
+/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > empty tokens.
+///
+/// ```markdown
+/// a <!--a
+/// |b--> b
+/// ```
pub fn after_line_ending(
tokenizer: &mut Tokenizer,
code: Code,
@@ -457,6 +678,15 @@ pub fn after_line_ending(
)(tokenizer, code)
}
+/// After a line ending, after indent.
+///
+/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > empty tokens.
+///
+/// ```markdown
+/// a <!--a
+/// |b--> b
+/// ```
pub fn after_line_ending_prefix(
tokenizer: &mut Tokenizer,
code: Code,
@@ -465,16 +695,3 @@ pub fn after_line_ending_prefix(
tokenizer.enter(TokenType::HtmlTextData);
return_state(tokenizer, code)
}
-
-/// To do.
-pub fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::Char('>') => {
- tokenizer.consume(code);
- tokenizer.exit(TokenType::HtmlTextData);
- tokenizer.exit(TokenType::HtmlText);
- (State::Ok, None)
- }
- _ => (State::Nok, None),
- }
-}
diff --git a/src/content/text.rs b/src/content/text.rs
index 3db82f5..73c2d55 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -7,7 +7,7 @@
//!
//! * [Autolink][crate::construct::autolink]
//! * Attention
-//! * [HTML (text)][crate::construct::html-text]
+//! * [HTML (text)][crate::construct::html_text]
//! * Hard break escape
//! * Code (text)
//! * Line ending
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index ee826b8..e004f7b 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -7,6 +7,7 @@ use crate::tokenizer::{
use crate::util::{slice_codes, Span};
use std::collections::HashMap;
+/// To do.
pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
let mut events = events;
let mut index = 0;