diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-17 17:45:50 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-17 17:45:50 +0200 |
commit | 24fec22e912c1aa2569e95683ca95edf1aafce8b (patch) | |
tree | d4b680ce042b7e1a6884f59f01a29087704f3378 | |
parent | 60ea2fd3a09f10fa28bf48575736b47afebf3221 (diff) | |
download | markdown-rs-24fec22e912c1aa2569e95683ca95edf1aafce8b.tar.gz markdown-rs-24fec22e912c1aa2569e95683ca95edf1aafce8b.tar.bz2 markdown-rs-24fec22e912c1aa2569e95683ca95edf1aafce8b.zip |
Add support for definitions
* Add definitions
* Add partials for label, destination, title
* Add `go`, to attempt something, and do something else on `ok`
-rw-r--r-- | readme.md | 17 | ||||
-rw-r--r-- | src/compiler.rs | 29 | ||||
-rw-r--r-- | src/constant.rs | 3 | ||||
-rw-r--r-- | src/construct/definition.rs | 318 | ||||
-rw-r--r-- | src/construct/mod.rs | 8 | ||||
-rw-r--r-- | src/construct/partial_destination.rs | 154 | ||||
-rw-r--r-- | src/construct/partial_label.rs | 100 | ||||
-rw-r--r-- | src/construct/partial_title.rs | 136 | ||||
-rw-r--r-- | src/content/content.rs | 49 | ||||
-rw-r--r-- | src/content/flow.rs | 24 | ||||
-rw-r--r-- | src/content/string.rs | 2 | ||||
-rw-r--r-- | src/parser.rs | 1 | ||||
-rw-r--r-- | src/tokenizer.rs | 82 | ||||
-rw-r--r-- | tests/character_escape.rs | 2 | ||||
-rw-r--r-- | tests/character_reference.rs | 2 | ||||
-rw-r--r-- | tests/definition.rs | 446 |
16 files changed, 1294 insertions, 79 deletions
@@ -49,8 +49,8 @@ cargo doc --document-private-items - [ ] (8) Can content (and to a lesser extent string and text) operate more performantly than checking whether other flow constructs start a line, before exiting and actually attempting flow constructs? -- [ ] (5) Figure out definitions and sharing those identifiers, and references - before definitions +- [ ] (5) Figure out sharing definition and identifiers, and references before + definitions - [ ] (3) Interrupting: sometimes flow can or cannot start depending on the previous construct (typically paragraph) - [ ] (5) Containers: this will be rather messy, and depends a lot on how @@ -66,6 +66,11 @@ cargo doc --document-private-items ### Small things +- [ ] (1) Add docs to partials +- [ ] (1) Remove all `pub fn`s from constructs, except for start +- [ ] (1) Remove `content` content type, as it is no longer needed +- [ ] (1) Connect `ChunkString` in label, destination, title +- [ ] (1) Add support for line endings in `string` - [ ] (1) Add docs to subtokenize - [ ] (1) Add module docs to content - [ ] (1) Add module docs to parser @@ -108,7 +113,7 @@ cargo doc --document-private-items - [x] code (indented) - [x] (1) code (text) - [ ] (3) content -- [ ] (3) definition +- [x] definition - [x] hard break (escape) - [x] hard break (trailing) - [x] heading (atx) @@ -127,17 +132,17 @@ cargo doc --document-private-items - [ ] (8) container - [ ] block quote - [ ] list -- [x] (1) flow +- [x] flow - [x] blank line - [x] code (fenced) - [x] code (indented) - [x] content + - [x] definition - [x] heading (atx) - [x] heading (setext) - [x] html (flow) - [x] thematic break -- [ ] (3) content - - [ ] definition +- [x] content - [x] paragraph - [ ] (5) text - [ ] attention (strong, emphasis) (text) diff --git a/src/compiler.rs b/src/compiler.rs index 9941fa5..be5d0fe 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -128,6 +128,18 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CodeTextSequence | TokenType::Content | TokenType::Data + | TokenType::DefinitionLabel + | TokenType::DefinitionLabelMarker + | TokenType::DefinitionLabelData + | TokenType::DefinitionMarker + | TokenType::DefinitionDestination + | TokenType::DefinitionDestinationLiteral + | TokenType::DefinitionDestinationLiteralMarker + | TokenType::DefinitionDestinationRaw + | TokenType::DefinitionDestinationString + | TokenType::DefinitionTitle + | TokenType::DefinitionTitleMarker + | TokenType::DefinitionTitleString | TokenType::HardBreakEscape | TokenType::HardBreakEscapeMarker | TokenType::HardBreakTrailing @@ -148,6 +160,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St } TokenType::CodeFencedFenceInfo | TokenType::CodeFencedFenceMeta + | TokenType::Definition | TokenType::HeadingAtxText | TokenType::HeadingSetextText => { buffer(buffers); @@ -201,6 +214,18 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CodeIndentedPrefixWhitespace | TokenType::CodeTextSequence | TokenType::Content + | TokenType::DefinitionLabel + | TokenType::DefinitionLabelMarker + | TokenType::DefinitionLabelData + | TokenType::DefinitionMarker + | TokenType::DefinitionDestination + | TokenType::DefinitionDestinationLiteral + | TokenType::DefinitionDestinationLiteralMarker + | TokenType::DefinitionDestinationRaw + | TokenType::DefinitionDestinationString + | TokenType::DefinitionTitle + | TokenType::DefinitionTitleMarker + | TokenType::DefinitionTitleString | TokenType::HardBreakEscapeMarker | TokenType::HardBreakTrailingSpace | TokenType::HeadingSetext @@ -353,6 +378,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St TokenType::CodeTextLineEnding => { buf_tail_mut(buffers).push(" ".to_string()); } + TokenType::Definition => { + resume(buffers); + slurp_one_line_ending = true; + } TokenType::HardBreakEscape | TokenType::HardBreakTrailing => { buf_tail_mut(buffers).push("<br />".to_string()); } diff --git a/src/constant.rs b/src/constant.rs index ff9e62e..1f833c2 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -72,6 +72,9 @@ pub const HEADING_ATX_OPENING_FENCE_SIZE_MAX: usize = 6; /// [code_fenced]: crate::construct::code_fenced pub const CODE_FENCED_SEQUENCE_SIZE_MIN: usize = 3; +/// To safeguard performance, labels are capped at a large number: `999`. +pub const LINK_REFERENCE_SIZE_MAX: usize = 999; + /// List of HTML tag names that form the **raw** production of /// [HTML (flow)][html_flow]. /// diff --git a/src/construct/definition.rs b/src/construct/definition.rs new file mode 100644 index 0000000..e540b44 --- /dev/null +++ b/src/construct/definition.rs @@ -0,0 +1,318 @@ +//! Definition is a construct that occurs in the [flow] content type. +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! definition ::= label ':' whitespace destination [ whitespace title ] [ space_or_tab ] +//! +//! ; Restriction: maximum `999` codes allowed between brackets. +//! ; Restriction: no blank lines. +//! ; Restriction: at least 1 non-space and non-eol code must exist. +//! label ::= '[' *( label_text | label_escape ) ']' +//! label_text ::= code - '[' - '\\' - ']' +//! label_escape ::= '\\' [ '[' | '\\' | ']' ] +//! +//! destination ::= destination_enclosed | destination_raw +//! destination_enclosed ::= '<' *( destination_enclosed_text | destination_enclosed_escape ) '>' +//! destination_enclosed_text ::= code - '<' - '\\' - eol +//! destination_enclosed_escape ::= '\\' [ '<' | '\\' | '>' ] +//! destination_raw ::= 1*( destination_raw_text | destination_raw_escape ) +//! ; Restriction: unbalanced `)` characters are not allowed. +//! destination_raw_text ::= code - '\\' - ascii_control - space_or_tab - eol +//! destination_raw_escape ::= '\\' [ '(' | ')' | '\\' ] +//! +//! ; Restriction: no blank lines. +//! ; Restriction: markers must match (in case of `(` with `)`). +//! title ::= marker [ *( code - '\\' | '\\' [ marker ] ) ] marker +//! marker ::= '"' | '\'' | '(' +//! +//! whitespace ::= eol *whitespace | 1*space_or_tab [ eol *whitespace ] +//! space_or_tab ::= ' ' | '\t' +//! ``` +//! +//! Definitions in markdown to not, on their own, relate to anything in HTML. +//! When connected with a link (reference), they together relate to the `<a>` +//! element in HTML. +//! The definition forms its `href`, and optionally `title`, attributes. +//! See [*§ 4.5.1 The `a` element*][html] in the HTML spec for more info. +//! +//! The `label`, `destination`, and `title` parts are interpreted as the +//! [string][] content type. +//! That means that character escapes and character reference are allowed. +//! +//! ## References +//! +//! * [`definition.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/definition.js) +//! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions) +//! +//! [flow]: crate::content::flow +//! [string]: crate::content::string +//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! +//! <!-- To do: link link (reference) --> +//! +//! <!-- To do: describe how references and definitions match --> + +use crate::construct::{ + partial_destination::start as destination, partial_label::start as label, + partial_title::start as title, partial_whitespace::start as whitespace, +}; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// At the start of a definition. +/// +/// ```markdown +/// |[a]: b "c" +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('[') => { + tokenizer.enter(TokenType::Definition); + tokenizer.go(label, label_after)(tokenizer, code) + } + _ => (State::Nok, None), + } +} + +/// After the label of a definition. +/// +/// ```markdown +/// [a]|: b "c" +/// ``` +pub fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // To do: get the identifier: + // identifier = normalizeIdentifier( + // self.sliceSerialize(self.events[self.events.length - 1][1]).slice(1, -1) + // ) + + match code { + Code::Char(':') => { + tokenizer.enter(TokenType::DefinitionMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::DefinitionMarker); + (State::Fn(Box::new(marker_after)), None) + } + _ => (State::Nok, None), + } +} + +/// After the marker of a definition. +/// +/// ```markdown +/// [a]:| b "c" +/// +/// [a]:| ␊ +/// b "c" +/// ``` +pub fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt( + |t, c| whitespace(t, c, TokenType::Whitespace), + |_ok| Box::new(marker_after_optional_whitespace), + )(tokenizer, code) +} + +/// After the marker, after whitespace. +/// +/// ```markdown +/// [a]: |b "c" +/// +/// [a]: |␊ +/// b "c" +/// ``` +pub fn marker_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + (State::Fn(Box::new(marker_after_optional_line_ending)), None) + } + _ => destination_before(tokenizer, code), + } +} + +/// After the marker, after a line ending. +/// +/// ```markdown +/// [a]: +/// | b "c" +/// ``` +pub fn marker_after_optional_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt( + |t, c| whitespace(t, c, TokenType::Whitespace), + |_ok| Box::new(destination_before), + )(tokenizer, code) +} + +/// Before a destination. +/// +/// ```markdown +/// [a]: |b "c" +/// +/// [a]: +/// |b "c" +/// ``` +pub fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let event = tokenizer.events.last().unwrap(); + // Blank line not ok. + let char_nok = matches!( + code, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + ); + + if !char_nok + && (event.token_type == TokenType::LineEnding || event.token_type == TokenType::Whitespace) + { + tokenizer.go(destination, destination_after)(tokenizer, code) + } else { + (State::Nok, None) + } +} + +/// After a destination. +/// +/// ```markdown +/// [a]: b| "c" +/// +/// [a]: b| ␊ +/// "c" +/// ``` +pub fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt(title_before, |_ok| Box::new(after))(tokenizer, code) +} + +/// After a definition. +/// +/// ```markdown +/// [a]: b| +/// [a]: b "c"| +/// ``` +pub fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt( + |t, c| whitespace(t, c, TokenType::Whitespace), + |_ok| Box::new(after_whitespace), + )(tokenizer, code) +} + +/// After a definition, after optional whitespace. +/// +/// ```markdown +/// [a]: b | +/// [a]: b "c"| +/// ``` +pub fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.exit(TokenType::Definition); + (State::Ok, Some(vec![code])) + } + _ => (State::Nok, None), + } +} + +/// After a destination, presumably before a title. +/// +/// ```markdown +/// [a]: b| "c" +/// +/// [a]: b| ␊ +/// "c" +/// ``` +pub fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt( + |t, c| whitespace(t, c, TokenType::Whitespace), + |_ok| Box::new(title_before_after_optional_whitespace), + )(tokenizer, code) +} + +/// Before a title, after optional whitespace. +/// +/// ```markdown +/// [a]: b |"c" +/// +/// [a]: b |␊ +/// "c" +/// ``` +pub fn title_before_after_optional_whitespace( + tokenizer: &mut Tokenizer, + code: Code, +) -> StateFnResult { + match code { + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + ( + State::Fn(Box::new(title_before_after_optional_line_ending)), + None, + ) + } + _ => title_before_marker(tokenizer, code), + } +} + +/// Before a title, after a line ending. +/// +/// ```markdown +/// [a]: b␊ +/// | "c" +/// ``` +pub fn title_before_after_optional_line_ending( + tokenizer: &mut Tokenizer, + code: Code, +) -> StateFnResult { + tokenizer.attempt( + |t, c| whitespace(t, c, TokenType::Whitespace), + |_ok| Box::new(title_before_marker), + )(tokenizer, code) +} + +/// Before a title, after a line ending. +/// +/// ```markdown +/// [a]: b␊ +/// | "c" +/// ``` +pub fn title_before_marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let event = tokenizer.events.last().unwrap(); + + if event.token_type == TokenType::LineEnding || event.token_type == TokenType::Whitespace { + tokenizer.go(title, title_after)(tokenizer, code) + } else { + (State::Nok, None) + } +} + +/// After a title. +/// +/// ```markdown +/// [a]: b "c"| +/// +/// [a]: b␊ +/// "c"| +/// ``` +pub fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt( + |t, c| whitespace(t, c, TokenType::Whitespace), + |_ok| Box::new(title_after_after_optional_whitespace), + )(tokenizer, code) +} + +/// After a title, after optional whitespace. +/// +/// ```markdown +/// [a]: b "c"| +/// +/// [a]: b "c" | +/// ``` +pub fn title_after_after_optional_whitespace( + _tokenizer: &mut Tokenizer, + code: Code, +) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + (State::Ok, Some(vec![code])) + } + _ => (State::Nok, None), + } +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ca1149f..fb79f68 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -16,7 +16,7 @@ //! The following constructs are found in markdown: //! //! * attention (strong, emphasis) (text) -//! * [autolink][autolink] +//! * [autolink][] //! * [blank line][blank_line] //! * block quote //! * [character escape][character_escape] @@ -25,7 +25,7 @@ //! * [code (indented)][code_indented] //! * [code (text)][code_text] //! * content -//! * definition +//! * [definition][] //! * [hard break (escape)][hard_break_escape] //! * [hard break (trailing)][hard_break_trailing] //! * [heading (atx)][heading_atx] @@ -61,11 +61,15 @@ pub mod character_reference; pub mod code_fenced; pub mod code_indented; pub mod code_text; +pub mod definition; pub mod hard_break_escape; pub mod hard_break_trailing; pub mod heading_atx; pub mod heading_setext; pub mod html_flow; pub mod html_text; +pub mod partial_destination; +pub mod partial_label; +pub mod partial_title; pub mod partial_whitespace; pub mod thematic_break; diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs new file mode 100644 index 0000000..8cf5b77 --- /dev/null +++ b/src/construct/partial_destination.rs @@ -0,0 +1,154 @@ +// To do: pass token types in. + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('<') => { + tokenizer.enter(TokenType::DefinitionDestination); + tokenizer.enter(TokenType::DefinitionDestinationLiteral); + tokenizer.enter(TokenType::DefinitionDestinationLiteralMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::DefinitionDestinationLiteralMarker); + (State::Fn(Box::new(enclosed_before)), None) + } + Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(')') => { + (State::Nok, None) + } + Code::Char(char) if char.is_ascii_control() => (State::Nok, None), + Code::Char(_) => { + tokenizer.enter(TokenType::DefinitionDestination); + tokenizer.enter(TokenType::DefinitionDestinationRaw); + tokenizer.enter(TokenType::DefinitionDestinationString); + // To do: link. + tokenizer.enter(TokenType::ChunkString); + raw(tokenizer, code, 0) + } + } +} + +/// To do. +fn enclosed_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if let Code::Char('>') = code { + tokenizer.enter(TokenType::DefinitionDestinationLiteralMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::DefinitionDestinationLiteralMarker); + tokenizer.exit(TokenType::DefinitionDestinationLiteral); + tokenizer.exit(TokenType::DefinitionDestination); + (State::Ok, None) + } else { + tokenizer.enter(TokenType::DefinitionDestinationString); + // To do: link. + tokenizer.enter(TokenType::ChunkString); + enclosed(tokenizer, code) + } +} + +/// To do. +fn enclosed(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('>') => { + tokenizer.exit(TokenType::ChunkString); + tokenizer.exit(TokenType::DefinitionDestinationString); + enclosed_before(tokenizer, code) + } + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '<') => { + (State::Nok, None) + } + Code::Char('\\') => { + tokenizer.consume(code); + (State::Fn(Box::new(enclosed_escape)), None) + } + _ => { + tokenizer.consume(code); + (State::Fn(Box::new(enclosed)), None) + } + } +} + +/// To do. +fn enclosed_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('<' | '>' | '\\') => { + tokenizer.consume(code); + (State::Fn(Box::new(enclosed)), None) + } + _ => enclosed(tokenizer, code), + } +} + +/// To do. +// To do: these arms can be improved? +fn raw(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult { + // To do: configurable. + let limit = usize::MAX; + + match code { + Code::Char('(') if balance >= limit => (State::Nok, None), + Code::Char('(') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| raw(t, c, balance + 1))), + None, + ) + } + Code::Char(')') if balance == 0 => { + tokenizer.exit(TokenType::ChunkString); + tokenizer.exit(TokenType::DefinitionDestinationString); + tokenizer.exit(TokenType::DefinitionDestinationRaw); + tokenizer.exit(TokenType::DefinitionDestination); + (State::Ok, Some(vec![code])) + } + Code::Char(')') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| raw(t, c, balance - 1))), + None, + ) + } + Code::None + | Code::CarriageReturnLineFeed + | Code::VirtualSpace + | Code::Char('\t' | '\r' | '\n' | ' ') + if balance > 0 => + { + (State::Nok, None) + } + Code::None + | Code::CarriageReturnLineFeed + | Code::VirtualSpace + | Code::Char('\t' | '\r' | '\n' | ' ') => { + tokenizer.exit(TokenType::ChunkString); + tokenizer.exit(TokenType::DefinitionDestinationString); + tokenizer.exit(TokenType::DefinitionDestinationRaw); + tokenizer.exit(TokenType::DefinitionDestination); + (State::Ok, Some(vec![code])) + } + Code::Char(char) if char.is_ascii_control() => (State::Nok, None), + Code::Char('\\') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| raw_escape(t, c, balance))), + None, + ) + } + Code::Char(_) => { + tokenizer.consume(code); + (State::Fn(Box::new(move |t, c| raw(t, c, balance))), None) + } + } +} + +/// To do. +fn raw_escape(tokenizer: &mut Tokenizer, code: Code, balance: usize) -> StateFnResult { + match code { + Code::Char('(' | ')' | '\\') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| raw(t, c, balance + 1))), + None, + ) + } + _ => raw(tokenizer, code, balance), + } +} diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs new file mode 100644 index 0000000..c772c56 --- /dev/null +++ b/src/construct/partial_label.rs @@ -0,0 +1,100 @@ +// To do: pass token types in. + +use crate::constant::LINK_REFERENCE_SIZE_MAX; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// To do. +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('[') => { + tokenizer.enter(TokenType::DefinitionLabel); + tokenizer.enter(TokenType::DefinitionLabelMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::DefinitionLabelMarker); + tokenizer.enter(TokenType::DefinitionLabelData); + (State::Fn(Box::new(|t, c| at_break(t, c, false, 0))), None) + } + // To do: allow? + _ => unreachable!("expected `[` at start of label"), + } +} + +/// To do. +fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult { + match code { + Code::None | Code::Char('[') => (State::Nok, None), + Code::Char(']') if !data => (State::Nok, None), + _ if size > LINK_REFERENCE_SIZE_MAX => (State::Nok, None), + Code::Char(']') => { + tokenizer.exit(TokenType::DefinitionLabelData); + tokenizer.enter(TokenType::DefinitionLabelMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::DefinitionLabelMarker); + tokenizer.exit(TokenType::DefinitionLabel); + (State::Ok, None) + } + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + ( + State::Fn(Box::new(move |t, c| at_break(t, c, data, size))), + None, + ) + } + _ => { + tokenizer.enter(TokenType::ChunkString); + // To do: link. + label(tokenizer, code, data, size) + } + } +} + +/// To do. +fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '[' | ']') => { + tokenizer.exit(TokenType::ChunkString); + at_break(tokenizer, code, data, size) + } + _ if size > LINK_REFERENCE_SIZE_MAX => { + tokenizer.exit(TokenType::ChunkString); + at_break(tokenizer, code, data, size) + } + Code::VirtualSpace | Code::Char('\t' | ' ') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| label(t, c, data, size + 1))), + None, + ) + } + Code::Char('/') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| escape(t, c, true, size + 1))), + None, + ) + } + Code::Char(_) => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| label(t, c, true, size + 1))), + None, + ) + } + } +} + +/// To do. +fn escape(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult { + match code { + Code::Char('[' | '\\' | ']') => { + tokenizer.consume(code); + ( + State::Fn(Box::new(move |t, c| label(t, c, true, size + 1))), + None, + ) + } + _ => label(tokenizer, code, data, size), + } +} diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs new file mode 100644 index 0000000..4c7b527 --- /dev/null +++ b/src/construct/partial_title.rs @@ -0,0 +1,136 @@ +// To do: pass token types in. + +use crate::construct::partial_whitespace::start as whitespace; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Type of quote, if we’re in an attribure, in complete (condition 7). +#[derive(Debug, Clone, PartialEq)] +enum TitleKind { + /// In a parenthesised (`(` and `)`) title. + Paren, + /// In a double quoted (`"`) title. + Double, + /// In a single quoted (`"`) title. + Single, +} + +fn kind_to_marker(kind: &TitleKind) -> char { + match kind { + TitleKind::Double => '"', + TitleKind::Single => '\'', + TitleKind::Paren => ')', + } +} + +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let kind = match code { + Code::Char('"') => Some(TitleKind::Double), + Code::Char('\'') => Some(TitleKind::Single), + Code::Char('(') => Some(TitleKind::Paren), + _ => None, + }; + + if let Some(kind) = kind { + tokenizer.enter(TokenType::DefinitionTitle); + tokenizer.enter(TokenType::DefinitionTitleMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::DefinitionTitleMarker); + (State::Fn(Box::new(|t, c| at_first_break(t, c, kind))), None) + } else { + (State::Nok, None) + } +} + +/// To do. +fn at_first_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult { + match code { + Code::Char(char) if char == kind_to_marker(&kind) => { + tokenizer.enter(TokenType::DefinitionTitleMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::DefinitionTitleMarker); + tokenizer.exit(TokenType::DefinitionTitle); + (State::Ok, None) + } + _ => { + tokenizer.enter(TokenType::DefinitionTitleString); + at_break(tokenizer, code, kind) + } + } +} + +/// To do. +fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult { + match code { + Code::Char(char) if char == kind_to_marker(&kind) => { + tokenizer.exit(TokenType::DefinitionTitleString); + at_first_break(tokenizer, code, kind) + } + Code::None => (State::Nok, None), + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + ( + State::Fn(Box::new(|t, c| at_break_line_start(t, c, kind))), + None, + ) + } + _ => { + // To do: link. + tokenizer.enter(TokenType::ChunkString); + title(tokenizer, code, kind) + } + } +} + +fn at_break_line_start(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult { + tokenizer.attempt( + |t, c| whitespace(t, c, TokenType::Whitespace), + |_ok| Box::new(|t, c| at_break_line_begin(t, c, kind)), + )(tokenizer, code) +} + +fn at_break_line_begin(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult { + match code { + // Blank line not allowed. + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None), + _ => at_break(tokenizer, code, kind), + } +} + +/// To do. +fn title(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult { + match code { + Code::Char(char) if char == kind_to_marker(&kind) => { + tokenizer.exit(TokenType::ChunkString); + at_break(tokenizer, code, kind) + } + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.exit(TokenType::ChunkString); + at_break(tokenizer, code, kind) + } + Code::Char('\\') => { + tokenizer.consume(code); + (State::Fn(Box::new(|t, c| escape(t, c, kind))), None) + } + _ => { + tokenizer.consume(code); + (State::Fn(Box::new(|t, c| title(t, c, kind))), None) + } + } +} + +/// To do. +fn escape(tokenizer: &mut Tokenizer, code: Code, kind: TitleKind) -> StateFnResult { + match code { + Code::Char(char) if char == kind_to_marker(&kind) => { + tokenizer.consume(code); + (State::Fn(Box::new(move |t, c| title(t, c, kind))), None) + } + Code::Char('\\') => { + tokenizer.consume(code); + (State::Fn(Box::new(move |t, c| title(t, c, kind))), None) + } + _ => title(tokenizer, code, kind), + } +} diff --git a/src/content/content.rs b/src/content/content.rs index 4ca69ee..86bc290 100644 --- a/src/content/content.rs +++ b/src/content/content.rs @@ -16,10 +16,9 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; -/// Before content. +/// Before a paragraph. /// /// ```markdown -/// |[x]: y /// |asd /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { @@ -27,48 +26,10 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { unreachable!("expected non-eol/eof"); } - _ => after_definitions(tokenizer, code) - // To do: definition. - // _ => tokenizer.attempt(definition, |ok| { - // Box::new(if ok { - // a - // } else { - // b - // }) - // })(tokenizer, code), - } -} - -/// Before a paragraph. -/// -/// ```markdown -/// |asd -/// ``` -fn after_definitions(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None => (State::Ok, None), - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - unreachable!("to do: handle eol after definition"); - } - _ => paragraph_initial(tokenizer, code), - } -} - -/// Before a paragraph. -/// -/// ```markdown -/// |asd -/// ``` -fn paragraph_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None => (State::Ok, None), - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - unreachable!("to do: handle eol after definition"); - } _ => { tokenizer.enter(TokenType::Paragraph); tokenizer.enter(TokenType::ChunkText); - data(tokenizer, code, tokenizer.events.len() - 1) + inside(tokenizer, code, tokenizer.events.len() - 1) } } } @@ -79,7 +40,7 @@ fn paragraph_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// |\& /// |qwe /// ``` -fn data(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult { +fn inside(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult { match code { Code::None => { tokenizer.exit(TokenType::ChunkText); @@ -94,14 +55,14 @@ fn data(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFn tokenizer.events[previous_index].next = Some(next_index); tokenizer.events[next_index].previous = Some(previous_index); ( - State::Fn(Box::new(move |t, c| data(t, c, next_index))), + State::Fn(Box::new(move |t, c| inside(t, c, next_index))), None, ) } _ => { tokenizer.consume(code); ( - State::Fn(Box::new(move |t, c| data(t, c, previous_index))), + State::Fn(Box::new(move |t, c| inside(t, c, previous_index))), None, ) } diff --git a/src/content/flow.rs b/src/content/flow.rs index d7509d7..3fab523 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -13,6 +13,7 @@ //! * [Blank line][crate::construct::blank_line] //! * [Code (fenced)][crate::construct::code_fenced] //! * [Code (indented)][crate::construct::code_indented] +//! * [Definition][crate::construct::definition] //! * [Heading (atx)][crate::construct::heading_atx] //! * [Heading (setext)][crate::construct::heading_setext] //! * [HTML (flow)][crate::construct::html_flow] @@ -23,9 +24,10 @@ use crate::constant::TAB_SIZE; use crate::construct::{ blank_line::start as blank_line, code_fenced::start as code_fenced, - code_indented::start as code_indented, heading_atx::start as heading_atx, - heading_setext::start as heading_setext, html_flow::start as html_flow, - partial_whitespace::start as whitespace, thematic_break::start as thematic_break, + code_indented::start as code_indented, definition::start as definition, + heading_atx::start as heading_atx, heading_setext::start as heading_setext, + html_flow::start as html_flow, partial_whitespace::start as whitespace, + thematic_break::start as thematic_break, }; use crate::subtokenize::subtokenize; use crate::tokenizer::{Code, Event, Point, State, StateFnResult, TokenType, Tokenizer}; @@ -96,6 +98,7 @@ fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Ok, None), + // To do: should all flow just start before the prefix? _ => tokenizer.attempt_3(code_indented, code_fenced, html_flow, |ok| { Box::new(if ok { after } else { before }) })(tokenizer, code), @@ -145,9 +148,13 @@ pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// |*** /// ``` pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.attempt_3(heading_atx, thematic_break, heading_setext, |ok| { - Box::new(if ok { after } else { content_before }) - })(tokenizer, code) + tokenizer.attempt_4( + heading_atx, + thematic_break, + definition, + heading_setext, + |ok| Box::new(if ok { after } else { content_before }), + )(tokenizer, code) } /// Before content. @@ -156,9 +163,7 @@ pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResu /// |qwe /// ``` /// -// To do: -// - Multiline -// - One or more definitions. +// To do: we don’t need content anymore in `micromark-rs` it seems? fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { @@ -247,6 +252,7 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None), // To do: If code is disabled, indented lines are part of the content. _ if prefix >= TAB_SIZE => (State::Ok, None), + // To do: definitions, setext headings, etc? _ => tokenizer.attempt_2(heading_atx, thematic_break, |ok| { let result = if ok { (State::Nok, None) diff --git a/src/content/string.rs b/src/content/string.rs index 25d8582..e8134c4 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -15,6 +15,8 @@ use crate::construct::{ }; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +// To do: line endings? + /// Before string. /// /// ```markdown diff --git a/src/parser.rs b/src/parser.rs index 5648942..250ff0c 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,6 +1,5 @@ //! Turn a string of markdown into events. // To do: this should start with `containers`, when they’re done. -// To do: definitions and such will mean more data has to be passed around. use crate::content::flow::flow; use crate::tokenizer::{as_codes, Code, Event, Point}; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index fc9e177..9884986 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -50,6 +50,19 @@ pub enum TokenType { CodeTextData, Content, Data, + Definition, + DefinitionLabel, + DefinitionLabelMarker, + DefinitionLabelData, + DefinitionMarker, + DefinitionDestination, + DefinitionDestinationLiteral, + DefinitionDestinationLiteralMarker, + DefinitionDestinationRaw, + DefinitionDestinationString, + DefinitionTitle, + DefinitionTitleMarker, + DefinitionTitleString, HardBreakEscape, HardBreakEscapeMarker, HardBreakTrailing, @@ -350,6 +363,39 @@ impl Tokenizer { self.stack.truncate(previous.stack_len); } + /// To do. + pub fn go( + &mut self, + state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + ok: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + ) -> Box<StateFn> { + // To do: could we *not* capture? + // As this state can return `nok`, it must be wrapped in a higher attempt, + // which has captured things and will revert on `nok` already? + let previous = self.capture(); + + attempt_impl( + state, + vec![], + |result: (Vec<Code>, Vec<Code>), is_ok, tokenizer: &mut Tokenizer| { + let codes = if is_ok { result.1 } else { result.0 }; + log::debug!( + "go: {:?}, codes: {:?}, at {:?}", + is_ok, + codes, + tokenizer.point + ); + + if is_ok { + tokenizer.feed(&codes, ok, false) + } else { + tokenizer.free(previous); + (State::Nok, None) + } + }, + ) + } + /// Check if `state` and its future states are successful or not. /// /// This captures the current state of the tokenizer, returns a wrapped @@ -461,6 +507,27 @@ impl Tokenizer { ) } + pub fn attempt_4( + &mut self, + a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + d: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + done: impl FnOnce(bool) -> Box<StateFn> + 'static, + ) -> Box<StateFn> { + self.call_multiple( + false, + Some(Box::new(a)), + Some(Box::new(b)), + Some(Box::new(c)), + Some(Box::new(d)), + None, + None, + None, + done, + ) + } + #[allow(clippy::too_many_arguments, clippy::many_single_char_names)] pub fn attempt_7( &mut self, @@ -537,8 +604,6 @@ impl Tokenizer { let mut state = State::Fn(Box::new(start)); let mut index = 0; - println!("feed: {:?} {:?}", codes, drain); - self.consumed = true; while index < codes.len() { @@ -702,19 +767,6 @@ pub fn as_codes(value: &str) -> Vec<Code> { fn check_statefn_result(result: StateFnResult) -> StateFnResult { let (state, mut remainder) = result; - match state { - State::Nok | State::Fn(_) => { - if let Some(ref x) = remainder { - assert_eq!( - x.len(), - 0, - "expected `None` to be passed back as remainder from `State::Nok`, `State::Fn`" - ); - } - } - State::Ok => {} - } - // Remove an eof. // For convencience, feeding back an eof is allowed, but cleaned here. // Most states handle eof and eol in the same branch, and hence pass diff --git a/tests/character_escape.rs b/tests/character_escape.rs index 9e2a5c8..ba94ab3 100644 --- a/tests/character_escape.rs +++ b/tests/character_escape.rs @@ -67,7 +67,7 @@ fn character_escape() { // "should escape in resource and title" // ); - // To do: definition. + // To do: link (reference). // assert_eq!( // micromark("[foo]: /bar\\* \"ti\\*tle\"\n\n[foo]"), // "<p><a href=\"/bar*\" title=\"ti*tle\">foo</a></p>", diff --git a/tests/character_reference.rs b/tests/character_reference.rs index e351088..bcd0aca 100644 --- a/tests/character_reference.rs +++ b/tests/character_reference.rs @@ -61,7 +61,7 @@ fn character_reference() { // "should support character references in resource URLs and titles" // ); - // To do: definition. + // To do: link (resource). // assert_eq!( // micromark("[foo]: /föö \"föö\"\n\n[foo]"), // "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>", diff --git a/tests/definition.rs b/tests/definition.rs new file mode 100644 index 0000000..f0869a3 --- /dev/null +++ b/tests/definition.rs @@ -0,0 +1,446 @@ +extern crate micromark; +use micromark::{micromark, micromark_with_options, CompileOptions}; + +const DANGER: &CompileOptions = &CompileOptions { + allow_dangerous_html: true, + allow_dangerous_protocol: true, +}; + +#[test] +fn definition() { + // To do: link (reference). + // assert_eq!( + // micromark("[foo]: /url \"title\"\n\n[foo]"), + // "<p><a href=\"/url\" title=\"title\">foo</a></p>", + // "should support link definitions" + // ); + + assert_eq!( + micromark("[foo]:\n\n/url\n\n[foo]"), + "<p>[foo]:</p>\n<p>/url</p>\n<p>[foo]</p>", + "should not support blank lines before destination" + ); + + // To do: link (reference). + // assert_eq!( + // micromark(" [foo]: \n /url \n 'the title' \n\n[foo]"), + // "<p><a href=\"/url\" title=\"the title\">foo</a></p>", + // "should support whitespace and line endings in definitions" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]"), + // "<p><a href=\"my_(url)\" title=\"title (with parens)\">Foo*bar]</a></p>", + // "should support complex definitions (1)" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[Foo bar]:\n<my url>\n'title'\n\n[Foo bar]"), + // "<p><a href=\"my%20url\" title=\"title\">Foo bar</a></p>", + // "should support complex definitions (2)" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]: /url '\ntitle\nline1\nline2\n'\n\n[foo]"), + // "<p><a href=\"/url\" title=\"\ntitle\nline1\nline2\n\">foo</a></p>", + // "should support line endings in titles" + // ); + + // To do: some bug + // assert_eq!( + // micromark("[foo]: /url 'title\n\nwith blank line'\n\n[foo]"), + // "<p>[foo]: /url 'title</p>\n<p>with blank line'</p>\n<p>[foo]</p>", + // "should not support blank lines in titles" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]:\n/url\n\n[foo]"), + // "<p><a href=\"/url\">foo</a></p>", + // "should support definitions w/o title" + // ); + + assert_eq!( + micromark("[foo]:\n\n[foo]"), + "<p>[foo]:</p>\n<p>[foo]</p>", + "should not support definitions w/o destination" + ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]: <>\n\n[foo]"), + // "<p><a href=\"\">foo</a></p>", + // "should support definitions w/ explicit empty destinations" + // ); + + assert_eq!( + micromark_with_options("[foo]: <bar>(baz)\n\n[foo]", DANGER), + "<p>[foo]: <bar>(baz)</p>\n<p>[foo]</p>", + "should not support definitions w/ no whitespace between destination and title" + ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]: /url\\bar\\*baz \"foo\\\"bar\\baz\"\n\n[foo]"), + // "<p><a href=\"/url%5Cbar*baz\" title=\"foo"bar\\baz\">foo</a></p>", + // "should support character escapes in destinations and titles" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]\n\n[foo]: url"), + // "<p><a href=\"url\">foo</a></p>\n", + // "should support a link before a definition" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]: first\n[foo]: second\n\n[foo]"), + // "<p><a href=\"first\">foo</a></p>", + // "should match w/ the first definition" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[FOO]: /url\n\n[Foo]"), + // "<p><a href=\"/url\">Foo</a></p>", + // "should match w/ case-insensitive (1)" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[ΑΓΩ]: /φου\n\n[αγω]"), + // "<p><a href=\"/%CF%86%CE%BF%CF%85\">αγω</a></p>", + // "should match w/ case-insensitive (2)" + // ); + + assert_eq!( + micromark("[foo]: /url"), + "", + "should not contribute anything w/o reference (1)" + ); + + assert_eq!( + micromark("[\nfoo\n]: /url\nbar"), + "<p>bar</p>", + "should not contribute anything w/o reference (2)" + ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]: /url \"title\" \n\n[foo]"), + // "<p><a href=\"/url\" title=\"title\">foo</a></p>", + // "should support whitespace after title" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]: /url\n\"title\" \n\n[foo]"), + // "<p><a href=\"/url\" title=\"title\">foo</a></p>", + // "should support whitespace after title on a separate line" + // ); + + assert_eq!( + micromark("[foo]: /url \"title\" ok"), + "<p>[foo]: /url "title" ok</p>", + "should not support non-whitespace content after definitions (1)" + ); + + assert_eq!( + micromark("[foo]: /url\n\"title\" ok"), + "<p>"title" ok</p>", + "should not support non-whitespace content after definitions (2)" + ); + + assert_eq!( + micromark(" [foo]: /url \"title\"\n\n[foo]"), + "<pre><code>[foo]: /url "title"\n</code></pre>\n<p>[foo]</p>", + "should prefer indented code over definitions" + ); + + assert_eq!( + micromark("```\n[foo]: /url\n```\n\n[foo]"), + "<pre><code>[foo]: /url\n</code></pre>\n<p>[foo]</p>", + "should not support definitions in fenced code" + ); + + assert_eq!( + micromark("Foo\n[bar]: /baz\n\n[bar]"), + "<p>Foo\n[bar]: /baz</p>\n<p>[bar]</p>", + "should not support definitions in paragraphs" + ); + + // To do: link (reference). + // assert_eq!( + // micromark("# [Foo]\n[foo]: /url\n> bar"), + // "<h1><a href=\"/url\">Foo</a></h1>\n<blockquote>\n<p>bar</p>\n</blockquote>", + // "should not support definitions in headings" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]: /url\nbar\n===\n[foo]"), + // "<h1>bar</h1>\n<p><a href=\"/url\">foo</a></p>", + // "should support setext headings after definitions" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[foo]: /url\n===\n[foo]"), + // "<p>===\n<a href=\"/url\">foo</a></p>", + // "should not support setext heading underlines after definitions" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark( + // "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]" + // ), + // "<p><a href=\"/foo-url\" title=\"foo\">foo</a>,\n<a href=\"/bar-url\" title=\"bar\">bar</a>,\n<a href=\"/baz-url\">baz</a></p>", + // "should support definitions after definitions" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("> [foo]: /url\n\n[foo]"), + // "<blockquote>\n</blockquote>\n<p><a href=\"/url\">foo</a></p>", + // "should support definitions in block quotes" + // ); + + // Extra + // To do: link (reference). + // assert_eq!( + // micromark("[\\[\\+\\]]: example.com\n\nLink: [\\[\\+\\]]."), + // "<p>Link: <a href=\"example.com\">[+]</a>.</p>", + // "should match w/ character escapes" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: \\\" \\(\\)\\\"\n\n[x]"), + // "<p><a href=\"%22%20()%22\">x</a></p>", + // "should support character escapes & references in unenclosed destinations" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: <\\> \\+\\>>\n\n[x]"), + // "<p><a href=\"%3E%20+%3E\">x</a></p>", + // "should support character escapes & references in enclosed destinations" + // ); + + assert_eq!( + micromark("[x]: <\n\n[x]"), + "<p>[x]: <</p>\n<p>[x]</p>", + "should not support a line ending at start of enclosed destination" + ); + + assert_eq!( + micromark("[x]: <x\n\n[x]"), + "<p>[x]: <x</p>\n<p>[x]</p>", + "should not support a line ending in enclosed destination" + ); + + assert_eq!( + micromark("[x]: \u{000b}a\n\n[x]"), + "<p>[x]: \u{000b}a</p>\n<p>[x]</p>", + "should not support ascii control characters at the start of destination" + ); + + assert_eq!( + micromark("[x]: a\u{000b}b\n\n[x]"), + "<p>[x]: a\u{000b}b</p>\n<p>[x]</p>", + "should not support ascii control characters in destination" + ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: <\u{000b}a>\n\n[x]"), + // "<p><a href=\"%0Ba\">x</a></p>", + // "should support ascii control characters at the start of enclosed destination" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: <a\u{000b}b>\n\n[x]"), + // "<p><a href=\"a%0Bb\">x</a></p>", + // "should support ascii control characters in enclosed destinations" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: a \"\\\"\"\n\n[x]"), + // "<p><a href=\"a\" title=\""\">x</a></p>", + // "should support character escapes at the start of a title" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: a \"\\\"\"\n\n[x]"), + // "<p><a href=\"a\" title=\"\"\">x</a></p>", + // "should support double quoted titles" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: a '\"'\n\n[x]"), + // "<p><a href=\"a\" title=\""\">x</a></p>", + // "should support double quoted titles" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: a (\"\")\n\n[x]"), + // "<p><a href=\"a\" title=\""\"\">x</a></p>", + // "should support paren enclosed titles" + // ); + + assert_eq!( + micromark("[x]: a(()\n\n[x]"), + "<p>[x]: a(()</p>\n<p>[x]</p>", + "should not support more opening than closing parens in the destination" + ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: a(())\n\n[x]"), + // "<p><a href=\"a(())\">x</a></p>", + // "should support balanced opening and closing parens in the destination" + // ); + + assert_eq!( + micromark("[x]: a())\n\n[x]"), + "<p>[x]: a())</p>\n<p>[x]</p>", + "should not support more closing than opening parens in the destination" + ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: a \t\n\n[x]"), + // "<p><a href=\"a\">x</a></p>", + // "should support trailing whitespace after a destination" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: a \"\"X \t\n\n[x]"), + // "<p><a href=\"a\" title=\"\"X>x</a></p>", + // "should support trailing whitespace after a destination" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[&©&]: example.com/&©& \"&©&\"\n\n[&©&]"), + // "<p><a href=\"example.com/&%C2%A9&\" title=\"&©&\">&©&</a></p>", + // "should support character references in definitions" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]:\nexample.com\n\n[x]"), + // "<p><a href=\"example.com\">x</a></p>", + // "should support a line ending before a destination" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[x]: \t\nexample.com\n\n[x]"), + // "<p><a href=\"example.com\">x</a></p>", + // "should support whitespace before a destination" + // ); + + // See: <https://github.com/commonmark/commonmark.js/issues/192> + // To do: link (reference). + // assert_eq!( + // micromark("[x]: <> \"\"\n[][x]"), + // "<p><a href=\"\"></a></p>", + // "should ignore an empty title" + // ); + + assert_eq!( + micromark_with_options("[a]\n\n[a]: <b<c>", DANGER), + "<p>[a]</p>\n<p>[a]: <b<c></p>", + "should not support a less than in an enclosed destination" + ); + + assert_eq!( + micromark("[a]\n\n[a]: b(c"), + "<p>[a]</p>\n<p>[a]: b(c</p>", + "should not support an extra left paren (`(`) in a raw destination" + ); + + assert_eq!( + micromark("[a]\n\n[a]: b)c"), + "<p>[a]</p>\n<p>[a]: b)c</p>", + "should not support an extra right paren (`)`) in a raw destination" + ); + + assert_eq!( + micromark("[a]\n\n[a]: b)c"), + "<p>[a]</p>\n<p>[a]: b)c</p>", + "should not support an extra right paren (`)`) in a raw destination" + ); + + // To do: link (reference). + // assert_eq!( + // micromark("[a]\n\n[a]: a(1(2(3(4()))))b"), + // "<p><a href=\"a(1(2(3(4()))))b\">a</a></p>\n", + // "should support 4 or more sets of parens in a raw destination (link resources don’t)" + // ); + + assert_eq!( + micromark("[a]\n\n[a]: aaa)"), + "<p>[a]</p>\n<p>[a]: aaa)</p>", + "should not support a final (unbalanced) right paren in a raw destination" + ); + + assert_eq!( + micromark("[a]\n\n[a]: aaa) \"a\""), + "<p>[a]</p>\n<p>[a]: aaa) "a"</p>", + "should not support a final (unbalanced) right paren in a raw destination “before” a title" + ); + + // To do: link (reference). + // assert_eq!( + // micromark(" [a]: b \"c\"\n [d]: e\n [f]: g \"h\"\n [i]: j\n\t[k]: l (m)\n\t n [k] o"), + // "<p>n <a href=\"l\" title=\"m\">k</a> o</p>", + // "should support subsequent indented definitions" + // ); + + // To do: link (reference). + // assert_eq!( + // micromark("[a\n b]: c\n\n[a\n b]"), + // "<p><a href=\"c\">a\nb</a></p>", + // "should support line prefixes in definition labels" + // ); + + assert_eq!( + micromark("[a]: )\n\n[a]"), + "<p>[a]: )</p>\n<p>[a]</p>", + "should not support definitions w/ only a closing paren as a raw destination" + ); + + assert_eq!( + micromark("[a]: )b\n\n[a]"), + "<p>[a]: )b</p>\n<p>[a]</p>", + "should not support definitions w/ closing paren + more text as a raw destination" + ); + + assert_eq!( + micromark("[a]: b)\n\n[a]"), + "<p>[a]: b)</p>\n<p>[a]</p>", + "should not support definitions w/ text + a closing paren as a raw destination" + ); + + // To do: support turning off things. + // assert_eq!( + // micromark("[foo]: /url \"title\"", { + // extensions: [{disable: {null: ["definition"]}}] + // }), + // "<p>[foo]: /url "title"</p>", + // "should support turning off definitions" + // ); +} |