From 5403261e8213f68633a09fc3e9bc2e6e2cd777b2 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 18 Jul 2022 16:31:14 +0200 Subject: Add support for turning off constructs --- build.rs | 14 +- examples/lib.rs | 2 +- readme.md | 7 +- src/construct/attention.rs | 2 +- src/construct/autolink.rs | 2 +- src/construct/block_quote.rs | 20 +- src/construct/character_escape.rs | 2 +- src/construct/character_reference.rs | 2 +- src/construct/code_fenced.rs | 34 +- src/construct/code_indented.rs | 2 +- src/construct/code_text.rs | 6 +- src/construct/definition.rs | 6 +- src/construct/hard_break_escape.rs | 2 +- src/construct/hard_break_trailing.rs | 2 +- src/construct/heading_atx.rs | 15 +- src/construct/heading_setext.rs | 10 +- src/construct/html_flow.rs | 33 +- src/construct/html_text.rs | 2 +- src/construct/label_end.rs | 2 +- src/construct/label_start_image.rs | 2 +- src/construct/label_start_link.rs | 2 +- src/construct/list.rs | 15 +- src/construct/thematic_break.rs | 15 +- src/content/string.rs | 1 + src/content/text.rs | 1 + src/lib.rs | 232 +- src/parser.rs | 5 +- tests/attention.rs | 42 +- tests/autolink.rs | 40 +- tests/block_quote.rs | 24 +- tests/character_escape.rs | 36 +- tests/character_reference.rs | 38 +- tests/code_fenced.rs | 22 +- tests/code_indented.rs | 140 +- tests/code_text.rs | 36 +- tests/commonmark.rs | 4576 ++++++++++++++++++++++++---------- tests/definition.rs | 40 +- tests/hard_break_escape.rs | 22 +- tests/hard_break_trailing.rs | 22 +- tests/heading_atx.rs | 22 +- tests/heading_setext.rs | 22 +- tests/html_flow.rs | 362 +-- tests/html_text.rs | 165 +- tests/image.rs | 25 +- tests/link_reference.rs | 56 +- tests/link_resource.rs | 22 +- tests/list.rs | 39 +- tests/misc_dangerous_html.rs | 14 +- tests/misc_default_line_ending.rs | 6 +- tests/misc_line_ending.rs | 30 +- tests/misc_tabs.rs | 13 +- tests/thematic_break.rs | 22 +- 52 files changed, 4345 insertions(+), 1929 deletions(-) diff --git a/build.rs b/build.rs index a4947f5..52de3ac 100644 --- a/build.rs +++ b/build.rs @@ -53,7 +53,7 @@ async fn commonmark() { format!("{}\n", parts[1]) }; - let test = format!(" assert_eq!(\n micromark_with_options(r###\"{}\"###, DANGER),\n r###\"{}\"###,\n r###\"{} ({})\"###\n);", input, output, section, number); + let test = format!(" assert_eq!(\n micromark_with_options(\n r###\"{}\"###,\n &danger\n ),\n r###\"{}\"###,\n r###\"{} ({})\"###\n);", input, output, section, number); cases.push(test); @@ -70,15 +70,15 @@ async fn commonmark() { extern crate micromark; use micromark::{{micromark_with_options, Options}}; -const DANGER: &Options = &Options {{ - allow_dangerous_html: true, - allow_dangerous_protocol: true, - default_line_ending: None, -}}; - #[rustfmt::skip] #[test] fn commonmark() {{ + let danger = Options {{ + allow_dangerous_html: true, + allow_dangerous_protocol: true, + ..Options::default() + }}; + {} }} ", diff --git a/examples/lib.rs b/examples/lib.rs index 718e400..b1869bb 100644 --- a/examples/lib.rs +++ b/examples/lib.rs @@ -17,7 +17,7 @@ fn main() { &Options { allow_dangerous_html: true, allow_dangerous_protocol: true, - default_line_ending: None + ..Options::default() } ) ); diff --git a/readme.md b/readme.md index 03a1a9f..d5641df 100644 --- a/readme.md +++ b/readme.md @@ -45,7 +45,6 @@ cargo doc --document-private-items - [ ] (5) There’s a lot of rust-related choosing whether to pass (mutable) references or whatever around that should be refactored -- [ ] (1) Support turning off constructs - [ ] (5) Figure out extensions ### All the things @@ -59,18 +58,15 @@ cargo doc --document-private-items #### Refactor - [ ] (1) Use `edit_map` in `subtokenize` (needs to support links in edits) -- [ ] (1) Add list of void tokens, assert that there’s nothing between them - [ ] (1) Improve `interrupt`, `concrete`, `lazy` fields somehow? #### Parse -- [ ] (3) Turn off things (enable every test for these) - [ ] (3) Make tokens extendable for extensions? #### Test - [ ] (1) Make sure positional info is perfect -- [ ] (3) Use `commonmark` tests - [ ] (3) Share a bunch of tests with `micromark-js` #### Misc @@ -209,3 +205,6 @@ important. - [x] (3) Add support for lazy lines - [x] (5) Containers! - [x] (3) Check subtokenizer unraveling is ok +- [x] (1) Add list of void tokens, check that they’re void +- [x] (3) Use `commonmark` tests +- [x] (3) Add support for turning off constructs diff --git a/src/construct/attention.rs b/src/construct/attention.rs index 86c9249..3e15f9a 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -175,7 +175,7 @@ struct Sequence { /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char('*' | '_') => { + Code::Char('*' | '_') if tokenizer.parse_state.constructs.attention => { tokenizer.enter(Token::AttentionSequence); inside(tokenizer, code, MarkerKind::from_code(code)) } diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index fe8f380..0ef4607 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -115,7 +115,7 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char('<') => { + Code::Char('<') if tokenizer.parse_state.constructs.autolink => { tokenizer.enter(Token::Autolink); tokenizer.enter(Token::AutolinkMarker); tokenizer.consume(code); diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs index 6c3f270..a8a8fa8 100644 --- a/src/construct/block_quote.rs +++ b/src/construct/block_quote.rs @@ -45,8 +45,16 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - // To do: allow arbitrary when code (indented) is turned off. - tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) + let max = if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }; + if tokenizer.parse_state.constructs.block_quote { + tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) + } else { + (State::Nok, None) + } } /// Start of block quote, after whitespace, before `>`. @@ -73,8 +81,12 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ^ /// ``` pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - // To do: allow arbitrary when code (indented) is turned off. - tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), cont_before)(tokenizer, code) + let max = if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }; + tokenizer.go(space_or_tab_min_max(0, max), cont_before)(tokenizer, code) } /// After whitespace, before `>`. diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs index 811adcf..f171d38 100644 --- a/src/construct/character_escape.rs +++ b/src/construct/character_escape.rs @@ -44,7 +44,7 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char('\\') => { + Code::Char('\\') if tokenizer.parse_state.constructs.character_escape => { tokenizer.enter(Token::CharacterEscape); tokenizer.enter(Token::CharacterEscapeMarker); tokenizer.consume(code); diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index 544458a..ce7cd31 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -138,7 +138,7 @@ struct Info { /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char('&') => { + Code::Char('&') if tokenizer.parse_state.constructs.character_reference => { tokenizer.enter(Token::CharacterReference); tokenizer.enter(Token::CharacterReferenceMarker); tokenizer.consume(code); diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index d5001e7..49bcae3 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -189,10 +189,18 @@ struct Info { /// | ~~~ /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.enter(Token::CodeFenced); - tokenizer.enter(Token::CodeFencedFence); - // To do: allow arbitrary when code (indented) is turned off. - tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before_sequence_open)(tokenizer, code) + let max = if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }; + if tokenizer.parse_state.constructs.code_fenced { + tokenizer.enter(Token::CodeFenced); + tokenizer.enter(Token::CodeFencedFence); + tokenizer.go(space_or_tab_min_max(0, max), before_sequence_open)(tokenizer, code) + } else { + (State::Nok, None) + } } /// Inside the opening fence, after an optional prefix, before a sequence. @@ -445,10 +453,20 @@ fn close_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu /// ^ /// ``` fn close_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { - tokenizer.enter(Token::CodeFencedFence); - tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), |t, c| { - close_before(t, c, info) - })(tokenizer, code) + let max = if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }; + + if tokenizer.parse_state.constructs.code_fenced { + tokenizer.enter(Token::CodeFencedFence); + tokenizer.go(space_or_tab_min_max(0, max), |t, c| { + close_before(t, c, info) + })(tokenizer, code) + } else { + (State::Nok, None) + } } /// In a closing fence, after optional whitespace, before sequence. diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 6778b62..eb0811b 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -62,7 +62,7 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // Do not interrupt paragraphs. - if tokenizer.interrupt { + if tokenizer.interrupt || !tokenizer.parse_state.constructs.code_indented { (State::Nok, None) } else { tokenizer.enter(Token::CodeIndented); diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index 5e40d03..eb143ba 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -99,8 +99,10 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('`') - if tokenizer.previous != Code::Char('`') - || (len > 0 && tokenizer.events[len - 1].token_type == Token::CharacterEscape) => + if tokenizer.parse_state.constructs.code_text + && (tokenizer.previous != Code::Char('`') + || (len > 0 + && tokenizer.events[len - 1].token_type == Token::CharacterEscape)) => { tokenizer.enter(Token::CodeText); tokenizer.enter(Token::CodeTextSequence); diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 6ce3a04..231011f 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -120,12 +120,12 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { == Token::Definition; // Do not interrupt paragraphs (but do follow definitions). - if tokenizer.interrupt && !definition_before { - (State::Nok, None) - } else { + if (!tokenizer.interrupt || definition_before) && tokenizer.parse_state.constructs.definition { tokenizer.enter(Token::Definition); // Note: arbitrary whitespace allowed even if code (indented) is on. tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) + } else { + (State::Nok, None) } } diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs index de8afe6..191ef67 100644 --- a/src/construct/hard_break_escape.rs +++ b/src/construct/hard_break_escape.rs @@ -52,7 +52,7 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char('\\') => { + Code::Char('\\') if tokenizer.parse_state.constructs.hard_break_escape => { tokenizer.enter(Token::HardBreakEscape); tokenizer.enter(Token::HardBreakEscapeMarker); tokenizer.consume(code); diff --git a/src/construct/hard_break_trailing.rs b/src/construct/hard_break_trailing.rs index d83bf60..88c668a 100644 --- a/src/construct/hard_break_trailing.rs +++ b/src/construct/hard_break_trailing.rs @@ -53,7 +53,7 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char(' ') => { + Code::Char(' ') if tokenizer.parse_state.constructs.hard_break_trailing => { tokenizer.enter(Token::HardBreakTrailing); tokenizer.enter(Token::HardBreakTrailingSpace); tokenizer.consume(code); diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 8947f64..68a6be7 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -67,9 +67,18 @@ use crate::util::edit_map::EditMap; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.enter(Token::HeadingAtx); - // To do: allow arbitrary when code (indented) is turned off. - tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) + let max = if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }; + + if tokenizer.parse_state.constructs.heading_atx { + tokenizer.enter(Token::HeadingAtx); + tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) + } else { + (State::Nok, None) + } } /// Start of a heading (atx), after whitespace. diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index deab558..7cd259b 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -117,6 +117,11 @@ impl Kind { /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let max = if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }; let paragraph_before = !tokenizer.events.is_empty() && tokenizer.events[skip_opt_back( &tokenizer.events, @@ -127,9 +132,8 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { == Token::Paragraph; // Require a paragraph before and do not allow on a lazy line. - if paragraph_before && !tokenizer.lazy { - // To do: allow arbitrary when code (indented) is turned off. - tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) + if paragraph_before && !tokenizer.lazy && tokenizer.parse_state.constructs.heading_setext { + tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) } else { (State::Nok, None) } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 822b9dd..1255081 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -204,18 +204,27 @@ struct Info { /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.enter(Token::HtmlFlow); - // To do: allow arbitrary when code (indented) is turned off. - tokenizer.go( - space_or_tab_with_options(SpaceOrTabOptions { - kind: Token::HtmlFlowData, - min: 0, - max: TAB_SIZE - 1, - connect: false, - content_type: None, - }), - before, - )(tokenizer, code) + let max = if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }; + + if tokenizer.parse_state.constructs.html_flow { + tokenizer.enter(Token::HtmlFlow); + tokenizer.go( + space_or_tab_with_options(SpaceOrTabOptions { + kind: Token::HtmlFlowData, + min: 0, + max, + connect: false, + content_type: None, + }), + before, + )(tokenizer, code) + } else { + (State::Nok, None) + } } /// After optional whitespace, before `<`. diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index be1f1fe..db00551 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -66,7 +66,7 @@ use crate::util::codes::parse; /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - if Code::Char('<') == code { + if Code::Char('<') == code && tokenizer.parse_state.constructs.html_text { tokenizer.enter(Token::HtmlText); tokenizer.enter(Token::HtmlTextData); tokenizer.consume(code); diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index e232cbe..2ac2500 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -184,7 +184,7 @@ struct Info { /// > | [a] b /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - if Code::Char(']') == code { + if Code::Char(']') == code && tokenizer.parse_state.constructs.label_end { let mut label_start_index: Option = None; let mut index = tokenizer.label_start_stack.len(); diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index b4e0433..fd7a42d 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -40,7 +40,7 @@ use crate::tokenizer::{Code, LabelStart, State, StateFnResult, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char('!') => { + Code::Char('!') if tokenizer.parse_state.constructs.label_start_image => { tokenizer.enter(Token::LabelImage); tokenizer.enter(Token::LabelImageMarker); tokenizer.consume(code); diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index 7e8e511..aeaa4eb 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -39,7 +39,7 @@ use crate::tokenizer::{Code, LabelStart, State, StateFnResult, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char('[') => { + Code::Char('[') if tokenizer.parse_state.constructs.label_start_link => { let start = tokenizer.events.len(); tokenizer.enter(Token::LabelLink); tokenizer.enter(Token::LabelMarker); diff --git a/src/construct/list.rs b/src/construct/list.rs index fce8f00..5fd0849 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -138,9 +138,18 @@ impl Kind { /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.enter(Token::ListItem); - // To do: allow arbitrary when code (indented) is turned off. - tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) + let max = if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }; + + if tokenizer.parse_state.constructs.list { + tokenizer.enter(Token::ListItem); + tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) + } else { + (State::Nok, None) + } } /// Start of list item, after whitespace. diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index f0b6052..48fb838 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -135,9 +135,18 @@ struct Info { /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.enter(Token::ThematicBreak); - // To do: allow arbitrary when code (indented) is turned off. - tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) + let max = if tokenizer.parse_state.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }; + + if tokenizer.parse_state.constructs.thematic_break { + tokenizer.enter(Token::ThematicBreak); + tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer, code) + } else { + (State::Nok, None) + } } /// Start of a thematic break, after whitespace. diff --git a/src/content/string.rs b/src/content/string.rs index cc8ee53..f2650df 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -19,6 +19,7 @@ use crate::construct::{ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; const MARKERS: [Code; 5] = [ + // To do: build this vec based on whether they are enabled? Code::VirtualSpace, // `whitespace` Code::Char('\t'), // `whitespace` Code::Char(' '), // `whitespace` diff --git a/src/content/text.rs b/src/content/text.rs index cf630f1..f797b11 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -58,6 +58,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { Code::None => (State::Ok, None), _ => tokenizer.attempt_n( vec![ + // To do: build this vec based on whether they are enabled? Box::new(attention), Box::new(autolink), Box::new(character_escape), diff --git a/src/lib.rs b/src/lib.rs index ff8e938..224f8d0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -75,8 +75,193 @@ impl LineEnding { } } +/// Control which constructs are enabled. +/// +/// Not all constructs can be configured. +/// Notably, blank lines and paragraphs cannot be turned off. +#[allow(clippy::struct_excessive_bools)] +#[derive(Clone, Debug)] +pub struct Constructs { + /// Attention. + /// + /// ```markdown + /// > | a *b* c **d**. + /// ^^^ ^^^^^ + /// ``` + pub attention: bool, + /// Autolink. + /// + /// ```markdown + /// > | a b . + /// ^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^ + /// ``` + pub autolink: bool, + /// Block quote. + /// + /// ```markdown + /// > | > a + /// ^^^ + /// ``` + pub block_quote: bool, + /// Character escape. + /// + /// ```markdown + /// > | a \* b + /// ^^ + /// ``` + pub character_escape: bool, + /// Character reference. + /// + /// ```markdown + /// > | a & b + /// ^^^^^ + /// ``` + pub character_reference: bool, + /// Code (indented). + /// + /// ```markdown + /// > | a + /// ^^^^^ + /// ``` + pub code_indented: bool, + /// Code (fenced). + /// + /// ```markdown + /// > | ~~~js + /// ^^^^^ + /// > | console.log(1) + /// ^^^^^^^^^^^^^^ + /// > | ~~~ + /// ^^^ + /// ``` + pub code_fenced: bool, + /// Code (text). + /// + /// ```markdown + /// > | a `b` c + /// ^^^ + /// ``` + pub code_text: bool, + /// Definition. + /// + /// ```markdown + /// > | [a]: b "c" + /// ^^^^^^^^^^ + /// ``` + pub definition: bool, + /// Hard break (escape). + /// + /// ```markdown + /// > | a\ + /// ^ + /// | b + /// ``` + pub hard_break_escape: bool, + /// Hard break (trailing). + /// + /// ```markdown + /// > | a␠␠ + /// ^^ + /// | b + /// ``` + pub hard_break_trailing: bool, + /// Heading (atx). + /// + /// ```markdown + /// > | # a + /// ^^^ + /// ``` + pub heading_atx: bool, + /// Heading (setext). + /// + /// ```markdown + /// > | a + /// ^^ + /// > | == + /// ^^ + /// ``` + pub heading_setext: bool, + /// HTML (flow). + /// + /// ```markdown + /// > |
+ /// ^^^^^ + /// ``` + pub html_flow: bool, + /// HTML (text). + /// + /// ```markdown + /// > | a c + /// ^^^ + /// ``` + pub html_text: bool, + /// Label start (image). + /// + /// ```markdown + /// > | a ![b](c) d + /// ^^ + /// ``` + pub label_start_image: bool, + /// Label start (link). + /// + /// ```markdown + /// > | a [b](c) d + /// ^ + /// ``` + pub label_start_link: bool, + /// Label end. + /// + /// ```markdown + /// > | a [b](c) d + /// ^^^^ + /// ``` + pub label_end: bool, + /// List. + /// + /// ```markdown + /// > | * a + /// ^^^ + /// ``` + pub list: bool, + /// Thematic break. + /// + /// ```markdown + /// > | *** + /// ^^^ + /// ``` + pub thematic_break: bool, +} + +impl Default for Constructs { + /// `CommonMark`. + fn default() -> Self { + Self { + attention: true, + autolink: true, + block_quote: true, + character_escape: true, + character_reference: true, + code_indented: true, + code_fenced: true, + code_text: true, + definition: true, + hard_break_escape: true, + hard_break_trailing: true, + heading_atx: true, + heading_setext: true, + html_flow: true, + html_text: true, + label_start_image: true, + label_start_link: true, + label_end: true, + list: true, + thematic_break: true, + } + } +} + /// Configuration (optional). -#[derive(Default, Debug)] +#[derive(Clone, Debug, Default)] pub struct Options { /// Whether to allow (dangerous) HTML. /// The default is `false`, you can turn it on to `true` for trusted @@ -99,8 +284,7 @@ pub struct Options { /// "Hi, venus!", /// &Options { /// allow_dangerous_html: true, - /// allow_dangerous_protocol: false, - /// default_line_ending: None, + /// ..Options::default() /// } /// ), /// "

Hi, venus!

" @@ -128,9 +312,8 @@ pub struct Options { /// micromark_with_options( /// "", /// &Options { - /// allow_dangerous_html: false, /// allow_dangerous_protocol: true, - /// default_line_ending: None, + /// ..Options::default() /// } /// ), /// "

javascript:alert(1)

" @@ -166,15 +349,46 @@ pub struct Options { /// micromark_with_options( /// "> a", /// &Options { - /// allow_dangerous_html: false, - /// allow_dangerous_protocol: false, /// default_line_ending: Some(LineEnding::CarriageReturnLineFeed), + /// ..Options::default() /// } /// ), /// "
\r\n

a

\r\n
" /// ); /// ``` + // To do: use `default`? pub default_line_ending: Option, + + /// Which constructs to enable and disable. + /// The default is to follow `CommonMark`. + /// + /// ## Examples + /// + /// ```rust + /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// + /// // micromark follows CommonMark by default: + /// assert_eq!( + /// micromark(" indented code?"), + /// "
indented code?\n
" + /// ); + /// + /// // Pass `constructs` to choose what to enable and disable: + /// assert_eq!( + /// micromark_with_options( + /// " indented code?", + /// &Options { + /// constructs: Constructs { + /// code_indented: false, + /// ..Constructs::default() + /// }, + /// ..Options::default() + /// } + /// ), + /// "

indented code?

" + /// ); + /// ``` + pub constructs: Constructs, } /// Turn markdown into HTML. @@ -203,13 +417,13 @@ pub fn micromark(value: &str) -> String { /// let result = micromark_with_options("
\n\n# Hello, world!\n\n
", &Options { /// allow_dangerous_html: true, /// allow_dangerous_protocol: true, -/// default_line_ending: None, +/// ..Options::default() /// }); /// /// assert_eq!(result, "
\n

Hello, world!

\n
"); /// ``` #[must_use] pub fn micromark_with_options(value: &str, options: &Options) -> String { - let (events, result) = parse(value); + let (events, result) = parse(value, options); compile(&events, &result.codes, options) } diff --git a/src/parser.rs b/src/parser.rs index 725f326..409e812 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3,6 +3,7 @@ use crate::content::document::document; use crate::tokenizer::{Code, Event, Point}; use crate::util::codes::parse as parse_codes; +use crate::{Constructs, Options}; use std::collections::HashSet; /// Information needed, in all content types, when parsing markdown. @@ -11,6 +12,7 @@ use std::collections::HashSet; /// It also references the input value as [`Code`][]s. #[derive(Debug)] pub struct ParseState { + pub constructs: Constructs, /// List of codes. pub codes: Vec, /// Set of defined identifiers. @@ -20,8 +22,9 @@ pub struct ParseState { /// Turn a string of markdown into events. /// /// Passes the codes back so the compiler can access the source. -pub fn parse(value: &str) -> (Vec, ParseState) { +pub fn parse(value: &str, options: &Options) -> (Vec, ParseState) { let mut parse_state = ParseState { + constructs: options.constructs.clone(), codes: parse_codes(value), definitions: HashSet::new(), }; diff --git a/tests/attention.rs b/tests/attention.rs index 1d30dd4..9a3e2fe 100644 --- a/tests/attention.rs +++ b/tests/attention.rs @@ -1,14 +1,14 @@ extern crate micromark; -use micromark::{micromark, micromark_with_options, Options}; - -const DANGER: &Options = &Options { - allow_dangerous_html: true, - allow_dangerous_protocol: true, - default_line_ending: None, -}; +use micromark::{micromark, micromark_with_options, Constructs, Options}; #[test] fn attention() { + let danger = Options { + allow_dangerous_html: true, + allow_dangerous_protocol: true, + ..Options::default() + }; + // Rule 1. assert_eq!( micromark("*foo bar*"), @@ -764,25 +764,25 @@ fn attention() { ); assert_eq!( - micromark_with_options("*", DANGER), + micromark_with_options("*", &danger), "

*

", "should not end inside HTML" ); assert_eq!( - micromark_with_options("*", DANGER), + micromark_with_options("*", &danger), "

*

", "should not end emphasis inside HTML" ); assert_eq!( - micromark_with_options("**", DANGER), + micromark_with_options("**", &danger), "

**

", "should not end strong inside HTML (1)" ); assert_eq!( - micromark_with_options("__", DANGER), + micromark_with_options("__", &danger), "

__

", "should not end strong inside HTML (2)" ); @@ -811,10 +811,18 @@ fn attention() { "should not end strong emphasis inside autolinks (2)" ); - // To do: turning things off. - // assert_eq!( - // micromark("*a*", {extensions: [{disable: {null: ["attention"]}}]}), - // "

*a*

", - // "should support turning off attention" - // ); + assert_eq!( + micromark_with_options( + "*a*", + &Options { + constructs: Constructs { + attention: false, + ..Constructs::default() + }, + ..Options::default() + } + ), + "

*a*

", + "should support turning off attention" + ); } diff --git a/tests/autolink.rs b/tests/autolink.rs index 7396c7a..b6258e6 100644 --- a/tests/autolink.rs +++ b/tests/autolink.rs @@ -1,14 +1,14 @@ extern crate micromark; -use micromark::{micromark, micromark_with_options, Options}; - -const DANGER: &Options = &Options { - allow_dangerous_html: true, - allow_dangerous_protocol: true, - default_line_ending: None, -}; +use micromark::{micromark, micromark_with_options, Constructs, Options}; #[test] fn autolink() { + let danger = Options { + allow_dangerous_html: true, + allow_dangerous_protocol: true, + ..Options::default() + }; + assert_eq!( micromark(""), "

http://foo.bar.baz

", @@ -40,7 +40,7 @@ fn autolink() { ); assert_eq!( - micromark_with_options("", DANGER), + micromark_with_options("", &danger), "

a+b+c:d

", "should support protocol autolinks w/ incorrect URIs (1, danger)" ); @@ -52,7 +52,7 @@ fn autolink() { ); assert_eq!( - micromark_with_options("", DANGER), + micromark_with_options("", &danger), "

made-up-scheme://foo,bar

", "should support protocol autolinks w/ incorrect URIs (2, danger)" ); @@ -64,7 +64,7 @@ fn autolink() { ); assert_eq!( - micromark_with_options("", DANGER), + micromark_with_options("", &danger), "

localhost:5001/foo

", "should support protocol autolinks w/ incorrect URIs (4)" ); @@ -246,10 +246,18 @@ fn autolink() { "should not support a dash before a dot in email autolinks" ); - // To do: turning things off. - // assert_eq!( - // micromark("", {extensions: [{disable: {null: ["autolink"]}}]}), - // "

<a@b.co>

", - // "should support turning off autolinks" - // ); + assert_eq!( + micromark_with_options( + "", + &Options { + constructs: Constructs { + autolink: false, + ..Constructs::default() + }, + ..Options::default() + } + ), + "

<a@b.co>

", + "should support turning off autolinks" + ); } diff --git a/tests/block_quote.rs b/tests/block_quote.rs index 06bd49a..13af078 100644 --- a/tests/block_quote.rs +++ b/tests/block_quote.rs @@ -1,5 +1,5 @@ extern crate micromark; -use micromark::micromark; +use micromark::{micromark, micromark_with_options, Constructs, Options}; #[test] fn block_quote() { @@ -165,12 +165,18 @@ fn block_quote() { "should support 5 spaces for indented code, not 4" ); - // To do: turning things off. - // assert_eq!( - // micromark("> # a\n> b\n> c", { - // extensions: [{disable: {null: ["blockQuote"]}}] - // }), - // "

> # a\n> b\n> c

", - // "should support turning off block quotes" - // ); + assert_eq!( + micromark_with_options( + "> # a\n> b\n> c", + &Options { + constructs: Constructs { + block_quote: false, + ..Constructs::default() + }, + ..Options::default() + } + ), + "

> # a\n> b\n> c

", + "should support turning off block quotes" + ); } diff --git a/tests/character_escape.rs b/tests/character_escape.rs index d27f20a..8cd170d 100644 --- a/tests/character_escape.rs +++ b/tests/character_escape.rs @@ -1,14 +1,14 @@ extern crate micromark; -use micromark::{micromark, micromark_with_options, Options}; - -const DANGER: &Options = &Options { - allow_dangerous_html: true, - allow_dangerous_protocol: true, - default_line_ending: None, -}; +use micromark::{micromark, micromark_with_options, Constructs, Options}; #[test] fn character_escape() { + let danger = Options { + allow_dangerous_html: true, + allow_dangerous_protocol: true, + ..Options::default() + }; + assert_eq!( micromark( "\\!\\\"\\#\\$\\%\\&\\'\\(\\)\\*\\+\\,\\-\\.\\/\\:\\;\\<\\=\\>\\?\\@\\[\\\\\\]\\^\\_\\`\\{\\|\\}\\~" @@ -56,7 +56,7 @@ fn character_escape() { ); assert_eq!( - micromark_with_options("", DANGER), + micromark_with_options("", &danger), "", "should not escape in flow html" ); @@ -79,10 +79,18 @@ fn character_escape() { "should escape in fenced code info" ); - // To do: turning things off - // assert_eq!( - // micromark("\\> a", {extensions: [{disable: {null: ["characterEscape"]}}]}), - // "

\\> a

", - // "should support turning off character escapes" - // ); + assert_eq!( + micromark_with_options( + "\\> a", + &Options { + constructs: Constructs { + character_escape: false, + ..Constructs::default() + }, + ..Options::default() + } + ), + "

\\> a

", + "should support turning off character escapes" + ); } diff --git a/tests/character_reference.rs b/tests/character_reference.rs index ef2ba0d..c41f47d 100644 --- a/tests/character_reference.rs +++ b/tests/character_reference.rs @@ -1,11 +1,5 @@ extern crate micromark; -use micromark::{micromark, micromark_with_options, Options}; - -const DANGER: &Options = &Options { - allow_dangerous_html: true, - allow_dangerous_protocol: true, - default_line_ending: None, -}; +use micromark::{micromark, micromark_with_options, Constructs, Options}; #[test] fn character_reference() { @@ -50,7 +44,13 @@ fn character_reference() { ); assert_eq!( - micromark_with_options("
", DANGER), + micromark_with_options( + "", + &Options { + allow_dangerous_html: true, + ..Options::default() + } + ), "", "should not care about character references in html" ); @@ -188,12 +188,18 @@ fn character_reference() { "should not support the other characters inside a hexademical" ); - // To do: turning things off. - // assert_eq!( - // micromark("&", { - // extensions: [{disable: {null: ["characterReferences"]}}] - // }), - // "

&

", - // "should support turning off character references" - // ); + assert_eq!( + micromark_with_options( + "&", + &Options { + constructs: Constructs { + character_reference: false, + ..Constructs::default() + }, + ..Options::default() + } + ), + "

&amp;

", + "should support turning off character references" + ); } diff --git a/tests/code_fenced.rs b/tests/code_fenced.rs index fa9ed5f..f251952 100644 --- a/tests/code_fenced.rs +++ b/tests/code_fenced.rs @@ -1,5 +1,5 @@ extern crate micromark; -use micromark::micromark; +use micromark::{micromark, micromark_with_options, Constructs, Options}; #[test] fn code_fenced() { @@ -250,10 +250,18 @@ fn code_fenced() { "should not support lazyness (3)" ); - // To do: turning things off. - // assert_eq!( - // micromark("```", {extensions: [{disable: {null: ["codeFenced"]}}]}), - // "

```

", - // "should support turning off code (fenced)" - // ); + assert_eq!( + micromark_with_options( + "```", + &Options { + constructs: Constructs { + code_fenced: false, + ..Constructs::default() + }, + ..Options::default() + } + ), + "

```

", + "should support turning off code (fenced)" + ); } diff --git a/tests/code_indented.rs b/tests/code_indented.rs index 6735954..cb316e7 100644 --- a/tests/code_indented.rs +++ b/tests/code_indented.rs @@ -1,5 +1,5 @@ extern crate micromark; -use micromark::micromark; +use micromark::{micromark, micromark_with_options, Constructs, Options}; #[test] fn code_indented() { @@ -117,75 +117,71 @@ fn code_indented() { "should not support lazyness (7)" ); - // To do: turning things off. - // assert_eq!( - // micromark(" a", {extensions: [{disable: {null: ["codeIndented"]}}]}), - // "

a

", - // "should support turning off code (indented, 1)" - // ); - - // assert_eq!( - // micromark("> a\n b", { - // extensions: [{disable: {null: ["codeIndented"]}}] - // }), - // "
\n

a\nb

\n
", - // "should support turning off code (indented, 2)" - // ); - - // assert_eq!( - // micromark("- a\n b", { - // extensions: [{disable: {null: ["codeIndented"]}}] - // }), - // "
    \n
  • a\nb
  • \n
", - // "should support turning off code (indented, 3)" - // ); - - // assert_eq!( - // micromark("- a\n - b", { - // extensions: [{disable: {null: ["codeIndented"]}}] - // }), - // "
    \n
  • a\n
      \n
    • b
    • \n
    \n
  • \n
", - // "should support turning off code (indented, 4)" - // ); - - // assert_eq!( - // micromark("- a\n - b", { - // extensions: [{disable: {null: ["codeIndented"]}}] - // }), - // "
    \n
  • a\n
      \n
    • b
    • \n
    \n
  • \n
", - // "should support turning off code (indented, 5)" - // ); - - // assert_eq!( - // micromark("```\na\n ```", { - // extensions: [{disable: {null: ["codeIndented"]}}] - // }), - // "
a\n
", - // "should support turning off code (indented, 6)" - // ); - - // assert_eq!( - // micromark("a ", { - // allowDangerousHtml: true, - // extensions: [{disable: {null: ["codeIndented"]}}] - // }), - // "

a

", - // "should support turning off code (indented, 7)" - // ); - - // assert_eq!( - // micromark("- Foo\n---", { - // extensions: [{disable: {null: ["codeIndented"]}}] - // }), - // "
    \n
  • Foo
  • \n
\n
", - // "should support turning off code (indented, 8)" - // ); - - // assert_eq!( - // micromark("- Foo\n ---", { - // extensions: [{disable: {null: ["codeIndented"]}}] - // }), - // "
    \n
  • \n

    Foo

    \n
  • \n
", - // "should support turning off code (indented, 9)" - // ); + let off = Options { + constructs: Constructs { + code_indented: false, + ..Constructs::default() + }, + ..Options::default() + }; + + assert_eq!( + micromark_with_options(" a", &off), + "

a

", + "should support turning off code (indented, 1)" + ); + + assert_eq!( + micromark_with_options("> a\n b", &off), + "
\n

a\nb

\n
", + "should support turning off code (indented, 2)" + ); + + assert_eq!( + micromark_with_options("- a\n b", &off), + "
    \n
  • a\nb
  • \n
", + "should support turning off code (indented, 3)" + ); + + assert_eq!( + micromark_with_options("- a\n - b", &off), + "
    \n
  • a\n
      \n
    • b
    • \n
    \n
  • \n
", + "should support turning off code (indented, 4)" + ); + + assert_eq!( + micromark_with_options("- a\n - b", &off), + "
    \n
  • a\n
      \n
    • b
    • \n
    \n
  • \n
", + "should support turning off code (indented, 5)" + ); + + assert_eq!( + micromark_with_options("```\na\n ```", &off), + "
a\n
", + "should support turning off code (indented, 6)" + ); + + assert_eq!( + micromark_with_options( + "a ", + &Options { + allow_dangerous_html: true, + ..off.clone() + } + ), + "

a

", + "should support turning off code (indented, 7)" + ); + + assert_eq!( + micromark_with_options("- Foo\n---", &off), + "
    \n
  • Foo
  • \n
\n
", + "should support turning off code (indented, 8)" + ); + + assert_eq!( + micromark_with_options("- Foo\n ---", &off), + "
    \n
  • \n

    Foo

    \n
  • \n
", + "should support turning off code (indented, 9)" + ); } diff --git a/tests/code_text.rs b/tests/code_text.rs index 054d8e2..834b831 100644 --- a/tests/code_text.rs +++ b/tests/code_text.rs @@ -1,14 +1,14 @@ extern crate micromark; -use micromark::{micromark, micromark_with_options, Options}; - -const DANGER: &Options = &Options { - allow_dangerous_html: true, - allow_dangerous_protocol: false, - default_line_ending: None, -}; +use micromark::{micromark, micromark_with_options, Constructs, Options}; #[test] fn code_text() { + let danger = Options { + allow_dangerous_html: true, + allow_dangerous_protocol: true, + ..Options::default() + }; + assert_eq!( micromark("`foo`"), "

foo

", @@ -106,7 +106,7 @@ fn code_text() { ); assert_eq!( - micromark_with_options("
`", DANGER), + micromark_with_options("`", &danger), "

`

", "should have same precedence as HTML (2)" ); @@ -154,10 +154,18 @@ fn code_text() { "should support an escaped initial grave accent" ); - // To do: turning things off. - // assert_eq!( - // micromark("`a`", {extensions: [{disable: {null: ["codeText"]}}]}), - // "

`a`

", - // "should support turning off code (text)" - // ); + assert_eq!( + micromark_with_options( + "`a`", + &Options { + constructs: Constructs { + code_text: false, + ..Constructs::default() + }, + ..Options::default() + } + ), + "

`a`

", + "should support turning off code (text)" + ); } diff --git a/tests/commonmark.rs b/tests/commonmark.rs index 59908a2..95871a4 100644 --- a/tests/commonmark.rs +++ b/tests/commonmark.rs @@ -6,18 +6,21 @@ extern crate micromark; use micromark::{micromark_with_options, Options}; -const DANGER: &Options = &Options { - allow_dangerous_html: true, - allow_dangerous_protocol: true, - default_line_ending: None, -}; - #[rustfmt::skip] #[test] fn commonmark() { + let danger = Options { + allow_dangerous_html: true, + allow_dangerous_protocol: true, + ..Options::default() + }; + assert_eq!( - micromark_with_options(r###" foo baz bim -"###, DANGER), + micromark_with_options( + r###" foo baz bim +"###, + &danger + ), r###"
foo	baz		bim
 
"###, @@ -25,8 +28,11 @@ fn commonmark() { ); assert_eq!( - micromark_with_options(r###" foo baz bim -"###, DANGER), + micromark_with_options( + r###" foo baz bim +"###, + &danger + ), r###"
foo	baz		bim
 
"###, @@ -34,9 +40,12 @@ fn commonmark() { ); assert_eq!( - micromark_with_options(r###" a a + micromark_with_options( + r###" a a ὐ a -"###, DANGER), +"###, + &danger + ), r###"
a	a
 ὐ	a
 
@@ -45,10 +54,13 @@ fn commonmark() { ); assert_eq!( - micromark_with_options(r###" - foo + micromark_with_options( + r###" - foo bar -"###, DANGER), +"###, + &danger + ), r###"