diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-30 16:35:13 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-30 16:35:13 +0200 |
commit | be62b2e29a61774100f676cfdd9b100cadf1905f (patch) | |
tree | 4349e259fc0150526dc32242b92d85218091fca5 | |
parent | 13588776d65601a41ddfce85f618e8aaa55951cc (diff) | |
download | markdown-rs-be62b2e29a61774100f676cfdd9b100cadf1905f.tar.gz markdown-rs-be62b2e29a61774100f676cfdd9b100cadf1905f.tar.bz2 markdown-rs-be62b2e29a61774100f676cfdd9b100cadf1905f.zip |
Add support for trimming whitespace around string, text
This commit introduces trimming initial and final whitespace around the
whole string or text, or around line endings inside that string or text.
* Add `register_resolver_before`, to run resolvers earlier than others,
used for labels
* Add resolver to merge `data` events, which are the most frequent token
that occurs, and can happen adjacently.
In `micromark-js` this sped up parsing a lot
* Fix a bug where a virtual space was not seen as an okay event
* Refactor to enable all turned off whitespace tests
-rw-r--r-- | readme.md | 5 | ||||
-rw-r--r-- | src/construct/label_end.rs | 2 | ||||
-rw-r--r-- | src/construct/label_start_image.rs | 2 | ||||
-rw-r--r-- | src/construct/label_start_link.rs | 2 | ||||
-rw-r--r-- | src/construct/mod.rs | 2 | ||||
-rw-r--r-- | src/construct/partial_data.rs | 51 | ||||
-rw-r--r-- | src/construct/partial_whitespace.rs | 56 | ||||
-rw-r--r-- | src/content/string.rs | 17 | ||||
-rw-r--r-- | src/content/text.rs | 22 | ||||
-rw-r--r-- | src/tokenizer.rs | 9 | ||||
-rw-r--r-- | tests/code_indented.rs | 11 | ||||
-rw-r--r-- | tests/definition.rs | 11 | ||||
-rw-r--r-- | tests/hard_break_escape.rs | 11 | ||||
-rw-r--r-- | tests/hard_break_trailing.rs | 74 | ||||
-rw-r--r-- | tests/heading_atx.rs | 11 | ||||
-rw-r--r-- | tests/heading_setext.rs | 35 | ||||
-rw-r--r-- | tests/html_flow.rs | 11 | ||||
-rw-r--r-- | tests/image.rs | 11 | ||||
-rw-r--r-- | tests/link_reference.rs | 24 | ||||
-rw-r--r-- | tests/link_resource.rs | 11 | ||||
-rw-r--r-- | tests/misc_soft_break.rs | 11 | ||||
-rw-r--r-- | tests/misc_tabs.rs | 30 | ||||
-rw-r--r-- | tests/thematic_break.rs | 11 |
23 files changed, 264 insertions, 166 deletions
@@ -138,10 +138,6 @@ cargo doc --document-private-items #### Parse -- [ ] (1) Parse initial and final space_or_tab of paragraphs (in text)\ - test (`code_indented`, `definition`, `hard_break_escape`, `hard_break_trailing`, - `heading_atx`, `heading_setext`, `html_flow`, `misc_soft_break`, - `misc_tabs`, `thematic_break`) - [ ] (3) Interrupting (html flow complete, definition + code_indented) - [ ] (5) attention\ test (`character_reference`, `hard_break_escape`, `hard_break_trailing`, @@ -281,3 +277,4 @@ important. - [x] (1) Add docs to Image, Link, and other media tokens - [x] (1) Add docs on resolver, clean feed - [x] (3) Clean compiler +- [x] (1) Parse initial and final space_or_tab of paragraphs (in string, text) diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 888355b..0da12b8 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -510,7 +510,7 @@ fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { info.media.end.1 = tokenizer.events.len() - 1; tokenizer.media_list.push(info.media); - tokenizer.register_resolver("media".to_string(), Box::new(resolve_media)); + tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); (State::Ok, Some(vec![code])) } diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 7725334..a45205a 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -67,7 +67,7 @@ pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { balanced: false, inactive: false, }); - tokenizer.register_resolver("media".to_string(), Box::new(resolve_media)); + tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); (State::Ok, None) } _ => (State::Nok, None), diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index 46d7c9c..6c4d7ae 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -49,7 +49,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { balanced: false, inactive: false, }); - tokenizer.register_resolver("media".to_string(), Box::new(resolve_media)); + tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); (State::Ok, None) } _ => (State::Nok, None), diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 8565b2f..9e3dfb0 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -44,6 +44,7 @@ //! * [label][partial_label] //! * [space or tab][partial_space_or_tab] //! * [title][partial_title] +//! * [whitespace][partial_whitespace] //! //! Each construct maintained here is explained with a BNF diagram. //! For example, the docs for [character escape][character_escape] contain: @@ -83,4 +84,5 @@ pub mod partial_destination; pub mod partial_label; pub mod partial_space_or_tab; pub mod partial_title; +pub mod partial_whitespace; pub mod thematic_break; diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index d83787a..9f99570 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -8,7 +8,8 @@ // To do: pass token types in? -use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, TokenType, Tokenizer}; +use crate::util::edit_map::EditMap; /// At the beginning of data. /// @@ -39,7 +40,10 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnRe tokenizer.exit(TokenType::LineEnding); (State::Fn(Box::new(|t, c| at_break(t, c, stop))), None) } - _ if stop.contains(&code) => (State::Ok, Some(vec![code])), + _ if stop.contains(&code) => { + tokenizer.register_resolver("data".to_string(), Box::new(resolve)); + (State::Ok, Some(vec![code])) + } _ => { tokenizer.enter(TokenType::Data); data(tokenizer, code, stop) @@ -67,3 +71,46 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult (State::Fn(Box::new(|t, c| data(t, c, stop))), None) } } + +/// Merge adjacent data events. +pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { + let mut edit_map = EditMap::new(); + let len = tokenizer.events.len(); + let mut index = 0; + + // Loop through events and merge adjacent data events. + while index < len { + let event = &tokenizer.events[index]; + + if event.event_type == EventType::Enter && event.token_type == TokenType::Data { + let exit_index = index + 1; + let mut exit_far_index = exit_index; + + // Find multiple `data` events. + while exit_far_index + 1 < len + && tokenizer.events[exit_far_index + 1].token_type == TokenType::Data + { + exit_far_index += 2; + } + + if exit_far_index > exit_index { + edit_map.add(exit_index, exit_far_index - exit_index, vec![]); + + // Change positional info. + let exit_far = &tokenizer.events[exit_far_index]; + let point_end = exit_far.point.clone(); + let index_end = exit_far.index; + let exit = &mut tokenizer.events[exit_index]; + exit.point = point_end; + exit.index = index_end; + index = exit_far_index; + + continue; + } + } + + index += 1; + } + + edit_map.consume(&mut tokenizer.events) +} diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs new file mode 100644 index 0000000..9a7a54d --- /dev/null +++ b/src/construct/partial_whitespace.rs @@ -0,0 +1,56 @@ +//! Trailing whitespace occurs in [string][] and [text][]. +//! +//! It occurs at the start or end of the whole, or around line endings. +//! This whitespace is ignored +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! ; Restriction: the start and end here count as an eol. +//! whitespace ::= 0.*space_or_tab eol 0.*space_or_tab +//! ``` +//! +//! ## References +//! +//! * [`initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js) +//! +//! [string]: crate::content::string +//! [text]: crate::content::text + +use super::partial_space_or_tab::space_or_tab; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +/// Parse initial or final whitespace. +pub fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.go( + // Nothing if there’s no whitespace. + space_or_tab(), + if matches!( + tokenizer.previous, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + ) { + // If there’s whitespace, and we were at an eol/eof, `ok` + ok + } else { + // If there’s whitespace, and we were not at an eol/eof, there must be one here. + at_eol + }, + )(tokenizer, code) +} + +/// After whitespace, at an eol/eof. +fn at_eol(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if matches!( + code, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + ) { + ok(tokenizer, code) + } else { + (State::Nok, None) + } +} + +/// Fine. +fn ok(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + (State::Ok, Some(vec![code])) +} diff --git a/src/content/string.rs b/src/content/string.rs index 53e88b1..cc8ee53 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -14,13 +14,16 @@ use crate::construct::{ character_escape::start as character_escape, character_reference::start as character_reference, - partial_data::start as data, + partial_data::start as data, partial_whitespace::whitespace, }; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; -const MARKERS: [Code; 2] = [ - Code::Char('&'), // `character_reference` - Code::Char('\\'), // `character_escape` +const MARKERS: [Code; 5] = [ + Code::VirtualSpace, // `whitespace` + Code::Char('\t'), // `whitespace` + Code::Char(' '), // `whitespace` + Code::Char('&'), // `character_reference` + Code::Char('\\'), // `character_escape` ]; /// Before string. @@ -34,7 +37,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Ok, None), _ => tokenizer.attempt_n( - vec![Box::new(character_reference), Box::new(character_escape)], + vec![ + Box::new(character_reference), + Box::new(character_escape), + Box::new(whitespace), + ], |ok| Box::new(if ok { start } else { before_data }), )(tokenizer, code), } diff --git a/src/content/text.rs b/src/content/text.rs index 183072e..c3f4e1b 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -24,18 +24,21 @@ use crate::construct::{ hard_break_trailing::start as hard_break_trailing, html_text::start as html_text, label_end::start as label_end, label_start_image::start as label_start_image, label_start_link::start as label_start_link, partial_data::start as data, + partial_whitespace::whitespace, }; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; -const MARKERS: [Code; 8] = [ - Code::Char(' '), // `hard_break_trailing` - Code::Char('!'), // `label_start_image` - Code::Char('&'), // `character_reference` - Code::Char('<'), // `autolink`, `html_text` - Code::Char('['), // `label_start_link` - Code::Char('\\'), // `character_escape`, `hard_break_escape` - Code::Char(']'), // `label_end` - Code::Char('`'), // `code_text` +const MARKERS: [Code; 10] = [ + Code::VirtualSpace, // `whitespace` + Code::Char('\t'), // `whitespace` + Code::Char(' '), // `hard_break_trailing`, `whitespace` + Code::Char('!'), // `label_start_image` + Code::Char('&'), // `character_reference` + Code::Char('<'), // `autolink`, `html_text` + Code::Char('['), // `label_start_link` + Code::Char('\\'), // `character_escape`, `hard_break_escape` + Code::Char(']'), // `label_end` + Code::Char('`'), // `code_text` ]; /// Before text. @@ -62,6 +65,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { Box::new(label_end), Box::new(label_start_image), Box::new(label_start_link), + Box::new(whitespace), ], |ok| Box::new(if ok { start } else { before_data }), )(tokenizer, code), diff --git a/src/tokenizer.rs b/src/tokenizer.rs index fe69366..817c1de 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1796,6 +1796,13 @@ impl<'a> Tokenizer<'a> { } } + pub fn register_resolver_before(&mut self, id: String, resolver: Box<Resolver>) { + if !self.resolver_ids.contains(&id) { + self.resolver_ids.push(id); + self.resolvers.insert(0, resolver); + } + } + /// Prepare for a next code to get consumed. fn expect(&mut self, code: Code) { assert!(self.consumed, "expected previous character to be consumed"); @@ -1901,7 +1908,7 @@ impl<'a> Tokenizer<'a> { let point = self.point.clone(); assert!( - current_token != previous.token_type || previous.point != point, + current_token != previous.token_type || previous.index != self.index, "expected non-empty token" ); diff --git a/tests/code_indented.rs b/tests/code_indented.rs index a7afb21..0190497 100644 --- a/tests/code_indented.rs +++ b/tests/code_indented.rs @@ -40,12 +40,11 @@ fn code_indented() { "should support blank lines in indented code (3)" ); - // To do: trimming paragraphs. - // assert_eq!( - // micromark("Foo\n bar"), - // "<p>Foo\nbar</p>", - // "should not support interrupting paragraphs" - // ); + assert_eq!( + micromark("Foo\n bar"), + "<p>Foo\nbar</p>", + "should not support interrupting paragraphs" + ); assert_eq!( micromark(" foo\nbar"), diff --git a/tests/definition.rs b/tests/definition.rs index 3edf687..ba4e384 100644 --- a/tests/definition.rs +++ b/tests/definition.rs @@ -382,12 +382,11 @@ fn definition() { // "should support subsequent indented definitions" // ); - // To do: trim whitespace in paragraphs. - // assert_eq!( - // micromark("[a\n b]: c\n\n[a\n b]"), - // "<p><a href=\"c\">a\nb</a></p>", - // "should support line prefixes in definition labels" - // ); + assert_eq!( + micromark("[a\n b]: c\n\n[a\n b]"), + "<p><a href=\"c\">a\nb</a></p>", + "should support line prefixes in definition labels" + ); assert_eq!( micromark("[a]: )\n\n[a]"), diff --git a/tests/hard_break_escape.rs b/tests/hard_break_escape.rs index 2e3a3ba..c4f6f1d 100644 --- a/tests/hard_break_escape.rs +++ b/tests/hard_break_escape.rs @@ -9,12 +9,11 @@ fn hard_break_escape() { "should support a backslash to form a hard break" ); - // To do: trimming whitespace in paragraphs. - // assert_eq!( - // micromark("foo\\\n bar"), - // "<p>foo<br />\nbar</p>", - // "should support leading spaces after an escape hard break" - // ); + assert_eq!( + micromark("foo\\\n bar"), + "<p>foo<br />\nbar</p>", + "should support leading spaces after an escape hard break" + ); // To do: attention. // assert_eq!( diff --git a/tests/hard_break_trailing.rs b/tests/hard_break_trailing.rs index 6c29020..0dbbbdb 100644 --- a/tests/hard_break_trailing.rs +++ b/tests/hard_break_trailing.rs @@ -15,12 +15,11 @@ fn hard_break_trailing() { "should support multiple trailing spaces" ); - // To do: trimming whitespace in paragraphs. - // assert_eq!( - // micromark("foo \n bar"), - // "<p>foo<br />\nbar</p>", - // "should support leading spaces after a trailing hard break" - // ); + assert_eq!( + micromark("foo \n bar"), + "<p>foo<br />\nbar</p>", + "should support leading spaces after a trailing hard break" + ); // To do: attention. // assert_eq!( @@ -42,12 +41,11 @@ fn hard_break_trailing() { "should not support trailing hard breaks in code" ); - // To do: trimming whitespace in paragraphs. - // assert_eq!( - // micromark("foo "), - // "<p>foo</p>", - // "should not support trailing hard breaks at the end of a paragraph" - // ); + assert_eq!( + micromark("foo "), + "<p>foo</p>", + "should not support trailing hard breaks at the end of a paragraph" + ); assert_eq!( micromark("### foo "), @@ -55,26 +53,23 @@ fn hard_break_trailing() { "should not support trailing hard breaks at the end of a heading" ); - // To do: trimming whitespace in paragraphs. - // assert_eq!( - // micromark("aaa \t\nbb"), - // "<p>aaa\nbb</p>", - // "should support a mixed line suffix (1)" - // ); + assert_eq!( + micromark("aaa \t\nbb"), + "<p>aaa\nbb</p>", + "should support a mixed line suffix (1)" + ); - // To do: trimming whitespace in paragraphs. - // assert_eq!( - // micromark("aaa\t \nbb"), - // "<p>aaa\nbb</p>", - // "should support a mixed line suffix (2)" - // ); + assert_eq!( + micromark("aaa\t \nbb"), + "<p>aaa\nbb</p>", + "should support a mixed line suffix (2)" + ); - // To do: trimming whitespace in paragraphs. - // assert_eq!( - // micromark("aaa \t \nbb"), - // "<p>aaa\nbb</p>", - // "should support a mixed line suffix (3)" - // ); + assert_eq!( + micromark("aaa \t \nbb"), + "<p>aaa\nbb</p>", + "should support a mixed line suffix (3)" + ); assert_eq!( micromark("aaa\0 \nbb"), @@ -82,12 +77,11 @@ fn hard_break_trailing() { "should support a hard break after a replacement character" ); - // To do: trimming whitespace in paragraphs. - // assert_eq!( - // micromark("aaa\0\t\nbb"), - // "<p>aaa�\nbb</p>", - // "should support a line suffix after a replacement character" - // ); + assert_eq!( + micromark("aaa\0\t\nbb"), + "<p>aaa�\nbb</p>", + "should support a line suffix after a replacement character" + ); // To do: attention. // assert_eq!( @@ -96,28 +90,28 @@ fn hard_break_trailing() { // "should support a hard break after a span" // ); - // To do: attention, trimming whitespace in paragraphs. + // To do: attention. // assert_eq!( // micromark("*a*\t\nbb"), // "<p><em>a</em>\nbb</p>", // "should support a line suffix after a span" // ); - // To do: attention, trimming whitespace in paragraphs. + // To do: attention. // assert_eq!( // micromark("*a* \t\nbb"), // "<p><em>a</em>\nbb</p>", // "should support a mixed line suffix after a span (1)" // ); - // To do: attention, trimming whitespace in paragraphs. + // To do: attention. // assert_eq!( // micromark("*a*\t \nbb"), // "<p><em>a</em>\nbb</p>", // "should support a mixed line suffix after a span (2)" // ); - // To do: attention, trimming whitespace in paragraphs. + // To do: attention. // assert_eq!( // micromark("*a* \t \nbb"), // "<p><em>a</em>\nbb</p>", diff --git a/tests/heading_atx.rs b/tests/heading_atx.rs index 67351ec..2548056 100644 --- a/tests/heading_atx.rs +++ b/tests/heading_atx.rs @@ -99,12 +99,11 @@ fn heading_atx() { "should not support four initial spaces" ); - // To do: trimming paragraphs. - // assert_eq!( - // micromark("foo\n # bar"), - // "<p>foo\n# bar</p>", - // "should not support four initial spaces when interrupting" - // ); + assert_eq!( + micromark("foo\n # bar"), + "<p>foo\n# bar</p>", + "should not support four initial spaces when interrupting" + ); assert_eq!( micromark("## foo ##"), diff --git a/tests/heading_setext.rs b/tests/heading_setext.rs index ecf22a8..e7ee9ff 100644 --- a/tests/heading_setext.rs +++ b/tests/heading_setext.rs @@ -24,7 +24,7 @@ fn heading_setext() { // "should support line endings in setext headings" // ); - // To do: attention, trim. + // To do: attention. // assert_eq!( // micromark(" Foo *bar\nbaz*\t\n===="), // "<h1>Foo <em>bar\nbaz</em></h1>", @@ -85,19 +85,17 @@ fn heading_setext() { "should support whitespace before underline" ); - // To do: trim paragraphs. - // assert_eq!( - // micromark("Foo\n ="), - // "<p>Foo\n=</p>", - // "should not support too much whitespace before underline (1)" - // ); + assert_eq!( + micromark("Foo\n ="), + "<p>Foo\n=</p>", + "should not support too much whitespace before underline (1)" + ); - // To do: trim paragraphs. - // assert_eq!( - // micromark("Foo\n\t="), - // "<p>Foo\n=</p>", - // "should not support too much whitespace before underline (2)" - // ); + assert_eq!( + micromark("Foo\n\t="), + "<p>Foo\n=</p>", + "should not support too much whitespace before underline (2)" + ); assert_eq!( micromark("Foo\n= ="), @@ -111,12 +109,11 @@ fn heading_setext() { "should not support whitespace in the underline (2)" ); - // To do: trim setext. - // assert_eq!( - // micromark("Foo \n-----"), - // "<h2>Foo</h2>", - // "should not support a hard break w/ spaces at the end" - // ); + assert_eq!( + micromark("Foo \n-----"), + "<h2>Foo</h2>", + "should not support a hard break w/ spaces at the end" + ); assert_eq!( micromark("Foo\\\n-----"), diff --git a/tests/html_flow.rs b/tests/html_flow.rs index d942642..455c5b8 100644 --- a/tests/html_flow.rs +++ b/tests/html_flow.rs @@ -814,12 +814,11 @@ fn html_flow_7_complete() { "should not support a line ending directly after a tag name" ); - // To do: trimming paragraphs. - // assert_eq!( - // micromark_with_options("<x ", DANGER), - // "<p><x</p>", - // "should not support an eof after a space directly after a tag name" - // ); + assert_eq!( + micromark_with_options("<x ", DANGER), + "<p><x</p>", + "should not support an eof after a space directly after a tag name" + ); assert_eq!( micromark_with_options("<x/", DANGER), diff --git a/tests/image.rs b/tests/image.rs index 68b9717..6db6d75 100644 --- a/tests/image.rs +++ b/tests/image.rs @@ -102,12 +102,11 @@ fn image() { "should support case-insensitive labels" ); - // To do: trim paragraphs. - // assert_eq!( - // micromark("[foo]: /url \"title\"\n\n![foo] \n[]"), - // "<p><img src=\"/url\" alt=\"foo\" title=\"title\" />\n[]</p>", - // "should not support whitespace between sets of brackets" - // ); + assert_eq!( + micromark("[foo]: /url \"title\"\n\n![foo] \n[]"), + "<p><img src=\"/url\" alt=\"foo\" title=\"title\" />\n[]</p>", + "should not support whitespace between sets of brackets" + ); assert_eq!( micromark("[foo]: /url \"title\"\n\n![foo]"), diff --git a/tests/link_reference.rs b/tests/link_reference.rs index 0904995..372bea5 100644 --- a/tests/link_reference.rs +++ b/tests/link_reference.rs @@ -66,7 +66,7 @@ fn link_reference() { ); assert_eq!( - micromark_with_options("[ref]: /uri\n\n[foo <bar attr=\"][ref]\">", &DANGER), + micromark_with_options("[ref]: /uri\n\n[foo <bar attr=\"][ref]\">", DANGER), "<p>[foo <bar attr=\"][ref]\"></p>", "should prefer HTML over link references" ); @@ -161,12 +161,11 @@ fn link_reference() { "should not support empty references" ); - // To do: trimming whitespace. - // assert_eq!( - // micromark("[\n ]: /uri\n\n[\n ]"), - // "<p>[\n]: /uri</p>\n<p>[\n]</p>", - // "should not support blank references" - // ); + assert_eq!( + micromark("[\n ]: /uri\n\n[\n ]"), + "<p>[\n]: /uri</p>\n<p>[\n]</p>", + "should not support blank references" + ); assert_eq!( micromark("[foo]: /url \"title\"\n\n[foo][]"), @@ -187,12 +186,11 @@ fn link_reference() { "should match references to definitions case-insensitively" ); - // To do: trimming whitespace. - // assert_eq!( - // micromark("[foo]: /url \"title\"\n\n[foo] \n[]"), - // "<p><a href=\"/url\" title=\"title\">foo</a>\n[]</p>", - // "should not support whitespace between label and collaped reference" - // ); + assert_eq!( + micromark("[foo]: /url \"title\"\n\n[foo] \n[]"), + "<p><a href=\"/url\" title=\"title\">foo</a>\n[]</p>", + "should not support whitespace between label and collaped reference" + ); assert_eq!( micromark("[foo]: /url \"title\"\n\n[foo]"), diff --git a/tests/link_resource.rs b/tests/link_resource.rs index d75736e..7761569 100644 --- a/tests/link_resource.rs +++ b/tests/link_resource.rs @@ -443,12 +443,11 @@ fn link_resource() { "should not support 33 or more sets of parens" ); - // To do: trim whitespace in string? - // assert_eq!( - // micromark("[a](b \"\n c\")"), - // "<p><a href=\"b\" title=\"\nc\">a</a></p>", - // "should support an eol at the start of a title" - // ); + assert_eq!( + micromark("[a](b \"\n c\")"), + "<p><a href=\"b\" title=\"\nc\">a</a></p>", + "should support an eol at the start of a title" + ); assert_eq!( micromark("[a](b( \"c\")"), diff --git a/tests/misc_soft_break.rs b/tests/misc_soft_break.rs index 1704ec2..50dedc1 100644 --- a/tests/misc_soft_break.rs +++ b/tests/misc_soft_break.rs @@ -9,10 +9,9 @@ fn soft_break() { "should support line endings" ); - // To do: trim whitespace. - // assert_eq!( - // micromark("foo \n baz"), - // "<p>foo\nbaz</p>", - // "should trim spaces around line endings" - // ); + assert_eq!( + micromark("foo \n baz"), + "<p>foo\nbaz</p>", + "should trim spaces around line endings" + ); } diff --git a/tests/misc_tabs.rs b/tests/misc_tabs.rs index 568172e..e82738d 100644 --- a/tests/misc_tabs.rs +++ b/tests/misc_tabs.rs @@ -204,22 +204,20 @@ fn tabs_text() { "should support a space starting, and a tab ending, code" ); - // To do: trim trailing whitespace. - // // Note: CM does not strip it in this case. - // // However, that should be a bug there: makes more sense to remove it like - // // trailing spaces. - // assert_eq!( - // micromark("x\t\ny"), - // "<p>x\ny</p>", - // "should support a trailing tab at a line ending in a paragraph" - // ); - - // To do: trim trailing whitespace. - // assert_eq!( - // micromark("x\n\ty"), - // "<p>x\ny</p>", - // "should support an initial tab after a line ending in a paragraph" - // ); + // Note: CM does not strip it in this case. + // However, that should be a bug there: makes more sense to remove it like + // trailing spaces. + assert_eq!( + micromark("x\t\ny"), + "<p>x\ny</p>", + "should support a trailing tab at a line ending in a paragraph" + ); + + assert_eq!( + micromark("x\n\ty"), + "<p>x\ny</p>", + "should support an initial tab after a line ending in a paragraph" + ); assert_eq!( micromark("x[\ty](z)"), diff --git a/tests/thematic_break.rs b/tests/thematic_break.rs index cbc84e0..e71ae22 100644 --- a/tests/thematic_break.rs +++ b/tests/thematic_break.rs @@ -63,12 +63,11 @@ fn thematic_break() { "should not support thematic breaks w/ 4 spaces" ); - // To do: trimming paragraphs. - // assert_eq!( - // micromark("Foo\n ***"), - // "<p>Foo\n***</p>", - // "should not support thematic breaks w/ 4 spaces as paragraph continuation" - // ); + assert_eq!( + micromark("Foo\n ***"), + "<p>Foo\n***</p>", + "should not support thematic breaks w/ 4 spaces as paragraph continuation" + ); assert_eq!( micromark("_____________________________________"), |