diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-29 17:15:17 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-29 17:15:17 +0200 |
commit | a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6 (patch) | |
tree | 18395a3438441f5ddeca984fab0db8c9f8133a2c | |
parent | fa37356074c1bc3a0f74b6f6a22e038b7842ff4d (diff) | |
download | markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.tar.gz markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.tar.bz2 markdown-rs-a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6.zip |
Fix a bunch of bugs with definitions, references
* Fix bug where whitespace after `:` was not allowed, it is
* Fix bug where escapes in labels did not work due to typo
* Fix to prefer first definition
* Fix whitespace after definitions
* Fix matching by adding normalizing
* Fix reference from being output as data
Diffstat (limited to '')
-rw-r--r-- | readme.md | 6 | ||||
-rw-r--r-- | src/compiler.rs | 37 | ||||
-rw-r--r-- | src/construct/definition.rs | 2 | ||||
-rw-r--r-- | src/construct/partial_label.rs | 2 | ||||
-rw-r--r-- | tests/definition.rs | 148 |
5 files changed, 92 insertions, 103 deletions
@@ -143,15 +143,15 @@ cargo doc --document-private-items #### Parse - [ ] (1) Parse initial and final space_or_tab of paragraphs (in text)\ - test (`code_indented`, `hard_break_escape`, `hard_break_trailing`, + test (`code_indented`, `definition`, `hard_break_escape`, `hard_break_trailing`, `heading_atx`, `heading_setext`, `html_flow`, `misc_soft_break`, `misc_tabs`, `thematic_break`) -- [ ] (3) Interrupting (html flow complete) +- [ ] (3) Interrupting (html flow complete, definition + code_indented) - [ ] (5) attention\ test (`character_reference`, `hard_break_escape`, `hard_break_trailing`, `heading_atx`, `heading_setext`, `html_flow`, `thematic_break`)\ - [ ] (8) block quote\ - test (`code_fenced`, `code_indented`, `heading_atx`, `heading_setext`, + test (`code_fenced`, `definition`, `code_indented`, `heading_atx`, `heading_setext`, `html_flow`, `misc_default_line_ending`, `thematic_break`) - [ ] (8) list\ test (`character_reference`, `code_indented`, `heading_setext`, diff --git a/src/compiler.rs b/src/compiler.rs index 3dd6ae4..bb2359e 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -2,6 +2,7 @@ use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC}; use crate::construct::character_reference::Kind as CharacterReferenceKind; use crate::tokenizer::{Code, Event, EventType, TokenType}; +use crate::util::normalize_identifier::normalize_identifier; use crate::util::{ decode_character_reference::{decode_named, decode_numeric}, encode::encode, @@ -431,16 +432,14 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { TokenType::DefinitionDestinationString, on_enter_definition_destination_string, ); + enter_map.insert(TokenType::ReferenceString, on_enter_buffer); enter_map.insert(TokenType::DefinitionLabelString, on_enter_buffer); enter_map.insert(TokenType::DefinitionTitleString, on_enter_buffer); let mut exit_map: Map = HashMap::new(); exit_map.insert(TokenType::Label, on_exit_label); exit_map.insert(TokenType::LabelText, on_exit_label_text); - exit_map.insert( - TokenType::ReferenceString, - on_exit_reference_destination_string, - ); + exit_map.insert(TokenType::ReferenceString, on_exit_reference_string); exit_map.insert( TokenType::ResourceDestinationString, on_exit_resource_destination_string, @@ -525,11 +524,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { &exit_map }; - println!( - "handle {:?}:{:?} ({:?})", - event.event_type, event.token_type, index - ); - if let Some(func) = map.get(&event.token_type) { func(context, event); } @@ -561,8 +555,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { index += 1; } - println!("xxx: {:?}", definition_indices); - index = 0; let jump_default = (events.len(), events.len()); let mut definition_index = 0; @@ -572,12 +564,12 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { while index < events.len() { if index == jump.0 { - println!("jump {:?}", jump); index = jump.1 + 1; definition_index += 1; jump = definition_indices .get(definition_index) .unwrap_or(&jump_default); + context.slurp_one_line_ending = true; } else { handle(&mut context, index); index += 1; @@ -683,7 +675,9 @@ fn on_exit_label_text(context: &mut CompileContext, _event: &Event) { )); } -fn on_exit_reference_destination_string(context: &mut CompileContext, _event: &Event) { +fn on_exit_reference_string(context: &mut CompileContext, _event: &Event) { + // Drop stuff. + context.resume(); let media = context.media_stack.last_mut().unwrap(); media.reference_id = Some(serialize( context.codes, @@ -720,7 +714,10 @@ fn on_exit_media(context: &mut CompileContext, _event: &Event) { // context.tags = is_in_image; let media = context.media_stack.pop().unwrap(); - let id = media.reference_id.or(media.label_id); + let id = media + .reference_id + .or(media.label_id) + .map(|id| normalize_identifier(&id)); let label = media.label.unwrap(); let definition = id.and_then(|id| context.definitions.get(&id)); let destination = if let Some(definition) = definition { @@ -734,8 +731,6 @@ fn on_exit_media(context: &mut CompileContext, _event: &Event) { &media.title }; - println!("media: {:?} {:?}", destination, title); - let destination = if let Some(destination) = destination { destination.clone() } else { @@ -1047,8 +1042,7 @@ fn on_exit_definition_label_string(context: &mut CompileContext, _event: &Event) // Discard label, use the source content instead. context.resume(); let definition = context.media_stack.last_mut().unwrap(); - // To do: put this on `reference_id` instead? - definition.label_id = Some(serialize( + definition.reference_id = Some(serialize( context.codes, &from_exit_event(context.events, context.index), false, @@ -1063,13 +1057,14 @@ fn on_exit_definition_title_string(context: &mut CompileContext, _event: &Event) fn on_exit_definition(context: &mut CompileContext, _event: &Event) { let definition = context.media_stack.pop().unwrap(); - let label_id = definition.label_id.unwrap(); + let reference_id = normalize_identifier(&definition.reference_id.unwrap()); let destination = definition.destination; let title = definition.title; context.resume(); + context .definitions - .insert(label_id, Definition { destination, title }); - context.slurp_one_line_ending = true; + .entry(reference_id) + .or_insert(Definition { destination, title }); } diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 5e80a93..aca22a6 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -149,7 +149,7 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(TokenType::DefinitionMarker); ( State::Fn(Box::new( - tokenizer.go(space_or_tab_one_line_ending(), destination_before), + tokenizer.attempt_opt(space_or_tab_one_line_ending(), destination_before), )), None, ) diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 2e8e950..1e4d7f2 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -181,7 +181,7 @@ fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult info.size += 1; (State::Fn(Box::new(|t, c| label(t, c, info))), None) } - Code::Char('/') => { + Code::Char('\\') => { tokenizer.consume(code); info.size += 1; if !info.data { diff --git a/tests/definition.rs b/tests/definition.rs index c112a96..3edf687 100644 --- a/tests/definition.rs +++ b/tests/definition.rs @@ -27,12 +27,17 @@ fn definition() { "should support whitespace and line endings in definitions" ); - // To do: some bug. - // assert_eq!( - // micromark("[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]"), - // "<p><a href=\"my_(url)\" title=\"title (with parens)\">Foo*bar]</a></p>", - // "should support complex definitions (1)" - // ); + assert_eq!( + micromark("[a]:b 'c'\n\n[a]"), + "<p><a href=\"b\" title=\"c\">a</a></p>", + "should support no whitespace after `:` in definitions" + ); + + assert_eq!( + micromark("[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]"), + "<p><a href=\"my_(url)\" title=\"title (with parens)\">Foo*bar]</a></p>", + "should support complex definitions (1)" + ); assert_eq!( micromark("[Foo bar]:\n<my url>\n'title'\n\n[Foo bar]"), @@ -82,33 +87,29 @@ fn definition() { "should support character escapes in destinations and titles" ); - // Some bug. - // assert_eq!( - // micromark("[foo]\n\n[foo]: url"), - // "<p><a href=\"url\">foo</a></p>\n", - // "should support a link before a definition" - // ); + assert_eq!( + micromark("[foo]\n\n[foo]: url"), + "<p><a href=\"url\">foo</a></p>\n", + "should support a link before a definition" + ); - // Some bug. - // assert_eq!( - // micromark("[foo]: first\n[foo]: second\n\n[foo]"), - // "<p><a href=\"first\">foo</a></p>", - // "should match w/ the first definition" - // ); + assert_eq!( + micromark("[foo]: first\n[foo]: second\n\n[foo]"), + "<p><a href=\"first\">foo</a></p>", + "should match w/ the first definition" + ); - // Some bug. - // assert_eq!( - // micromark("[FOO]: /url\n\n[Foo]"), - // "<p><a href=\"/url\">Foo</a></p>", - // "should match w/ case-insensitive (1)" - // ); + assert_eq!( + micromark("[FOO]: /url\n\n[Foo]"), + "<p><a href=\"/url\">Foo</a></p>", + "should match w/ case-insensitive (1)" + ); - // Some bug. - // assert_eq!( - // micromark("[ΑΓΩ]: /φου\n\n[αγω]"), - // "<p><a href=\"/%CF%86%CE%BF%CF%85\">αγω</a></p>", - // "should match w/ case-insensitive (2)" - // ); + assert_eq!( + micromark("[ΑΓΩ]: /φου\n\n[αγω]"), + "<p><a href=\"/%CF%86%CE%BF%CF%85\">αγω</a></p>", + "should match w/ case-insensitive (2)" + ); assert_eq!( micromark("[foo]: /url"), @@ -183,14 +184,13 @@ fn definition() { "should not support setext heading underlines after definitions" ); - // To do: some bug. - // assert_eq!( - // micromark( - // "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]" - // ), - // "<p><a href=\"/foo-url\" title=\"foo\">foo</a>,\n<a href=\"/bar-url\" title=\"bar\">bar</a>,\n<a href=\"/baz-url\">baz</a></p>", - // "should support definitions after definitions" - // ); + assert_eq!( + micromark( + "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]" + ), + "<p><a href=\"/foo-url\" title=\"foo\">foo</a>,\n<a href=\"/bar-url\" title=\"bar\">bar</a>,\n<a href=\"/baz-url\">baz</a></p>", + "should support definitions after definitions" + ); // To do: block quote. // assert_eq!( @@ -200,12 +200,11 @@ fn definition() { // ); // Extra - // To do: some bug. - // assert_eq!( - // micromark("[\\[\\+\\]]: example.com\n\nLink: [\\[\\+\\]]."), - // "<p>Link: <a href=\"example.com\">[+]</a>.</p>", - // "should match w/ character escapes" - // ); + assert_eq!( + micromark("[\\[\\+\\]]: example.com\n\nLink: [\\[\\+\\]]."), + "<p>Link: <a href=\"example.com\">[+]</a>.</p>", + "should match w/ character escapes" + ); assert_eq!( micromark("[x]: \\\" \\(\\)\\\"\n\n[x]"), @@ -261,25 +260,23 @@ fn definition() { "should support character escapes at the start of a title" ); - // To do: some bug. - // assert_eq!( - // micromark("[x]: a \"\\\"\"\n\n[x]"), - // "<p><a href=\"a\" title=\"\"\">x</a></p>", - // "should support double quoted titles" - // ); + assert_eq!( + micromark("[x]: a \"'\"\n\n[x]"), + "<p><a href=\"a\" title=\"'\">x</a></p>", + "should support double quoted titles" + ); assert_eq!( micromark("[x]: a '\"'\n\n[x]"), "<p><a href=\"a\" title=\""\">x</a></p>", - "should support double quoted titles" + "should support single quoted titles" ); - // To do: some bug. - // assert_eq!( - // micromark("[x]: a (\"\")\n\n[x]"), - // "<p><a href=\"a\" title=\""\"\">x</a></p>", - // "should support paren enclosed titles" - // ); + assert_eq!( + micromark("[x]: a (\"')\n\n[x]"), + "<p><a href=\"a\" title=\""'\">x</a></p>", + "should support paren enclosed titles" + ); assert_eq!( micromark("[x]: a(()\n\n[x]"), @@ -305,12 +302,11 @@ fn definition() { "should support trailing whitespace after a destination" ); - // To do: some bug. - // assert_eq!( - // micromark("[x]: a \"\"X \t\n\n[x]"), - // "<p><a href=\"a\" title=\"\"X>x</a></p>", - // "should support trailing whitespace after a destination" - // ); + assert_eq!( + micromark("[x]: a \"X\" \t\n\n[x]"), + "<p><a href=\"a\" title=\"X\">x</a></p>", + "should support trailing whitespace after a title" + ); assert_eq!( micromark("[&©&]: example.com/&©& \"&©&\"\n\n[&©&]"), @@ -331,12 +327,11 @@ fn definition() { ); // See: <https://github.com/commonmark/commonmark.js/issues/192> - // To do: some bug. - // assert_eq!( - // micromark("[x]: <> \"\"\n[][x]"), - // "<p><a href=\"\"></a></p>", - // "should ignore an empty title" - // ); + assert_eq!( + micromark("[x]: <> \"\"\n[][x]"), + "<p><a href=\"\"></a></p>", + "should ignore an empty title" + ); assert_eq!( micromark_with_options("[a]\n\n[a]: <b<c>", DANGER), @@ -362,12 +357,11 @@ fn definition() { "should not support an extra right paren (`)`) in a raw destination" ); - // To do: some bug. - // assert_eq!( - // micromark("[a]\n\n[a]: a(1(2(3(4()))))b"), - // "<p><a href=\"a(1(2(3(4()))))b\">a</a></p>\n", - // "should support 4 or more sets of parens in a raw destination (link resources don’t)" - // ); + assert_eq!( + micromark("[a]\n\n[a]: a(1(2(3(4()))))b"), + "<p><a href=\"a(1(2(3(4()))))b\">a</a></p>\n", + "should support 4 or more sets of parens in a raw destination (link resources don’t)" + ); assert_eq!( micromark("[a]\n\n[a]: aaa)"), @@ -381,14 +375,14 @@ fn definition() { "should not support a final (unbalanced) right paren in a raw destination “before” a title" ); - // To do: some bug. + // To do: do not let code (indented) interrupt definitions. // assert_eq!( // micromark(" [a]: b \"c\"\n [d]: e\n [f]: g \"h\"\n [i]: j\n\t[k]: l (m)\n\t n [k] o"), // "<p>n <a href=\"l\" title=\"m\">k</a> o</p>", // "should support subsequent indented definitions" // ); - // To do: some bug. + // To do: trim whitespace in paragraphs. // assert_eq!( // micromark("[a\n b]: c\n\n[a\n b]"), // "<p><a href=\"c\">a\nb</a></p>", |