From a056a7b3716bd4cc78e47e64f7d735c5bd5b82e6 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Wed, 29 Jun 2022 17:15:17 +0200 Subject: Fix a bunch of bugs with definitions, references * Fix bug where whitespace after `:` was not allowed, it is * Fix bug where escapes in labels did not work due to typo * Fix to prefer first definition * Fix whitespace after definitions * Fix matching by adding normalizing * Fix reference from being output as data --- readme.md | 6 +- src/compiler.rs | 37 +++++------ src/construct/definition.rs | 2 +- src/construct/partial_label.rs | 2 +- tests/definition.rs | 148 ++++++++++++++++++++--------------------- 5 files changed, 92 insertions(+), 103 deletions(-) diff --git a/readme.md b/readme.md index 16f81d9..7c70905 100644 --- a/readme.md +++ b/readme.md @@ -143,15 +143,15 @@ cargo doc --document-private-items #### Parse - [ ] (1) Parse initial and final space_or_tab of paragraphs (in text)\ - test (`code_indented`, `hard_break_escape`, `hard_break_trailing`, + test (`code_indented`, `definition`, `hard_break_escape`, `hard_break_trailing`, `heading_atx`, `heading_setext`, `html_flow`, `misc_soft_break`, `misc_tabs`, `thematic_break`) -- [ ] (3) Interrupting (html flow complete) +- [ ] (3) Interrupting (html flow complete, definition + code_indented) - [ ] (5) attention\ test (`character_reference`, `hard_break_escape`, `hard_break_trailing`, `heading_atx`, `heading_setext`, `html_flow`, `thematic_break`)\ - [ ] (8) block quote\ - test (`code_fenced`, `code_indented`, `heading_atx`, `heading_setext`, + test (`code_fenced`, `definition`, `code_indented`, `heading_atx`, `heading_setext`, `html_flow`, `misc_default_line_ending`, `thematic_break`) - [ ] (8) list\ test (`character_reference`, `code_indented`, `heading_setext`, diff --git a/src/compiler.rs b/src/compiler.rs index 3dd6ae4..bb2359e 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -2,6 +2,7 @@ use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC}; use crate::construct::character_reference::Kind as CharacterReferenceKind; use crate::tokenizer::{Code, Event, EventType, TokenType}; +use crate::util::normalize_identifier::normalize_identifier; use crate::util::{ decode_character_reference::{decode_named, decode_numeric}, encode::encode, @@ -431,16 +432,14 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { TokenType::DefinitionDestinationString, on_enter_definition_destination_string, ); + enter_map.insert(TokenType::ReferenceString, on_enter_buffer); enter_map.insert(TokenType::DefinitionLabelString, on_enter_buffer); enter_map.insert(TokenType::DefinitionTitleString, on_enter_buffer); let mut exit_map: Map = HashMap::new(); exit_map.insert(TokenType::Label, on_exit_label); exit_map.insert(TokenType::LabelText, on_exit_label_text); - exit_map.insert( - TokenType::ReferenceString, - on_exit_reference_destination_string, - ); + exit_map.insert(TokenType::ReferenceString, on_exit_reference_string); exit_map.insert( TokenType::ResourceDestinationString, on_exit_resource_destination_string, @@ -525,11 +524,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { &exit_map }; - println!( - "handle {:?}:{:?} ({:?})", - event.event_type, event.token_type, index - ); - if let Some(func) = map.get(&event.token_type) { func(context, event); } @@ -561,8 +555,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { index += 1; } - println!("xxx: {:?}", definition_indices); - index = 0; let jump_default = (events.len(), events.len()); let mut definition_index = 0; @@ -572,12 +564,12 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { while index < events.len() { if index == jump.0 { - println!("jump {:?}", jump); index = jump.1 + 1; definition_index += 1; jump = definition_indices .get(definition_index) .unwrap_or(&jump_default); + context.slurp_one_line_ending = true; } else { handle(&mut context, index); index += 1; @@ -683,7 +675,9 @@ fn on_exit_label_text(context: &mut CompileContext, _event: &Event) { )); } -fn on_exit_reference_destination_string(context: &mut CompileContext, _event: &Event) { +fn on_exit_reference_string(context: &mut CompileContext, _event: &Event) { + // Drop stuff. + context.resume(); let media = context.media_stack.last_mut().unwrap(); media.reference_id = Some(serialize( context.codes, @@ -720,7 +714,10 @@ fn on_exit_media(context: &mut CompileContext, _event: &Event) { // context.tags = is_in_image; let media = context.media_stack.pop().unwrap(); - let id = media.reference_id.or(media.label_id); + let id = media + .reference_id + .or(media.label_id) + .map(|id| normalize_identifier(&id)); let label = media.label.unwrap(); let definition = id.and_then(|id| context.definitions.get(&id)); let destination = if let Some(definition) = definition { @@ -734,8 +731,6 @@ fn on_exit_media(context: &mut CompileContext, _event: &Event) { &media.title }; - println!("media: {:?} {:?}", destination, title); - let destination = if let Some(destination) = destination { destination.clone() } else { @@ -1047,8 +1042,7 @@ fn on_exit_definition_label_string(context: &mut CompileContext, _event: &Event) // Discard label, use the source content instead. context.resume(); let definition = context.media_stack.last_mut().unwrap(); - // To do: put this on `reference_id` instead? - definition.label_id = Some(serialize( + definition.reference_id = Some(serialize( context.codes, &from_exit_event(context.events, context.index), false, @@ -1063,13 +1057,14 @@ fn on_exit_definition_title_string(context: &mut CompileContext, _event: &Event) fn on_exit_definition(context: &mut CompileContext, _event: &Event) { let definition = context.media_stack.pop().unwrap(); - let label_id = definition.label_id.unwrap(); + let reference_id = normalize_identifier(&definition.reference_id.unwrap()); let destination = definition.destination; let title = definition.title; context.resume(); + context .definitions - .insert(label_id, Definition { destination, title }); - context.slurp_one_line_ending = true; + .entry(reference_id) + .or_insert(Definition { destination, title }); } diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 5e80a93..aca22a6 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -149,7 +149,7 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(TokenType::DefinitionMarker); ( State::Fn(Box::new( - tokenizer.go(space_or_tab_one_line_ending(), destination_before), + tokenizer.attempt_opt(space_or_tab_one_line_ending(), destination_before), )), None, ) diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 2e8e950..1e4d7f2 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -181,7 +181,7 @@ fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult info.size += 1; (State::Fn(Box::new(|t, c| label(t, c, info))), None) } - Code::Char('/') => { + Code::Char('\\') => { tokenizer.consume(code); info.size += 1; if !info.data { diff --git a/tests/definition.rs b/tests/definition.rs index c112a96..3edf687 100644 --- a/tests/definition.rs +++ b/tests/definition.rs @@ -27,12 +27,17 @@ fn definition() { "should support whitespace and line endings in definitions" ); - // To do: some bug. - // assert_eq!( - // micromark("[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]"), - // "

Foo*bar]

", - // "should support complex definitions (1)" - // ); + assert_eq!( + micromark("[a]:b 'c'\n\n[a]"), + "

a

", + "should support no whitespace after `:` in definitions" + ); + + assert_eq!( + micromark("[Foo*bar\\]]:my_(url) 'title (with parens)'\n\n[Foo*bar\\]]"), + "

Foo*bar]

", + "should support complex definitions (1)" + ); assert_eq!( micromark("[Foo bar]:\n\n'title'\n\n[Foo bar]"), @@ -82,33 +87,29 @@ fn definition() { "should support character escapes in destinations and titles" ); - // Some bug. - // assert_eq!( - // micromark("[foo]\n\n[foo]: url"), - // "

foo

\n", - // "should support a link before a definition" - // ); + assert_eq!( + micromark("[foo]\n\n[foo]: url"), + "

foo

\n", + "should support a link before a definition" + ); - // Some bug. - // assert_eq!( - // micromark("[foo]: first\n[foo]: second\n\n[foo]"), - // "

foo

", - // "should match w/ the first definition" - // ); + assert_eq!( + micromark("[foo]: first\n[foo]: second\n\n[foo]"), + "

foo

", + "should match w/ the first definition" + ); - // Some bug. - // assert_eq!( - // micromark("[FOO]: /url\n\n[Foo]"), - // "

Foo

", - // "should match w/ case-insensitive (1)" - // ); + assert_eq!( + micromark("[FOO]: /url\n\n[Foo]"), + "

Foo

", + "should match w/ case-insensitive (1)" + ); - // Some bug. - // assert_eq!( - // micromark("[ΑΓΩ]: /φου\n\n[αγω]"), - // "

αγω

", - // "should match w/ case-insensitive (2)" - // ); + assert_eq!( + micromark("[ΑΓΩ]: /φου\n\n[αγω]"), + "

αγω

", + "should match w/ case-insensitive (2)" + ); assert_eq!( micromark("[foo]: /url"), @@ -183,14 +184,13 @@ fn definition() { "should not support setext heading underlines after definitions" ); - // To do: some bug. - // assert_eq!( - // micromark( - // "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]" - // ), - // "

foo,\nbar,\nbaz

", - // "should support definitions after definitions" - // ); + assert_eq!( + micromark( + "[foo]: /foo-url \"foo\"\n[bar]: /bar-url\n \"bar\"\n[baz]: /baz-url\n\n[foo],\n[bar],\n[baz]" + ), + "

foo,\nbar,\nbaz

", + "should support definitions after definitions" + ); // To do: block quote. // assert_eq!( @@ -200,12 +200,11 @@ fn definition() { // ); // Extra - // To do: some bug. - // assert_eq!( - // micromark("[\\[\\+\\]]: example.com\n\nLink: [\\[\\+\\]]."), - // "

Link: [+].

", - // "should match w/ character escapes" - // ); + assert_eq!( + micromark("[\\[\\+\\]]: example.com\n\nLink: [\\[\\+\\]]."), + "

Link: [+].

", + "should match w/ character escapes" + ); assert_eq!( micromark("[x]: \\\" \\(\\)\\\"\n\n[x]"), @@ -261,25 +260,23 @@ fn definition() { "should support character escapes at the start of a title" ); - // To do: some bug. - // assert_eq!( - // micromark("[x]: a \"\\\"\"\n\n[x]"), - // "

x

", - // "should support double quoted titles" - // ); + assert_eq!( + micromark("[x]: a \"'\"\n\n[x]"), + "

x

", + "should support double quoted titles" + ); assert_eq!( micromark("[x]: a '\"'\n\n[x]"), "

x

", - "should support double quoted titles" + "should support single quoted titles" ); - // To do: some bug. - // assert_eq!( - // micromark("[x]: a (\"\")\n\n[x]"), - // "

x

", - // "should support paren enclosed titles" - // ); + assert_eq!( + micromark("[x]: a (\"')\n\n[x]"), + "

x

", + "should support paren enclosed titles" + ); assert_eq!( micromark("[x]: a(()\n\n[x]"), @@ -305,12 +302,11 @@ fn definition() { "should support trailing whitespace after a destination" ); - // To do: some bug. - // assert_eq!( - // micromark("[x]: a \"\"X \t\n\n[x]"), - // "

x

", - // "should support trailing whitespace after a destination" - // ); + assert_eq!( + micromark("[x]: a \"X\" \t\n\n[x]"), + "

x

", + "should support trailing whitespace after a title" + ); assert_eq!( micromark("[&©&]: example.com/&©& \"&©&\"\n\n[&©&]"), @@ -331,12 +327,11 @@ fn definition() { ); // See: - // To do: some bug. - // assert_eq!( - // micromark("[x]: <> \"\"\n[][x]"), - // "

", - // "should ignore an empty title" - // ); + assert_eq!( + micromark("[x]: <> \"\"\n[][x]"), + "

", + "should ignore an empty title" + ); assert_eq!( micromark_with_options("[a]\n\n[a]: ", DANGER), @@ -362,12 +357,11 @@ fn definition() { "should not support an extra right paren (`)`) in a raw destination" ); - // To do: some bug. - // assert_eq!( - // micromark("[a]\n\n[a]: a(1(2(3(4()))))b"), - // "

a

\n", - // "should support 4 or more sets of parens in a raw destination (link resources don’t)" - // ); + assert_eq!( + micromark("[a]\n\n[a]: a(1(2(3(4()))))b"), + "

a

\n", + "should support 4 or more sets of parens in a raw destination (link resources don’t)" + ); assert_eq!( micromark("[a]\n\n[a]: aaa)"), @@ -381,14 +375,14 @@ fn definition() { "should not support a final (unbalanced) right paren in a raw destination “before” a title" ); - // To do: some bug. + // To do: do not let code (indented) interrupt definitions. // assert_eq!( // micromark(" [a]: b \"c\"\n [d]: e\n [f]: g \"h\"\n [i]: j\n\t[k]: l (m)\n\t n [k] o"), // "

n k o

", // "should support subsequent indented definitions" // ); - // To do: some bug. + // To do: trim whitespace in paragraphs. // assert_eq!( // micromark("[a\n b]: c\n\n[a\n b]"), // "

a\nb

", -- cgit