From 57c3cda9f98e70a9f614a22eb6d518051cc60b19 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 12 Sep 2022 17:18:30 +0200 Subject: Fix gfm email autolink literals overlapping --- src/construct/gfm_autolink_literal.rs | 26 +++++++++++++------------- src/event.rs | 2 +- tests/fuzz.rs | 19 +++++++++++++++++-- 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs index c25f04c..c4ef637 100644 --- a/src/construct/gfm_autolink_literal.rs +++ b/src/construct/gfm_autolink_literal.rs @@ -636,14 +636,14 @@ pub fn resolve(tokenizer: &mut Tokenizer) { let mut replace = Vec::new(); let mut point = tokenizer.events[index - 1].point.clone(); let start_index = point.index; - let mut start = 0; + let mut min = 0; while byte_index < bytes.len() { if bytes[byte_index] == b'@' { let mut range = (0, 0, Name::GfmAutolinkLiteralEmail); - if let Some(start) = peek_bytes_atext(bytes, byte_index) { - let (start, kind) = peek_protocol(bytes, start); + if let Some(start) = peek_bytes_atext(bytes, min, byte_index) { + let (start, kind) = peek_protocol(bytes, min, start); if let Some(end) = peek_bytes_email_domain( bytes, @@ -658,8 +658,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) { byte_index = range.1; // If there is something between the last link - // (or the start) and this link. - if start != range.0 { + // (or `min`) and this link. + if min != range.0 { replace.push(Event { kind: Kind::Enter, name: Name::Data, @@ -691,7 +691,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { point: point.clone(), link: None, }); - start = range.1; + min = range.1; } } @@ -699,7 +699,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { } // If there was a link, and we have more bytes left. - if start != 0 && start < bytes.len() { + if min != 0 && min < bytes.len() { replace.push(Event { kind: Kind::Enter, name: Name::Data, @@ -739,13 +739,13 @@ pub fn resolve(tokenizer: &mut Tokenizer) { /// ^-- from /// ^-- to /// ``` -fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option { +fn peek_bytes_atext(bytes: &[u8], min: usize, end: usize) -> Option { let mut index = end; // Take simplified atext. // See `email_atext` in `autolink.rs` for a similar algorithm. // Source: . - while index > 0 + while index > min && matches!(bytes[index - 1], b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'_' | b'a'..=b'z') { index -= 1; @@ -755,7 +755,7 @@ fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option { // The reference code is a bit weird, but that’s what it results in. // Source: . // Other than slash, every preceding character is allowed. - if index == end || (index > 0 && bytes[index - 1] == b'/') { + if index == end || (index > min && bytes[index - 1] == b'/') { None } else { Some(index) @@ -772,14 +772,14 @@ fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option { /// ^-- from /// ^-- to /// ``` -fn peek_protocol(bytes: &[u8], end: usize) -> (usize, Name) { +fn peek_protocol(bytes: &[u8], min: usize, end: usize) -> (usize, Name) { let mut index = end; - if index > 0 && bytes[index - 1] == b':' { + if index > min && bytes[index - 1] == b':' { index -= 1; // Take alphanumerical. - while index > 0 && matches!(bytes[index - 1], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') { + while index > min && matches!(bytes[index - 1], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') { index -= 1; } diff --git a/src/event.rs b/src/event.rs index 6ea52fb..de3f95f 100644 --- a/src/event.rs +++ b/src/event.rs @@ -3424,7 +3424,7 @@ impl Point { /// point, to `index.` pub fn shift_to(&self, bytes: &[u8], index: usize) -> Point { let mut next = self.clone(); - debug_assert!(index > next.index, "expect"); + debug_assert!(index > next.index, "expected to shift forward"); while next.index < index { match bytes[next.index] { diff --git a/tests/fuzz.rs b/tests/fuzz.rs index 3cc1066..126032a 100644 --- a/tests/fuzz.rs +++ b/tests/fuzz.rs @@ -1,5 +1,5 @@ extern crate micromark; -use micromark::micromark; +use micromark::{micromark, micromark_with_options, Constructs, Options}; use pretty_assertions::assert_eq; #[test] @@ -7,7 +7,22 @@ fn fuzz() -> Result<(), String> { assert_eq!( micromark("[\n~\na\n-\n\n"), "

[\n~\na

\n
    \n
  • \n
\n", - "1" + "1: label, blank lines, and code" + ); + + assert_eq!( + // The first link is stopped by the `+` (so it’s `a@b.c`), but the next + // link overlaps it (`b.c+d@e.f`). + micromark_with_options( + "a@b.c+d@e.f", + &Options { + constructs: Constructs::gfm(), + gfm_tagfilter: true, + ..Options::default() + } + )?, + "

a@b.c+d@e.f

", + "2: gfm: email autolink literals running into each other" ); Ok(()) -- cgit