From 57c3cda9f98e70a9f614a22eb6d518051cc60b19 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Mon, 12 Sep 2022 17:18:30 +0200
Subject: Fix gfm email autolink literals overlapping

---
 src/construct/gfm_autolink_literal.rs | 26 +++++++++++++-------------
 src/event.rs                          |  2 +-
 tests/fuzz.rs                         | 19 +++++++++++++++++--
 3 files changed, 31 insertions(+), 16 deletions(-)

diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs
index c25f04c..c4ef637 100644
--- a/src/construct/gfm_autolink_literal.rs
+++ b/src/construct/gfm_autolink_literal.rs
@@ -636,14 +636,14 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
                 let mut replace = Vec::new();
                 let mut point = tokenizer.events[index - 1].point.clone();
                 let start_index = point.index;
-                let mut start = 0;
+                let mut min = 0;
 
                 while byte_index < bytes.len() {
                     if bytes[byte_index] == b'@' {
                         let mut range = (0, 0, Name::GfmAutolinkLiteralEmail);
 
-                        if let Some(start) = peek_bytes_atext(bytes, byte_index) {
-                            let (start, kind) = peek_protocol(bytes, start);
+                        if let Some(start) = peek_bytes_atext(bytes, min, byte_index) {
+                            let (start, kind) = peek_protocol(bytes, min, start);
 
                             if let Some(end) = peek_bytes_email_domain(
                                 bytes,
@@ -658,8 +658,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
                             byte_index = range.1;
 
                             // If there is something between the last link
-                            // (or the start) and this link.
-                            if start != range.0 {
+                            // (or `min`) and this link.
+                            if min != range.0 {
                                 replace.push(Event {
                                     kind: Kind::Enter,
                                     name: Name::Data,
@@ -691,7 +691,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
                                 point: point.clone(),
                                 link: None,
                             });
-                            start = range.1;
+                            min = range.1;
                         }
                     }
 
@@ -699,7 +699,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
                 }
 
                 // If there was a link, and we have more bytes left.
-                if start != 0 && start < bytes.len() {
+                if min != 0 && min < bytes.len() {
                     replace.push(Event {
                         kind: Kind::Enter,
                         name: Name::Data,
@@ -739,13 +739,13 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
 ///              ^-- from
 ///       ^-- to
 /// ```
-fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option<usize> {
+fn peek_bytes_atext(bytes: &[u8], min: usize, end: usize) -> Option<usize> {
     let mut index = end;
 
     // Take simplified atext.
     // See `email_atext` in `autolink.rs` for a similar algorithm.
     // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L301>.
-    while index > 0
+    while index > min
         && matches!(bytes[index - 1], b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'_' | b'a'..=b'z')
     {
         index -= 1;
@@ -755,7 +755,7 @@ fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option<usize> {
     // The reference code is a bit weird, but that’s what it results in.
     // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L307>.
     // Other than slash, every preceding character is allowed.
-    if index == end || (index > 0 && bytes[index - 1] == b'/') {
+    if index == end || (index > min && bytes[index - 1] == b'/') {
         None
     } else {
         Some(index)
@@ -772,14 +772,14 @@ fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option<usize> {
 ///              ^-- from
 ///       ^-- to
 /// ```
-fn peek_protocol(bytes: &[u8], end: usize) -> (usize, Name) {
+fn peek_protocol(bytes: &[u8], min: usize, end: usize) -> (usize, Name) {
     let mut index = end;
 
-    if index > 0 && bytes[index - 1] == b':' {
+    if index > min && bytes[index - 1] == b':' {
         index -= 1;
 
         // Take alphanumerical.
-        while index > 0 && matches!(bytes[index - 1], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') {
+        while index > min && matches!(bytes[index - 1], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') {
             index -= 1;
         }
 
diff --git a/src/event.rs b/src/event.rs
index 6ea52fb..de3f95f 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -3424,7 +3424,7 @@ impl Point {
     /// point, to `index.`
     pub fn shift_to(&self, bytes: &[u8], index: usize) -> Point {
         let mut next = self.clone();
-        debug_assert!(index > next.index, "expect");
+        debug_assert!(index > next.index, "expected to shift forward");
 
         while next.index < index {
             match bytes[next.index] {
diff --git a/tests/fuzz.rs b/tests/fuzz.rs
index 3cc1066..126032a 100644
--- a/tests/fuzz.rs
+++ b/tests/fuzz.rs
@@ -1,5 +1,5 @@
 extern crate micromark;
-use micromark::micromark;
+use micromark::{micromark, micromark_with_options, Constructs, Options};
 use pretty_assertions::assert_eq;
 
 #[test]
@@ -7,7 +7,22 @@ fn fuzz() -> Result<(), String> {
     assert_eq!(
         micromark("[\n~\na\n-\n\n"),
         "<p>[\n~\na</p>\n<ul>\n<li></li>\n</ul>\n",
-        "1"
+        "1: label, blank lines, and code"
+    );
+
+    assert_eq!(
+        // The first link is stopped by the `+` (so it’s `a@b.c`), but the next
+        // link overlaps it (`b.c+d@e.f`).
+        micromark_with_options(
+            "a@b.c+d@e.f",
+            &Options {
+                constructs: Constructs::gfm(),
+                gfm_tagfilter: true,
+                ..Options::default()
+            }
+        )?,
+        "<p><a href=\"mailto:a@b.c\">a@b.c</a><a href=\"mailto:+d@e.f\">+d@e.f</a></p>",
+        "2: gfm: email autolink literals running into each other"
     );
 
     Ok(())
-- 
cgit