From 25e267afbc0789ea36508d45c3ea3545b84223bb Mon Sep 17 00:00:00 2001
From: Titus Wormer 
Date: Thu, 18 Aug 2022 18:33:10 +0200
Subject: Add support for GFM autolink literals
---
 Untitled.txt                          |  38 ++--
 examples/lib.rs                       |  14 +-
 readme.md                             |   2 +-
 src/compiler.rs                       | 107 ++++++----
 src/construct/attention.rs            |  70 +------
 src/construct/gfm_autolink_literal.rs | 382 ++++++++++++++++++++++++++++++++++
 src/construct/mod.rs                  |   9 +-
 src/construct/partial_data.rs         |   2 -
 src/construct/string.rs               |   7 +-
 src/construct/text.rs                 |  12 +-
 src/event.rs                          | 269 ++++++++++++++++--------
 src/lib.rs                            |  21 ++
 src/util/classify_character.rs        |  72 +++++++
 src/util/mod.rs                       |   1 +
 tests/gfm_autolink_literal.rs         | 256 +++++++++++++++++++++++
 15 files changed, 1040 insertions(+), 222 deletions(-)
 create mode 100644 src/construct/gfm_autolink_literal.rs
 create mode 100644 src/util/classify_character.rs
 create mode 100644 tests/gfm_autolink_literal.rs
diff --git a/Untitled.txt b/Untitled.txt
index 8238cf7..ca56d67 100644
--- a/Untitled.txt
+++ b/Untitled.txt
@@ -7,26 +7,22 @@ micromark.js: `atLineEnding` in html (text) should always eat arbitrary whitespa
 // ---------------------
 // Useful helper:
 
-let mut index = 0;
-let mut balance = 0;
-println!("before: {:?}", events.len());
-while index < events.len() {
-    let event = &events[index];
-    if event.event_type == EventType::Exit {
-        balance -= 1;
+    let mut index = 0;
+    let mut balance = 0;
+    println!("before: {:?}", tokenizer.events.len());
+    while index < tokenizer.events.len() {
+        let event = &tokenizer.events[index];
+        if event.kind == Kind::Exit {
+            balance -= 1;
+        }
+        let prefix = String::from_utf8(vec![b' '; balance * 2]).unwrap();
+        println!(
+            "ev:  {}{:?}:{:?} ({:?}): {:?}",
+            prefix, event.kind, event.name, index, event.link,
+        );
+        if event.kind == Kind::Enter {
+            balance += 1;
+        }
+        index += 1;
     }
-    let prefix = String::from_utf8(vec![b' '; balance * 2]).unwrap();
-    println!(
-        "ev:  {}{:?}:{:?} ({:?}): {:?}",
-        prefix,
-        event.kind,
-        event.name,
-        index,
-        event.link,
-   );
-    if event.event_type == EventType::Enter {
-        balance += 1;
-    }
-    index += 1;
-}
 ```
diff --git a/examples/lib.rs b/examples/lib.rs
index 94c2c58..62d7ee4 100644
--- a/examples/lib.rs
+++ b/examples/lib.rs
@@ -22,7 +22,19 @@ fn main() {
         )
     );
 
-    // Support extensions that are not in CommonMark.
+    // Support GFM extensions.
+    println!(
+        "{}",
+        micromark_with_options(
+            "Just a link! https://example.com.",
+            &Options {
+                constructs: Constructs::gfm(),
+                ..Options::default()
+            }
+        )
+    );
+
+    // Support other extensions that are not in CommonMark.
     println!(
         "{:?}",
         micromark_with_options(
diff --git a/readme.md b/readme.md
index 4143e39..f1c33f8 100644
--- a/readme.md
+++ b/readme.md
@@ -46,7 +46,7 @@ important.
 
 - [x] (1) frontmatter (yaml, toml) (flow)
       — [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter)
-- [ ] (3) autolink literal (GFM) (text)
+- [x] (3) autolink literal (GFM) (text)
       — [`micromark-extension-gfm-autolink-literal`](https://github.com/micromark/micromark-extension-gfm-autolink-literal)
 - [ ] (3) footnote (GFM) (flow, text)
       — [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote)
diff --git a/src/compiler.rs b/src/compiler.rs
index bb08745..ac68504 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -369,6 +369,9 @@ fn exit(context: &mut CompileContext) {
         Name::DefinitionTitleString => on_exit_definition_title_string(context),
         Name::Emphasis => on_exit_emphasis(context),
         Name::Frontmatter => on_exit_frontmatter(context),
+        Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context),
+        Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context),
+        Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context),
         Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context),
         Name::HeadingAtx => on_exit_heading_atx(context),
         Name::HeadingAtxSequence => on_exit_heading_atx_sequence(context),
@@ -647,47 +650,28 @@ fn on_enter_strong(context: &mut CompileContext) {
 
 /// Handle [`Exit`][Kind::Exit]:[`AutolinkEmail`][Name::AutolinkEmail].
 fn on_exit_autolink_email(context: &mut CompileContext) {
-    let slice = Slice::from_position(
-        context.bytes,
-        &Position::from_exit_event(context.events, context.index),
+    generate_autolink(
+        context,
+        Some("mailto:"),
+        Slice::from_position(
+            context.bytes,
+            &Position::from_exit_event(context.events, context.index),
+        )
+        .as_str(),
     );
-    let value = slice.as_str();
-
-    if !context.image_alt_inside {
-        context.push("");
-    }
-
-    context.push(&encode(value, context.encode_html));
-
-    if !context.image_alt_inside {
-        context.push("");
-    }
 }
 
 /// Handle [`Exit`][Kind::Exit]:[`AutolinkProtocol`][Name::AutolinkProtocol].
 fn on_exit_autolink_protocol(context: &mut CompileContext) {
-    let slice = Slice::from_position(
-        context.bytes,
-        &Position::from_exit_event(context.events, context.index),
+    generate_autolink(
+        context,
+        None,
+        Slice::from_position(
+            context.bytes,
+            &Position::from_exit_event(context.events, context.index),
+        )
+        .as_str(),
     );
-    let value = slice.as_str();
-
-    if !context.image_alt_inside {
-        context.push("");
-    }
-
-    context.push(&encode(value, context.encode_html));
-
-    if !context.image_alt_inside {
-        context.push("");
-    }
 }
 
 /// Handle [`Exit`][Kind::Exit]:{[`HardBreakEscape`][Name::HardBreakEscape],[`HardBreakTrailing`][Name::HardBreakTrailing]}.
@@ -927,6 +911,37 @@ fn on_exit_frontmatter(context: &mut CompileContext) {
     context.slurp_one_line_ending = true;
 }
 
+/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralProtocol`][Name::GfmAutolinkLiteralProtocol].
+fn on_exit_gfm_autolink_literal_protocol(context: &mut CompileContext) {
+    generate_autolink(
+        context,
+        None,
+        Slice::from_position(
+            context.bytes,
+            &Position::from_exit_event(context.events, context.index),
+        )
+        .as_str(),
+    );
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralWww`][Name::GfmAutolinkLiteralWww].
+fn on_exit_gfm_autolink_literal_www(context: &mut CompileContext) {
+    generate_autolink(
+        context,
+        Some("http://"),
+        Slice::from_position(
+            context.bytes,
+            &Position::from_exit_event(context.events, context.index),
+        )
+        .as_str(),
+    );
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail].
+fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) {
+    on_exit_autolink_email(context);
+}
+
 /// Handle [`Exit`][Kind::Exit]:[`HeadingAtx`][Name::HeadingAtx].
 fn on_exit_heading_atx(context: &mut CompileContext) {
     let rank = context
@@ -1244,3 +1259,25 @@ fn on_exit_thematic_break(context: &mut CompileContext) {
     context.line_ending_if_needed();
     context.push("
");
 }
+
+/// Generate an autolink (used by unicode autolinks and GFM autolink literals).
+fn generate_autolink(context: &mut CompileContext, protocol: Option<&str>, value: &str) {
+    if !context.image_alt_inside {
+        context.push("");
+    }
+
+    context.push(&encode(value, context.encode_html));
+
+    if !context.image_alt_inside {
+        context.push("");
+    }
+}
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 8df0f61..ef960d4 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -62,42 +62,10 @@ use crate::event::{Event, Kind, Name, Point};
 use crate::resolve::Name as ResolveName;
 use crate::state::{Name as StateName, State};
 use crate::tokenizer::Tokenizer;
-use crate::unicode::PUNCTUATION;
+use crate::util::classify_character::{classify_opt, Kind as CharacterKind};
 use crate::util::slice::Slice;
 use alloc::{string::String, vec, vec::Vec};
 
-/// Character code kinds.
-#[derive(Debug, PartialEq)]
-enum CharacterKind {
-    /// Whitespace.
-    ///
-    /// ## Example
-    ///
-    /// ```markdown
-    /// > | **a_b_ c**.
-    ///    ^      ^    ^
-    /// ```
-    Whitespace,
-    /// Punctuation.
-    ///
-    /// ## Example
-    ///
-    /// ```markdown
-    /// > | **a_b_ c**.
-    ///     ^^ ^ ^    ^
-    /// ```
-    Punctuation,
-    /// Everything else.
-    ///
-    /// ## Example
-    ///
-    /// ```markdown
-    /// > | **a_b_ c**.
-    ///       ^ ^  ^
-    /// ```
-    Other,
-}
-
 /// Attentention sequence that we can take markers from.
 #[derive(Debug)]
 struct Sequence {
@@ -192,8 +160,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
                 let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point)
                     .head()
                     .unwrap();
-                let before = classify_character(char_before);
-                let after = classify_character(char_after);
+                let before = classify_opt(char_before);
+                let after = classify_opt(char_after);
                 let open = after == CharacterKind::Other
                     || (after == CharacterKind::Punctuation && before != CharacterKind::Other);
                 // To do: GFM strikethrough?
@@ -429,35 +397,3 @@ fn match_sequences(
 
     next
 }
-
-/// Classify whether a character code represents whitespace, punctuation, or
-/// something else.
-///
-/// Used for attention (emphasis, strong), whose sequences can open or close
-/// based on the class of surrounding characters.
-///
-/// > 👉 **Note** that eof (`None`) is seen as whitespace.
-///
-/// ## References
-///
-/// *   [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)
-fn classify_character(char: Option) -> CharacterKind {
-    if let Some(char) = char {
-        // Unicode whitespace.
-        if char.is_whitespace() {
-            CharacterKind::Whitespace
-        }
-        // Unicode punctuation.
-        else if PUNCTUATION.contains(&char) {
-            CharacterKind::Punctuation
-        }
-        // Everything else.
-        else {
-            CharacterKind::Other
-        }
-    }
-    // EOF.
-    else {
-        CharacterKind::Whitespace
-    }
-}
diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs
new file mode 100644
index 0000000..7fdeb01
--- /dev/null
+++ b/src/construct/gfm_autolink_literal.rs
@@ -0,0 +1,382 @@
+//! To do.
+
+use crate::event::{Event, Kind, Name};
+use crate::tokenizer::Tokenizer;
+use crate::util::classify_character::{classify, Kind as CharacterKind};
+use crate::util::slice::{Position, Slice};
+use alloc::vec::Vec;
+extern crate std;
+use core::str;
+
+// To do: doc al functions.
+
+pub fn resolve(tokenizer: &mut Tokenizer) {
+    tokenizer.map.consume(&mut tokenizer.events);
+
+    let mut index = 0;
+    let mut links = 0;
+
+    while index < tokenizer.events.len() {
+        let event = &tokenizer.events[index];
+
+        if event.kind == Kind::Enter {
+            if event.name == Name::Link {
+                links += 1;
+            }
+        } else {
+            if event.name == Name::Data && links == 0 {
+                let slice = Slice::from_position(
+                    tokenizer.parse_state.bytes,
+                    &Position::from_exit_event(&tokenizer.events, index),
+                );
+                let bytes = slice.bytes;
+                let mut byte_index = 0;
+                let mut replace = Vec::new();
+                let mut point = tokenizer.events[index - 1].point.clone();
+                let start_index = point.index;
+                let mut start = 0;
+
+                while byte_index < bytes.len() {
+                    if matches!(bytes[byte_index], b'H' | b'h' | b'W' | b'w' | b'@') {
+                        if let Some(autolink) = peek(bytes, byte_index) {
+                            byte_index = autolink.1;
+
+                            // If there is something between the last link
+                            // (or the start) and this link.
+                            if start != autolink.0 {
+                                replace.push(Event {
+                                    kind: Kind::Enter,
+                                    name: Name::Data,
+                                    point: point.clone(),
+                                    link: None,
+                                });
+                                point = point.shift_to(
+                                    tokenizer.parse_state.bytes,
+                                    start_index + autolink.0,
+                                );
+                                replace.push(Event {
+                                    kind: Kind::Exit,
+                                    name: Name::Data,
+                                    point: point.clone(),
+                                    link: None,
+                                });
+                            }
+
+                            // Add the link.
+                            replace.push(Event {
+                                kind: Kind::Enter,
+                                name: autolink.2.clone(),
+                                point: point.clone(),
+                                link: None,
+                            });
+                            point = point
+                                .shift_to(tokenizer.parse_state.bytes, start_index + autolink.1);
+                            replace.push(Event {
+                                kind: Kind::Exit,
+                                name: autolink.2.clone(),
+                                point: point.clone(),
+                                link: None,
+                            });
+                            start = autolink.1;
+                        }
+                    }
+
+                    byte_index += 1;
+                }
+
+                // If there was a link, and we have more bytes left.
+                if start != 0 && start < bytes.len() {
+                    replace.push(Event {
+                        kind: Kind::Enter,
+                        name: Name::Data,
+                        point: point.clone(),
+                        link: None,
+                    });
+                    replace.push(Event {
+                        kind: Kind::Exit,
+                        name: Name::Data,
+                        point: event.point.clone(),
+                        link: None,
+                    });
+                }
+
+                // If there were links.
+                if !replace.is_empty() {
+                    tokenizer.map.add(index - 1, 2, replace);
+                }
+            }
+
+            if event.name == Name::Link {
+                links -= 1;
+            }
+        }
+
+        index += 1;
+    }
+}
+
+fn peek(bytes: &[u8], index: usize) -> Option<(usize, usize, Name)> {
+    // Protocol.
+    if let Some(protocol_end) = peek_protocol(bytes, index) {
+        if let Some(domain_end) = peek_domain(bytes, protocol_end, true) {
+            let end = truncate(bytes, protocol_end, domain_end);
+
+            // Cannot be empty.
+            if end != protocol_end {
+                return Some((index, end, Name::GfmAutolinkLiteralProtocol));
+            }
+        }
+    }
+
+    // Www.
+    if peek_www(bytes, index).is_some() {
+        // Note: we discard the `www.` we parsed, we now try to parse it as a domain.
+        let domain_end = peek_domain(bytes, index, false).unwrap_or(index);
+        let end = truncate(bytes, index, domain_end);
+        return Some((index, end, Name::GfmAutolinkLiteralWww));
+    }
+
+    // Email.
+    if bytes[index] == b'@' {
+        if let Some(start) = peek_atext(bytes, index) {
+            if let Some(end) = peek_email_domain(bytes, index + 1) {
+                let end = truncate(bytes, start, end);
+                return Some((start, end, Name::GfmAutolinkLiteralEmail));
+            }
+        }
+    }
+
+    None
+}
+
+/// Move past `http://`, `https://`, case-insensitive.
+fn peek_protocol(bytes: &[u8], mut index: usize) -> Option {
+    // `http`
+    if index + 3 < bytes.len()
+        && matches!(bytes[index], b'H' | b'h')
+        && matches!(bytes[index + 1], b'T' | b't')
+        && matches!(bytes[index + 2], b'T' | b't')
+        && matches!(bytes[index + 3], b'P' | b'p')
+    {
+        index += 4;
+
+        // `s`, optional.
+        if index + 1 < bytes.len() && matches!(bytes[index], b'S' | b's') {
+            index += 1;
+        }
+
+        // `://`
+        if index + 3 < bytes.len()
+            && bytes[index] == b':'
+            && bytes[index + 1] == b'/'
+            && bytes[index + 2] == b'/'
+        {
+            return Some(index + 3);
+        }
+    }
+
+    None
+}
+
+/// Move past `www.`, case-insensitive.
+fn peek_www(bytes: &[u8], index: usize) -> Option {
+    // `www.`
+    if index + 3 < bytes.len()
+        // Source: .
+        && (index == 0 || matches!(bytes[index - 1], b'\t' | b'\n' | b'\r' | b' ' | b'(' | b'*' | b'_' | b'~'))
+        && matches!(bytes[index], b'W' | b'w')
+        && matches!(bytes[index + 1], b'W' | b'w')
+        && matches!(bytes[index + 2], b'W' | b'w')
+        && bytes[index + 3] == b'.'
+    {
+        Some(index + 4)
+    } else {
+        None
+    }
+}
+
+/// Move past `example.com`.
+fn peek_domain(bytes: &[u8], start: usize, allow_short: bool) -> Option {
+    let mut dots = false;
+    let mut penultime = false;
+    let mut last = false;
+    // To do: expose this from slice?
+    // To do: do it ourselves? , , , .
+    let char_indices = str::from_utf8(&bytes[start..])
+        .unwrap()
+        .char_indices()
+        .collect::>();
+    let mut index = 0;
+
+    while index < char_indices.len() {
+        match char_indices[index].1 {
+            '_' => last = true,
+            '.' => {
+                penultime = last;
+                last = false;
+                dots = true;
+            }
+            '-' => {}
+            // Source: .
+            char if classify(char) == CharacterKind::Other => {}
+            _ => break,
+        }
+
+        index += 1;
+    }
+
+    // No underscores allowed in last two parts.
+    // A valid domain needs to have at least a dot.
+    if penultime || last || (!allow_short && !dots) {
+        None
+    } else {
+        // Now peek past `/path?search#hash` (anything except whitespace).
+        while index < char_indices.len() {
+            if classify(char_indices[index].1) == CharacterKind::Whitespace {
+                break;
+            }
+
+            index += 1;
+        }
+
+        Some(if index == char_indices.len() {
+            bytes.len()
+        } else {
+            start + char_indices[index].0
+        })
+    }
+}
+
+/// Move back past `contact`.
+fn peek_atext(bytes: &[u8], end: usize) -> Option {
+    let mut index = end;
+
+    // Take simplified atext.
+    // See `email_atext` in `autolink.rs` for a similar algorithm.
+    // Source: .
+    while index > 0
+        && matches!(bytes[index - 1], b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'_' | b'a'..=b'z')
+    {
+        index -= 1;
+    }
+
+    // Do not allow a slash “inside” atext.
+    // The reference code is a bit weird, but that’s what it results in.
+    // Source: .
+    // Other than slash, every preceding character is allowed.
+    if index == end || (index > 0 && bytes[index - 1] == b'/') {
+        None
+    } else {
+        Some(index)
+    }
+}
+
+/// Move past `example.com`.
+fn peek_email_domain(bytes: &[u8], start: usize) -> Option {
+    let mut index = start;
+    let mut dot = false;
+
+    // Move past “domain”.
+    // The reference code is a bit overly complex as it handles the `@`, of which there may be just one.
+    // Source: 
+    while index < bytes.len() {
+        match bytes[index] {
+            // Alphanumerical, `-`, and `_`.
+            b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'_' | b'a'..=b'z' => {}
+            // Dot followed by alphanumerical (not `-` or `_`).
+            b'.' if index + 1 < bytes.len()
+                && matches!(bytes[index + 1], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') =>
+            {
+                dot = true;
+            }
+            _ => break,
+        }
+
+        index += 1;
+    }
+
+    // Domain must not be empty, must include a dot, and must end in alphabetical or `.`.
+    // Source: .
+    if index > start && dot && matches!(bytes[index - 1], b'.' | b'A'..=b'Z' | b'a'..=b'z') {
+        Some(index)
+    } else {
+        None
+    }
+}
+
+/// Split trialing stuff from a URL.
+fn truncate(bytes: &[u8], start: usize, mut end: usize) -> usize {
+    let mut index = start;
+
+    // Source: 
+    while index < end {
+        if bytes[index] == b'<' {
+            end = index;
+            break;
+        }
+        index += 1;
+    }
+
+    let mut split = end;
+
+    // Move before trailing punctuation.
+    while split > start {
+        match bytes[split - 1] {
+            b'!' | b'"' | b'&' | b'\'' | b')' | b',' | b'.' | b':' | b'<' | b'>' | b'?' | b']'
+            | b'}' => {}
+            // Source: .
+            // Note: we can’t move across actual references, because those have been parsed already.
+            b';' => {
+                let mut new_split = split - 1;
+                // Move back past alphabeticals.
+                while new_split > start && matches!(bytes[new_split - 1], b'A'..=b'Z' | b'a'..=b'z')
+                {
+                    new_split -= 1;
+                }
+
+                // Nonempty character reference:
+                if new_split > start && bytes[new_split - 1] == b'&' && new_split < split - 1 {
+                    split = new_split - 1;
+                    continue;
+                }
+
+                // Otherwise it’s just a `;`.
+            }
+            _ => break,
+        }
+        split -= 1;
+    }
+
+    // If there was trailing punctuation, try to balance parens.
+    if split != end {
+        let mut open = 0;
+        let mut close = 0;
+        let mut paren_index = start;
+
+        // Count parens in `url` (not in trail).
+        while paren_index < split {
+            match bytes[paren_index] {
+                b'(' => open += 1,
+                b')' => close += 1,
+                _ => {}
+            }
+
+            paren_index += 1;
+        }
+
+        let mut trail_index = split;
+
+        // If there are more opening than closing parens, try to balance them
+        // from the trail.
+        while open > close && trail_index < end {
+            if bytes[trail_index] == b')' {
+                split = trail_index;
+                close += 1;
+            }
+
+            trail_index += 1;
+        }
+    }
+
+    split
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 1c1c6f7..ba1a0b3 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -28,7 +28,7 @@
 //! For example, [code (fenced)][code_fenced] and
 //! [code (indented)][code_indented] are considered different constructs.
 //!
-//! The following constructs are found in markdown:
+//! The following constructs are found in markdown (CommonMark):
 //!
 //! *   [attention (strong, emphasis)][attention]
 //! *   [autolink][]
@@ -40,7 +40,6 @@
 //! *   [code (indented)][code_indented]
 //! *   [code (text)][code_text]
 //! *   [definition][]
-//! *   [frontmatter][]
 //! *   [hard break (escape)][hard_break_escape]
 //! *   [heading (atx)][heading_atx]
 //! *   [heading (setext)][heading_setext]
@@ -56,6 +55,11 @@
 //! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
 //! > [whitespace][partial_whitespace].
 //!
+//! The following constructs are extensions found in markdown:
+//!
+//! *   [frontmatter][]
+//! *   [gfm autolink literal][gfm_autolink_literal]
+//!
 //! There are also several small subroutines typically used in different places:
 //!
 //! *   [bom][partial_bom]
@@ -141,6 +145,7 @@ pub mod definition;
 pub mod document;
 pub mod flow;
 pub mod frontmatter;
+pub mod gfm_autolink_literal;
 pub mod hard_break_escape;
 pub mod heading_atx;
 pub mod heading_setext;
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index bc6d7f4..b6f1f47 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -7,7 +7,6 @@
 //! [text]: crate::construct::text
 
 use crate::event::{Kind, Name};
-use crate::resolve::Name as ResolveName;
 use crate::state::{Name as StateName, State};
 use crate::tokenizer::Tokenizer;
 use alloc::vec;
@@ -51,7 +50,6 @@ pub fn at_break(tokenizer: &mut Tokenizer) -> State {
         }
     }
 
-    tokenizer.register_resolver_before(ResolveName::Data);
     State::Ok
 }
 
diff --git a/src/construct/string.rs b/src/construct/string.rs
index 698a51d..dba1ac1 100644
--- a/src/construct/string.rs
+++ b/src/construct/string.rs
@@ -27,7 +27,6 @@ const MARKERS: [u8; 2] = [b'&', b'\\'];
 ///        ^
 /// ````
 pub fn start(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.register_resolver(ResolveName::String);
     tokenizer.tokenize_state.markers = &MARKERS;
     State::Retry(StateName::StringBefore)
 }
@@ -40,7 +39,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
 /// ````
 pub fn before(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
-        None => State::Ok,
+        None => {
+            tokenizer.register_resolver(ResolveName::Data);
+            tokenizer.register_resolver(ResolveName::String);
+            State::Ok
+        }
         Some(b'&') => {
             tokenizer.attempt(
                 State::Next(StateName::StringBefore),
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 5c13dba..06ba378 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -20,6 +20,7 @@
 //! > 👉 **Note**: for performance reasons, hard break (trailing) is formed by
 //! > [whitespace][crate::construct::partial_whitespace].
 
+use crate::construct::gfm_autolink_literal::resolve as resolve_gfm_autolink_literal;
 use crate::construct::partial_whitespace::resolve_whitespace;
 use crate::resolve::Name as ResolveName;
 use crate::state::{Name as StateName, State};
@@ -45,7 +46,6 @@ const MARKERS: [u8; 9] = [
 ///     ^
 /// ```
 pub fn start(tokenizer: &mut Tokenizer) -> State {
-    tokenizer.register_resolver(ResolveName::Text);
     tokenizer.tokenize_state.markers = &MARKERS;
     State::Retry(StateName::TextBefore)
 }
@@ -58,7 +58,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn before(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
-        None => State::Ok,
+        None => {
+            tokenizer.register_resolver(ResolveName::Data);
+            tokenizer.register_resolver(ResolveName::Text);
+            State::Ok
+        }
         Some(b'!') => {
             tokenizer.attempt(
                 State::Next(StateName::TextBefore),
@@ -170,4 +174,8 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
         tokenizer.parse_state.constructs.hard_break_trailing,
         true,
     );
+
+    if tokenizer.parse_state.constructs.gfm_autolink_literal {
+        resolve_gfm_autolink_literal(tokenizer);
+    }
 }
diff --git a/src/event.rs b/src/event.rs
index f2f8ae1..169fdb5 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -878,6 +878,148 @@ pub enum Name {
     ///      ^
     /// ```
     EmphasisText,
+    // To do: sort.
+    /// Whole frontmatter.
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [document content][crate::construct::document]
+    /// *   **Content model**:
+    ///     [`FrontmatterFence`][Name::FrontmatterFence],
+    ///     [`FrontmatterChunk`][Name::FrontmatterChunk],
+    ///     [`LineEnding`][Name::LineEnding]
+    /// *   **Construct**:
+    ///     [`frontmatter`][crate::construct::frontmatter]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | ---
+    ///     ^^^
+    /// > | title: Neptune
+    ///     ^^^^^^^^^^^^^^
+    /// > | ---
+    ///     ^^^
+    /// ```
+    Frontmatter,
+    /// Frontmatter chunk.
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [`Frontmatter`][Name::Frontmatter]
+    /// *   **Content model**:
+    ///     void
+    /// *   **Construct**:
+    ///     [`frontmatter`][crate::construct::frontmatter]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    ///   | ---
+    /// > | title: Neptune
+    ///     ^^^^^^^^^^^^^^
+    ///   | ---
+    /// ```
+    FrontmatterChunk,
+    /// Frontmatter fence.
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [`Frontmatter`][Name::Frontmatter]
+    /// *   **Content model**:
+    ///     [`FrontmatterSequence`][Name::FrontmatterSequence],
+    ///     [`SpaceOrTab`][Name::SpaceOrTab]
+    /// *   **Construct**:
+    ///     [`frontmatter`][crate::construct::frontmatter]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | ---
+    ///     ^^^
+    ///   | title: Neptune
+    /// > | ---
+    ///     ^^^
+    /// ```
+    FrontmatterFence,
+    /// Frontmatter sequence.
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [`FrontmatterFence`][Name::FrontmatterFence]
+    /// *   **Content model**:
+    ///     void
+    /// *   **Construct**:
+    ///     [`frontmatter`][crate::construct::frontmatter]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | ---
+    ///     ^^^
+    ///   | title: Neptune
+    /// > | ---
+    ///     ^^^
+    /// ```
+    FrontmatterSequence,
+    /// GFM extension: email autolink.
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [text content][crate::construct::text]
+    /// *   **Content model**:
+    ///     void.
+    /// *   **Construct**:
+    ///     [`gfm_autolink_literal`][crate::construct::gfm_autolink_literal]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | context@example.com
+    ///     ^^^^^^^^^^^^^^^^^^^
+    /// ```
+    GfmAutolinkLiteralEmail,
+    /// GFM extension: autolink w/ protocol.
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [text content][crate::construct::text]
+    /// *   **Content model**:
+    ///     void.
+    /// *   **Construct**:
+    ///     [`gfm_autolink_literal`][crate::construct::gfm_autolink_literal]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | https://example.com
+    ///     ^^^^^^^^^^^^^^^^^^^
+    /// ```
+    GfmAutolinkLiteralProtocol,
+    /// GFM extension: autolink w/ www.
+    ///
+    /// ## Info
+    ///
+    /// *   **Context**:
+    ///     [text content][crate::construct::text]
+    /// *   **Content model**:
+    ///     void.
+    /// *   **Construct**:
+    ///     [`gfm_autolink_literal`][crate::construct::gfm_autolink_literal]
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | www.example.com
+    ///     ^^^^^^^^^^^^^^^
+    /// ```
+    GfmAutolinkLiteralWww,
     /// Whole hard break (escape).
     ///
     /// ## Info
@@ -1832,98 +1974,10 @@ pub enum Name {
     ///     ^ ^ ^
     /// ```
     ThematicBreakSequence,
-
-    /// Whole frontmatter.
-    ///
-    /// ## Info
-    ///
-    /// *   **Context**:
-    ///     [document content][crate::construct::document]
-    /// *   **Content model**:
-    ///     [`FrontmatterFence`][Name::FrontmatterFence],
-    ///     [`FrontmatterChunk`][Name::FrontmatterChunk],
-    ///     [`LineEnding`][Name::LineEnding]
-    /// *   **Construct**:
-    ///     [`frontmatter`][crate::construct::frontmatter]
-    ///
-    /// ## Example
-    ///
-    /// ````markdown
-    /// > | ---
-    ///     ^^^
-    /// > | title: Neptune
-    ///     ^^^^^^^^^^^^^^
-    /// > | ---
-    ///     ^^^
-    /// ````
-    Frontmatter,
-    /// Frontmatter chunk.
-    ///
-    /// ## Info
-    ///
-    /// *   **Context**:
-    ///     [`Frontmatter`][Name::Frontmatter]
-    /// *   **Content model**:
-    ///     void
-    /// *   **Construct**:
-    ///     [`frontmatter`][crate::construct::frontmatter]
-    ///
-    /// ## Example
-    ///
-    /// ````markdown
-    ///   | ---
-    /// > | title: Neptune
-    ///     ^^^^^^^^^^^^^^
-    ///   | ---
-    /// ````
-    FrontmatterChunk,
-    /// Frontmatter fence.
-    ///
-    /// ## Info
-    ///
-    /// *   **Context**:
-    ///     [`Frontmatter`][Name::Frontmatter]
-    /// *   **Content model**:
-    ///     [`FrontmatterSequence`][Name::FrontmatterSequence],
-    ///     [`SpaceOrTab`][Name::SpaceOrTab]
-    /// *   **Construct**:
-    ///     [`frontmatter`][crate::construct::frontmatter]
-    ///
-    /// ## Example
-    ///
-    /// ````markdown
-    /// > | ---
-    ///     ^^^
-    ///   | title: Neptune
-    /// > | ---
-    ///     ^^^
-    /// ````
-    FrontmatterFence,
-    /// Frontmatter sequence.
-    ///
-    /// ## Info
-    ///
-    /// *   **Context**:
-    ///     [`FrontmatterFence`][Name::FrontmatterFence]
-    /// *   **Content model**:
-    ///     void
-    /// *   **Construct**:
-    ///     [`frontmatter`][crate::construct::frontmatter]
-    ///
-    /// ## Example
-    ///
-    /// ````markdown
-    /// > | ---
-    ///     ^^^
-    ///   | title: Neptune
-    /// > | ---
-    ///     ^^^
-    /// ````
-    FrontmatterSequence,
 }
 
 /// List of void events, used to make sure everything is working well.
-pub const VOID_EVENTS: [Name; 43] = [
+pub const VOID_EVENTS: [Name; 46] = [
     Name::AttentionSequence,
     Name::AutolinkEmail,
     Name::AutolinkMarker,
@@ -1949,6 +2003,9 @@ pub const VOID_EVENTS: [Name; 43] = [
     Name::DefinitionTitleMarker,
     Name::EmphasisSequence,
     Name::FrontmatterChunk,
+    Name::GfmAutolinkLiteralEmail,
+    Name::GfmAutolinkLiteralProtocol,
+    Name::GfmAutolinkLiteralWww,
     Name::FrontmatterSequence,
     Name::HardBreakEscape,
     Name::HardBreakTrailing,
@@ -2013,6 +2070,40 @@ pub struct Point {
     pub vs: usize,
 }
 
+impl Point {
+    /// Create a new point, that is shifted from the close earlier current
+    /// point, to `index.`
+    // To do: tabs.
+    pub fn shift_to(&self, bytes: &[u8], index: usize) -> Point {
+        let mut next = self.clone();
+        debug_assert!(index > next.index, "expect");
+
+        while next.index < index {
+            match bytes[next.index] {
+                b'\n' | b'\r' => unreachable!("cannot move past line endings"),
+                b'\t' => {
+                    unreachable!("to do: tab")
+                    // let remainder = next.column % TAB_SIZE;
+                    // let vs = if remainder == 0 {
+                    //     0
+                    // } else {
+                    //     TAB_SIZE - remainder
+                    // };
+
+                    // next.index += 1;
+                    // next.column += 1 + vs;
+                }
+                _ => {
+                    next.index += 1;
+                    next.column += 1;
+                }
+            }
+        }
+
+        next
+    }
+}
+
 /// Event kinds.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum Kind {
diff --git a/src/lib.rs b/src/lib.rs
index 00f1c5c..ba257db 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -166,6 +166,13 @@ pub struct Constructs {
     ///     ^^^
     /// ````
     pub frontmatter: bool,
+    /// GFM: autolink literal.
+    ///
+    /// ```markdown
+    /// > | https://example.com
+    ///     ^^^^^^^^^^^^^^^^^^^
+    /// ```
+    pub gfm_autolink_literal: bool,
     /// Hard break (escape).
     ///
     /// ```markdown
@@ -263,6 +270,7 @@ impl Default for Constructs {
             code_text: true,
             definition: true,
             frontmatter: false,
+            gfm_autolink_literal: false,
             hard_break_escape: true,
             hard_break_trailing: true,
             heading_atx: true,
@@ -278,6 +286,19 @@ impl Default for Constructs {
     }
 }
 
+impl Constructs {
+    /// GFM.
+    ///
+    /// This turns on `CommonMark` + GFM.
+    #[must_use]
+    pub fn gfm() -> Self {
+        Self {
+            gfm_autolink_literal: true,
+            ..Self::default()
+        }
+    }
+}
+
 /// Configuration (optional).
 #[derive(Clone, Debug, Default)]
 pub struct Options {
diff --git a/src/util/classify_character.rs b/src/util/classify_character.rs
new file mode 100644
index 0000000..b938502
--- /dev/null
+++ b/src/util/classify_character.rs
@@ -0,0 +1,72 @@
+//! Utilities to classify characters as whitespace, punctuation, or rest.
+
+use crate::unicode::PUNCTUATION;
+
+/// Character kinds.
+#[derive(Debug, PartialEq, Eq)]
+pub enum Kind {
+    /// Whitespace.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | **a_b_ c**.
+    ///    ^      ^    ^
+    /// ```
+    Whitespace,
+    /// Punctuation.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | **a_b_ c**.
+    ///     ^^ ^ ^    ^
+    /// ```
+    Punctuation,
+    /// Everything else.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | **a_b_ c**.
+    ///       ^ ^  ^
+    /// ```
+    Other,
+}
+
+/// Classify whether a character code represents whitespace, punctuation, or
+/// something else.
+///
+/// Used for attention (emphasis, strong), whose sequences can open or close
+/// based on the class of surrounding characters.
+///
+/// > 👉 **Note** that eof (`None`) is seen as whitespace.
+///
+/// ## References
+///
+/// *   [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)
+pub fn classify(char: char) -> Kind {
+    // Unicode whitespace.
+    if char.is_whitespace() {
+        Kind::Whitespace
+    }
+    // Unicode punctuation.
+    else if PUNCTUATION.contains(&char) {
+        Kind::Punctuation
+    }
+    // Everything else.
+    else {
+        Kind::Other
+    }
+}
+
+/// Like [`classify`], but supports eof as whitespace.
+pub fn classify_opt(char_opt: Option) -> Kind {
+    if let Some(char) = char_opt {
+        classify(char)
+    }
+    // EOF.
+    else {
+        Kind::Whitespace
+    }
+}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index f51845c..022c7d6 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -1,5 +1,6 @@
 //! Utilities used when processing markdown.
 
+pub mod classify_character;
 pub mod decode_character_reference;
 pub mod edit_map;
 pub mod encode;
diff --git a/tests/gfm_autolink_literal.rs b/tests/gfm_autolink_literal.rs
new file mode 100644
index 0000000..9551751
--- /dev/null
+++ b/tests/gfm_autolink_literal.rs
@@ -0,0 +1,256 @@
+extern crate micromark;
+use micromark::{micromark, micromark_with_options, Constructs, Options};
+use pretty_assertions::assert_eq;
+
+#[test]
+fn gfm_autolink_literal() {
+    let gfm = Options {
+        constructs: Constructs::gfm(),
+        ..Options::default()
+    };
+
+    assert_eq!(
+        micromark("https://example.com"),
+        "https://example.com
",
+        "should ignore protocol urls by default"
+    );
+    assert_eq!(
+        micromark("www.example.com"),
+        "www.example.com
",
+        "should ignore www urls by default"
+    );
+    assert_eq!(
+        micromark("user@example.com"),
+        "user@example.com
",
+        "should ignore email urls by default"
+    );
+
+    assert_eq!(
+        micromark_with_options("https://example.com", &gfm),
+        "https://example.com
",
+        "should support protocol urls if enabled"
+    );
+    assert_eq!(
+        micromark_with_options("www.example.com", &gfm),
+        "www.example.com
",
+        "should support www urls if enabled"
+    );
+    assert_eq!(
+        micromark_with_options("user@example.com", &gfm),
+        "user@example.com
",
+        "should support email urls if enabled"
+    );
+
+    assert_eq!(
+        micromark_with_options("user@example.com", &gfm),
+        "user@example.com
",
+        "should support a closing paren at TLD (email)"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.a.)", &gfm),
+        "www.a.)
",
+        "should support a closing paren at TLD (www)"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.a b", &gfm),
+        "www.a b
",
+        "should support no TLD"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.a/b c", &gfm),
+        "www.a/b c
",
+        "should support a path instead of TLD"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.�a", &gfm),
+        "www.�a
",
+        "should support a replacement character in a domain"
+    );
+
+    assert_eq!(
+        micromark_with_options("http://點看.com", &gfm),
+        "http://點看.com
",
+        "should support non-ascii characters in a domain (http)"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.點看.com", &gfm),
+        "www.點看.com
",
+        "should support non-ascii characters in a domain (www)"
+    );
+
+    assert_eq!(
+        micromark_with_options("點看@example.com", &gfm),
+        "點看@example.com
",
+        "should *not* support non-ascii characters in atext (email)"
+    );
+
+    assert_eq!(
+        micromark_with_options("example@點看.com", &gfm),
+        "example@點看.com
",
+        "should *not* support non-ascii characters in a domain (email)"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.a.com/點看", &gfm),
+        "www.a.com/點看
",
+        "should support non-ascii characters in a path"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.-a.b", &gfm),
+        "www.-a.b
",
+        "should support a dash to start a domain"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.$", &gfm),
+        "www.$
",
+        "should support a dollar as a domain name"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.a..b.c", &gfm),
+        "www.a..b.c
",
+        "should support adjacent dots in a domain name"
+    );
+
+    assert_eq!(
+        micromark_with_options("www.a&a;", &gfm),
+        "www.a&a;
",
+        "should support named character references in domains"
+    );
+
+    assert_eq!(
+        micromark_with_options("https://a.bc/d/e/).", &gfm),
+        "https://a.bc/d/e/).
",
+        "should support a closing paren and period after a path"
+    );
+
+    assert_eq!(
+        micromark_with_options("https://a.bc/d/e/.)", &gfm),
+        "https://a.bc/d/e/.)
",
+        "should support a period and closing paren after a path"
+    );
+
+    assert_eq!(
+        micromark_with_options("https://a.bc).", &gfm),
+        "https://a.bc).
",
+        "should support a closing paren and period after a domain"
+    );
+
+    assert_eq!(
+        micromark_with_options("https://a.bc.)", &gfm),
+        "https://a.bc.)
",
+        "should support a period and closing paren after a domain"
+    );
+
+    assert_eq!(
+        micromark_with_options("https://a.bc).d", &gfm),
+        "https://a.bc).d
",
+        "should support a closing paren and period in a path"
+    );
+
+    assert_eq!(
+        micromark_with_options("https://a.bc.)d", &gfm),
+        "https://a.bc.)d
",
+        "should support a period and closing paren in a path"
+    );
+
+    assert_eq!(
+        micromark_with_options("https://a.bc/))d", &gfm),
+        "https://a.bc/))d
",
+        "should support two closing parens in a path"
+    );
+
+    assert_eq!(
+        micromark_with_options("ftp://a/b/c.txt", &gfm),
+        "ftp://a/b/c.txt
",
+        "should not support ftp links"
+    );
+
+    // Note: GH comments/issues/PRs do not link this, but Gists/readmes do.
+    // Fixing it would mean defiating from `cmark-gfm`:
+    // Source: .
+    // assert_eq!(
+    //     micromark_with_options(",www.example.com", &gfm),
+    //     ",www.example.com
",
+    //     "should support www links after Unicode punctuation",
+    // );
+
+    assert_eq!(
+        micromark_with_options(",https://example.com", &gfm),
+        ",https://example.com
",
+        "should support http links after Unicode punctuation"
+    );
+
+    assert_eq!(
+        micromark_with_options(",example@example.com", &gfm),
+        ",example@example.com
",
+        "should support email links after Unicode punctuation"
+    );
+
+    assert_eq!(
+        micromark_with_options(
+            "http://user:password@host:port/path?key=value#fragment",
+            &gfm
+        ),
+        "http://user:password@host:port/path?key=value#fragment
",
+        "should not link character reference for `:`"
+    );
+
+    assert_eq!(
+        micromark_with_options("http://example.com/abhttp://example.com/ab<cd
",
+        "should stop domains/paths at `<`"
+    );
+
+    assert_eq!(
+        micromark_with_options(
+            r###"
+[ www.example.com
+
+[ https://example.com
+
+[ contact@example.com
+
+[ www.example.com ]
+
+[ https://example.com ]
+
+[ contact@example.com ]
+
+[ www.example.com ](#)
+
+[ https://example.com ](#)
+
+[ contact@example.com ](#)
+
+
+
+
+
+
+"###,
+            &gfm
+        ),
+        r###"[ www.example.com
+[ https://example.com
+[ contact@example.com
+[ www.example.com ]
+[ https://example.com ]
+[ contact@example.com ]
+ www.example.com 
+ https://example.com 
+ contact@example.com 
+
+
+
+"###,
+        "should interplay with brackets, links, and images"
+    );
+}
-- 
cgit