From 1ba9f2c632fb6c9e57f8ad2213894d4f1235677d Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 5 Sep 2022 16:06:02 +0200 Subject: Add support for `mailto:`, `xmpp:` protocols --- src/compiler.rs | 38 ++++++++++++++++++--- src/construct/gfm_autolink_literal.rs | 63 +++++++++++++++++++++++++++++------ src/construct/raw_flow.rs | 3 +- src/event.rs | 36 ++++++++++++++++++++ tests/gfm_autolink_literal.rs | 43 ++++++++++++++++++++++++ 5 files changed, 167 insertions(+), 16 deletions(-) diff --git a/src/compiler.rs b/src/compiler.rs index 0ea1638..b271768 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -430,9 +430,11 @@ fn exit(context: &mut CompileContext) { Name::DefinitionTitleString => on_exit_definition_title_string(context), Name::Emphasis => on_exit_emphasis(context), Name::Frontmatter => on_exit_frontmatter(context), + Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context), + Name::GfmAutolinkLiteralMailto => on_exit_gfm_autolink_literal_mailto(context), Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context), Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context), - Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context), + Name::GfmAutolinkLiteralXmpp => on_exit_gfm_autolink_literal_xmpp(context), Name::GfmFootnoteCall => on_exit_gfm_footnote_call(context), Name::GfmFootnoteDefinitionLabelString => { on_exit_gfm_footnote_definition_label_string(context); @@ -1146,6 +1148,34 @@ fn on_exit_frontmatter(context: &mut CompileContext) { context.slurp_one_line_ending = true; } +/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail]. +fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) { + generate_autolink( + context, + Some("mailto:"), + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + true, + ); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralMailto`][Name::GfmAutolinkLiteralMailto]. +fn on_exit_gfm_autolink_literal_mailto(context: &mut CompileContext) { + generate_autolink( + context, + None, + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + true, + ); +} + /// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralProtocol`][Name::GfmAutolinkLiteralProtocol]. fn on_exit_gfm_autolink_literal_protocol(context: &mut CompileContext) { generate_autolink( @@ -1174,11 +1204,11 @@ fn on_exit_gfm_autolink_literal_www(context: &mut CompileContext) { ); } -/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail]. -fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) { +/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralXmpp`][Name::GfmAutolinkLiteralXmpp]. +fn on_exit_gfm_autolink_literal_xmpp(context: &mut CompileContext) { generate_autolink( context, - Some("mailto:"), + None, Slice::from_position( context.bytes, &Position::from_exit_event(context.events, context.index), diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs index 038330c..62f18ef 100644 --- a/src/construct/gfm_autolink_literal.rs +++ b/src/construct/gfm_autolink_literal.rs @@ -122,15 +122,20 @@ //! //! ## Tokens //! +//! * [`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail] +//! * [`GfmAutolinkLiteralMailto`][Name::GfmAutolinkLiteralMailto] //! * [`GfmAutolinkLiteralProtocol`][Name::GfmAutolinkLiteralProtocol] //! * [`GfmAutolinkLiteralWww`][Name::GfmAutolinkLiteralWww] -//! * [`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail] +//! * [`GfmAutolinkLiteralXmpp`][Name::GfmAutolinkLiteralXmpp] //! //! ## References //! //! * [`micromark-extension-gfm-autolink-literal`](https://github.com/micromark/micromark-extension-gfm-autolink-literal) //! * [*ยง 6.9 Autolinks (extension)* in `GFM`](https://github.github.com/gfm/#autolinks-extension-) //! +//! > ๐Ÿ‘‰ **Note**: `mailto:` and `xmpp:` protocols before email autolinks were +//! > added in `cmark-gfm@0.29.0.gfm.5` and are as of yet undocumented. +//! //! [text]: crate::construct::text //! [definition]: crate::construct::definition //! [attention]: crate::construct::attention @@ -644,12 +649,17 @@ pub fn resolve(tokenizer: &mut Tokenizer) { while byte_index < bytes.len() { if bytes[byte_index] == b'@' { - let mut range = (0, 0); + let mut range = (0, 0, Name::GfmAutolinkLiteralEmail); if let Some(start) = peek_bytes_atext(bytes, byte_index) { - if let Some(end) = peek_bytes_email_domain(bytes, byte_index + 1) { - let end = peek_bytes_truncate(bytes, start, end); - range = (start, end); + let (start, kind) = peek_protocol(bytes, start); + + if let Some(end) = peek_bytes_email_domain( + bytes, + byte_index + 1, + kind == Name::GfmAutolinkLiteralXmpp, + ) { + range = (start, peek_bytes_truncate(bytes, start, end), kind); } } @@ -678,7 +688,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { // Add the link. replace.push(Event { kind: Kind::Enter, - name: Name::GfmAutolinkLiteralEmail, + name: range.2.clone(), point: point.clone(), link: None, }); @@ -686,7 +696,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) { point.shift_to(tokenizer.parse_state.bytes, start_index + range.1); replace.push(Event { kind: Kind::Exit, - name: Name::GfmAutolinkLiteralEmail, + name: range.2.clone(), point: point.clone(), link: None, }); @@ -728,8 +738,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) { } } -// To do: add `xmpp`, `mailto` support. - /// Move back past atext. /// /// Moving back is only used when post processing text: so for the email address @@ -763,6 +771,40 @@ fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option { } } +/// Move back past a `mailto:` or `xmpp:` protocol. +/// +/// Moving back is only used when post processing text: so for the email address +/// algorithm. +/// +/// ```markdown +/// > | a mailto:contact@example.org b +/// ^-- from +/// ^-- to +/// ``` +fn peek_protocol(bytes: &[u8], end: usize) -> (usize, Name) { + let mut index = end; + + if index > 0 && bytes[index - 1] == b':' { + index -= 1; + + // Take alphanumerical. + while index > 0 && matches!(bytes[index - 1], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') { + index -= 1; + } + + let slice = Slice::from_indices(bytes, index, end - 1); + let name = slice.as_str().to_ascii_lowercase(); + + if name == "xmpp" { + return (index, Name::GfmAutolinkLiteralXmpp); + } else if name == "mailto" { + return (index, Name::GfmAutolinkLiteralMailto); + } + } + + (end, Name::GfmAutolinkLiteralEmail) +} + /// Move past email domain. /// /// Peeking like this only used when post processing text: so for the email @@ -773,7 +815,7 @@ fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option { /// ^-- from /// ^-- to /// ``` -fn peek_bytes_email_domain(bytes: &[u8], start: usize) -> Option { +fn peek_bytes_email_domain(bytes: &[u8], start: usize, xmpp: bool) -> Option { let mut index = start; let mut dot = false; @@ -784,6 +826,7 @@ fn peek_bytes_email_domain(bytes: &[u8], start: usize) -> Option { match bytes[index] { // Alphanumerical, `-`, and `_`. b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'_' | b'a'..=b'z' => {} + b'/' if xmpp => {} // Dot followed by alphanumerical (not `-` or `_`). b'.' if index + 1 < bytes.len() && matches!(bytes[index + 1], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => diff --git a/src/construct/raw_flow.rs b/src/construct/raw_flow.rs index 395d0ae..105a031 100644 --- a/src/construct/raw_flow.rs +++ b/src/construct/raw_flow.rs @@ -252,8 +252,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.size += 1; tokenizer.consume(); State::Next(StateName::RawFlowSequenceOpen) - } - else if tokenizer.tokenize_state.size + } else if tokenizer.tokenize_state.size < (if tokenizer.tokenize_state.marker == b'$' { MATH_FLOW_SEQUENCE_SIZE_MIN } else { diff --git a/src/event.rs b/src/event.rs index 458bd19..fad2c64 100644 --- a/src/event.rs +++ b/src/event.rs @@ -986,6 +986,24 @@ pub enum Name { /// ^^^^^^^^^^^^^^^^^^^ /// ``` GfmAutolinkLiteralEmail, + /// GFM extension: email autolink w/ explicit `mailto`. + /// + /// ## Info + /// + /// * **Context**: + /// [text content][crate::construct::text] + /// * **Content model**: + /// void. + /// * **Construct**: + /// [`gfm_autolink_literal`][crate::construct::gfm_autolink_literal] + /// + /// ## Example + /// + /// ```markdown + /// > | mailto:context@example.com + /// ^^^^^^^^^^^^^^^^^^^^^^^^^^ + /// ``` + GfmAutolinkLiteralMailto, /// GFM extension: autolink w/ protocol. /// /// ## Info @@ -1022,6 +1040,24 @@ pub enum Name { /// ^^^^^^^^^^^^^^^ /// ``` GfmAutolinkLiteralWww, + /// GFM extension: email autolink w/ explicit `xmpp`. + /// + /// ## Info + /// + /// * **Context**: + /// [text content][crate::construct::text] + /// * **Content model**: + /// void. + /// * **Construct**: + /// [`gfm_autolink_literal`][crate::construct::gfm_autolink_literal] + /// + /// ## Example + /// + /// ```markdown + /// > | mailto:a@b.c/d + /// ^^^^^^^^^^^^^^ + /// ``` + GfmAutolinkLiteralXmpp, /// GFM extension: whole footnote call. /// /// ## Info diff --git a/tests/gfm_autolink_literal.rs b/tests/gfm_autolink_literal.rs index 2e84e6d..0c646b6 100644 --- a/tests/gfm_autolink_literal.rs +++ b/tests/gfm_autolink_literal.rs @@ -225,6 +225,49 @@ fn gfm_autolink_literal() { "should stop domains/paths at `<`" ); + assert_eq!( + micromark_with_options( + r###" +mailto:scyther@pokemon.com + +This is a mailto:scyther@pokemon.com + +mailto:scyther@pokemon.com. + +mmmmailto:scyther@pokemon.com + +mailto:scyther@pokemon.com/ + +mailto:scyther@pokemon.com/message + +mailto:scyther@pokemon.com/mailto:beedrill@pokemon.com + +xmpp:scyther@pokemon.com + +xmpp:scyther@pokemon.com. + +xmpp:scyther@pokemon.com/message + +xmpp:scyther@pokemon.com/message. + +Email me at:scyther@pokemon.com"###, + &gfm + ), + r###"

mailto:scyther@pokemon.com

+

This is a mailto:scyther@pokemon.com

+

mailto:scyther@pokemon.com.

+

mmmmailto:scyther@pokemon.com

+

mailto:scyther@pokemon.com/

+

mailto:scyther@pokemon.com/message

+

mailto:scyther@pokemon.com/mailto:beedrill@pokemon.com

+

xmpp:scyther@pokemon.com

+

xmpp:scyther@pokemon.com.

+

xmpp:scyther@pokemon.com/message

+

xmpp:scyther@pokemon.com/message.

+

Email me at:scyther@pokemon.com

"###, + "should support `mailto:` and `xmpp:` protocols" + ); + assert_eq!( micromark_with_options( r###" -- cgit