aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-05 16:06:02 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-05 16:06:02 +0200
commit1ba9f2c632fb6c9e57f8ad2213894d4f1235677d (patch)
treedcf50fb7277239cda442000db150d910bfe9964a /src/construct
parent57673d17336371ca5a98cd8d0ce2b01137b43431 (diff)
downloadmarkdown-rs-1ba9f2c632fb6c9e57f8ad2213894d4f1235677d.tar.gz
markdown-rs-1ba9f2c632fb6c9e57f8ad2213894d4f1235677d.tar.bz2
markdown-rs-1ba9f2c632fb6c9e57f8ad2213894d4f1235677d.zip
Add support for `mailto:`, `xmpp:` protocols
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/gfm_autolink_literal.rs63
-rw-r--r--src/construct/raw_flow.rs3
2 files changed, 54 insertions, 12 deletions
diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs
index 038330c..62f18ef 100644
--- a/src/construct/gfm_autolink_literal.rs
+++ b/src/construct/gfm_autolink_literal.rs
@@ -122,15 +122,20 @@
//!
//! ## Tokens
//!
+//! * [`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail]
+//! * [`GfmAutolinkLiteralMailto`][Name::GfmAutolinkLiteralMailto]
//! * [`GfmAutolinkLiteralProtocol`][Name::GfmAutolinkLiteralProtocol]
//! * [`GfmAutolinkLiteralWww`][Name::GfmAutolinkLiteralWww]
-//! * [`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail]
+//! * [`GfmAutolinkLiteralXmpp`][Name::GfmAutolinkLiteralXmpp]
//!
//! ## References
//!
//! * [`micromark-extension-gfm-autolink-literal`](https://github.com/micromark/micromark-extension-gfm-autolink-literal)
//! * [*ยง 6.9 Autolinks (extension)* in `GFM`](https://github.github.com/gfm/#autolinks-extension-)
//!
+//! > ๐Ÿ‘‰ **Note**: `mailto:` and `xmpp:` protocols before email autolinks were
+//! > added in `cmark-gfm@0.29.0.gfm.5` and are as of yet undocumented.
+//!
//! [text]: crate::construct::text
//! [definition]: crate::construct::definition
//! [attention]: crate::construct::attention
@@ -644,12 +649,17 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
while byte_index < bytes.len() {
if bytes[byte_index] == b'@' {
- let mut range = (0, 0);
+ let mut range = (0, 0, Name::GfmAutolinkLiteralEmail);
if let Some(start) = peek_bytes_atext(bytes, byte_index) {
- if let Some(end) = peek_bytes_email_domain(bytes, byte_index + 1) {
- let end = peek_bytes_truncate(bytes, start, end);
- range = (start, end);
+ let (start, kind) = peek_protocol(bytes, start);
+
+ if let Some(end) = peek_bytes_email_domain(
+ bytes,
+ byte_index + 1,
+ kind == Name::GfmAutolinkLiteralXmpp,
+ ) {
+ range = (start, peek_bytes_truncate(bytes, start, end), kind);
}
}
@@ -678,7 +688,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
// Add the link.
replace.push(Event {
kind: Kind::Enter,
- name: Name::GfmAutolinkLiteralEmail,
+ name: range.2.clone(),
point: point.clone(),
link: None,
});
@@ -686,7 +696,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
point.shift_to(tokenizer.parse_state.bytes, start_index + range.1);
replace.push(Event {
kind: Kind::Exit,
- name: Name::GfmAutolinkLiteralEmail,
+ name: range.2.clone(),
point: point.clone(),
link: None,
});
@@ -728,8 +738,6 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
}
}
-// To do: add `xmpp`, `mailto` support.
-
/// Move back past atext.
///
/// Moving back is only used when post processing text: so for the email address
@@ -763,6 +771,40 @@ fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option<usize> {
}
}
+/// Move back past a `mailto:` or `xmpp:` protocol.
+///
+/// Moving back is only used when post processing text: so for the email address
+/// algorithm.
+///
+/// ```markdown
+/// > | a mailto:contact@example.org b
+/// ^-- from
+/// ^-- to
+/// ```
+fn peek_protocol(bytes: &[u8], end: usize) -> (usize, Name) {
+ let mut index = end;
+
+ if index > 0 && bytes[index - 1] == b':' {
+ index -= 1;
+
+ // Take alphanumerical.
+ while index > 0 && matches!(bytes[index - 1], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') {
+ index -= 1;
+ }
+
+ let slice = Slice::from_indices(bytes, index, end - 1);
+ let name = slice.as_str().to_ascii_lowercase();
+
+ if name == "xmpp" {
+ return (index, Name::GfmAutolinkLiteralXmpp);
+ } else if name == "mailto" {
+ return (index, Name::GfmAutolinkLiteralMailto);
+ }
+ }
+
+ (end, Name::GfmAutolinkLiteralEmail)
+}
+
/// Move past email domain.
///
/// Peeking like this only used when post processing text: so for the email
@@ -773,7 +815,7 @@ fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option<usize> {
/// ^-- from
/// ^-- to
/// ```
-fn peek_bytes_email_domain(bytes: &[u8], start: usize) -> Option<usize> {
+fn peek_bytes_email_domain(bytes: &[u8], start: usize, xmpp: bool) -> Option<usize> {
let mut index = start;
let mut dot = false;
@@ -784,6 +826,7 @@ fn peek_bytes_email_domain(bytes: &[u8], start: usize) -> Option<usize> {
match bytes[index] {
// Alphanumerical, `-`, and `_`.
b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'_' | b'a'..=b'z' => {}
+ b'/' if xmpp => {}
// Dot followed by alphanumerical (not `-` or `_`).
b'.' if index + 1 < bytes.len()
&& matches!(bytes[index + 1], b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') =>
diff --git a/src/construct/raw_flow.rs b/src/construct/raw_flow.rs
index 395d0ae..105a031 100644
--- a/src/construct/raw_flow.rs
+++ b/src/construct/raw_flow.rs
@@ -252,8 +252,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.size += 1;
tokenizer.consume();
State::Next(StateName::RawFlowSequenceOpen)
- }
- else if tokenizer.tokenize_state.size
+ } else if tokenizer.tokenize_state.size
< (if tokenizer.tokenize_state.marker == b'$' {
MATH_FLOW_SEQUENCE_SIZE_MIN
} else {