From efdf90959f78d1582da312bffbefaabb79f264b7 Mon Sep 17 00:00:00 2001
From: Titus Wormer
Date: Mon, 13 Jun 2022 12:37:25 +0200
Subject: Add autolinks
---
readme.md | 5 +-
src/compiler.rs | 30 ++++-
src/constant.rs | 17 +++
src/construct/autolink.rs | 327 ++++++++++++++++++++++++++++++++++++++++++++++
src/construct/mod.rs | 1 +
src/content/text.rs | 9 +-
src/tokenizer.rs | 5 +
tests/autolink.rs | 247 ++++++++++++++++++++++++++++++++++
8 files changed, 633 insertions(+), 8 deletions(-)
create mode 100644 src/construct/autolink.rs
create mode 100644 tests/autolink.rs
diff --git a/readme.md b/readme.md
index 527170d..26035c4 100644
--- a/readme.md
+++ b/readme.md
@@ -68,6 +68,7 @@ cargo doc --document-private-items
### Small things
+- [ ] (3) Encode urls
- [ ] (1) Parse initial and final whitespace of paragraphs (in text)
- [ ] (3) Clean compiler
- [ ] (1) Optionally remove dangerous protocols when compiling
@@ -96,7 +97,7 @@ cargo doc --document-private-items
### Constructs
- [ ] (5) attention (strong, emphasis) (text)
-- [ ] (1) autolink
+- [x] autolink
- [x] blank line
- [ ] (5) block quote
- [x] character escape
@@ -137,7 +138,7 @@ cargo doc --document-private-items
- [x] paragraph
- [ ] (5) text
- [ ] attention (strong, emphasis) (text)
- - [ ] autolink
+ - [x] autolink
- [x] character escape
- [x] character reference
- [ ] code (text)
diff --git a/src/compiler.rs b/src/compiler.rs
index 48983b6..df26f1b 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -89,7 +89,11 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
| TokenType::CharacterReferenceMarkerNumeric
| TokenType::CharacterReferenceMarkerHexadecimal
| TokenType::CharacterReferenceMarkerSemi
- | TokenType::CharacterReferenceValue => {}
+ | TokenType::CharacterReferenceValue
+ | TokenType::Autolink
+ | TokenType::AutolinkMarker
+ | TokenType::AutolinkProtocol
+ | TokenType::AutolinkEmail => {}
#[allow(unreachable_patterns)]
_ => {
unreachable!("unhandled `enter` of TokenType {:?}", token_type)
@@ -108,7 +112,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
| TokenType::CharacterEscape
| TokenType::CharacterEscapeMarker
| TokenType::CharacterReference
- | TokenType::CharacterReferenceMarkerSemi => {}
+ | TokenType::CharacterReferenceMarkerSemi
+ | TokenType::Autolink
+ | TokenType::AutolinkMarker => {}
TokenType::HtmlFlow => {
ignore_encode = false;
}
@@ -229,6 +235,26 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
atx_opening_sequence_size = None;
atx_heading_buffer = None;
}
+ TokenType::AutolinkProtocol => {
+ let slice = slice_serialize(codes, &get_span(events, index), false);
+ let buf = buf_tail_mut(buffers);
+ // To do: options.allowDangerousProtocol ? undefined : protocolHref
+ // let url = sanitize_uri(slice);
+ let url = encode(&slice);
+ buf.push(format!("", url));
+ buf.push(encode(&slice));
+ buf.push("".to_string());
+ }
+ TokenType::AutolinkEmail => {
+ let slice = slice_serialize(codes, &get_span(events, index), false);
+ let buf = buf_tail_mut(buffers);
+ // To do: options.allowDangerousProtocol ? undefined : protocolHref
+ // let url = sanitize_uri(slice);
+ let url = encode(&slice);
+ buf.push(format!("", url));
+ buf.push(encode(&slice));
+ buf.push("".to_string());
+ }
TokenType::ThematicBreak => {
buf_tail_mut(buffers).push("
".to_string());
}
diff --git a/src/constant.rs b/src/constant.rs
index 332fdaf..c98c24d 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -27,6 +27,23 @@
/// [code_indented]: crate::construct::code_indented
pub const TAB_SIZE: usize = 4;
+/// The number of characters allowed in a protocol of an [autolink][].
+///
+/// The protocol part is the `xxx` in ``.
+/// 32 characters is fine, 33 is too many.
+///
+/// [autolink]: crate::construct::autolink
+pub const AUTOLINK_SCHEME_SIZE_MAX: usize = 32;
+
+/// The number of characters allowed in a domain of an email [autolink][].
+///
+/// There can be multiple “domains”.
+/// A domain part is each `xxx` in ``.
+/// 63 characters is fine, 64 is too many.
+///
+/// [autolink]: crate::construct::autolink
+pub const AUTOLINK_DOMAIN_SIZE_MAX: usize = 63;
+
/// The number of markers needed for a [thematic break][thematic_break] to form.
///
/// Like many things in markdown, the number is `3`.
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
new file mode 100644
index 0000000..24f2c20
--- /dev/null
+++ b/src/construct/autolink.rs
@@ -0,0 +1,327 @@
+//! Autolinks are a construct that occurs in the [text][] content type.
+//!
+//! It forms with the following BNF:
+//!
+//! ```bnf
+//! autolink ::= '<' ( url | email ) '>'
+//!
+//! url ::= ascii_alphabetic 0*31( '+' '-' '.' ascii_alphanumeric ) ':' *( code - ascii_control - '\r' - '\n' - ' ')
+//! email ::= 1*ascii_atext '@' domain *('.' domain)
+//! ; Restriction: up to (including) 63 character are allowed in each domain.
+//! domain ::= ascii_alphanumeric *( ascii_alphanumeric | '-' ascii_alphanumeric )
+//! ascii_atext ::= ascii_alphanumeric | '#' .. '\'' | '*' | '+' | '-' | '/' | '=' | '?' | '^' .. '`' | '{' .. '~'
+//! ```
+//!
+//! Autolinks relate to the `` element in HTML.
+//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info.
+//! When an email autolink is used (so, without a protocol), the string
+//! `mailto:` is prepended before the email, when generating the `href`
+//! attribute of the hyperlink.
+//!
+//! The maximum allowed size of a scheme is `31` (inclusive), which is defined
+//! in [`AUTOLINK_SCHEME_SIZE_MAX`][autolink_scheme_size_max].
+//! The maximum allowed size of a domain is `63` (inclusive), which is defined
+//! in [`AUTOLINK_DOMAIN_SIZE_MAX`][autolink_domain_size_max].
+//!
+//! The grammar for autolinks is quite strict and requires ASCII to be used
+//! (without, for example, spaces).
+//! To use non-ascii characters and otherwise impossible characters, in URLs,
+//! you can use percent encoding:
+//!
+//! ```markdown
+//!
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! https://example.com/alpha%20bravo
+//! ```
+//!
+//! Interestingly, there are a couple of things that are valid autolinks in
+//! markdown but in HTML would be valid tags, such as `` and
+//! ``.
+//! However, because CommonMark employs a naïve HTML parsing algorithm, those
+//! are not considered HTML.
+//!
+//! While CommonMark restricts links from occurring in other links in the case
+//! of bracketed links, this restriction is not in place for autolinks inside
+//! autolinks:
+//!
+//! ```markdown
+//! [](#)
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! https://example.com
+//! ```
+//!
+//! The generated output, in this case, is invalid according to HTML.
+//! When a browser sees that markup, it will instead parse it as:
+//!
+//! ```html
+//! https://example.com
+//! ```
+//!
+//! ## References
+//!
+//! * [`autolink.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/autolink.js)
+//! * [*§ 6.4 Autolinks* in `CommonMark`](https://spec.commonmark.org/0.30/#autolinks)
+//!
+//! [text]: crate::content::text
+//! [autolink_scheme_size_max]: crate::constant::AUTOLINK_SCHEME_SIZE_MAX
+//! [autolink_domain_size_max]: crate::constant::AUTOLINK_DOMAIN_SIZE_MAX
+//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
+//!
+//!
+
+use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of an autolink.
+///
+/// ```markdown
+/// a|b
+/// a|b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('<') => {
+ tokenizer.enter(TokenType::Autolink);
+ tokenizer.enter(TokenType::AutolinkMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::AutolinkMarker);
+ tokenizer.enter(TokenType::AutolinkProtocol);
+ (State::Fn(Box::new(open)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// After `<`, before the protocol.
+///
+/// ```markdown
+/// a<|https://example.com>b
+/// a<|user@example.com>b
+/// ```
+pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(char) if char.is_ascii_alphabetic() => {
+ tokenizer.consume(code);
+ (State::Fn(Box::new(scheme_or_email_atext)), None)
+ }
+ Code::Char(char) if is_ascii_atext(char) => email_atext(tokenizer, code),
+ _ => (State::Nok, None),
+ }
+}
+
+/// After the first character of the protocol or email name.
+///
+/// ```markdown
+/// ab
+/// ab
+/// ```
+pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ // Whether this character can be both a protocol and email atext.
+ let unknown = match code {
+ Code::Char('+' | '-' | '.') => true,
+ Code::Char(char) if char.is_ascii_alphanumeric() => true,
+ _ => false,
+ };
+
+ if unknown {
+ scheme_inside_or_email_atext(tokenizer, code, 1)
+ } else {
+ email_atext(tokenizer, code)
+ }
+}
+
+/// Inside an ambiguous protocol or email name.
+///
+/// ```markdown
+/// ab
+/// ab
+/// ```
+pub fn scheme_inside_or_email_atext(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ size: usize,
+) -> StateFnResult {
+ if let Code::Char(':') = code {
+ tokenizer.consume(code);
+ (State::Fn(Box::new(url_inside)), None)
+ } else {
+ // Whether this character can be both a protocol and email atext.
+ let unknown = match code {
+ Code::Char('+' | '-' | '.') if size < AUTOLINK_SCHEME_SIZE_MAX => true,
+ Code::Char(char) if char.is_ascii_alphanumeric() && size < AUTOLINK_SCHEME_SIZE_MAX => {
+ true
+ }
+ _ => false,
+ };
+
+ if unknown {
+ tokenizer.consume(code);
+ (
+ State::Fn(Box::new(move |t, c| {
+ scheme_inside_or_email_atext(t, c, size + 1)
+ })),
+ None,
+ )
+ } else {
+ email_atext(tokenizer, code)
+ }
+ }
+}
+
+/// Inside a URL, after the protocol.
+///
+/// ```markdown
+/// ab
+/// ```
+pub fn url_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('>') => {
+ tokenizer.exit(TokenType::AutolinkProtocol);
+ end(tokenizer, code)
+ }
+ Code::Char(char) if char.is_ascii_control() => (State::Nok, None),
+ Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(' ') => {
+ (State::Nok, None)
+ }
+ Code::Char(_) => {
+ tokenizer.consume(code);
+ (State::Fn(Box::new(url_inside)), None)
+ }
+ }
+}
+
+/// Inside email atext.
+///
+/// ```markdown
+/// ab
+/// ```
+pub fn email_atext(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('@') => {
+ tokenizer.consume(code);
+ (
+ State::Fn(Box::new(|t, c| email_at_sign_or_dot(t, c, 0))),
+ None,
+ )
+ }
+ Code::Char(char) if is_ascii_atext(char) => {
+ tokenizer.consume(code);
+ (State::Fn(Box::new(email_atext)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// After an at-sign or a dot in the label.
+///
+/// ```markdown
+/// ab
+/// ab
+/// ```
+pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+ match code {
+ Code::Char(char) if char.is_ascii_alphanumeric() => email_value(tokenizer, code, size),
+ _ => (State::Nok, None),
+ }
+}
+
+/// In the label, where `.` and `>` are allowed.
+///
+/// ```markdown
+/// ab
+/// ```
+pub fn email_label(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+ match code {
+ Code::Char('.') => {
+ tokenizer.consume(code);
+ (
+ State::Fn(Box::new(|t, c| email_at_sign_or_dot(t, c, 0))),
+ None,
+ )
+ }
+ Code::Char('>') => {
+ let tail_index = tokenizer.events.len();
+ let head_index = tokenizer.events.len() - 1;
+ tokenizer.exit(TokenType::AutolinkProtocol);
+ // Change the token type.
+ tokenizer.events[head_index].token_type = TokenType::AutolinkEmail;
+ tokenizer.events[tail_index].token_type = TokenType::AutolinkEmail;
+ end(tokenizer, code)
+ }
+ _ => email_value(tokenizer, code, size),
+ }
+}
+
+/// In the label, where `.` and `>` are *not* allowed.
+///
+/// Though, this is also used in `email_label` to parse other values.
+///
+/// ```markdown
+/// ab
+/// ```
+pub fn email_value(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+ let ok = match code {
+ Code::Char('-') if size < AUTOLINK_DOMAIN_SIZE_MAX => true,
+ Code::Char(char) if char.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => true,
+ _ => false,
+ };
+
+ if ok {
+ tokenizer.consume(code);
+ let func = if let Code::Char('-') = code {
+ email_value
+ } else {
+ email_label
+ };
+ (State::Fn(Box::new(move |t, c| func(t, c, size + 1))), None)
+ } else {
+ (State::Nok, None)
+ }
+}
+
+/// At the `>`.
+///
+/// ```markdown
+/// ab
+/// ab
+/// ```
+pub fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('>') => {
+ tokenizer.enter(TokenType::AutolinkMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::AutolinkMarker);
+ tokenizer.exit(TokenType::Autolink);
+ (State::Ok, None)
+ }
+ _ => unreachable!("expected `>` at `end`"),
+ }
+}
+
+/// Check whether the character code represents an ASCII atext.
+///
+/// atext is an ASCII alphanumeric (see [`is_ascii_alphanumeric`][]), or a character in
+/// the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
+/// U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
+/// SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
+/// CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
+/// (`{`) to U+007E TILDE (`~`).
+///
+/// See:
+/// **\[RFC5322]**:
+/// [Internet Message Format](https://tools.ietf.org/html/rfc5322).
+/// P. Resnick.
+/// IETF.
+///
+/// [`is_ascii_alphanumeric`]: char::is_ascii_alphanumeric
+fn is_ascii_atext(x: char) -> bool {
+ matches!(x, '#'..='\'' | '*' | '+' | '-'..='9' | '=' | '?' | 'A'..='Z' | '^'..='~')
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index d671db6..0bc8746 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -1,5 +1,6 @@
//! Constructs found in markdown.
+pub mod autolink;
pub mod blank_line;
pub mod character_escape;
pub mod character_reference;
diff --git a/src/content/text.rs b/src/content/text.rs
index 2c93b18..a7b40e7 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -5,7 +5,7 @@
//!
//! The constructs found in text are:
//!
-//! * Autolink
+//! * [Autolink][crate::construct::autolink]
//! * Attention
//! * HTML (text)
//! * Hard break escape
@@ -17,7 +17,8 @@
//! * [Character reference][crate::construct::character_reference]
use crate::construct::{
- character_escape::start as character_escape, character_reference::start as character_reference,
+ autolink::start as autolink, character_escape::start as character_escape,
+ character_reference::start as character_reference,
};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -33,7 +34,7 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Ok, None),
- _ => tokenizer.attempt_2(character_reference, character_escape, |ok| {
+ _ => tokenizer.attempt_3(character_reference, character_escape, autolink, |ok| {
Box::new(if ok { start } else { before_data })
})(tokenizer, code),
}
@@ -68,7 +69,7 @@ fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
(State::Ok, None)
}
// To do: somehow get these markers from constructs.
- Code::Char('&' | '\\') => {
+ Code::Char('&' | '\\' | '<') => {
tokenizer.exit(TokenType::Data);
start(tokenizer, code)
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 4d235ed..4c1caa4 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -20,6 +20,11 @@ use std::collections::HashMap;
// To do: document each variant.
#[derive(Debug, Clone, PartialEq)]
pub enum TokenType {
+ Autolink,
+ AutolinkMarker,
+ AutolinkProtocol,
+ AutolinkEmail,
+
AtxHeading,
AtxHeadingSequence,
AtxHeadingWhitespace,
diff --git a/tests/autolink.rs b/tests/autolink.rs
new file mode 100644
index 0000000..fc49dcb
--- /dev/null
+++ b/tests/autolink.rs
@@ -0,0 +1,247 @@
+extern crate micromark;
+use micromark::micromark;
+
+#[test]
+fn autolink() {
+ assert_eq!(
+ micromark("```\n<\n >\n```"),
+ "<\n >\n
",
+ "should support fenced code w/ grave accents"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "http://foo.bar.baz
",
+ "should support protocol autolinks (1)"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "http://foo.bar.baz/test?q=hello&id=22&boolean
",
+ "should support protocol autolinks (2)"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "irc://foo.bar:2233/baz
",
+ "should support protocol autolinks w/ non-HTTP schemes"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "MAILTO:FOO@BAR.BAZ
",
+ "should support protocol autolinks in uppercase"
+ );
+
+ // To do: safety.
+ // assert_eq!(
+ // micromark("", {allowDangerousProtocol: true}),
+ // "a+b+c:d
",
+ // "should support protocol autolinks w/ incorrect URIs (1)"
+ // );
+
+ // To do: safety.
+ // assert_eq!(
+ // micromark("", {allowDangerousProtocol: true}),
+ // "made-up-scheme://foo,bar
",
+ // "should support protocol autolinks w/ incorrect URIs (2)"
+ // );
+
+ assert_eq!(
+ micromark(""),
+ "http://../
",
+ "should support protocol autolinks w/ incorrect URIs (3)"
+ );
+
+ // To do: safety.
+ // assert_eq!(
+ // micromark("", {allowDangerousProtocol: true}),
+ // "localhost:5001/foo
",
+ // "should support protocol autolinks w/ incorrect URIs (4)"
+ // );
+
+ assert_eq!(
+ micromark(""),
+ "<http://foo.bar/baz bim>
",
+ "should not support protocol autolinks w/ spaces"
+ );
+
+ // To do: encode urls.
+ // assert_eq!(
+ // micromark(""),
+ // "http://example.com/\\[\\
",
+ // "should not support character escapes in protocol autolinks"
+ // );
+
+ assert_eq!(
+ micromark(""),
+ "foo@bar.example.com
",
+ "should support email autolinks (1)"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "foo+special@Bar.baz-bar0.com
",
+ "should support email autolinks (2)"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "a@b.c
",
+ "should support email autolinks (3)"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<foo+@bar.example.com>
",
+ "should not support character escapes in email autolinks"
+ );
+
+ assert_eq!(
+ micromark("<>"),
+ "<>
",
+ "should not support empty autolinks"
+ );
+
+ assert_eq!(
+ micromark("< http://foo.bar >"),
+ "< http://foo.bar >
",
+ "should not support autolinks w/ space"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<m:abc>
",
+ "should not support autolinks w/ a single character for a scheme"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<foo.bar.baz>
",
+ "should not support autolinks w/o a colon or at sign"
+ );
+
+ assert_eq!(
+ micromark("http://example.com"),
+ "http://example.com
",
+ "should not support protocol autolinks w/o angle brackets"
+ );
+
+ assert_eq!(
+ micromark("foo@bar.example.com"),
+ "foo@bar.example.com
",
+ "should not support email autolinks w/o angle brackets"
+ );
+
+ // Extra:
+ assert_eq!(
+ micromark("<*@example.com>"),
+ "*@example.com
",
+ "should support autolinks w/ atext (1)"
+ );
+ assert_eq!(
+ micromark(""),
+ "a*@example.com
",
+ "should support autolinks w/ atext (2)"
+ );
+ assert_eq!(
+ micromark(""),
+ "aa*@example.com
",
+ "should support autolinks w/ atext (3)"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<aaa©@example.com>
",
+ "should support non-atext in email autolinks local part (1)"
+ );
+ assert_eq!(
+ micromark(""),
+ "<a*a©@example.com>
",
+ "should support non-atext in email autolinks local part (2)"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<asd@.example.com>
",
+ "should not support a dot after an at sign in email autolinks"
+ );
+ assert_eq!(
+ micromark(""),
+ "<asd@e..xample.com>
",
+ "should not support a dot after another dot in email autolinks"
+ );
+
+ assert_eq!(
+ micromark(
+ ""
+ ),
+ "asd@012345678901234567890123456789012345678901234567890123456789012
",
+ "should support 63 character in email autolinks domains"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<asd@0123456789012345678901234567890123456789012345678901234567890123>
",
+ "should not support 64 character in email autolinks domains"
+ );
+
+ assert_eq!(
+ micromark(
+ ""
+ ),
+ "asd@012345678901234567890123456789012345678901234567890123456789012.a
",
+ "should support a TLD after a 63 character domain in email autolinks"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<asd@0123456789012345678901234567890123456789012345678901234567890123.a>
",
+ "should not support a TLD after a 64 character domain in email autolinks"
+ );
+
+ assert_eq!(
+ micromark(
+ ""
+ ),
+ "asd@a.012345678901234567890123456789012345678901234567890123456789012
",
+ "should support a 63 character TLD in email autolinks"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<asd@a.0123456789012345678901234567890123456789012345678901234567890123>
",
+ "should not support a 64 character TLD in email autolinks"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<asd@-example.com>
",
+ "should not support a dash after `@` in email autolinks"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "asd@e-xample.com
",
+ "should support a dash after other domain characters in email autolinks"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "asd@e--xample.com
",
+ "should support a dash after another dash in email autolinks"
+ );
+
+ assert_eq!(
+ micromark(""),
+ "<asd@example-.com>
",
+ "should not support a dash before a dot in email autolinks"
+ );
+
+ // To do: extensions.
+ // assert_eq!(
+ // micromark("", {extensions: [{disable: {null: ["autolink"]}}]}),
+ // "<a@b.co>
",
+ // "should support turning off autolinks"
+ // );
+}
--
cgit