diff options
Diffstat (limited to '')
| -rw-r--r-- | readme.md | 5 | ||||
| -rw-r--r-- | src/compiler.rs | 30 | ||||
| -rw-r--r-- | src/constant.rs | 17 | ||||
| -rw-r--r-- | src/construct/autolink.rs | 327 | ||||
| -rw-r--r-- | src/construct/mod.rs | 1 | ||||
| -rw-r--r-- | src/content/text.rs | 9 | ||||
| -rw-r--r-- | src/tokenizer.rs | 5 | ||||
| -rw-r--r-- | tests/autolink.rs | 247 | 
8 files changed, 633 insertions, 8 deletions
| @@ -68,6 +68,7 @@ cargo doc --document-private-items  ### Small things +- [ ] (3) Encode urls  - [ ] (1) Parse initial and final whitespace of paragraphs (in text)  - [ ] (3) Clean compiler  - [ ] (1) Optionally remove dangerous protocols when compiling @@ -96,7 +97,7 @@ cargo doc --document-private-items  ### Constructs  - [ ] (5) attention (strong, emphasis) (text) -- [ ] (1) autolink +- [x] autolink  - [x] blank line  - [ ] (5) block quote  - [x] character escape @@ -137,7 +138,7 @@ cargo doc --document-private-items    - [x] paragraph  - [ ] (5) text    - [ ] attention (strong, emphasis) (text) -  - [ ] autolink +  - [x] autolink    - [x] character escape    - [x] character reference    - [ ] code (text) diff --git a/src/compiler.rs b/src/compiler.rs index 48983b6..df26f1b 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -89,7 +89,11 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                  | TokenType::CharacterReferenceMarkerNumeric                  | TokenType::CharacterReferenceMarkerHexadecimal                  | TokenType::CharacterReferenceMarkerSemi -                | TokenType::CharacterReferenceValue => {} +                | TokenType::CharacterReferenceValue +                | TokenType::Autolink +                | TokenType::AutolinkMarker +                | TokenType::AutolinkProtocol +                | TokenType::AutolinkEmail => {}                  #[allow(unreachable_patterns)]                  _ => {                      unreachable!("unhandled `enter` of TokenType {:?}", token_type) @@ -108,7 +112,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                  | TokenType::CharacterEscape                  | TokenType::CharacterEscapeMarker                  | TokenType::CharacterReference -                | TokenType::CharacterReferenceMarkerSemi => {} +                | TokenType::CharacterReferenceMarkerSemi +                | TokenType::Autolink +                | TokenType::AutolinkMarker => {}                  TokenType::HtmlFlow => {                      ignore_encode = false;                  } @@ -229,6 +235,26 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                      atx_opening_sequence_size = None;                      atx_heading_buffer = None;                  } +                TokenType::AutolinkProtocol => { +                    let slice = slice_serialize(codes, &get_span(events, index), false); +                    let buf = buf_tail_mut(buffers); +                    // To do: options.allowDangerousProtocol ? undefined : protocolHref +                    // let url = sanitize_uri(slice); +                    let url = encode(&slice); +                    buf.push(format!("<a href=\"{}\">", url)); +                    buf.push(encode(&slice)); +                    buf.push("</a>".to_string()); +                } +                TokenType::AutolinkEmail => { +                    let slice = slice_serialize(codes, &get_span(events, index), false); +                    let buf = buf_tail_mut(buffers); +                    // To do: options.allowDangerousProtocol ? undefined : protocolHref +                    // let url = sanitize_uri(slice); +                    let url = encode(&slice); +                    buf.push(format!("<a href=\"mailto:{}\">", url)); +                    buf.push(encode(&slice)); +                    buf.push("</a>".to_string()); +                }                  TokenType::ThematicBreak => {                      buf_tail_mut(buffers).push("<hr />".to_string());                  } diff --git a/src/constant.rs b/src/constant.rs index 332fdaf..c98c24d 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -27,6 +27,23 @@  /// [code_indented]: crate::construct::code_indented  pub const TAB_SIZE: usize = 4; +/// The number of characters allowed in a protocol of an [autolink][]. +/// +/// The protocol part is the `xxx` in `<xxx://example.com>`. +/// 32 characters is fine, 33 is too many. +/// +/// [autolink]: crate::construct::autolink +pub const AUTOLINK_SCHEME_SIZE_MAX: usize = 32; + +/// The number of characters allowed in a domain of an email [autolink][]. +/// +/// There can be multiple “domains”. +/// A domain part is each `xxx` in `<example@xxx.xxx.xxx>`. +/// 63 characters is fine, 64 is too many. +/// +/// [autolink]: crate::construct::autolink +pub const AUTOLINK_DOMAIN_SIZE_MAX: usize = 63; +  /// The number of markers needed for a [thematic break][thematic_break] to form.  ///  /// Like many things in markdown, the number is `3`. diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs new file mode 100644 index 0000000..24f2c20 --- /dev/null +++ b/src/construct/autolink.rs @@ -0,0 +1,327 @@ +//! Autolinks are a construct that occurs in the [text][] content type. +//! +//! It forms with the following BNF: +//! +//! ```bnf +//! autolink ::= '<' ( url | email ) '>' +//! +//! url ::= ascii_alphabetic 0*31( '+' '-' '.' ascii_alphanumeric ) ':' *( code - ascii_control - '\r' - '\n' - ' ') +//! email ::= 1*ascii_atext '@' domain *('.' domain) +//! ; Restriction: up to (including) 63 character are allowed in each domain. +//! domain ::= ascii_alphanumeric *( ascii_alphanumeric | '-' ascii_alphanumeric ) +//! ascii_atext ::= ascii_alphanumeric | '#' .. '\'' | '*' | '+' | '-' | '/' | '=' | '?' | '^' .. '`' | '{' .. '~' +//! ``` +//! +//! Autolinks relate to the `<a>` element in HTML. +//! See [*§ 4.5.1 The `a` element*][html-a] in the HTML spec for more info. +//! When an email autolink is used (so, without a protocol), the string +//! `mailto:` is prepended before the email, when generating the `href` +//! attribute of the hyperlink. +//! +//! The maximum allowed size of a scheme is `31` (inclusive), which is defined +//! in [`AUTOLINK_SCHEME_SIZE_MAX`][autolink_scheme_size_max]. +//! The maximum allowed size of a domain is `63` (inclusive), which is defined +//! in [`AUTOLINK_DOMAIN_SIZE_MAX`][autolink_domain_size_max]. +//! +//! The grammar for autolinks is quite strict and requires ASCII to be used +//! (without, for example, spaces). +//! To use non-ascii characters and otherwise impossible characters, in URLs, +//! you can use percent encoding: +//! +//! ```markdown +//! <https://example.com/alpha%20bravo> +//! ``` +//! +//! Yields: +//! +//! ```html +//! <p><a href="https://example.com/alpha%20bravo">https://example.com/alpha%20bravo</a></p> +//! ``` +//! +//! Interestingly, there are a couple of things that are valid autolinks in +//! markdown but in HTML would be valid tags, such as `<svg:rect>` and +//! `<xml:lang/>`. +//! However, because CommonMark employs a naïve HTML parsing algorithm, those +//! are not considered HTML. +//! +//! While CommonMark restricts links from occurring in other links in the case +//! of bracketed links, this restriction is not in place for autolinks inside +//! autolinks: +//! +//! ```markdown +//! [<https://example.com>](#) +//! ``` +//! +//! Yields: +//! +//! ```html +//! <p><a href="#"><a href="https://example.com">https://example.com</a></a></p> +//! ``` +//! +//! The generated output, in this case, is invalid according to HTML. +//! When a browser sees that markup, it will instead parse it as: +//! +//! ```html +//! <p><a href="#"></a><a href="https://example.com">https://example.com</a></p> +//! ``` +//! +//! ## References +//! +//! *   [`autolink.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/autolink.js) +//! *   [*§ 6.4 Autolinks* in `CommonMark`](https://spec.commonmark.org/0.30/#autolinks) +//! +//! [text]: crate::content::text +//! [autolink_scheme_size_max]: crate::constant::AUTOLINK_SCHEME_SIZE_MAX +//! [autolink_domain_size_max]: crate::constant::AUTOLINK_DOMAIN_SIZE_MAX +//! [html-a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! +//! <!-- To do: link to `encode` --> + +use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX}; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Start of an autolink. +/// +/// ```markdown +/// a|<https://example.com>b +/// a|<user@example.com>b +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char('<') => { +            tokenizer.enter(TokenType::Autolink); +            tokenizer.enter(TokenType::AutolinkMarker); +            tokenizer.consume(code); +            tokenizer.exit(TokenType::AutolinkMarker); +            tokenizer.enter(TokenType::AutolinkProtocol); +            (State::Fn(Box::new(open)), None) +        } +        _ => (State::Nok, None), +    } +} + +/// After `<`, before the protocol. +/// +/// ```markdown +/// a<|https://example.com>b +/// a<|user@example.com>b +/// ``` +pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char(char) if char.is_ascii_alphabetic() => { +            tokenizer.consume(code); +            (State::Fn(Box::new(scheme_or_email_atext)), None) +        } +        Code::Char(char) if is_ascii_atext(char) => email_atext(tokenizer, code), +        _ => (State::Nok, None), +    } +} + +/// After the first character of the protocol or email name. +/// +/// ```markdown +/// a<h|ttps://example.com>b +/// a<u|ser@example.com>b +/// ``` +pub fn scheme_or_email_atext(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    // Whether this character can be both a protocol and email atext. +    let unknown = match code { +        Code::Char('+' | '-' | '.') => true, +        Code::Char(char) if char.is_ascii_alphanumeric() => true, +        _ => false, +    }; + +    if unknown { +        scheme_inside_or_email_atext(tokenizer, code, 1) +    } else { +        email_atext(tokenizer, code) +    } +} + +/// Inside an ambiguous protocol or email name. +/// +/// ```markdown +/// a<ht|tps://example.com>b +/// a<us|er@example.com>b +/// ``` +pub fn scheme_inside_or_email_atext( +    tokenizer: &mut Tokenizer, +    code: Code, +    size: usize, +) -> StateFnResult { +    if let Code::Char(':') = code { +        tokenizer.consume(code); +        (State::Fn(Box::new(url_inside)), None) +    } else { +        // Whether this character can be both a protocol and email atext. +        let unknown = match code { +            Code::Char('+' | '-' | '.') if size < AUTOLINK_SCHEME_SIZE_MAX => true, +            Code::Char(char) if char.is_ascii_alphanumeric() && size < AUTOLINK_SCHEME_SIZE_MAX => { +                true +            } +            _ => false, +        }; + +        if unknown { +            tokenizer.consume(code); +            ( +                State::Fn(Box::new(move |t, c| { +                    scheme_inside_or_email_atext(t, c, size + 1) +                })), +                None, +            ) +        } else { +            email_atext(tokenizer, code) +        } +    } +} + +/// Inside a URL, after the protocol. +/// +/// ```markdown +/// a<https:|//example.com>b +/// ``` +pub fn url_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char('>') => { +            tokenizer.exit(TokenType::AutolinkProtocol); +            end(tokenizer, code) +        } +        Code::Char(char) if char.is_ascii_control() => (State::Nok, None), +        Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(' ') => { +            (State::Nok, None) +        } +        Code::Char(_) => { +            tokenizer.consume(code); +            (State::Fn(Box::new(url_inside)), None) +        } +    } +} + +/// Inside email atext. +/// +/// ```markdown +/// a<user.na|me@example.com>b +/// ``` +pub fn email_atext(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char('@') => { +            tokenizer.consume(code); +            ( +                State::Fn(Box::new(|t, c| email_at_sign_or_dot(t, c, 0))), +                None, +            ) +        } +        Code::Char(char) if is_ascii_atext(char) => { +            tokenizer.consume(code); +            (State::Fn(Box::new(email_atext)), None) +        } +        _ => (State::Nok, None), +    } +} + +/// After an at-sign or a dot in the label. +/// +/// ```markdown +/// a<user.name@|example.com>b +/// a<user.name@example.|com>b +/// ``` +pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { +    match code { +        Code::Char(char) if char.is_ascii_alphanumeric() => email_value(tokenizer, code, size), +        _ => (State::Nok, None), +    } +} + +/// In the label, where `.` and `>` are allowed. +/// +/// ```markdown +/// a<user.name@ex|ample.com>b +/// ``` +pub fn email_label(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { +    match code { +        Code::Char('.') => { +            tokenizer.consume(code); +            ( +                State::Fn(Box::new(|t, c| email_at_sign_or_dot(t, c, 0))), +                None, +            ) +        } +        Code::Char('>') => { +            let tail_index = tokenizer.events.len(); +            let head_index = tokenizer.events.len() - 1; +            tokenizer.exit(TokenType::AutolinkProtocol); +            // Change the token type. +            tokenizer.events[head_index].token_type = TokenType::AutolinkEmail; +            tokenizer.events[tail_index].token_type = TokenType::AutolinkEmail; +            end(tokenizer, code) +        } +        _ => email_value(tokenizer, code, size), +    } +} + +/// In the label, where `.` and `>` are *not* allowed. +/// +/// Though, this is also used in `email_label` to parse other values. +/// +/// ```markdown +/// a<user.name@ex-|ample.com>b +/// ``` +pub fn email_value(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult { +    let ok = match code { +        Code::Char('-') if size < AUTOLINK_DOMAIN_SIZE_MAX => true, +        Code::Char(char) if char.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => true, +        _ => false, +    }; + +    if ok { +        tokenizer.consume(code); +        let func = if let Code::Char('-') = code { +            email_value +        } else { +            email_label +        }; +        (State::Fn(Box::new(move |t, c| func(t, c, size + 1))), None) +    } else { +        (State::Nok, None) +    } +} + +/// At the `>`. +/// +/// ```markdown +/// a<https://example.com|>b +/// a<user@example.com|>b +/// ``` +pub fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char('>') => { +            tokenizer.enter(TokenType::AutolinkMarker); +            tokenizer.consume(code); +            tokenizer.exit(TokenType::AutolinkMarker); +            tokenizer.exit(TokenType::Autolink); +            (State::Ok, None) +        } +        _ => unreachable!("expected `>` at `end`"), +    } +} + +/// Check whether the character code represents an ASCII atext. +/// +/// atext is an ASCII alphanumeric (see [`is_ascii_alphanumeric`][]), or a character in +/// the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`), +/// U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F +/// SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E +/// CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE +/// (`{`) to U+007E TILDE (`~`). +/// +/// See: +/// **\[RFC5322]**: +/// [Internet Message Format](https://tools.ietf.org/html/rfc5322). +/// P. Resnick. +/// IETF. +/// +/// [`is_ascii_alphanumeric`]: char::is_ascii_alphanumeric +fn is_ascii_atext(x: char) -> bool { +    matches!(x, '#'..='\'' | '*' | '+' | '-'..='9' | '=' | '?' | 'A'..='Z' | '^'..='~') +} diff --git a/src/construct/mod.rs b/src/construct/mod.rs index d671db6..0bc8746 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -1,5 +1,6 @@  //! Constructs found in markdown. +pub mod autolink;  pub mod blank_line;  pub mod character_escape;  pub mod character_reference; diff --git a/src/content/text.rs b/src/content/text.rs index 2c93b18..a7b40e7 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -5,7 +5,7 @@  //!  //! The constructs found in text are:  //! -//! *   Autolink +//! *   [Autolink][crate::construct::autolink]  //! *   Attention  //! *   HTML (text)  //! *   Hard break escape @@ -17,7 +17,8 @@  //! *   [Character reference][crate::construct::character_reference]  use crate::construct::{ -    character_escape::start as character_escape, character_reference::start as character_reference, +    autolink::start as autolink, character_escape::start as character_escape, +    character_reference::start as character_reference,  };  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -33,7 +34,7 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::None => (State::Ok, None), -        _ => tokenizer.attempt_2(character_reference, character_escape, |ok| { +        _ => tokenizer.attempt_3(character_reference, character_escape, autolink, |ok| {              Box::new(if ok { start } else { before_data })          })(tokenizer, code),      } @@ -68,7 +69,7 @@ fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {              (State::Ok, None)          }          // To do: somehow get these markers from constructs. -        Code::Char('&' | '\\') => { +        Code::Char('&' | '\\' | '<') => {              tokenizer.exit(TokenType::Data);              start(tokenizer, code)          } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 4d235ed..4c1caa4 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -20,6 +20,11 @@ use std::collections::HashMap;  // To do: document each variant.  #[derive(Debug, Clone, PartialEq)]  pub enum TokenType { +    Autolink, +    AutolinkMarker, +    AutolinkProtocol, +    AutolinkEmail, +      AtxHeading,      AtxHeadingSequence,      AtxHeadingWhitespace, diff --git a/tests/autolink.rs b/tests/autolink.rs new file mode 100644 index 0000000..fc49dcb --- /dev/null +++ b/tests/autolink.rs @@ -0,0 +1,247 @@ +extern crate micromark; +use micromark::micromark; + +#[test] +fn autolink() { +    assert_eq!( +        micromark("```\n<\n >\n```"), +        "<pre><code><\n >\n</code></pre>", +        "should support fenced code w/ grave accents" +    ); + +    assert_eq!( +        micromark("<http://foo.bar.baz>"), +        "<p><a href=\"http://foo.bar.baz\">http://foo.bar.baz</a></p>", +        "should support protocol autolinks (1)" +    ); + +    assert_eq!( +    micromark("<http://foo.bar.baz/test?q=hello&id=22&boolean>"), +    "<p><a href=\"http://foo.bar.baz/test?q=hello&id=22&boolean\">http://foo.bar.baz/test?q=hello&id=22&boolean</a></p>", +    "should support protocol autolinks (2)" +  ); + +    assert_eq!( +        micromark("<irc://foo.bar:2233/baz>"), +        "<p><a href=\"irc://foo.bar:2233/baz\">irc://foo.bar:2233/baz</a></p>", +        "should support protocol autolinks w/ non-HTTP schemes" +    ); + +    assert_eq!( +        micromark("<MAILTO:FOO@BAR.BAZ>"), +        "<p><a href=\"MAILTO:FOO@BAR.BAZ\">MAILTO:FOO@BAR.BAZ</a></p>", +        "should support protocol autolinks in uppercase" +    ); + +    // To do: safety. +    // assert_eq!( +    //   micromark("<a+b+c:d>", {allowDangerousProtocol: true}), +    //   "<p><a href=\"a+b+c:d\">a+b+c:d</a></p>", +    //   "should support protocol autolinks w/ incorrect URIs (1)" +    // ); + +    // To do: safety. +    // assert_eq!( +    //   micromark("<made-up-scheme://foo,bar>", {allowDangerousProtocol: true}), +    //   "<p><a href=\"made-up-scheme://foo,bar\">made-up-scheme://foo,bar</a></p>", +    //   "should support protocol autolinks w/ incorrect URIs (2)" +    // ); + +    assert_eq!( +        micromark("<http://../>"), +        "<p><a href=\"http://../\">http://../</a></p>", +        "should support protocol autolinks w/ incorrect URIs (3)" +    ); + +    // To do: safety. +    // assert_eq!( +    //   micromark("<localhost:5001/foo>", {allowDangerousProtocol: true}), +    //   "<p><a href=\"localhost:5001/foo\">localhost:5001/foo</a></p>", +    //   "should support protocol autolinks w/ incorrect URIs (4)" +    // ); + +    assert_eq!( +        micromark("<http://foo.bar/baz bim>"), +        "<p><http://foo.bar/baz bim></p>", +        "should not support protocol autolinks w/ spaces" +    ); + +    // To do: encode urls. +    // assert_eq!( +    //     micromark("<http://example.com/\\[\\>"), +    //     "<p><a href=\"http://example.com/%5C%5B%5C\">http://example.com/\\[\\</a></p>", +    //     "should not support character escapes in protocol autolinks" +    // ); + +    assert_eq!( +        micromark("<foo@bar.example.com>"), +        "<p><a href=\"mailto:foo@bar.example.com\">foo@bar.example.com</a></p>", +        "should support email autolinks (1)" +    ); + +    assert_eq!( +        micromark("<foo+special@Bar.baz-bar0.com>"), +        "<p><a href=\"mailto:foo+special@Bar.baz-bar0.com\">foo+special@Bar.baz-bar0.com</a></p>", +        "should support email autolinks (2)" +    ); + +    assert_eq!( +        micromark("<a@b.c>"), +        "<p><a href=\"mailto:a@b.c\">a@b.c</a></p>", +        "should support email autolinks (3)" +    ); + +    assert_eq!( +        micromark("<foo\\+@bar.example.com>"), +        "<p><foo+@bar.example.com></p>", +        "should not support character escapes in email autolinks" +    ); + +    assert_eq!( +        micromark("<>"), +        "<p><></p>", +        "should not support empty autolinks" +    ); + +    assert_eq!( +        micromark("< http://foo.bar >"), +        "<p>< http://foo.bar ></p>", +        "should not support autolinks w/ space" +    ); + +    assert_eq!( +        micromark("<m:abc>"), +        "<p><m:abc></p>", +        "should not support autolinks w/ a single character for a scheme" +    ); + +    assert_eq!( +        micromark("<foo.bar.baz>"), +        "<p><foo.bar.baz></p>", +        "should not support autolinks w/o a colon or at sign" +    ); + +    assert_eq!( +        micromark("http://example.com"), +        "<p>http://example.com</p>", +        "should not support protocol autolinks w/o angle brackets" +    ); + +    assert_eq!( +        micromark("foo@bar.example.com"), +        "<p>foo@bar.example.com</p>", +        "should not support email autolinks w/o angle brackets" +    ); + +    // Extra: +    assert_eq!( +        micromark("<*@example.com>"), +        "<p><a href=\"mailto:*@example.com\">*@example.com</a></p>", +        "should support autolinks w/ atext (1)" +    ); +    assert_eq!( +        micromark("<a*@example.com>"), +        "<p><a href=\"mailto:a*@example.com\">a*@example.com</a></p>", +        "should support autolinks w/ atext (2)" +    ); +    assert_eq!( +        micromark("<aa*@example.com>"), +        "<p><a href=\"mailto:aa*@example.com\">aa*@example.com</a></p>", +        "should support autolinks w/ atext (3)" +    ); + +    assert_eq!( +        micromark("<aaa©@example.com>"), +        "<p><aaa©@example.com></p>", +        "should support non-atext in email autolinks local part (1)" +    ); +    assert_eq!( +        micromark("<a*a©@example.com>"), +        "<p><a*a©@example.com></p>", +        "should support non-atext in email autolinks local part (2)" +    ); + +    assert_eq!( +        micromark("<asd@.example.com>"), +        "<p><asd@.example.com></p>", +        "should not support a dot after an at sign in email autolinks" +    ); +    assert_eq!( +        micromark("<asd@e..xample.com>"), +        "<p><asd@e..xample.com></p>", +        "should not support a dot after another dot in email autolinks" +    ); + +    assert_eq!( +    micromark( +      "<asd@012345678901234567890123456789012345678901234567890123456789012>" +    ), +    "<p><a href=\"mailto:asd@012345678901234567890123456789012345678901234567890123456789012\">asd@012345678901234567890123456789012345678901234567890123456789012</a></p>", +    "should support 63 character in email autolinks domains" +  ); + +    assert_eq!( +        micromark("<asd@0123456789012345678901234567890123456789012345678901234567890123>"), +        "<p><asd@0123456789012345678901234567890123456789012345678901234567890123></p>", +        "should not support 64 character in email autolinks domains" +    ); + +    assert_eq!( +    micromark( +      "<asd@012345678901234567890123456789012345678901234567890123456789012.a>" +    ), +    "<p><a href=\"mailto:asd@012345678901234567890123456789012345678901234567890123456789012.a\">asd@012345678901234567890123456789012345678901234567890123456789012.a</a></p>", +    "should support a TLD after a 63 character domain in email autolinks" +  ); + +    assert_eq!( +        micromark("<asd@0123456789012345678901234567890123456789012345678901234567890123.a>"), +        "<p><asd@0123456789012345678901234567890123456789012345678901234567890123.a></p>", +        "should not support a TLD after a 64 character domain in email autolinks" +    ); + +    assert_eq!( +    micromark( +      "<asd@a.012345678901234567890123456789012345678901234567890123456789012>" +    ), +    "<p><a href=\"mailto:asd@a.012345678901234567890123456789012345678901234567890123456789012\">asd@a.012345678901234567890123456789012345678901234567890123456789012</a></p>", +    "should support a 63 character TLD in email autolinks" +  ); + +    assert_eq!( +        micromark("<asd@a.0123456789012345678901234567890123456789012345678901234567890123>"), +        "<p><asd@a.0123456789012345678901234567890123456789012345678901234567890123></p>", +        "should not support a 64 character TLD in email autolinks" +    ); + +    assert_eq!( +        micromark("<asd@-example.com>"), +        "<p><asd@-example.com></p>", +        "should not support a dash after `@` in email autolinks" +    ); + +    assert_eq!( +        micromark("<asd@e-xample.com>"), +        "<p><a href=\"mailto:asd@e-xample.com\">asd@e-xample.com</a></p>", +        "should support a dash after other domain characters in email autolinks" +    ); + +    assert_eq!( +        micromark("<asd@e--xample.com>"), +        "<p><a href=\"mailto:asd@e--xample.com\">asd@e--xample.com</a></p>", +        "should support a dash after another dash in email autolinks" +    ); + +    assert_eq!( +        micromark("<asd@example-.com>"), +        "<p><asd@example-.com></p>", +        "should not support a dash before a dot in email autolinks" +    ); + +    // To do: extensions. +    // assert_eq!( +    //   micromark("<a@b.co>", {extensions: [{disable: {null: ["autolink"]}}]}), +    //   "<p><a@b.co></p>", +    //   "should support turning off autolinks" +    // ); +} | 
