aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-05 15:03:24 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-05 15:03:24 +0200
commit3d00bf57a225369120fd98bee36f65a541260da1 (patch)
tree65780bdbc880f06ba3c92d8c5dbddbdd00ccb92e
parent16de10fe2395002644d685fdfcf76823346d1cc4 (diff)
downloadmarkdown-rs-3d00bf57a225369120fd98bee36f65a541260da1.tar.gz
markdown-rs-3d00bf57a225369120fd98bee36f65a541260da1.tar.bz2
markdown-rs-3d00bf57a225369120fd98bee36f65a541260da1.zip
Fix to implement GFM autolink literals exactly
Diffstat (limited to '')
-rw-r--r--src/compiler.rs37
-rw-r--r--src/construct/gfm_autolink_literal.rs848
-rw-r--r--src/construct/gfm_table.rs2
-rw-r--r--src/construct/text.rs20
-rw-r--r--src/state.rs60
-rw-r--r--tests/gfm_autolink_literal.rs2442
6 files changed, 3245 insertions, 164 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 681ec00..0ea1638 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -871,6 +871,7 @@ fn on_exit_autolink_email(context: &mut CompileContext) {
&Position::from_exit_event(context.events, context.index),
)
.as_str(),
+ false,
);
}
@@ -884,6 +885,7 @@ fn on_exit_autolink_protocol(context: &mut CompileContext) {
&Position::from_exit_event(context.events, context.index),
)
.as_str(),
+ false,
);
}
@@ -1154,6 +1156,7 @@ fn on_exit_gfm_autolink_literal_protocol(context: &mut CompileContext) {
&Position::from_exit_event(context.events, context.index),
)
.as_str(),
+ true,
);
}
@@ -1167,12 +1170,22 @@ fn on_exit_gfm_autolink_literal_www(context: &mut CompileContext) {
&Position::from_exit_event(context.events, context.index),
)
.as_str(),
+ true,
);
}
/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail].
fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) {
- on_exit_autolink_email(context);
+ generate_autolink(
+ context,
+ Some("mailto:"),
+ Slice::from_position(
+ context.bytes,
+ &Position::from_exit_event(context.events, context.index),
+ )
+ .as_str(),
+ true,
+ );
}
/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteCall`][Name::GfmFootnoteCall].
@@ -1822,8 +1835,24 @@ fn generate_footnote_item(context: &mut CompileContext, index: usize) {
}
/// Generate an autolink (used by unicode autolinks and GFM autolink literals).
-fn generate_autolink(context: &mut CompileContext, protocol: Option<&str>, value: &str) {
- if !context.image_alt_inside {
+fn generate_autolink(
+ context: &mut CompileContext,
+ protocol: Option<&str>,
+ value: &str,
+ is_gfm_literal: bool,
+) {
+ let mut is_in_link = false;
+ let mut index = 0;
+
+ while index < context.media_stack.len() {
+ if !context.media_stack[index].image {
+ is_in_link = true;
+ break;
+ }
+ index += 1;
+ }
+
+ if !context.image_alt_inside && (!is_in_link || !is_gfm_literal) {
context.push("<a href=\"");
let url = if let Some(protocol) = protocol {
format!("{}{}", protocol, value)
@@ -1843,7 +1872,7 @@ fn generate_autolink(context: &mut CompileContext, protocol: Option<&str>, value
context.push(&encode(value, context.encode_html));
- if !context.image_alt_inside {
+ if !context.image_alt_inside && (!is_in_link || !is_gfm_literal) {
context.push("</a>");
}
}
diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs
index 704c536..038330c 100644
--- a/src/construct/gfm_autolink_literal.rs
+++ b/src/construct/gfm_autolink_literal.rs
@@ -1,14 +1,621 @@
-//! To do.
+//! GFM: autolink literal occurs in the [text][] content type.
+//!
+//! ## Grammar
+//!
+//! Autolink literals form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! gfm_autolink_literal ::= gfm_protocol_autolink | gfm_www_autolink | gfm_email_autolink
+//!
+//! ; Restriction: the code before must be `www_autolink_before`.
+//! ; Restriction: the code after `.` must not be eof.
+//! www_autolink ::= 3('w' | 'W') '.' [domain [path]]
+//! www_autolink_before ::= eof | eol | space_or_tab | '(' | '*' | '_' | '[' | ']' | '~'
+//!
+//! ; Restriction: the code before must be `http_autolink_before`.
+//! ; Restriction: the code after the protocol must be `http_autolink_protocol_after`.
+//! http_autolink ::= ('h' | 'H') 2('t' | 'T') ('p' | 'P') ['s' | 'S'] ':' 2'/' domain [path]
+//! http_autolink_before ::= byte - ascii_alpha
+//! http_autolink_protocol_after ::= byte - eof - eol - ascii_control - unicode_whitespace - unicode_punctuation
+//!
+//! ; Restriction: the code before must be `email_autolink_before`.
+//! ; Restriction: `ascii_digit` may not occur in the last label part of the label.
+//! email_autolink ::= 1*('+' | '-' | '.' | '_' | ascii_alphanumeric) '@' 1*(1*label_segment label_dot_cont) 1*label_segment
+//! email_autolink_before ::= byte - ascii_alpha - '/'
+//!
+//! ; Restriction: `_` may not occur in the last two domain parts.
+//! domain ::= 1*(url_ampt_cont | domain_punct_cont | '-' | byte - eof - ascii_control - unicode_whitespace - unicode_punctuation)
+//! ; Restriction: must not be followed by `punct`.
+//! domain_punct_cont ::= '.' | '_'
+//! ; Restriction: must not be followed by `char-ref`.
+//! url_ampt_cont ::= '&'
+//!
+//! ; Restriction: a counter `balance = 0` is increased for every `(`, and decreased for every `)`.
+//! ; Restriction: `)` must not be `paren_at_end`.
+//! path ::= 1*(url_ampt_cont | path_punctuation_cont | '(' | ')' | byte - eof - eol - space_or_tab)
+//! ; Restriction: must not be followed by `punct`.
+//! path_punctuation_cont ::= trailing_punctuation - '<'
+//! ; Restriction: must be followed by `punct` and `balance` must be less than `0`.
+//! paren_at_end ::= ')'
+//!
+//! label_segment ::= label_dash_underscore_cont | ascii_alpha | ascii_digit
+//! ; Restriction: if followed by `punct`, the whole email autolink is invalid.
+//! label_dash_underscore_cont ::= '-' | '_'
+//! ; Restriction: must not be followed by `punct`.
+//! label_dot_cont ::= '.'
+//!
+//! punct ::= *trailing_punctuation ( byte - eof - eol - space_or_tab - '<' )
+//! char_ref ::= *ascii_alpha ';' path_end
+//! trailing_punctuation ::= '!' | '"' | '\'' | ')' | '*' | ',' | '.' | ':' | ';' | '<' | '?' | '_' | '~'
+//! ```
+//!
+//! The grammar for GFM autolink literal is very relaxed: basically anything
+//! except for whitespace is allowed after a prefix.
+//! To use whitespace characters and otherwise impossible characters, in URLs,
+//! you can use percent encoding:
+//!
+//! ```markdown
+//! https://example.com/alpha%20bravo
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <p><a href="https://example.com/alpha%20bravo">https://example.com/alpha%20bravo</a></p>
+//! ```
+//!
+//! There are several cases where incorrect encoding of URLs would, in other
+//! languages, result in a parse error.
+//! In markdown, there are no errors, and URLs are normalized.
+//! In addition, many characters are percent encoded
+//! ([`sanitize_uri`][sanitize_uri]).
+//! For example:
+//!
+//! ```markdown
+//! www.ađź‘Ťb%
+//! ```
+//!
+//! Yields:
+//!
+//! ```html
+//! <p><a href="http://www.a%F0%9F%91%8Db%25">www.ađź‘Ťb%</a></p>
+//! ```
+//!
+//! There is a big difference between how www and protocol literals work
+//! compared to how email literals work.
+//! The first two are done when parsing, and work like anything else in
+//! markdown.
+//! But email literals are handled afterwards: when everything is parsed, we
+//! look back at the events to figure out if there were email addresses.
+//! This particularly affects how they interleave with character escapes and
+//! character references.
+//!
+//! ## HTML
+//!
+//! GFM autolink literals relate to the `<a>` element in HTML.
+//! See [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info.
+//! When an email autolink is used, the string `mailto:` is prepended when
+//! generating the `href` attribute of the hyperlink.
+//! When a www autolink is used, the string `http:` is prepended.
+//!
+//! ## Recommendation
+//!
+//! It is recommended to use labels ([label start link][label_start_link],
+//! [label end][label_end]), either with a resource or a definition
+//! ([definition][]), instead of autolink literals, as those allow relative
+//! URLs and descriptive text to explain the URL in prose.
+//!
+//! ## Bugs
+//!
+//! GitHub’s own algorithm to parse autolink literals contains three bugs.
+//! A smaller bug is left unfixed in this project for consistency.
+//! Two main bugs are not present in this project.
+//! The issues relating to autolink literals are:
+//!
+//! * [GFM autolink extension (`www.`, `https?://` parts): links don’t work when after bracket](https://github.com/github/cmark-gfm/issues/278)\
+//! fixed here âś…
+//! * [GFM autolink extension (`www.` part): uppercase does not match on issues/PRs/comments](https://github.com/github/cmark-gfm/issues/280)\
+//! fixed here âś…
+//! * [GFM autolink extension (`www.` part): the word `www` matches](https://github.com/github/cmark-gfm/issues/279)\
+//! present here for consistency
+//!
+//! ## Tokens
+//!
+//! * [`GfmAutolinkLiteralProtocol`][Name::GfmAutolinkLiteralProtocol]
+//! * [`GfmAutolinkLiteralWww`][Name::GfmAutolinkLiteralWww]
+//! * [`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail]
+//!
+//! ## References
+//!
+//! * [`micromark-extension-gfm-autolink-literal`](https://github.com/micromark/micromark-extension-gfm-autolink-literal)
+//! * [*§ 6.9 Autolinks (extension)* in `GFM`](https://github.github.com/gfm/#autolinks-extension-)
+//!
+//! [text]: crate::construct::text
+//! [definition]: crate::construct::definition
+//! [attention]: crate::construct::attention
+//! [label_start_link]: crate::construct::label_start_link
+//! [label_end]: crate::construct::label_end
+//! [sanitize_uri]: crate::util::sanitize_uri
+//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
use crate::event::{Event, Kind, Name};
+use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-use crate::util::classify_character::{classify, Kind as CharacterKind};
-use crate::util::slice::{Position, Slice};
+use crate::util::{
+ classify_character::{classify_opt, Kind as CharacterKind},
+ slice::{char_after_index, Position, Slice},
+};
use alloc::vec::Vec;
-use core::str;
-// To do: doc al functions.
+/// Start of protocol autolink literal.
+///
+/// ```markdown
+/// > | https://example.com/a?b#c
+/// ^
+/// ```
+pub fn protocol_start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer
+ .parse_state
+ .options
+ .constructs
+ .gfm_autolink_literal &&
+ matches!(tokenizer.current, Some(b'H' | b'h'))
+ // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L214>.
+ && !matches!(tokenizer.previous, Some(b'A'..=b'Z' | b'a'..=b'z'))
+ {
+ tokenizer.enter(Name::GfmAutolinkLiteralProtocol);
+ tokenizer.attempt(
+ State::Next(StateName::GfmAutolinkLiteralProtocolAfter),
+ State::Nok,
+ );
+ tokenizer.attempt(
+ State::Next(StateName::GfmAutolinkLiteralDomainInside),
+ State::Nok,
+ );
+ tokenizer.tokenize_state.start = tokenizer.point.index;
+ State::Retry(StateName::GfmAutolinkLiteralProtocolPrefixInside)
+ } else {
+ State::Nok
+ }
+}
+
+/// After a protocol autolink literal.
+///
+/// ```markdown
+/// > | https://example.com/a?b#c
+/// ^
+/// ```
+pub fn protocol_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.exit(Name::GfmAutolinkLiteralProtocol);
+ State::Ok
+}
+
+/// In protocol.
+///
+/// ```markdown
+/// > | https://example.com/a?b#c
+/// ^^^^^
+/// ```
+pub fn protocol_prefix_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'A'..=b'Z' | b'a'..=b'z')
+ // `5` is size of `https`
+ if tokenizer.point.index - tokenizer.tokenize_state.start < 5 =>
+ {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralProtocolPrefixInside)
+ }
+ Some(b':') => {
+ let slice = Slice::from_indices(
+ tokenizer.parse_state.bytes,
+ tokenizer.tokenize_state.start,
+ tokenizer.point.index,
+ );
+ let name = slice.as_str().to_ascii_lowercase();
+
+ tokenizer.tokenize_state.start = 0;
+
+ if name == "http" || name == "https" {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralProtocolSlashesInside)
+ } else {
+ State::Nok
+ }
+ }
+ _ => {
+ tokenizer.tokenize_state.start = 0;
+ State::Nok
+ }
+ }
+}
+
+/// In protocol slashes.
+///
+/// ```markdown
+/// > | https://example.com/a?b#c
+/// ^^
+/// ```
+pub fn protocol_slashes_inside(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer.current == Some(b'/') {
+ tokenizer.consume();
+ if tokenizer.tokenize_state.size == 0 {
+ tokenizer.tokenize_state.size += 1;
+ State::Next(StateName::GfmAutolinkLiteralProtocolSlashesInside)
+ } else {
+ tokenizer.tokenize_state.size = 0;
+ State::Ok
+ }
+ } else {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ }
+}
+/// Start of www autolink literal.
+///
+/// ```markdown
+/// > | www.example.com/a?b#c
+/// ^
+/// ```
+pub fn www_start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer
+ .parse_state
+ .options
+ .constructs
+ .gfm_autolink_literal &&
+ matches!(tokenizer.current, Some(b'W' | b'w'))
+ // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>.
+ && matches!(tokenizer.previous, None | Some(b'\t' | b'\n' | b' ' | b'(' | b'*' | b'_' | b'[' | b']' | b'~'))
+ {
+ tokenizer.enter(Name::GfmAutolinkLiteralWww);
+ tokenizer.attempt(
+ State::Next(StateName::GfmAutolinkLiteralWwwAfter),
+ State::Nok,
+ );
+ // Note: we *check*, so we can discard the `www.` we parsed.
+ // If it worked, we consider it as a part of the domain.
+ tokenizer.check(
+ State::Next(StateName::GfmAutolinkLiteralDomainInside),
+ State::Nok,
+ );
+ State::Retry(StateName::GfmAutolinkLiteralWwwPrefixInside)
+ } else {
+ State::Nok
+ }
+}
+
+/// After a www autolink literal.
+///
+/// ```markdown
+/// > | www.example.com/a?b#c
+/// ^
+/// ```
+pub fn www_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.exit(Name::GfmAutolinkLiteralWww);
+ State::Ok
+}
+
+/// In www prefix.
+///
+/// ```markdown
+/// > | www.example.com
+/// ^^^^
+/// ```
+pub fn www_prefix_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'.') if tokenizer.tokenize_state.size == 3 => {
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralWwwPrefixAfter)
+ }
+ Some(b'W' | b'w') if tokenizer.tokenize_state.size < 3 => {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralWwwPrefixInside)
+ }
+ _ => {
+ tokenizer.tokenize_state.size = 0;
+ State::Nok
+ }
+ }
+}
+
+/// After www prefix.
+///
+/// ```markdown
+/// > | www.example.com
+/// ^
+/// ```
+pub fn www_prefix_after(tokenizer: &mut Tokenizer) -> State {
+ // If there is *anything*, we can link.
+ if tokenizer.current == None {
+ State::Nok
+ } else {
+ State::Ok
+ }
+}
+
+/// In domain.
+///
+/// ```markdown
+/// > | https://example.com/a
+/// ^^^^^^^^^^^
+/// ```
+pub fn domain_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ // Check whether this marker, which is a trailing punctuation
+ // marker, optionally followed by more trailing markers, and then
+ // followed by an end.
+ Some(b'.' | b'_') => {
+ tokenizer.check(
+ State::Next(StateName::GfmAutolinkLiteralDomainAfter),
+ State::Next(StateName::GfmAutolinkLiteralDomainAtPunctuation),
+ );
+ State::Retry(StateName::GfmAutolinkLiteralTrail)
+ }
+ // Dashes and continuation bytes are fine.
+ Some(b'-' | 0x80..=0xBF) => {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralDomainInside)
+ }
+ _ => {
+ // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
+ if byte_to_kind(
+ tokenizer.parse_state.bytes,
+ tokenizer.point.index,
+ tokenizer.current,
+ ) == CharacterKind::Other
+ {
+ tokenizer.tokenize_state.seen = true;
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralDomainInside)
+ } else {
+ State::Retry(StateName::GfmAutolinkLiteralDomainAfter)
+ }
+ }
+ }
+}
+
+/// In domain, at potential trailing punctuation, that was not trailing.
+///
+/// ```markdown
+/// > | https://example.com
+/// ^
+/// ```
+pub fn domain_at_punctuation(tokenizer: &mut Tokenizer) -> State {
+ // There is an underscore in the last segment of the domain
+ if matches!(tokenizer.current, Some(b'_')) {
+ tokenizer.tokenize_state.marker = b'_';
+ }
+ // Otherwise, it’s a `.`: save the last segment underscore in the
+ // penultimate segment slot.
+ else {
+ tokenizer.tokenize_state.marker_b = tokenizer.tokenize_state.marker;
+ tokenizer.tokenize_state.marker = 0;
+ }
+
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralDomainInside)
+}
+
+/// After domain
+///
+/// ```markdown
+/// > | https://example.com/a
+/// ^
+/// ```
+pub fn domain_after(tokenizer: &mut Tokenizer) -> State {
+ // No underscores allowed in last two segments.
+ let result = if tokenizer.tokenize_state.marker_b == b'_'
+ || tokenizer.tokenize_state.marker == b'_'
+ // At least one character must be seen.
+ || !tokenizer.tokenize_state.seen
+ // Note: that’s GH says a dot is needed, but it’s not true:
+ // <https://github.com/github/cmark-gfm/issues/279>
+ {
+ State::Nok
+ } else {
+ State::Retry(StateName::GfmAutolinkLiteralPathInside)
+ };
+
+ tokenizer.tokenize_state.seen = false;
+ tokenizer.tokenize_state.marker = 0;
+ tokenizer.tokenize_state.marker_b = 0;
+ result
+}
+
+/// In path.
+///
+/// ```markdown
+/// > | https://example.com/a
+/// ^^
+/// ```
+pub fn path_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ // Continuation bytes are fine, we’ve already checked the first one.
+ Some(0x80..=0xBF) => {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralPathInside)
+ }
+ // Count opening parens.
+ Some(b'(') => {
+ tokenizer.tokenize_state.size += 1;
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralPathInside)
+ }
+ // Check whether this trailing punctuation marker is optionally
+ // followed by more trailing markers, and then followed
+ // by an end.
+ // If this is a paren (followed by trailing, then the end), we
+ // *continue* if we saw less closing parens than opening parens.
+ Some(
+ b'!' | b'"' | b'&' | b'\'' | b')' | b'*' | b',' | b'.' | b':' | b';' | b'<' | b'?'
+ | b']' | b'_' | b'~',
+ ) => {
+ let next = if tokenizer.current == Some(b')')
+ && tokenizer.tokenize_state.size_b < tokenizer.tokenize_state.size
+ {
+ StateName::GfmAutolinkLiteralPathAtPunctuation
+ } else {
+ StateName::GfmAutolinkLiteralPathAfter
+ };
+ tokenizer.check(
+ State::Next(next),
+ State::Next(StateName::GfmAutolinkLiteralPathAtPunctuation),
+ );
+ State::Retry(StateName::GfmAutolinkLiteralTrail)
+ }
+ _ => {
+ // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
+ if byte_to_kind(
+ tokenizer.parse_state.bytes,
+ tokenizer.point.index,
+ tokenizer.current,
+ ) == CharacterKind::Whitespace
+ {
+ State::Retry(StateName::GfmAutolinkLiteralPathAfter)
+ } else {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralPathInside)
+ }
+ }
+ }
+}
+
+/// In path, at potential trailing punctuation, that was not trailing.
+///
+/// ```markdown
+/// > | https://example.com/a"b
+/// ^
+/// ```
+pub fn path_at_punctuation(tokenizer: &mut Tokenizer) -> State {
+ // Count closing parens.
+ if tokenizer.current == Some(b')') {
+ tokenizer.tokenize_state.size_b += 1;
+ }
+
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralPathInside)
+}
+
+/// At end of path, reset parens.
+///
+/// ```markdown
+/// > | https://example.com/asd(qwe).
+/// ^
+/// ```
+pub fn path_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.size = 0;
+ tokenizer.tokenize_state.size_b = 0;
+ State::Ok
+}
+
+/// In trail of domain or path.
+///
+/// ```markdown
+/// > | https://example.com").
+/// ^
+/// ```
+pub fn trail(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ // Regular trailing punctuation.
+ Some(
+ b'!' | b'"' | b'\'' | b')' | b'*' | b',' | b'.' | b':' | b';' | b'?' | b'_' | b'~',
+ ) => {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralTrail)
+ }
+ // `&` followed by one or more alphabeticals and then a `;`, is
+ // as a whole considered as trailing punctuation.
+ // In all other cases, it is considered as continuation of the URL.
+ Some(b'&') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralTrailCharRefStart)
+ }
+ // `<` is an end.
+ Some(b'<') => State::Ok,
+ // Needed because we allow literals after `[`, as we fix:
+ // <https://github.com/github/cmark-gfm/issues/278>.
+ // Check that it is not followed by `(` or `[`.
+ Some(b']') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralTrailBracketAfter)
+ }
+ _ => {
+ // Whitespace is the end of the URL, anything else is continuation.
+ if byte_to_kind(
+ tokenizer.parse_state.bytes,
+ tokenizer.point.index,
+ tokenizer.current,
+ ) == CharacterKind::Whitespace
+ {
+ State::Ok
+ } else {
+ State::Nok
+ }
+ }
+ }
+}
+
+/// In trail, after `]`.
+///
+/// > 👉 **Note**: this deviates from `cmark-gfm` to fix a bug.
+/// > See end of <https://github.com/github/cmark-gfm/issues/278> for more.
+///
+/// ```markdown
+/// > | https://example.com](
+/// ^
+/// ```
+pub fn trail_bracket_after(tokenizer: &mut Tokenizer) -> State {
+ // Whitespace or something that could start a resource or reference is the end.
+ // Switch back to trail otherwise.
+ if matches!(
+ tokenizer.current,
+ None | Some(b'\t' | b'\n' | b' ' | b'(' | b'[')
+ ) {
+ State::Ok
+ } else {
+ State::Retry(StateName::GfmAutolinkLiteralTrail)
+ }
+}
+
+/// In character-reference like trail, after `&`.
+///
+/// ```markdown
+/// > | https://example.com&amp;).
+/// ^
+/// ```
+pub fn trail_char_ref_start(tokenizer: &mut Tokenizer) -> State {
+ if matches!(tokenizer.current, Some(b'A'..=b'Z' | b'a'..=b'z')) {
+ State::Retry(StateName::GfmAutolinkLiteralTrailCharRefInside)
+ } else {
+ State::Nok
+ }
+}
+
+/// In character-reference like trail.
+///
+/// ```markdown
+/// > | https://example.com&amp;).
+/// ^
+/// ```
+pub fn trail_char_ref_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralTrailCharRefInside)
+ }
+ // Switch back to trail if this is well-formed.
+ Some(b';') => {
+ tokenizer.consume();
+ State::Next(StateName::GfmAutolinkLiteralTrail)
+ }
+ _ => State::Nok,
+ }
+}
+
+/// Resolve: postprocess text to find email autolink literals.
pub fn resolve(tokenizer: &mut Tokenizer) {
tokenizer.map.consume(&mut tokenizer.events);
@@ -36,23 +643,30 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
let mut start = 0;
while byte_index < bytes.len() {
- if matches!(bytes[byte_index], b'H' | b'h' | b'W' | b'w' | b'@') {
- if let Some(autolink) = peek(bytes, byte_index) {
- byte_index = autolink.1;
+ if bytes[byte_index] == b'@' {
+ let mut range = (0, 0);
+
+ if let Some(start) = peek_bytes_atext(bytes, byte_index) {
+ if let Some(end) = peek_bytes_email_domain(bytes, byte_index + 1) {
+ let end = peek_bytes_truncate(bytes, start, end);
+ range = (start, end);
+ }
+ }
+
+ if range.1 != 0 {
+ byte_index = range.1;
// If there is something between the last link
// (or the start) and this link.
- if start != autolink.0 {
+ if start != range.0 {
replace.push(Event {
kind: Kind::Enter,
name: Name::Data,
point: point.clone(),
link: None,
});
- point = point.shift_to(
- tokenizer.parse_state.bytes,
- start_index + autolink.0,
- );
+ point = point
+ .shift_to(tokenizer.parse_state.bytes, start_index + range.0);
replace.push(Event {
kind: Kind::Exit,
name: Name::Data,
@@ -64,19 +678,19 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
// Add the link.
replace.push(Event {
kind: Kind::Enter,
- name: autolink.2.clone(),
+ name: Name::GfmAutolinkLiteralEmail,
point: point.clone(),
link: None,
});
- point = point
- .shift_to(tokenizer.parse_state.bytes, start_index + autolink.1);
+ point =
+ point.shift_to(tokenizer.parse_state.bytes, start_index + range.1);
replace.push(Event {
kind: Kind::Exit,
- name: autolink.2.clone(),
+ name: Name::GfmAutolinkLiteralEmail,
point: point.clone(),
link: None,
});
- start = autolink.1;
+ start = range.1;
}
}
@@ -114,140 +728,19 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
}
}
-fn peek(bytes: &[u8], index: usize) -> Option<(usize, usize, Name)> {
- // Protocol.
- if let Some(protocol_end) = peek_protocol(bytes, index) {
- if let Some(domain_end) = peek_domain(bytes, protocol_end, true) {
- let end = truncate(bytes, protocol_end, domain_end);
-
- // Cannot be empty.
- if end != protocol_end {
- return Some((index, end, Name::GfmAutolinkLiteralProtocol));
- }
- }
- }
-
- // Www.
- if peek_www(bytes, index).is_some() {
- // Note: we discard the `www.` we parsed, we now try to parse it as a domain.
- let domain_end = peek_domain(bytes, index, false).unwrap_or(index);
- let end = truncate(bytes, index, domain_end);
- return Some((index, end, Name::GfmAutolinkLiteralWww));
- }
-
- // Email.
- if bytes[index] == b'@' {
- if let Some(start) = peek_atext(bytes, index) {
- if let Some(end) = peek_email_domain(bytes, index + 1) {
- let end = truncate(bytes, start, end);
- return Some((start, end, Name::GfmAutolinkLiteralEmail));
- }
- }
- }
-
- None
-}
-
-/// Move past `http://`, `https://`, case-insensitive.
-fn peek_protocol(bytes: &[u8], mut index: usize) -> Option<usize> {
- // `http`
- if index + 3 < bytes.len()
- && matches!(bytes[index], b'H' | b'h')
- && matches!(bytes[index + 1], b'T' | b't')
- && matches!(bytes[index + 2], b'T' | b't')
- && matches!(bytes[index + 3], b'P' | b'p')
- {
- index += 4;
-
- // `s`, optional.
- if index + 1 < bytes.len() && matches!(bytes[index], b'S' | b's') {
- index += 1;
- }
-
- // `://`
- if index + 3 < bytes.len()
- && bytes[index] == b':'
- && bytes[index + 1] == b'/'
- && bytes[index + 2] == b'/'
- {
- return Some(index + 3);
- }
- }
-
- None
-}
-
-/// Move past `www.`, case-insensitive.
-fn peek_www(bytes: &[u8], index: usize) -> Option<usize> {
- // `www.`
- if index + 3 < bytes.len()
- // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>.
- && (index == 0 || matches!(bytes[index - 1], b'\t' | b'\n' | b'\r' | b' ' | b'(' | b'*' | b'_' | b'~'))
- && matches!(bytes[index], b'W' | b'w')
- && matches!(bytes[index + 1], b'W' | b'w')
- && matches!(bytes[index + 2], b'W' | b'w')
- && bytes[index + 3] == b'.'
- {
- Some(index + 4)
- } else {
- None
- }
-}
-
-/// Move past `example.com`.
-fn peek_domain(bytes: &[u8], start: usize, allow_short: bool) -> Option<usize> {
- let mut dots = false;
- let mut penultime = false;
- let mut last = false;
- // To do: expose this from slice?
- // To do: do it ourselves? <https://github.com/commonmark/cmark/blob/8a023286198a7e408398e282f293e3b0baebb644/src/utf8.c#L150>, <https://doc.rust-lang.org/core/str/fn.next_code_point.html>, <https://www.reddit.com/r/rust/comments/4g2zu0/lazy_unicode_iterator_from_byte_iteratorslice/>, <http://bjoern.hoehrmann.de/utf-8/decoder/dfa/>.
- let char_indices = str::from_utf8(&bytes[start..])
- .unwrap()
- .char_indices()
- .collect::<Vec<_>>();
- let mut index = 0;
-
- while index < char_indices.len() {
- match char_indices[index].1 {
- '_' => last = true,
- '.' => {
- penultime = last;
- last = false;
- dots = true;
- }
- '-' => {}
- // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
- char if classify(char) == CharacterKind::Other => {}
- _ => break,
- }
-
- index += 1;
- }
-
- // No underscores allowed in last two parts.
- // A valid domain needs to have at least a dot.
- if penultime || last || (!allow_short && !dots) {
- None
- } else {
- // Now peek past `/path?search#hash` (anything except whitespace).
- while index < char_indices.len() {
- if classify(char_indices[index].1) == CharacterKind::Whitespace {
- break;
- }
-
- index += 1;
- }
-
- Some(if index == char_indices.len() {
- bytes.len()
- } else {
- start + char_indices[index].0
- })
- }
-}
-
-/// Move back past `contact`.
-fn peek_atext(bytes: &[u8], end: usize) -> Option<usize> {
+// To do: add `xmpp`, `mailto` support.
+
+/// Move back past atext.
+///
+/// Moving back is only used when post processing text: so for the email address
+/// algorithm.
+///
+/// ```markdown
+/// > | a contact@example.org b
+/// ^-- from
+/// ^-- to
+/// ```
+fn peek_bytes_atext(bytes: &[u8], end: usize) -> Option<usize> {
let mut index = end;
// Take simplified atext.
@@ -270,8 +763,17 @@ fn peek_atext(bytes: &[u8], end: usize) -> Option<usize> {
}
}
-/// Move past `example.com`.
-fn peek_email_domain(bytes: &[u8], start: usize) -> Option<usize> {
+/// Move past email domain.
+///
+/// Peeking like this only used when post processing text: so for the email
+/// address algorithm.
+///
+/// ```markdown
+/// > | a contact@example.org b
+/// ^-- from
+/// ^-- to
+/// ```
+fn peek_bytes_email_domain(bytes: &[u8], start: usize) -> Option<usize> {
let mut index = start;
let mut dot = false;
@@ -303,8 +805,21 @@ fn peek_email_domain(bytes: &[u8], start: usize) -> Option<usize> {
}
}
-/// Split trialing stuff from a URL.
-fn truncate(bytes: &[u8], start: usize, mut end: usize) -> usize {
+/// Move back past punctuation.
+///
+/// Moving back is only used when post processing text: so for the email address
+/// algorithm.
+///
+/// This is much more complex that needed, because GH allows a lot of
+/// punctuation in the protocol and www algorithms.
+/// However, those aren’t implemented like the email algo.
+///
+/// ```markdown
+/// > | a contact@example.org”) b
+/// ^-- from
+/// ^-- to
+/// ```
+fn peek_bytes_truncate(bytes: &[u8], start: usize, mut end: usize) -> usize {
let mut index = start;
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L42>
@@ -379,3 +894,24 @@ fn truncate(bytes: &[u8], start: usize, mut end: usize) -> usize {
split
}
+
+/// Classify a byte (or `char`).
+fn byte_to_kind(bytes: &[u8], index: usize, byte: Option<u8>) -> CharacterKind {
+ match byte {
+ None => CharacterKind::Whitespace,
+ Some(byte) => {
+ if byte.is_ascii_whitespace() {
+ CharacterKind::Whitespace
+ } else if byte.is_ascii_punctuation() {
+ CharacterKind::Punctuation
+ } else if byte.is_ascii_alphanumeric() {
+ CharacterKind::Other
+ } else {
+ // Otherwise: seems to be an ASCII control, so it seems to be a
+ // non-ASCII `char`.
+ let char = char_after_index(bytes, index);
+ classify_opt(char)
+ }
+ }
+ }
+}
diff --git a/src/construct/gfm_table.rs b/src/construct/gfm_table.rs
index d7c2b69..27fbadf 100644
--- a/src/construct/gfm_table.rs
+++ b/src/construct/gfm_table.rs
@@ -191,7 +191,7 @@
//! This bug is not present in this project.
//! The issue relating to tables is:
//!
-//! * [GFM tables: escaped escapes are incorrectly treated as escapes](https://github.com/github/cmark-gfm/issues/277)\
+//! * [GFM tables: escaped escapes are incorrectly treated as escapes](https://github.com/github/cmark-gfm/issues/277)
//!
//! ## Tokens
//!
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 3cb0f10..0168d02 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -29,17 +29,21 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
/// Characters that can start something in text.
-const MARKERS: [u8; 11] = [
+const MARKERS: [u8; 15] = [
b'!', // `label_start_image`
b'$', // `raw_text` (math (text))
b'&', // `character_reference`
b'*', // `attention` (emphasis, strong)
b'<', // `autolink`, `html_text`
+ b'H', // `gfm_autolink_literal` (`protocol` kind)
+ b'W', // `gfm_autolink_literal` (`www.` kind)
b'[', // `label_start_link`
b'\\', // `character_escape`, `hard_break_escape`
b']', // `label_end`, `gfm_label_start_footnote`
b'_', // `attention` (emphasis, strong)
b'`', // `raw_text` (code (text))
+ b'h', // `gfm_autolink_literal` (`protocol` kind)
+ b'w', // `gfm_autolink_literal` (`www.` kind)
b'~', // `attention` (gfm strikethrough)
];
@@ -113,6 +117,20 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
);
State::Retry(StateName::AutolinkStart)
}
+ Some(b'H' | b'h') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::GfmAutolinkLiteralProtocolStart)
+ }
+ Some(b'W' | b'w') => {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::GfmAutolinkLiteralWwwStart)
+ }
Some(b'[') => {
tokenizer.attempt(
State::Next(StateName::TextBefore),
diff --git a/src/state.rs b/src/state.rs
index 5013ec8..d7c0c8a 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -310,6 +310,29 @@ pub enum Name {
StringBefore,
StringBeforeData,
+ GfmAutolinkLiteralProtocolStart,
+ GfmAutolinkLiteralProtocolAfter,
+ GfmAutolinkLiteralProtocolPrefixInside,
+ GfmAutolinkLiteralProtocolSlashesInside,
+
+ GfmAutolinkLiteralWwwStart,
+ GfmAutolinkLiteralWwwAfter,
+ GfmAutolinkLiteralWwwPrefixInside,
+ GfmAutolinkLiteralWwwPrefixAfter,
+
+ GfmAutolinkLiteralDomainInside,
+ GfmAutolinkLiteralDomainAtPunctuation,
+ GfmAutolinkLiteralDomainAfter,
+
+ GfmAutolinkLiteralPathInside,
+ GfmAutolinkLiteralPathAtPunctuation,
+ GfmAutolinkLiteralPathAfter,
+
+ GfmAutolinkLiteralTrail,
+ GfmAutolinkLiteralTrailCharRefInside,
+ GfmAutolinkLiteralTrailCharRefStart,
+ GfmAutolinkLiteralTrailBracketAfter,
+
GfmTableStart,
GfmTableHeadRowBefore,
GfmTableHeadRowStart,
@@ -686,6 +709,43 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::StringBefore => construct::string::before,
Name::StringBeforeData => construct::string::before_data,
+ Name::GfmAutolinkLiteralProtocolStart => construct::gfm_autolink_literal::protocol_start,
+ Name::GfmAutolinkLiteralProtocolAfter => construct::gfm_autolink_literal::protocol_after,
+ Name::GfmAutolinkLiteralProtocolPrefixInside => {
+ construct::gfm_autolink_literal::protocol_prefix_inside
+ }
+ Name::GfmAutolinkLiteralProtocolSlashesInside => {
+ construct::gfm_autolink_literal::protocol_slashes_inside
+ }
+
+ Name::GfmAutolinkLiteralWwwAfter => construct::gfm_autolink_literal::www_after,
+ Name::GfmAutolinkLiteralWwwStart => construct::gfm_autolink_literal::www_start,
+ Name::GfmAutolinkLiteralWwwPrefixInside => {
+ construct::gfm_autolink_literal::www_prefix_inside
+ }
+ Name::GfmAutolinkLiteralWwwPrefixAfter => construct::gfm_autolink_literal::www_prefix_after,
+ Name::GfmAutolinkLiteralDomainInside => construct::gfm_autolink_literal::domain_inside,
+ Name::GfmAutolinkLiteralDomainAtPunctuation => {
+ construct::gfm_autolink_literal::domain_at_punctuation
+ }
+ Name::GfmAutolinkLiteralDomainAfter => construct::gfm_autolink_literal::domain_after,
+
+ Name::GfmAutolinkLiteralPathInside => construct::gfm_autolink_literal::path_inside,
+ Name::GfmAutolinkLiteralPathAtPunctuation => {
+ construct::gfm_autolink_literal::path_at_punctuation
+ }
+ Name::GfmAutolinkLiteralPathAfter => construct::gfm_autolink_literal::path_after,
+ Name::GfmAutolinkLiteralTrail => construct::gfm_autolink_literal::trail,
+ Name::GfmAutolinkLiteralTrailCharRefStart => {
+ construct::gfm_autolink_literal::trail_char_ref_start
+ }
+ Name::GfmAutolinkLiteralTrailCharRefInside => {
+ construct::gfm_autolink_literal::trail_char_ref_inside
+ }
+ Name::GfmAutolinkLiteralTrailBracketAfter => {
+ construct::gfm_autolink_literal::trail_bracket_after
+ }
+
Name::GfmTableStart => construct::gfm_table::start,
Name::GfmTableHeadRowBefore => construct::gfm_table::head_row_before,
Name::GfmTableHeadRowStart => construct::gfm_table::head_row_start,
diff --git a/tests/gfm_autolink_literal.rs b/tests/gfm_autolink_literal.rs
index 9551751..2e84e6d 100644
--- a/tests/gfm_autolink_literal.rs
+++ b/tests/gfm_autolink_literal.rs
@@ -42,6 +42,22 @@ fn gfm_autolink_literal() {
);
assert_eq!(
+ micromark_with_options("[https://example.com](xxx)", &gfm),
+ "<p><a href=\"xxx\">https://example.com</a></p>",
+ "should not link protocol urls in links"
+ );
+ assert_eq!(
+ micromark_with_options("[www.example.com](xxx)", &gfm),
+ "<p><a href=\"xxx\">www.example.com</a></p>",
+ "should not link www urls in links"
+ );
+ assert_eq!(
+ micromark_with_options("[user@example.com](xxx)", &gfm),
+ "<p><a href=\"xxx\">user@example.com</a></p>",
+ "should not link email urls in links"
+ );
+
+ assert_eq!(
micromark_with_options("user@example.com", &gfm),
"<p><a href=\"mailto:user@example.com\">user@example.com</a></p>",
"should support a closing paren at TLD (email)"
@@ -174,7 +190,7 @@ fn gfm_autolink_literal() {
);
// Note: GH comments/issues/PRs do not link this, but Gists/readmes do.
- // Fixing it would mean defiating from `cmark-gfm`:
+ // Fixing it would mean deviating from `cmark-gfm`:
// Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L156>.
// assert_eq!(
// micromark_with_options(",www.example.com", &gfm),
@@ -212,6 +228,55 @@ fn gfm_autolink_literal() {
assert_eq!(
micromark_with_options(
r###"
+a www.example.com&xxx;b c
+
+a www.example.com&xxx;. b
+
+a www.example.com&xxxxxxxxx;. b
+
+a www.example.com&xxxxxxxxxx;. b
+
+a www.example.com&xxxxxxxxxxx;. b
+
+a www.example.com&xxx. b
+
+a www.example.com&#123. b
+
+a www.example.com&123. b
+
+a www.example.com&x. b
+
+a www.example.com&#1. b
+
+a www.example.com&1. b
+
+a www.example.com&. b
+
+a www.example.com& b
+"###,
+ &gfm
+ ),
+ r###"<p>a <a href="http://www.example.com&amp;xxx;b">www.example.com&amp;xxx;b</a> c</p>
+<p>a <a href="http://www.example.com">www.example.com</a>&amp;xxx;. b</p>
+<p>a <a href="http://www.example.com">www.example.com</a>&amp;xxxxxxxxx;. b</p>
+<p>a <a href="http://www.example.com">www.example.com</a>&amp;xxxxxxxxxx;. b</p>
+<p>a <a href="http://www.example.com">www.example.com</a>&amp;xxxxxxxxxxx;. b</p>
+<p>a <a href="http://www.example.com&amp;xxx">www.example.com&amp;xxx</a>. b</p>
+<p>a <a href="http://www.example.com&amp;#123">www.example.com&amp;#123</a>. b</p>
+<p>a <a href="http://www.example.com&amp;123">www.example.com&amp;123</a>. b</p>
+<p>a <a href="http://www.example.com&amp;x">www.example.com&amp;x</a>. b</p>
+<p>a <a href="http://www.example.com&amp;#1">www.example.com&amp;#1</a>. b</p>
+<p>a <a href="http://www.example.com&amp;1">www.example.com&amp;1</a>. b</p>
+<p>a <a href="http://www.example.com&amp;">www.example.com&amp;</a>. b</p>
+<p>a <a href="http://www.example.com&amp;">www.example.com&amp;</a> b</p>
+"###,
+ "should match “character references” like GitHub does"
+ );
+
+ // Note: this deviates from GFM, as <https://github.com/github/cmark-gfm/issues/278> is fixed.
+ assert_eq!(
+ micromark_with_options(
+ r###"
[ www.example.com
[ https://example.com
@@ -251,6 +316,2379 @@ fn gfm_autolink_literal() {
<p><img src="#" alt=" https://example.com " /></p>
<p><img src="#" alt=" contact@example.com " /></p>
"###,
- "should interplay with brackets, links, and images"
+ "should match interplay with brackets, links, and images, like GitHub does (but without the bugs)"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"
+www.example.com/?=a(b)cccccc
+
+www.example.com/?=a(b(c)ccccc
+
+www.example.com/?=a(b(c)c)cccc
+
+www.example.com/?=a(b(c)c)c)ccc
+
+www.example.com/?q=a(business)
+
+www.example.com/?q=a(business)))
+
+(www.example.com/?q=a(business))
+
+(www.example.com/?q=a(business)
+
+www.example.com/?q=a(business)".
+
+www.example.com/?q=a(business)))
+
+(www.example.com/?q=a(business))".
+
+(www.example.com/?q=a(business)".)
+
+(www.example.com/?q=a(business)".
+"###,
+ &gfm
+ ),
+ r###"<p><a href="http://www.example.com/?=a(b)cccccc">www.example.com/?=a(b)cccccc</a></p>
+<p><a href="http://www.example.com/?=a(b(c)ccccc">www.example.com/?=a(b(c)ccccc</a></p>
+<p><a href="http://www.example.com/?=a(b(c)c)cccc">www.example.com/?=a(b(c)c)cccc</a></p>
+<p><a href="http://www.example.com/?=a(b(c)c)c)ccc">www.example.com/?=a(b(c)c)c)ccc</a></p>
+<p><a href="http://www.example.com/?q=a(business)">www.example.com/?q=a(business)</a></p>
+<p><a href="http://www.example.com/?q=a(business)">www.example.com/?q=a(business)</a>))</p>
+<p>(<a href="http://www.example.com/?q=a(business)">www.example.com/?q=a(business)</a>)</p>
+<p>(<a href="http://www.example.com/?q=a(business)">www.example.com/?q=a(business)</a></p>
+<p><a href="http://www.example.com/?q=a(business)">www.example.com/?q=a(business)</a>&quot;.</p>
+<p><a href="http://www.example.com/?q=a(business)">www.example.com/?q=a(business)</a>))</p>
+<p>(<a href="http://www.example.com/?q=a(business)">www.example.com/?q=a(business)</a>)&quot;.</p>
+<p>(<a href="http://www.example.com/?q=a(business)">www.example.com/?q=a(business)</a>&quot;.)</p>
+<p>(<a href="http://www.example.com/?q=a(business)">www.example.com/?q=a(business)</a>&quot;.</p>
+"###,
+ "should match parens like GitHub does"
+ );
+
+ // Note: this deviates from GFM.
+ // Here, the following issues are fixed:
+ // - <https://github.com/github/cmark-gfm/issues/280>
+ assert_eq!(
+ micromark_with_options(
+ r###"
+# Literal autolinks
+
+## WWW autolinks
+
+w.commonmark.org
+
+ww.commonmark.org
+
+www.commonmark.org
+
+Www.commonmark.org
+
+wWw.commonmark.org
+
+wwW.commonmark.org
+
+WWW.COMMONMARK.ORG
+
+Visit www.commonmark.org/help for more information.
+
+Visit www.commonmark.org.
+
+Visit www.commonmark.org/a.b.
+
+www.aaa.bbb.ccc_ccc
+
+www.aaa_bbb.ccc
+
+www.aaa.bbb.ccc.ddd_ddd
+
+www.aaa.bbb.ccc_ccc.ddd
+
+www.aaa.bbb_bbb.ccc.ddd
+
+www.aaa_aaa.bbb.ccc.ddd
+
+Visit www.commonmark.org.
+
+Visit www.commonmark.org/a.b.
+
+www.google.com/search?q=Markup+(business)
+
+www.google.com/search?q=Markup+(business)))
+
+(www.google.com/search?q=Markup+(business))
+
+(www.google.com/search?q=Markup+(business)
+
+www.google.com/search?q=(business))+ok
+
+www.google.com/search?q=commonmark&hl=en
+
+www.google.com/search?q=commonmark&hl;en
+
+www.google.com/search?q=commonmark&hl;
+
+www.commonmark.org/he<lp
+
+## HTTP autolinks
+
+hexample.com
+
+htexample.com
+
+httexample.com
+
+httpexample.com
+
+http:example.com
+
+http:/example.com
+
+https:/example.com
+
+http://example.com
+
+https://example.com
+
+https://example
+
+http://commonmark.org
+
+(Visit https://encrypted.google.com/search?q=Markup+(business))
+
+## Email autolinks
+
+No dot: foo@barbaz
+
+No dot: foo@barbaz.
+
+foo@bar.baz
+
+hello@mail+xyz.example isn’t valid, but hello+xyz@mail.example is.
+
+a.b-c_d@a.b
+
+a.b-c_d@a.b.
+
+a.b-c_d@a.b-
+
+a.b-c_d@a.b_
+
+a@a_b.c
+
+a@a-b.c
+
+Can’t end in an underscore followed by a period: aaa@a.b_.
+
+Can contain an underscore followed by a period: aaa@a.b_.c
+
+## Link text should not be expanded
+
+[Visit www.example.com](http://www.example.com) please.
+
+[Visit http://www.example.com](http://www.example.com) please.
+
+[Mail example@example.com](mailto:example@example.com) please.
+
+[link]() <http://autolink> should still be expanded.
+"###,
+ &gfm
+ ),
+ r###"<h1>Literal autolinks</h1>
+<h2>WWW autolinks</h2>
+<p>w.commonmark.org</p>
+<p>ww.commonmark.org</p>
+<p><a href="http://www.commonmark.org">www.commonmark.org</a></p>
+<p><a href="http://Www.commonmark.org">Www.commonmark.org</a></p>
+<p><a href="http://wWw.commonmark.org">wWw.commonmark.org</a></p>
+<p><a href="http://wwW.commonmark.org">wwW.commonmark.org</a></p>
+<p><a href="http://WWW.COMMONMARK.ORG">WWW.COMMONMARK.ORG</a></p>
+<p>Visit <a href="http://www.commonmark.org/help">www.commonmark.org/help</a> for more information.</p>
+<p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p>
+<p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p>
+<p>www.aaa.bbb.ccc_ccc</p>
+<p>www.aaa_bbb.ccc</p>
+<p>www.aaa.bbb.ccc.ddd_ddd</p>
+<p>www.aaa.bbb.ccc_ccc.ddd</p>
+<p><a href="http://www.aaa.bbb_bbb.ccc.ddd">www.aaa.bbb_bbb.ccc.ddd</a></p>
+<p><a href="http://www.aaa_aaa.bbb.ccc.ddd">www.aaa_aaa.bbb.ccc.ddd</a></p>
+<p>Visit <a href="http://www.commonmark.org">www.commonmark.org</a>.</p>
+<p>Visit <a href="http://www.commonmark.org/a.b">www.commonmark.org/a.b</a>.</p>
+<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p>
+<p><a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>))</p>
+<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a>)</p>
+<p>(<a href="http://www.google.com/search?q=Markup+(business)">www.google.com/search?q=Markup+(business)</a></p>
+<p><a href="http://www.google.com/search?q=(business))+ok">www.google.com/search?q=(business))+ok</a></p>
+<p><a href="http://www.google.com/search?q=commonmark&amp;hl=en">www.google.com/search?q=commonmark&amp;hl=en</a></p>
+<p><a href="http://www.google.com/search?q=commonmark&amp;hl;en">www.google.com/search?q=commonmark&amp;hl;en</a></p>
+<p><a href="http://www.google.com/search?q=commonmark">www.google.com/search?q=commonmark</a>&amp;hl;</p>
+<p><a href="http://www.commonmark.org/he">www.commonmark.org/he</a>&lt;lp</p>
+<h2>HTTP autolinks</h2>
+<p>hexample.com</p>
+<p>htexample.com</p>
+<p>httexample.com</p>
+<p>httpexample.com</p>
+<p>http:example.com</p>
+<p>http:/example.com</p>
+<p>https:/example.com</p>
+<p><a href="http://example.com">http://example.com</a></p>
+<p><a href="https://example.com">https://example.com</a></p>
+<p><a href="https://example">https://example</a></p>
+<p><a href="http://commonmark.org">http://commonmark.org</a></p>
+<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>
+<h2>Email autolinks</h2>
+<p>No dot: foo@barbaz</p>
+<p>No dot: foo@barbaz.</p>
+<p><a href="mailto:foo@bar.baz">foo@bar.baz</a></p>
+<p>hello@mail+xyz.example isn’t valid, but <a href="mailto:hello+xyz@mail.example">hello+xyz@mail.example</a> is.</p>
+<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a></p>
+<p><a href="mailto:a.b-c_d@a.b">a.b-c_d@a.b</a>.</p>
+<p>a.b-c_d@a.b-</p>
+<p>a.b-c_d@a.b_</p>
+<p><a href="mailto:a@a_b.c">a@a_b.c</a></p>
+<p><a href="mailto:a@a-b.c">a@a-b.c</a></p>
+<p>Can’t end in an underscore followed by a period: aaa@a.b_.</p>
+<p>Can contain an underscore followed by a period: <a href="mailto:aaa@a.b_.c">aaa@a.b_.c</a></p>
+<h2>Link text should not be expanded</h2>
+<p><a href="http://www.example.com">Visit www.example.com</a> please.</p>
+<p><a href="http://www.example.com">Visit http://www.example.com</a> please.</p>
+<p><a href="mailto:example@example.com">Mail example@example.com</a> please.</p>
+<p><a href="">link</a> <a href="http://autolink">http://autolink</a> should still be expanded.</p>
+"###,
+ "should match base like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"H0.
+
+[https://a.com&copy;b
+
+[www.a.com&copy;b
+
+H1.
+
+[]https://a.com&copy;b
+
+[]www.a.com&copy;b
+
+H2.
+
+[] https://a.com&copy;b
+
+[] www.a.com&copy;b
+
+H3.
+
+[[https://a.com&copy;b
+
+[[www.a.com&copy;b
+
+H4.
+
+[[]https://a.com&copy;b
+
+[[]www.a.com&copy;b
+
+H5.
+
+[[]]https://a.com&copy;b
+
+[[]]www.a.com&copy;b
+"###,
+ &gfm
+ ),
+ r###"<p>H0.</p>
+<p>[<a href="https://a.com&amp;copy;b">https://a.com&amp;copy;b</a></p>
+<p>[<a href="http://www.a.com&amp;copy;b">www.a.com&amp;copy;b</a></p>
+<p>H1.</p>
+<p>[]<a href="https://a.com&amp;copy;b">https://a.com&amp;copy;b</a></p>
+<p>[]<a href="http://www.a.com&amp;copy;b">www.a.com&amp;copy;b</a></p>
+<p>H2.</p>
+<p>[] <a href="https://a.com&amp;copy;b">https://a.com&amp;copy;b</a></p>
+<p>[] <a href="http://www.a.com&amp;copy;b">www.a.com&amp;copy;b</a></p>
+<p>H3.</p>
+<p>[[<a href="https://a.com&amp;copy;b">https://a.com&amp;copy;b</a></p>
+<p>[[<a href="http://www.a.com&amp;copy;b">www.a.com&amp;copy;b</a></p>
+<p>H4.</p>
+<p>[[]<a href="https://a.com&amp;copy;b">https://a.com&amp;copy;b</a></p>
+<p>[[]<a href="http://www.a.com&amp;copy;b">www.a.com&amp;copy;b</a></p>
+<p>H5.</p>
+<p>[[]]<a href="https://a.com&amp;copy;b">https://a.com&amp;copy;b</a></p>
+<p>[[]]<a href="http://www.a.com&amp;copy;b">www.a.com&amp;copy;b</a></p>
+"###,
+ "should match brackets like GitHub does (except for the bracket bug)"
+ );
+
+ assert_eq!(
+ micromark_with_options(r###"Image start.
+
+![https://a.com
+
+![http://a.com
+
+![www.a.com
+
+![a@b.c
+
+Image start and label end.
+
+![https://a.com]
+
+![http://a.com]
+
+![www.a.com]
+
+![a@b.c]
+
+Image label with reference (note: GH cleans hashes here, but we keep them in).
+
+![https://a.com][x]
+
+![http://a.com][x]
+
+![www.a.com][x]
+
+![a@b.c][x]
+
+[x]: #
+
+Image label with resource.
+
+![https://a.com]()
+
+![http://a.com]()
+
+![www.a.com]()
+
+![a@b.c]()
+
+Autolink literal after image.
+
+![a]() https://a.com
+
+![a]() http://a.com
+
+![a]() www.a.com
+
+![a]() a@b.c
+"###, &gfm),
+ r###"<p>Image start.</p>
+<p>![<a href="https://a.com">https://a.com</a></p>
+<p>![<a href="http://a.com">http://a.com</a></p>
+<p>![<a href="http://www.a.com">www.a.com</a></p>
+<p>![<a href="mailto:a@b.c">a@b.c</a></p>
+<p>Image start and label end.</p>
+<p>![<a href="https://a.com">https://a.com</a>]</p>
+<p>![<a href="http://a.com">http://a.com</a>]</p>
+<p>![<a href="http://www.a.com">www.a.com</a>]</p>
+<p>![<a href="mailto:a@b.c">a@b.c</a>]</p>
+<p>Image label with reference (note: GH cleans hashes here, but we keep them in).</p>
+<p><img src="#" alt="https://a.com" /></p>
+<p><img src="#" alt="http://a.com" /></p>
+<p><img src="#" alt="www.a.com" /></p>
+<p><img src="#" alt="a@b.c" /></p>
+<p>Image label with resource.</p>
+<p><img src="" alt="https://a.com" /></p>
+<p><img src="" alt="http://a.com" /></p>
+<p><img src="" alt="www.a.com" /></p>
+<p><img src="" alt="a@b.c" /></p>
+<p>Autolink literal after image.</p>
+<p><img src="" alt="a" /> <a href="https://a.com">https://a.com</a></p>
+<p><img src="" alt="a" /> <a href="http://a.com">http://a.com</a></p>
+<p><img src="" alt="a" /> <a href="http://www.a.com">www.a.com</a></p>
+<p><img src="" alt="a" /> <a href="mailto:a@b.c">a@b.c</a></p>
+"###,
+ "should match autolink literals combined w/ images like GitHub does (except for the bracket bug)"
+ );
+
+ assert_eq!(
+ micromark_with_options(r###"Link start.
+
+[https://a.com
+
+[http://a.com
+
+[www.a.com
+
+[a@b.c
+
+Label end.
+
+https://a.com]
+
+http://a.com]
+
+www.a.com]
+
+a@b.c]
+
+Link start and label end.
+
+[https://a.com]
+
+[http://a.com]
+
+[www.a.com]
+
+[a@b.c]
+
+What naĂŻvely seems like a label end (A).
+
+https://a.com`]`
+
+http://a.com`]`
+
+www.a.com`]`
+
+a@b.c`]`
+
+Link start and what naĂŻvely seems like a balanced brace (B).
+
+[https://a.com`]`
+
+[http://a.com`]`
+
+[www.a.com`]`
+
+[a@b.c`]`
+
+What naĂŻvely seems like a label end (C).
+
+https://a.com `]`
+
+http://a.com `]`
+
+www.a.com `]`
+
+a@b.c `]`
+
+Link start and what naĂŻvely seems like a balanced brace (D).
+
+[https://a.com `]`
+
+[http://a.com `]`
+
+[www.a.com `]`
+
+[a@b.c `]`
+
+Link label with reference.
+
+[https://a.com][x]
+
+[http://a.com][x]
+
+[www.a.com][x]
+
+[a@b.c][x]
+
+[x]: #
+
+Link label with resource.
+
+[https://a.com]()
+
+[http://a.com]()
+
+[www.a.com]()
+
+[a@b.c]()
+
+More in link.
+
+[a https://b.com c]()
+
+[a http://b.com c]()
+
+[a www.b.com c]()
+
+[a b@c.d e]()
+
+Autolink literal after link.
+
+[a]() https://a.com
+
+[a]() http://a.com
+
+[a]() www.a.com
+
+[a]() a@b.c
+"###, &gfm),
+ r###"<p>Link start.</p>
+<p>[<a href="https://a.com">https://a.com</a></p>
+<p>[<a href="http://a.com">http://a.com</a></p>
+<p>[<a href="http://www.a.com">www.a.com</a></p>
+<p>[<a href="mailto:a@b.c">a@b.c</a></p>
+<p>Label end.</p>
+<p><a href="https://a.com">https://a.com</a>]</p>
+<p><a href="http://a.com">http://a.com</a>]</p>
+<p><a href="http://www.a.com">www.a.com</a>]</p>
+<p><a href="mailto:a@b.c">a@b.c</a>]</p>
+<p>Link start and label end.</p>
+<p>[<a href="https://a.com">https://a.com</a>]</p>
+<p>[<a href="http://a.com">http://a.com</a>]</p>
+<p>[<a href="http://www.a.com">www.a.com</a>]</p>
+<p>[<a href="mailto:a@b.c">a@b.c</a>]</p>
+<p>What naĂŻvely seems like a label end (A).</p>
+<p><a href="https://a.com%60%5D%60">https://a.com`]`</a></p>
+<p><a href="http://a.com%60%5D%60">http://a.com`]`</a></p>
+<p><a href="http://www.a.com%60%5D%60">www.a.com`]`</a></p>
+<p><a href="mailto:a@b.c">a@b.c</a><code>]</code></p>
+<p>Link start and what naĂŻvely seems like a balanced brace (B).</p>
+<p>[<a href="https://a.com%60%5D%60">https://a.com`]`</a></p>
+<p>[<a href="http://a.com%60%5D%60">http://a.com`]`</a></p>
+<p>[<a href="http://www.a.com%60%5D%60">www.a.com`]`</a></p>
+<p>[<a href="mailto:a@b.c">a@b.c</a><code>]</code></p>
+<p>What naĂŻvely seems like a label end (C).</p>
+<p><a href="https://a.com">https://a.com</a> <code>]</code></p>
+<p><a href="http://a.com">http://a.com</a> <code>]</code></p>
+<p><a href="http://www.a.com">www.a.com</a> <code>]</code></p>
+<p><a href="mailto:a@b.c">a@b.c</a> <code>]</code></p>
+<p>Link start and what naĂŻvely seems like a balanced brace (D).</p>
+<p>[<a href="https://a.com">https://a.com</a> <code>]</code></p>
+<p>[<a href="http://a.com">http://a.com</a> <code>]</code></p>
+<p>[<a href="http://www.a.com">www.a.com</a> <code>]</code></p>
+<p>[<a href="mailto:a@b.c">a@b.c</a> <code>]</code></p>
+<p>Link label with reference.</p>
+<p><a href="#">https://a.com</a></p>
+<p><a href="#">http://a.com</a></p>
+<p><a href="#">www.a.com</a></p>
+<p><a href="#">a@b.c</a></p>
+<p>Link label with resource.</p>
+<p><a href="">https://a.com</a></p>
+<p><a href="">http://a.com</a></p>
+<p><a href="">www.a.com</a></p>
+<p><a href="">a@b.c</a></p>
+<p>More in link.</p>
+<p><a href="">a https://b.com c</a></p>
+<p><a href="">a http://b.com c</a></p>
+<p><a href="">a www.b.com c</a></p>
+<p><a href="">a b@c.d e</a></p>
+<p>Autolink literal after link.</p>
+<p><a href="">a</a> <a href="https://a.com">https://a.com</a></p>
+<p><a href="">a</a> <a href="http://a.com">http://a.com</a></p>
+<p><a href="">a</a> <a href="http://www.a.com">www.a.com</a></p>
+<p><a href="">a</a> <a href="mailto:a@b.c">a@b.c</a></p>
+"###,
+ "should match autolink literals combined w/ links like GitHub does (except for the bracket bug)"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# “character reference”
+
+www.a&b (space)
+
+www.a&b!
+
+www.a&b"
+
+www.a&b#
+
+www.a&b$
+
+www.a&b%
+
+www.a&b&
+
+www.a&b'
+
+www.a&b(
+
+www.a&b)
+
+www.a&b*
+
+www.a&b+
+
+www.a&b,
+
+www.a&b-
+
+www.a&b
+
+www.a&b.
+
+www.a&b/
+
+www.a&b:
+
+www.a&b;
+
+www.a&b<
+
+www.a&b=
+
+www.a&b>
+
+www.a&b?
+
+www.a&b@
+
+www.a&b[
+
+www.a&b\
+
+www.a&b]
+
+www.a&b^
+
+www.a&b_
+
+www.a&b`
+
+www.a&b{
+
+www.a&b|
+
+www.a&b}
+
+www.a&b~
+"###,
+ &gfm
+ ),
+ r###"<h1>“character reference”</h1>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a> (space)</p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>!</p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>&quot;</p>
+<p><a href="http://www.a&amp;b#">www.a&amp;b#</a></p>
+<p><a href="http://www.a&amp;b$">www.a&amp;b$</a></p>
+<p><a href="http://www.a&amp;b%25">www.a&amp;b%</a></p>
+<p><a href="http://www.a&amp;b&amp;">www.a&amp;b&amp;</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>'</p>
+<p><a href="http://www.a&amp;b(">www.a&amp;b(</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>)</p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>*</p>
+<p><a href="http://www.a&amp;b+">www.a&amp;b+</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>,</p>
+<p><a href="http://www.a&amp;b-">www.a&amp;b-</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>.</p>
+<p><a href="http://www.a&amp;b/">www.a&amp;b/</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>:</p>
+<p><a href="http://www.a">www.a</a>&amp;b;</p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>&lt;</p>
+<p><a href="http://www.a&amp;b=">www.a&amp;b=</a></p>
+<p><a href="http://www.a&amp;b%3E">www.a&amp;b&gt;</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>?</p>
+<p><a href="http://www.a&amp;b@">www.a&amp;b@</a></p>
+<p><a href="http://www.a&amp;b%5B">www.a&amp;b[</a></p>
+<p><a href="http://www.a&amp;b%5C">www.a&amp;b\</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>]</p>
+<p><a href="http://www.a&amp;b%5E">www.a&amp;b^</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>_</p>
+<p><a href="http://www.a&amp;b%60">www.a&amp;b`</a></p>
+<p><a href="http://www.a&amp;b%7B">www.a&amp;b{</a></p>
+<p><a href="http://www.a&amp;b%7C">www.a&amp;b|</a></p>
+<p><a href="http://www.a&amp;b%7D">www.a&amp;b}</a></p>
+<p><a href="http://www.a&amp;b">www.a&amp;b</a>~</p>
+"###,
+ "should match “character references (named)” like GitHub does (except for the bracket bug)"
+ );
+
+ assert_eq!(
+ micromark_with_options(r###"# “character reference”
+
+www.a&#35 (space)
+
+www.a&#35!
+
+www.a&#35"
+
+www.a&#35#
+
+www.a&#35$
+
+www.a&#35%
+
+www.a&#35&
+
+www.a&#35'
+
+www.a&#35(
+
+www.a&#35)
+
+www.a&#35*
+
+www.a&#35+
+
+www.a&#35,
+
+www.a&#35-
+
+www.a&#35
+
+www.a&#35.
+
+www.a&#35/
+
+www.a&#35:
+
+www.a&#35;
+
+www.a&#35<
+
+www.a&#35=
+
+www.a&#35>
+
+www.a&#35?
+
+www.a&#35@
+
+www.a&#35[
+
+www.a&#35\
+
+www.a&#35]
+
+www.a&#35^
+
+www.a&#35_
+
+www.a&#35`
+
+www.a&#35{
+
+www.a&#35|
+
+www.a&#35}
+
+www.a&#35~
+"###, &gfm),
+ r###"<h1>“character reference”</h1>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a> (space)</p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>!</p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>&quot;</p>
+<p><a href="http://www.a&amp;#35#">www.a&amp;#35#</a></p>
+<p><a href="http://www.a&amp;#35$">www.a&amp;#35$</a></p>
+<p><a href="http://www.a&amp;#35%25">www.a&amp;#35%</a></p>
+<p><a href="http://www.a&amp;#35&amp;">www.a&amp;#35&amp;</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>'</p>
+<p><a href="http://www.a&amp;#35(">www.a&amp;#35(</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>)</p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>*</p>
+<p><a href="http://www.a&amp;#35+">www.a&amp;#35+</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>,</p>
+<p><a href="http://www.a&amp;#35-">www.a&amp;#35-</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>.</p>
+<p><a href="http://www.a&amp;#35/">www.a&amp;#35/</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>:</p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>;</p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>&lt;</p>
+<p><a href="http://www.a&amp;#35=">www.a&amp;#35=</a></p>
+<p><a href="http://www.a&amp;#35%3E">www.a&amp;#35&gt;</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>?</p>
+<p><a href="http://www.a&amp;#35@">www.a&amp;#35@</a></p>
+<p><a href="http://www.a&amp;#35%5B">www.a&amp;#35[</a></p>
+<p><a href="http://www.a&amp;#35%5C">www.a&amp;#35\</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>]</p>
+<p><a href="http://www.a&amp;#35%5E">www.a&amp;#35^</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>_</p>
+<p><a href="http://www.a&amp;#35%60">www.a&amp;#35`</a></p>
+<p><a href="http://www.a&amp;#35%7B">www.a&amp;#35{</a></p>
+<p><a href="http://www.a&amp;#35%7C">www.a&amp;#35|</a></p>
+<p><a href="http://www.a&amp;#35%7D">www.a&amp;#35}</a></p>
+<p><a href="http://www.a&amp;#35">www.a&amp;#35</a>~</p>
+"###,
+ "should match “character references (numeric)” like GitHub does (except for the bracket bug)"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"a@0.0
+
+a@0.b
+
+a@a.29
+
+a@a.b
+
+a@0.0.c
+
+react@0.11.1
+
+react@0.12.0-rc1
+
+react@0.14.0-alpha1
+
+react@16.7.0-alpha.2
+
+react@0.0.0-experimental-aae83a4b9
+
+[ react@0.11.1
+
+[ react@0.12.0-rc1
+
+[ react@0.14.0-alpha1
+
+[ react@16.7.0-alpha.2
+
+[ react@0.0.0-experimental-aae83a4b9
+"###,
+ &gfm
+ ),
+ r###"<p>a@0.0</p>
+<p><a href="mailto:a@0.b">a@0.b</a></p>
+<p>a@a.29</p>
+<p><a href="mailto:a@a.b">a@a.b</a></p>
+<p><a href="mailto:a@0.0.c">a@0.0.c</a></p>
+<p>react@0.11.1</p>
+<p>react@0.12.0-rc1</p>
+<p>react@0.14.0-alpha1</p>
+<p>react@16.7.0-alpha.2</p>
+<p>react@0.0.0-experimental-aae83a4b9</p>
+<p>[ react@0.11.1</p>
+<p>[ react@0.12.0-rc1</p>
+<p>[ react@0.14.0-alpha1</p>
+<p>[ react@16.7.0-alpha.2</p>
+<p>[ react@0.0.0-experimental-aae83a4b9</p>
+"###,
+ "should match email TLD digits like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# httpshhh? (2)
+
+http://a (space)
+
+http://a!
+
+http://a"
+
+http://a#
+
+http://a$
+
+http://a%
+
+http://a&
+
+http://a'
+
+http://a(
+
+http://a)
+
+http://a*
+
+http://a+
+
+http://a,
+
+http://a-
+
+http://a
+
+http://a.
+
+http://a/
+
+http://a:
+
+http://a;
+
+http://a<
+
+http://a=
+
+http://a>
+
+http://a?
+
+http://a@
+
+http://a[
+
+http://a\
+
+http://a]
+
+http://a^
+
+http://a_
+
+http://a`
+
+http://a{
+
+http://a|
+
+http://a}
+
+http://a~
+"###,
+ &gfm
+ ),
+ r###"<h1>httpshhh? (2)</h1>
+<p><a href="http://a">http://a</a> (space)</p>
+<p><a href="http://a">http://a</a>!</p>
+<p><a href="http://a">http://a</a>&quot;</p>
+<p><a href="http://a#">http://a#</a></p>
+<p><a href="http://a$">http://a$</a></p>
+<p><a href="http://a%25">http://a%</a></p>
+<p><a href="http://a&amp;">http://a&amp;</a></p>
+<p><a href="http://a">http://a</a>'</p>
+<p><a href="http://a(">http://a(</a></p>
+<p><a href="http://a">http://a</a>)</p>
+<p><a href="http://a">http://a</a>*</p>
+<p><a href="http://a+">http://a+</a></p>
+<p><a href="http://a">http://a</a>,</p>
+<p><a href="http://a-">http://a-</a></p>
+<p><a href="http://a">http://a</a></p>
+<p><a href="http://a">http://a</a>.</p>
+<p><a href="http://a/">http://a/</a></p>
+<p><a href="http://a">http://a</a>:</p>
+<p><a href="http://a">http://a</a>;</p>
+<p><a href="http://a">http://a</a>&lt;</p>
+<p><a href="http://a=">http://a=</a></p>
+<p><a href="http://a%3E">http://a&gt;</a></p>
+<p><a href="http://a">http://a</a>?</p>
+<p><a href="http://a@">http://a@</a></p>
+<p><a href="http://a%5B">http://a[</a></p>
+<p><a href="http://a%5C">http://a\</a></p>
+<p><a href="http://a">http://a</a>]</p>
+<p><a href="http://a%5E">http://a^</a></p>
+<p><a href="http://a">http://a</a>_</p>
+<p><a href="http://a%60">http://a`</a></p>
+<p><a href="http://a%7B">http://a{</a></p>
+<p><a href="http://a%7C">http://a|</a></p>
+<p><a href="http://a%7D">http://a}</a></p>
+<p><a href="http://a">http://a</a>~</p>
+"###,
+ "should match protocol domain continue like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# httpshhh? (1)
+
+http:// (space)
+
+http://!
+
+http://"
+
+http://#
+
+http://$
+
+http://%
+
+http://&
+
+http://'
+
+http://(
+
+http://)
+
+http://*
+
+http://+
+
+http://,
+
+http://-
+
+http://
+
+http://.
+
+http:///
+
+http://:
+
+http://;
+
+http://<
+
+http://=
+
+http://>
+
+http://?
+
+http://@
+
+http://[
+
+http://\
+
+http://]
+
+http://^
+
+http://_
+
+http://`
+
+http://{
+
+http://|
+
+http://}
+
+http://~
+"###,
+ &gfm
+ ),
+ r###"<h1>httpshhh? (1)</h1>
+<p>http:// (space)</p>
+<p>http://!</p>
+<p>http://&quot;</p>
+<p>http://#</p>
+<p>http://$</p>
+<p>http://%</p>
+<p>http://&amp;</p>
+<p>http://'</p>
+<p>http://(</p>
+<p>http://)</p>
+<p>http://*</p>
+<p>http://+</p>
+<p>http://,</p>
+<p>http://-</p>
+<p>http://</p>
+<p>http://.</p>
+<p>http:///</p>
+<p>http://:</p>
+<p>http://;</p>
+<p>http://&lt;</p>
+<p>http://=</p>
+<p>http://&gt;</p>
+<p>http://?</p>
+<p>http://@</p>
+<p>http://[</p>
+<p>http://\</p>
+<p>http://]</p>
+<p>http://^</p>
+<p>http://_</p>
+<p>http://`</p>
+<p>http://{</p>
+<p>http://|</p>
+<p>http://}</p>
+<p>http://~</p>
+"###,
+ "should match protocol domain start like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# httpshhh? (4)
+
+http://a/b (space)
+
+http://a/b!
+
+http://a/b"
+
+http://a/b#
+
+http://a/b$
+
+http://a/b%
+
+http://a/b&
+
+http://a/b'
+
+http://a/b(
+
+http://a/b)
+
+http://a/b*
+
+http://a/b+
+
+http://a/b,
+
+http://a/b-
+
+http://a/b
+
+http://a/b.
+
+http://a/b/
+
+http://a/b:
+
+http://a/b;
+
+http://a/b<
+
+http://a/b=
+
+http://a/b>
+
+http://a/b?
+
+http://a/b@
+
+http://a/b[
+
+http://a/b\
+
+http://a/b]
+
+http://a/b^
+
+http://a/b_
+
+http://a/b`
+
+http://a/b{
+
+http://a/b|
+
+http://a/b}
+
+http://a/b~
+"###,
+ &gfm
+ ),
+ r###"<h1>httpshhh? (4)</h1>
+<p><a href="http://a/b">http://a/b</a> (space)</p>
+<p><a href="http://a/b">http://a/b</a>!</p>
+<p><a href="http://a/b">http://a/b</a>&quot;</p>
+<p><a href="http://a/b#">http://a/b#</a></p>
+<p><a href="http://a/b$">http://a/b$</a></p>
+<p><a href="http://a/b%25">http://a/b%</a></p>
+<p><a href="http://a/b&amp;">http://a/b&amp;</a></p>
+<p><a href="http://a/b">http://a/b</a>'</p>
+<p><a href="http://a/b(">http://a/b(</a></p>
+<p><a href="http://a/b">http://a/b</a>)</p>
+<p><a href="http://a/b">http://a/b</a>*</p>
+<p><a href="http://a/b+">http://a/b+</a></p>
+<p><a href="http://a/b">http://a/b</a>,</p>
+<p><a href="http://a/b-">http://a/b-</a></p>
+<p><a href="http://a/b">http://a/b</a></p>
+<p><a href="http://a/b">http://a/b</a>.</p>
+<p><a href="http://a/b/">http://a/b/</a></p>
+<p><a href="http://a/b">http://a/b</a>:</p>
+<p><a href="http://a/b">http://a/b</a>;</p>
+<p><a href="http://a/b">http://a/b</a>&lt;</p>
+<p><a href="http://a/b=">http://a/b=</a></p>
+<p><a href="http://a/b%3E">http://a/b&gt;</a></p>
+<p><a href="http://a/b">http://a/b</a>?</p>
+<p><a href="http://a/b@">http://a/b@</a></p>
+<p><a href="http://a/b%5B">http://a/b[</a></p>
+<p><a href="http://a/b%5C">http://a/b\</a></p>
+<p><a href="http://a/b">http://a/b</a>]</p>
+<p><a href="http://a/b%5E">http://a/b^</a></p>
+<p><a href="http://a/b">http://a/b</a>_</p>
+<p><a href="http://a/b%60">http://a/b`</a></p>
+<p><a href="http://a/b%7B">http://a/b{</a></p>
+<p><a href="http://a/b%7C">http://a/b|</a></p>
+<p><a href="http://a/b%7D">http://a/b}</a></p>
+<p><a href="http://a/b">http://a/b</a>~</p>
+"###,
+ "should match protocol path continue like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# httpshhh? (3)
+
+http://a/ (space)
+
+http://a/!
+
+http://a/"
+
+http://a/#
+
+http://a/$
+
+http://a/%
+
+http://a/&
+
+http://a/'
+
+http://a/(
+
+http://a/)
+
+http://a/*
+
+http://a/+
+
+http://a/,
+
+http://a/-
+
+http://a/
+
+http://a/.
+
+http://a//
+
+http://a/:
+
+http://a/;
+
+http://a/<
+
+http://a/=
+
+http://a/>
+
+http://a/?
+
+http://a/@
+
+http://a/[
+
+http://a/\
+
+http://a/]
+
+http://a/^
+
+http://a/_
+
+http://a/`
+
+http://a/{
+
+http://a/|
+
+http://a/}
+
+http://a/~
+"###,
+ &gfm
+ ),
+ r###"<h1>httpshhh? (3)</h1>
+<p><a href="http://a/">http://a/</a> (space)</p>
+<p><a href="http://a/">http://a/</a>!</p>
+<p><a href="http://a/">http://a/</a>&quot;</p>
+<p><a href="http://a/#">http://a/#</a></p>
+<p><a href="http://a/$">http://a/$</a></p>
+<p><a href="http://a/%25">http://a/%</a></p>
+<p><a href="http://a/&amp;">http://a/&amp;</a></p>
+<p><a href="http://a/">http://a/</a>'</p>
+<p><a href="http://a/(">http://a/(</a></p>
+<p><a href="http://a/">http://a/</a>)</p>
+<p><a href="http://a/">http://a/</a>*</p>
+<p><a href="http://a/+">http://a/+</a></p>
+<p><a href="http://a/">http://a/</a>,</p>
+<p><a href="http://a/-">http://a/-</a></p>
+<p><a href="http://a/">http://a/</a></p>
+<p><a href="http://a/">http://a/</a>.</p>
+<p><a href="http://a//">http://a//</a></p>
+<p><a href="http://a/">http://a/</a>:</p>
+<p><a href="http://a/">http://a/</a>;</p>
+<p><a href="http://a/">http://a/</a>&lt;</p>
+<p><a href="http://a/=">http://a/=</a></p>
+<p><a href="http://a/%3E">http://a/&gt;</a></p>
+<p><a href="http://a/">http://a/</a>?</p>
+<p><a href="http://a/@">http://a/@</a></p>
+<p><a href="http://a/%5B">http://a/[</a></p>
+<p><a href="http://a/%5C">http://a/\</a></p>
+<p><a href="http://a/">http://a/</a>]</p>
+<p><a href="http://a/%5E">http://a/^</a></p>
+<p><a href="http://a/">http://a/</a>_</p>
+<p><a href="http://a/%60">http://a/`</a></p>
+<p><a href="http://a/%7B">http://a/{</a></p>
+<p><a href="http://a/%7C">http://a/|</a></p>
+<p><a href="http://a/%7D">http://a/}</a></p>
+<p><a href="http://a/">http://a/</a>~</p>
+"###,
+ "should match protocol path start like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"[www.example.com/a&copy;](#)
+
+www.example.com/a&copy;
+
+[www.example.com/a&bogus;](#)
+
+www.example.com/a&bogus;
+
+[www.example.com/a\.](#)
+
+www.example.com/a\.
+"###,
+ &gfm
+ ),
+ r###"<p><a href="#">www.example.com/a©</a></p>
+<p><a href="http://www.example.com/a">www.example.com/a</a>©</p>
+<p><a href="#">www.example.com/a&amp;bogus;</a></p>
+<p><a href="http://www.example.com/a">www.example.com/a</a>&amp;bogus;</p>
+<p><a href="#">www.example.com/a\.</a></p>
+<p><a href="http://www.example.com/a%5C">www.example.com/a\</a>.</p>
+"###,
+ "should match links, autolink literals, and characters like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# “character reference”
+
+www.a/b&c (space)
+
+www.a/b&c!
+
+www.a/b&c"
+
+www.a/b&c#
+
+www.a/b&c$
+
+www.a/b&c%
+
+www.a/b&c&
+
+www.a/b&c'
+
+www.a/b&c(
+
+www.a/b&c)
+
+www.a/b&c*
+
+www.a/b&c+
+
+www.a/b&c,
+
+www.a/b&c-
+
+www.a/b&c
+
+www.a/b&c.
+
+www.a/b&c/
+
+www.a/b&c:
+
+www.a/b&c;
+
+www.a/b&c<
+
+www.a/b&c=
+
+www.a/b&c>
+
+www.a/b&c?
+
+www.a/b&c@
+
+www.a/b&c[
+
+www.a/b&c\
+
+www.a/b&c]
+
+www.a/b&c^
+
+www.a/b&c_
+
+www.a/b&c`
+
+www.a/b&c{
+
+www.a/b&c|
+
+www.a/b&c}
+
+www.a/b&c~
+"###,
+ &gfm
+ ),
+ r###"<h1>“character reference”</h1>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a> (space)</p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>!</p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>&quot;</p>
+<p><a href="http://www.a/b&amp;c#">www.a/b&amp;c#</a></p>
+<p><a href="http://www.a/b&amp;c$">www.a/b&amp;c$</a></p>
+<p><a href="http://www.a/b&amp;c%25">www.a/b&amp;c%</a></p>
+<p><a href="http://www.a/b&amp;c&amp;">www.a/b&amp;c&amp;</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>'</p>
+<p><a href="http://www.a/b&amp;c(">www.a/b&amp;c(</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>)</p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>*</p>
+<p><a href="http://www.a/b&amp;c+">www.a/b&amp;c+</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>,</p>
+<p><a href="http://www.a/b&amp;c-">www.a/b&amp;c-</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>.</p>
+<p><a href="http://www.a/b&amp;c/">www.a/b&amp;c/</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>:</p>
+<p><a href="http://www.a/b">www.a/b</a>&amp;c;</p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>&lt;</p>
+<p><a href="http://www.a/b&amp;c=">www.a/b&amp;c=</a></p>
+<p><a href="http://www.a/b&amp;c%3E">www.a/b&amp;c&gt;</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>?</p>
+<p><a href="http://www.a/b&amp;c@">www.a/b&amp;c@</a></p>
+<p><a href="http://www.a/b&amp;c%5B">www.a/b&amp;c[</a></p>
+<p><a href="http://www.a/b&amp;c%5C">www.a/b&amp;c\</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>]</p>
+<p><a href="http://www.a/b&amp;c%5E">www.a/b&amp;c^</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>_</p>
+<p><a href="http://www.a/b&amp;c%60">www.a/b&amp;c`</a></p>
+<p><a href="http://www.a/b&amp;c%7B">www.a/b&amp;c{</a></p>
+<p><a href="http://www.a/b&amp;c%7C">www.a/b&amp;c|</a></p>
+<p><a href="http://www.a/b&amp;c%7D">www.a/b&amp;c}</a></p>
+<p><a href="http://www.a/b&amp;c">www.a/b&amp;c</a>~</p>
+"###,
+ "should match character reference-like (named) things in paths like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# “character reference”
+
+www.a/b&#35 (space)
+
+www.a/b&#35!
+
+www.a/b&#35"
+
+www.a/b&#35#
+
+www.a/b&#35$
+
+www.a/b&#35%
+
+www.a/b&#35&
+
+www.a/b&#35'
+
+www.a/b&#35(
+
+www.a/b&#35)
+
+www.a/b&#35*
+
+www.a/b&#35+
+
+www.a/b&#35,
+
+www.a/b&#35-
+
+www.a/b&#35
+
+www.a/b&#35.
+
+www.a/b&#35/
+
+www.a/b&#35:
+
+www.a/b&#35;
+
+www.a/b&#35<
+
+www.a/b&#35=
+
+www.a/b&#35>
+
+www.a/b&#35?
+
+www.a/b&#35@
+
+www.a/b&#35[
+
+www.a/b&#35\
+
+www.a/b&#35]
+
+www.a/b&#35^
+
+www.a/b&#35_
+
+www.a/b&#35`
+
+www.a/b&#35{
+
+www.a/b&#35|
+
+www.a/b&#35}
+
+www.a/b&#35~
+"###,
+ &gfm
+ ),
+ r###"<h1>“character reference”</h1>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a> (space)</p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>!</p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>&quot;</p>
+<p><a href="http://www.a/b&amp;#35#">www.a/b&amp;#35#</a></p>
+<p><a href="http://www.a/b&amp;#35$">www.a/b&amp;#35$</a></p>
+<p><a href="http://www.a/b&amp;#35%25">www.a/b&amp;#35%</a></p>
+<p><a href="http://www.a/b&amp;#35&amp;">www.a/b&amp;#35&amp;</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>'</p>
+<p><a href="http://www.a/b&amp;#35(">www.a/b&amp;#35(</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>)</p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>*</p>
+<p><a href="http://www.a/b&amp;#35+">www.a/b&amp;#35+</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>,</p>
+<p><a href="http://www.a/b&amp;#35-">www.a/b&amp;#35-</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>.</p>
+<p><a href="http://www.a/b&amp;#35/">www.a/b&amp;#35/</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>:</p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>;</p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>&lt;</p>
+<p><a href="http://www.a/b&amp;#35=">www.a/b&amp;#35=</a></p>
+<p><a href="http://www.a/b&amp;#35%3E">www.a/b&amp;#35&gt;</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>?</p>
+<p><a href="http://www.a/b&amp;#35@">www.a/b&amp;#35@</a></p>
+<p><a href="http://www.a/b&amp;#35%5B">www.a/b&amp;#35[</a></p>
+<p><a href="http://www.a/b&amp;#35%5C">www.a/b&amp;#35\</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>]</p>
+<p><a href="http://www.a/b&amp;#35%5E">www.a/b&amp;#35^</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>_</p>
+<p><a href="http://www.a/b&amp;#35%60">www.a/b&amp;#35`</a></p>
+<p><a href="http://www.a/b&amp;#35%7B">www.a/b&amp;#35{</a></p>
+<p><a href="http://www.a/b&amp;#35%7C">www.a/b&amp;#35|</a></p>
+<p><a href="http://www.a/b&amp;#35%7D">www.a/b&amp;#35}</a></p>
+<p><a href="http://www.a/b&amp;#35">www.a/b&amp;#35</a>~</p>
+"###,
+ "should match character reference-like (numeric) things in paths like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"In autolink literal path or link end?
+
+[https://a.com/d]()
+
+[http://a.com/d]()
+
+[www.a.com/d]()
+
+https://a.com/d]()
+
+http://a.com/d]()
+
+www.a.com/d]()
+
+In autolink literal search or link end?
+
+[https://a.com?d]()
+
+[http://a.com?d]()
+
+[www.a.com?d]()
+
+https://a.com?d]()
+
+http://a.com?d]()
+
+www.a.com?d]()
+
+In autolink literal hash or link end?
+
+[https://a.com#d]()
+
+[http://a.com#d]()
+
+[www.a.com#d]()
+
+https://a.com#d]()
+
+http://a.com#d]()
+
+www.a.com#d]()
+"###,
+ &gfm
+ ),
+ r###"<p>In autolink literal path or link end?</p>
+<p><a href="">https://a.com/d</a></p>
+<p><a href="">http://a.com/d</a></p>
+<p><a href="">www.a.com/d</a></p>
+<p><a href="https://a.com/d">https://a.com/d</a>]()</p>
+<p><a href="http://a.com/d">http://a.com/d</a>]()</p>
+<p><a href="http://www.a.com/d">www.a.com/d</a>]()</p>
+<p>In autolink literal search or link end?</p>
+<p><a href="">https://a.com?d</a></p>
+<p><a href="">http://a.com?d</a></p>
+<p><a href="">www.a.com?d</a></p>
+<p><a href="https://a.com?d">https://a.com?d</a>]()</p>
+<p><a href="http://a.com?d">http://a.com?d</a>]()</p>
+<p><a href="http://www.a.com?d">www.a.com?d</a>]()</p>
+<p>In autolink literal hash or link end?</p>
+<p><a href="">https://a.com#d</a></p>
+<p><a href="">http://a.com#d</a></p>
+<p><a href="">www.a.com#d</a></p>
+<p><a href="https://a.com#d">https://a.com#d</a>]()</p>
+<p><a href="http://a.com#d">http://a.com#d</a>]()</p>
+<p><a href="http://www.a.com#d">www.a.com#d</a>]()</p>
+"###,
+ "should match path or link end like GitHub does (except for the bracket bug)"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"Last non-markdown ASCII whitespace (FF): noreply@example.com, http://example.com, https://example.com, www.example.com
+
+Last non-whitespace ASCII control (US): noreply@example.com, http://example.com, https://example.com, www.example.com
+
+First punctuation after controls: !noreply@example.com, !http://example.com, !https://example.com, !www.example.com
+
+Last punctuation before digits: /noreply@example.com, /http://example.com, /https://example.com, /www.example.com
+
+First digit: 0noreply@example.com, 0http://example.com, 0https://example.com, 0www.example.com
+
+First punctuation after digits: :noreply@example.com, :http://example.com, :https://example.com, :www.example.com
+
+Last punctuation before caps: @noreply@example.com, @http://example.com, @https://example.com, @www.example.com
+
+First uppercase: Anoreply@example.com, Ahttp://example.com, Ahttps://example.com, Awww.example.com
+
+Punctuation after uppercase: \noreply@example.com, \http://example.com, \https://example.com, \www.example.com
+
+Last punctuation before lowercase (1): `noreply@example.com;
+
+(2) `http://example.com;
+
+(3) `https://example.com;
+
+(4) `www.example.com; (broken up to prevent code from forming)
+
+First lowercase: anoreply@example.com, ahttp://example.com, ahttps://example.com, awww.example.com
+
+First punctuation after lowercase: {noreply@example.com, {http://example.com, {https://example.com, {www.example.com
+
+Last punctuation: ~noreply@example.com, ~http://example.com, ~https://example.com, ~www.example.com
+
+First non-ASCII unicode whitespace (0x80): Â…noreply@example.com, Â…http://example.com, Â…https://example.com, Â…www.example.com
+
+Last non-ASCII unicode whitespace (0x3000):  noreply@example.com,  http://example.com,  https://example.com,  www.example.com
+
+First non-ASCII punctuation: ¡noreply@example.com, ¡http://example.com, ¡https://example.com, ¡www.example.com
+
+Last non-ASCII punctuation: ・noreply@example.com, ・http://example.com, ・https://example.com, ・www.example.com
+
+Some non-ascii: 中noreply@example.com, 中http://example.com, 中https://example.com, 中www.example.com
+
+Some more non-ascii: 🤷‍noreply@example.com, 🤷‍http://example.com, 🤷‍https://example.com, 🤷‍www.example.com
+"###,
+ &gfm
+ ),
+ r###"<p>Last non-markdown ASCII whitespace (FF): <a href="mailto:noreply@example.com">noreply@example.com</a>, <a href="http://example.com">http://example.com</a>, <a href="https://example.com">https://example.com</a>, www.example.com</p>
+<p>Last non-whitespace ASCII control (US): <a href="mailto:noreply@example.com">noreply@example.com</a>, <a href="http://example.com">http://example.com</a>, <a href="https://example.com">https://example.com</a>, www.example.com</p>
+<p>First punctuation after controls: !<a href="mailto:noreply@example.com">noreply@example.com</a>, !<a href="http://example.com">http://example.com</a>, !<a href="https://example.com">https://example.com</a>, !www.example.com</p>
+<p>Last punctuation before digits: /noreply@example.com, /<a href="http://example.com">http://example.com</a>, /<a href="https://example.com">https://example.com</a>, /www.example.com</p>
+<p>First digit: <a href="mailto:0noreply@example.com">0noreply@example.com</a>, 0<a href="http://example.com">http://example.com</a>, 0<a href="https://example.com">https://example.com</a>, 0www.example.com</p>
+<p>First punctuation after digits: :<a href="mailto:noreply@example.com">noreply@example.com</a>, :<a href="http://example.com">http://example.com</a>, :<a href="https://example.com">https://example.com</a>, :www.example.com</p>
+<p>Last punctuation before caps: @<a href="mailto:noreply@example.com">noreply@example.com</a>, @<a href="http://example.com">http://example.com</a>, @<a href="https://example.com">https://example.com</a>, @www.example.com</p>
+<p>First uppercase: <a href="mailto:Anoreply@example.com">Anoreply@example.com</a>, Ahttp://example.com, Ahttps://example.com, Awww.example.com</p>
+<p>Punctuation after uppercase: \<a href="mailto:noreply@example.com">noreply@example.com</a>, \<a href="http://example.com">http://example.com</a>, \<a href="https://example.com">https://example.com</a>, \www.example.com</p>
+<p>Last punctuation before lowercase (1): `<a href="mailto:noreply@example.com">noreply@example.com</a>;</p>
+<p>(2) `<a href="http://example.com">http://example.com</a>;</p>
+<p>(3) `<a href="https://example.com">https://example.com</a>;</p>
+<p>(4) `www.example.com; (broken up to prevent code from forming)</p>
+<p>First lowercase: <a href="mailto:anoreply@example.com">anoreply@example.com</a>, ahttp://example.com, ahttps://example.com, awww.example.com</p>
+<p>First punctuation after lowercase: {<a href="mailto:noreply@example.com">noreply@example.com</a>, {<a href="http://example.com">http://example.com</a>, {<a href="https://example.com">https://example.com</a>, {www.example.com</p>
+<p>Last punctuation: ~<a href="mailto:noreply@example.com">noreply@example.com</a>, ~<a href="http://example.com">http://example.com</a>, ~<a href="https://example.com">https://example.com</a>, ~<a href="http://www.example.com">www.example.com</a></p>
+<p>First non-ASCII unicode whitespace (0x80): Â…<a href="mailto:noreply@example.com">noreply@example.com</a>, Â…<a href="http://example.com">http://example.com</a>, Â…<a href="https://example.com">https://example.com</a>, Â…www.example.com</p>
+<p>Last non-ASCII unicode whitespace (0x3000):  <a href="mailto:noreply@example.com">noreply@example.com</a>,  <a href="http://example.com">http://example.com</a>,  <a href="https://example.com">https://example.com</a>,  www.example.com</p>
+<p>First non-ASCII punctuation: ¡<a href="mailto:noreply@example.com">noreply@example.com</a>, ¡<a href="http://example.com">http://example.com</a>, ¡<a href="https://example.com">https://example.com</a>, ¡www.example.com</p>
+<p>Last non-ASCII punctuation: ・<a href="mailto:noreply@example.com">noreply@example.com</a>, ・<a href="http://example.com">http://example.com</a>, ・<a href="https://example.com">https://example.com</a>, ・www.example.com</p>
+<p>Some non-ascii: 中<a href="mailto:noreply@example.com">noreply@example.com</a>, 中<a href="http://example.com">http://example.com</a>, 中<a href="https://example.com">https://example.com</a>, 中www.example.com</p>
+<p>Some more non-ascii: 🤷‍<a href="mailto:noreply@example.com">noreply@example.com</a>, 🤷‍<a href="http://example.com">http://example.com</a>, 🤷‍<a href="https://example.com">https://example.com</a>, 🤷‍www.example.com</p>
+"###,
+ "should match previous (complex) like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# HTTP
+
+https://a.b can start after EOF
+
+Can start after EOL:
+https://a.b
+
+Can start after tab: https://a.b.
+
+Can start after space: https://a.b.
+
+Can start after left paren (https://a.b.
+
+Can start after asterisk *https://a.b.
+
+Can start after underscore *_https://a.b.
+
+Can start after tilde ~https://a.b.
+
+# www
+
+www.a.b can start after EOF
+
+Can start after EOL:
+www.a.b
+
+Can start after tab: www.a.b.
+
+Can start after space: www.a.b.
+
+Can start after left paren (www.a.b.
+
+Can start after asterisk *www.a.b.
+
+Can start after underscore *_www.a.b.
+
+Can start after tilde ~www.a.b.
+
+# Email
+
+## Correct character before
+
+a@b.c can start after EOF
+
+Can start after EOL:
+a@b.c
+
+Can start after tab: a@b.c.
+
+Can start after space: a@b.c.
+
+Can start after left paren(a@b.c.
+
+Can start after asterisk*a@b.c.
+
+While theoretically it’s possible to start at an underscore, that underscore
+is part of the email, so it’s in fact part of the link: _a@b.c.
+
+Can start after tilde~a@b.c.
+
+## Others characters before
+
+While other characters before the email aren’t allowed by GFM, they work on
+github.com: !a@b.c, "a@b.c, #a@b.c, $a@b.c, &a@b.c, 'a@b.c, )a@b.c, +a@b.c,
+,a@b.c, -a@b.c, .a@b.c, /a@b.c, :a@b.c, ;a@b.c, <a@b.c, =a@b.c, >a@b.c, ?a@b.c,
+@a@b.c, \a@b.c, ]a@b.c, ^a@b.c, `a@b.c, {a@b.c, }a@b.c.
+
+## Commas
+
+See `https://github.com/remarkjs/remark/discussions/678`.
+
+,https://github.com
+
+[ ,https://github.com
+
+[asd] ,https://github.com
+"###,
+ &gfm
+ ),
+ r###"<h1>HTTP</h1>
+<p><a href="https://a.b">https://a.b</a> can start after EOF</p>
+<p>Can start after EOL:
+<a href="https://a.b">https://a.b</a></p>
+<p>Can start after tab: <a href="https://a.b">https://a.b</a>.</p>
+<p>Can start after space: <a href="https://a.b">https://a.b</a>.</p>
+<p>Can start after left paren (<a href="https://a.b">https://a.b</a>.</p>
+<p>Can start after asterisk *<a href="https://a.b">https://a.b</a>.</p>
+<p>Can start after underscore *_<a href="https://a.b">https://a.b</a>.</p>
+<p>Can start after tilde ~<a href="https://a.b">https://a.b</a>.</p>
+<h1>www</h1>
+<p><a href="http://www.a.b">www.a.b</a> can start after EOF</p>
+<p>Can start after EOL:
+<a href="http://www.a.b">www.a.b</a></p>
+<p>Can start after tab: <a href="http://www.a.b">www.a.b</a>.</p>
+<p>Can start after space: <a href="http://www.a.b">www.a.b</a>.</p>
+<p>Can start after left paren (<a href="http://www.a.b">www.a.b</a>.</p>
+<p>Can start after asterisk *<a href="http://www.a.b">www.a.b</a>.</p>
+<p>Can start after underscore *_<a href="http://www.a.b">www.a.b</a>.</p>
+<p>Can start after tilde ~<a href="http://www.a.b">www.a.b</a>.</p>
+<h1>Email</h1>
+<h2>Correct character before</h2>
+<p><a href="mailto:a@b.c">a@b.c</a> can start after EOF</p>
+<p>Can start after EOL:
+<a href="mailto:a@b.c">a@b.c</a></p>
+<p>Can start after tab: <a href="mailto:a@b.c">a@b.c</a>.</p>
+<p>Can start after space: <a href="mailto:a@b.c">a@b.c</a>.</p>
+<p>Can start after left paren(<a href="mailto:a@b.c">a@b.c</a>.</p>
+<p>Can start after asterisk*<a href="mailto:a@b.c">a@b.c</a>.</p>
+<p>While theoretically it’s possible to start at an underscore, that underscore
+is part of the email, so it’s in fact part of the link: <a href="mailto:_a@b.c">_a@b.c</a>.</p>
+<p>Can start after tilde~<a href="mailto:a@b.c">a@b.c</a>.</p>
+<h2>Others characters before</h2>
+<p>While other characters before the email aren’t allowed by GFM, they work on
+github.com: !<a href="mailto:a@b.c">a@b.c</a>, &quot;<a href="mailto:a@b.c">a@b.c</a>, #<a href="mailto:a@b.c">a@b.c</a>, $<a href="mailto:a@b.c">a@b.c</a>, &amp;<a href="mailto:a@b.c">a@b.c</a>, '<a href="mailto:a@b.c">a@b.c</a>, )<a href="mailto:a@b.c">a@b.c</a>, <a href="mailto:+a@b.c">+a@b.c</a>,
+,<a href="mailto:a@b.c">a@b.c</a>, <a href="mailto:-a@b.c">-a@b.c</a>, <a href="mailto:.a@b.c">.a@b.c</a>, /a@b.c, :<a href="mailto:a@b.c">a@b.c</a>, ;<a href="mailto:a@b.c">a@b.c</a>, &lt;<a href="mailto:a@b.c">a@b.c</a>, =<a href="mailto:a@b.c">a@b.c</a>, &gt;<a href="mailto:a@b.c">a@b.c</a>, ?<a href="mailto:a@b.c">a@b.c</a>,
+@<a href="mailto:a@b.c">a@b.c</a>, \<a href="mailto:a@b.c">a@b.c</a>, ]<a href="mailto:a@b.c">a@b.c</a>, ^<a href="mailto:a@b.c">a@b.c</a>, `<a href="mailto:a@b.c">a@b.c</a>, {<a href="mailto:a@b.c">a@b.c</a>, }<a href="mailto:a@b.c">a@b.c</a>.</p>
+<h2>Commas</h2>
+<p>See <code>https://github.com/remarkjs/remark/discussions/678</code>.</p>
+<p>,<a href="https://github.com">https://github.com</a></p>
+<p>[ ,<a href="https://github.com">https://github.com</a></p>
+<p>[asd] ,<a href="https://github.com">https://github.com</a></p>
+"###,
+ "should match previous like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# wwwtf 2?
+
+www.a (space)
+
+www.a!
+
+www.a"
+
+www.a#
+
+www.a$
+
+www.a%
+
+www.a&
+
+www.a'
+
+www.a(
+
+www.a)
+
+www.a*
+
+www.a+
+
+www.a,
+
+www.a-
+
+www.a
+
+www.a.
+
+www.a/
+
+www.a:
+
+www.a;
+
+www.a<
+
+www.a=
+
+www.a>
+
+www.a?
+
+www.a@
+
+www.a[
+
+www.a\
+
+www.a]
+
+www.a^
+
+www.a_
+
+www.a`
+
+www.a{
+
+www.a|
+
+www.a}
+
+www.a~
+"###,
+ &gfm
+ ),
+ r###"<h1>wwwtf 2?</h1>
+<p><a href="http://www.a">www.a</a> (space)</p>
+<p><a href="http://www.a">www.a</a>!</p>
+<p><a href="http://www.a">www.a</a>&quot;</p>
+<p><a href="http://www.a#">www.a#</a></p>
+<p><a href="http://www.a$">www.a$</a></p>
+<p><a href="http://www.a%25">www.a%</a></p>
+<p><a href="http://www.a&amp;">www.a&amp;</a></p>
+<p><a href="http://www.a">www.a</a>'</p>
+<p><a href="http://www.a(">www.a(</a></p>
+<p><a href="http://www.a">www.a</a>)</p>
+<p><a href="http://www.a">www.a</a>*</p>
+<p><a href="http://www.a+">www.a+</a></p>
+<p><a href="http://www.a">www.a</a>,</p>
+<p><a href="http://www.a-">www.a-</a></p>
+<p><a href="http://www.a">www.a</a></p>
+<p><a href="http://www.a">www.a</a>.</p>
+<p><a href="http://www.a/">www.a/</a></p>
+<p><a href="http://www.a">www.a</a>:</p>
+<p><a href="http://www.a">www.a</a>;</p>
+<p><a href="http://www.a">www.a</a>&lt;</p>
+<p><a href="http://www.a=">www.a=</a></p>
+<p><a href="http://www.a%3E">www.a&gt;</a></p>
+<p><a href="http://www.a">www.a</a>?</p>
+<p><a href="http://www.a@">www.a@</a></p>
+<p><a href="http://www.a%5B">www.a[</a></p>
+<p><a href="http://www.a%5C">www.a\</a></p>
+<p><a href="http://www.a">www.a</a>]</p>
+<p><a href="http://www.a%5E">www.a^</a></p>
+<p><a href="http://www.a">www.a</a>_</p>
+<p><a href="http://www.a%60">www.a`</a></p>
+<p><a href="http://www.a%7B">www.a{</a></p>
+<p><a href="http://www.a%7C">www.a|</a></p>
+<p><a href="http://www.a%7D">www.a}</a></p>
+<p><a href="http://www.a">www.a</a>~</p>
+"###,
+ "should match www (domain continue) like GitHub does (except for the bracket bug)"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# wwwtf 5?
+
+www.a. (space)
+
+www.a.!
+
+www.a."
+
+www.a.#
+
+www.a.$
+
+www.a.%
+
+www.a.&
+
+www.a.'
+
+www.a.(
+
+www.a.)
+
+www.a.*
+
+www.a.+
+
+www.a.,
+
+www.a.-
+
+www.a.
+
+www.a..
+
+www.a./
+
+www.a.:
+
+www.a.;
+
+www.a.<
+
+www.a.=
+
+www.a.>
+
+www.a.?
+
+www.a.@
+
+www.a.[
+
+www.a.\
+
+www.a.]
+
+www.a.^
+
+www.a._
+
+www.a.`
+
+www.a.{
+
+www.a.|
+
+www.a.}
+
+www.a.~
+"###,
+ &gfm
+ ),
+ r###"<h1>wwwtf 5?</h1>
+<p><a href="http://www.a">www.a</a>. (space)</p>
+<p><a href="http://www.a">www.a</a>.!</p>
+<p><a href="http://www.a">www.a</a>.&quot;</p>
+<p><a href="http://www.a.#">www.a.#</a></p>
+<p><a href="http://www.a.$">www.a.$</a></p>
+<p><a href="http://www.a.%25">www.a.%</a></p>
+<p><a href="http://www.a.&amp;">www.a.&amp;</a></p>
+<p><a href="http://www.a">www.a</a>.'</p>
+<p><a href="http://www.a.(">www.a.(</a></p>
+<p><a href="http://www.a">www.a</a>.)</p>
+<p><a href="http://www.a">www.a</a>.*</p>
+<p><a href="http://www.a.+">www.a.+</a></p>
+<p><a href="http://www.a">www.a</a>.,</p>
+<p><a href="http://www.a.-">www.a.-</a></p>
+<p><a href="http://www.a">www.a</a>.</p>
+<p><a href="http://www.a">www.a</a>..</p>
+<p><a href="http://www.a./">www.a./</a></p>
+<p><a href="http://www.a">www.a</a>.:</p>
+<p><a href="http://www.a">www.a</a>.;</p>
+<p><a href="http://www.a">www.a</a>.&lt;</p>
+<p><a href="http://www.a.=">www.a.=</a></p>
+<p><a href="http://www.a.%3E">www.a.&gt;</a></p>
+<p><a href="http://www.a">www.a</a>.?</p>
+<p><a href="http://www.a.@">www.a.@</a></p>
+<p><a href="http://www.a.%5B">www.a.[</a></p>
+<p><a href="http://www.a.%5C">www.a.\</a></p>
+<p><a href="http://www.a">www.a</a>.]</p>
+<p><a href="http://www.a.%5E">www.a.^</a></p>
+<p><a href="http://www.a">www.a</a>._</p>
+<p><a href="http://www.a.%60">www.a.`</a></p>
+<p><a href="http://www.a.%7B">www.a.{</a></p>
+<p><a href="http://www.a.%7C">www.a.|</a></p>
+<p><a href="http://www.a.%7D">www.a.}</a></p>
+<p><a href="http://www.a">www.a</a>.~</p>
+"###,
+ "should match www (domain dot) like GitHub does (except for the bracket bug)"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# wwwtf?
+
+www. (space)
+
+www.!
+
+www."
+
+www.#
+
+www.$
+
+www.%
+
+www.&
+
+www.'
+
+www.(
+
+www.)
+
+www.*
+
+www.+
+
+www.,
+
+www.-
+
+www.
+
+www..
+
+www./
+
+www.:
+
+www.;
+
+www.<
+
+www.=
+
+www.>
+
+www.?
+
+www.@
+
+www.[
+
+www.\
+
+www.]
+
+www.^
+
+www._
+
+www.`
+
+www.{
+
+www.|
+
+www.}
+
+www.~
+"###,
+ &gfm
+ ),
+ r###"<h1>wwwtf?</h1>
+<p><a href="http://www">www</a>. (space)</p>
+<p><a href="http://www">www</a>.!</p>
+<p><a href="http://www">www</a>.&quot;</p>
+<p><a href="http://www.#">www.#</a></p>
+<p><a href="http://www.$">www.$</a></p>
+<p><a href="http://www.%25">www.%</a></p>
+<p><a href="http://www.&amp;">www.&amp;</a></p>
+<p><a href="http://www">www</a>.'</p>
+<p><a href="http://www.(">www.(</a></p>
+<p><a href="http://www">www</a>.)</p>
+<p><a href="http://www">www</a>.*</p>
+<p><a href="http://www.+">www.+</a></p>
+<p><a href="http://www">www</a>.,</p>
+<p><a href="http://www.-">www.-</a></p>
+<p>www.</p>
+<p><a href="http://www">www</a>..</p>
+<p><a href="http://www./">www./</a></p>
+<p><a href="http://www">www</a>.:</p>
+<p><a href="http://www">www</a>.;</p>
+<p><a href="http://www">www</a>.&lt;</p>
+<p><a href="http://www.=">www.=</a></p>
+<p><a href="http://www.%3E">www.&gt;</a></p>
+<p><a href="http://www">www</a>.?</p>
+<p><a href="http://www.@">www.@</a></p>
+<p><a href="http://www.%5B">www.[</a></p>
+<p><a href="http://www.%5C">www.\</a></p>
+<p><a href="http://www">www</a>.]</p>
+<p><a href="http://www.%5E">www.^</a></p>
+<p><a href="http://www">www</a>._</p>
+<p><a href="http://www.%60">www.`</a></p>
+<p><a href="http://www.%7B">www.{</a></p>
+<p><a href="http://www.%7C">www.|</a></p>
+<p><a href="http://www.%7D">www.}</a></p>
+<p><a href="http://www">www</a>.~</p>
+"###,
+ "should match www (domain start) like GitHub does"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# wwwtf? (4)
+
+www.a/b (space)
+
+www.a/b!
+
+www.a/b"
+
+www.a/b#
+
+www.a/b$
+
+www.a/b%
+
+www.a/b&
+
+www.a/b'
+
+www.a/b(
+
+www.a/b)
+
+www.a/b*
+
+www.a/b+
+
+www.a/b,
+
+www.a/b-
+
+www.a/b
+
+www.a/b.
+
+www.a/b/
+
+www.a/b:
+
+www.a/b;
+
+www.a/b<
+
+www.a/b=
+
+www.a/b>
+
+www.a/b?
+
+www.a/b@
+
+www.a/b[
+
+www.a/b\
+
+www.a/b]
+
+www.a/b^
+
+www.a/b_
+
+www.a/b`
+
+www.a/b{
+
+www.a/b|
+
+www.a/b}
+
+www.a/b~
+"###,
+ &gfm
+ ),
+ r###"<h1>wwwtf? (4)</h1>
+<p><a href="http://www.a/b">www.a/b</a> (space)</p>
+<p><a href="http://www.a/b">www.a/b</a>!</p>
+<p><a href="http://www.a/b">www.a/b</a>&quot;</p>
+<p><a href="http://www.a/b#">www.a/b#</a></p>
+<p><a href="http://www.a/b$">www.a/b$</a></p>
+<p><a href="http://www.a/b%25">www.a/b%</a></p>
+<p><a href="http://www.a/b&amp;">www.a/b&amp;</a></p>
+<p><a href="http://www.a/b">www.a/b</a>'</p>
+<p><a href="http://www.a/b(">www.a/b(</a></p>
+<p><a href="http://www.a/b">www.a/b</a>)</p>
+<p><a href="http://www.a/b">www.a/b</a>*</p>
+<p><a href="http://www.a/b+">www.a/b+</a></p>
+<p><a href="http://www.a/b">www.a/b</a>,</p>
+<p><a href="http://www.a/b-">www.a/b-</a></p>
+<p><a href="http://www.a/b">www.a/b</a></p>
+<p><a href="http://www.a/b">www.a/b</a>.</p>
+<p><a href="http://www.a/b/">www.a/b/</a></p>
+<p><a href="http://www.a/b">www.a/b</a>:</p>
+<p><a href="http://www.a/b">www.a/b</a>;</p>
+<p><a href="http://www.a/b">www.a/b</a>&lt;</p>
+<p><a href="http://www.a/b=">www.a/b=</a></p>
+<p><a href="http://www.a/b%3E">www.a/b&gt;</a></p>
+<p><a href="http://www.a/b">www.a/b</a>?</p>
+<p><a href="http://www.a/b@">www.a/b@</a></p>
+<p><a href="http://www.a/b%5B">www.a/b[</a></p>
+<p><a href="http://www.a/b%5C">www.a/b\</a></p>
+<p><a href="http://www.a/b">www.a/b</a>]</p>
+<p><a href="http://www.a/b%5E">www.a/b^</a></p>
+<p><a href="http://www.a/b">www.a/b</a>_</p>
+<p><a href="http://www.a/b%60">www.a/b`</a></p>
+<p><a href="http://www.a/b%7B">www.a/b{</a></p>
+<p><a href="http://www.a/b%7C">www.a/b|</a></p>
+<p><a href="http://www.a/b%7D">www.a/b}</a></p>
+<p><a href="http://www.a/b">www.a/b</a>~</p>
+"###,
+ "should match www (path continue) like GitHub does (except for the bracket bug)"
+ );
+
+ assert_eq!(
+ micromark_with_options(
+ r###"# wwwtf? (3)
+
+www.a/ (space)
+
+www.a/!
+
+www.a/"
+
+www.a/#
+
+www.a/$
+
+www.a/%
+
+www.a/&
+
+www.a/'
+
+www.a/(
+
+www.a/)
+
+www.a/*
+
+www.a/+
+
+www.a/,
+
+www.a/-
+
+www.a/
+
+www.a/.
+
+www.a//
+
+www.a/:
+
+www.a/;
+
+www.a/<
+
+www.a/=
+
+www.a/>
+
+www.a/?
+
+www.a/@
+
+www.a/[
+
+www.a/\
+
+www.a/]
+
+www.a/^
+
+www.a/_
+
+www.a/`
+
+www.a/{
+
+www.a/|
+
+www.a/}
+
+www.a/~
+"###,
+ &gfm
+ ),
+ r###"<h1>wwwtf? (3)</h1>
+<p><a href="http://www.a/">www.a/</a> (space)</p>
+<p><a href="http://www.a/">www.a/</a>!</p>
+<p><a href="http://www.a/">www.a/</a>&quot;</p>
+<p><a href="http://www.a/#">www.a/#</a></p>
+<p><a href="http://www.a/$">www.a/$</a></p>
+<p><a href="http://www.a/%25">www.a/%</a></p>
+<p><a href="http://www.a/&amp;">www.a/&amp;</a></p>
+<p><a href="http://www.a/">www.a/</a>'</p>
+<p><a href="http://www.a/(">www.a/(</a></p>
+<p><a href="http://www.a/">www.a/</a>)</p>
+<p><a href="http://www.a/">www.a/</a>*</p>
+<p><a href="http://www.a/+">www.a/+</a></p>
+<p><a href="http://www.a/">www.a/</a>,</p>
+<p><a href="http://www.a/-">www.a/-</a></p>
+<p><a href="http://www.a/">www.a/</a></p>
+<p><a href="http://www.a/">www.a/</a>.</p>
+<p><a href="http://www.a//">www.a//</a></p>
+<p><a href="http://www.a/">www.a/</a>:</p>
+<p><a href="http://www.a/">www.a/</a>;</p>
+<p><a href="http://www.a/">www.a/</a>&lt;</p>
+<p><a href="http://www.a/=">www.a/=</a></p>
+<p><a href="http://www.a/%3E">www.a/&gt;</a></p>
+<p><a href="http://www.a/">www.a/</a>?</p>
+<p><a href="http://www.a/@">www.a/@</a></p>
+<p><a href="http://www.a/%5B">www.a/[</a></p>
+<p><a href="http://www.a/%5C">www.a/\</a></p>
+<p><a href="http://www.a/">www.a/</a>]</p>
+<p><a href="http://www.a/%5E">www.a/^</a></p>
+<p><a href="http://www.a/">www.a/</a>_</p>
+<p><a href="http://www.a/%60">www.a/`</a></p>
+<p><a href="http://www.a/%7B">www.a/{</a></p>
+<p><a href="http://www.a/%7C">www.a/|</a></p>
+<p><a href="http://www.a/%7D">www.a/}</a></p>
+<p><a href="http://www.a/">www.a/</a>~</p>
+"###,
+ "should match www (path start) like GitHub does (except for the bracket bug)"
);
}