extern crate markdown; use markdown::{ mdast::{Link, Node, Paragraph, Root, Text}, to_html, to_html_with_options, to_mdast, unist::Position, Constructs, Options, ParseOptions, }; use pretty_assertions::assert_eq; #[test] fn gfm_autolink_literal() -> Result<(), String> { let gfm = Options { parse: ParseOptions { constructs: Constructs::gfm(), ..ParseOptions::default() }, ..Options::default() }; assert_eq!( to_html("https://example.com"), "

https://example.com

", "should ignore protocol urls by default" ); assert_eq!( to_html("www.example.com"), "

www.example.com

", "should ignore www urls by default" ); assert_eq!( to_html("user@example.com"), "

user@example.com

", "should ignore email urls by default" ); assert_eq!( to_html_with_options("https://example.com", &gfm)?, "

https://example.com

", "should support protocol urls if enabled" ); assert_eq!( to_html_with_options("www.example.com", &gfm)?, "

www.example.com

", "should support www urls if enabled" ); assert_eq!( to_html_with_options("user@example.com", &gfm)?, "

user@example.com

", "should support email urls if enabled" ); assert_eq!( to_html_with_options("[https://example.com](xxx)", &gfm)?, "

https://example.com

", "should not link protocol urls in links" ); assert_eq!( to_html_with_options("[www.example.com](xxx)", &gfm)?, "

www.example.com

", "should not link www urls in links" ); assert_eq!( to_html_with_options("[user@example.com](xxx)", &gfm)?, "

user@example.com

", "should not link email urls in links" ); assert_eq!( to_html_with_options("user@example.com", &gfm)?, "

user@example.com

", "should support a closing paren at TLD (email)" ); assert_eq!( to_html_with_options("www.a.)", &gfm)?, "

www.a.)

", "should support a closing paren at TLD (www)" ); assert_eq!( to_html_with_options("www.a b", &gfm)?, "

www.a b

", "should support no TLD" ); assert_eq!( to_html_with_options("www.a/b c", &gfm)?, "

www.a/b c

", "should support a path instead of TLD" ); assert_eq!( to_html_with_options("www.�a", &gfm)?, "

www.�a

", "should support a replacement character in a domain" ); assert_eq!( to_html_with_options("http://點看.com", &gfm)?, "

http://點看.com

", "should support non-ascii characters in a domain (http)" ); assert_eq!( to_html_with_options("www.點看.com", &gfm)?, "

www.點看.com

", "should support non-ascii characters in a domain (www)" ); assert_eq!( to_html_with_options("點看@example.com", &gfm)?, "

點看@example.com

", "should *not* support non-ascii characters in atext (email)" ); assert_eq!( to_html_with_options("example@點看.com", &gfm)?, "

example@點看.com

", "should *not* support non-ascii characters in a domain (email)" ); assert_eq!( to_html_with_options("www.a.com/點看", &gfm)?, "

www.a.com/點看

", "should support non-ascii characters in a path" ); assert_eq!( to_html_with_options("www.-a.b", &gfm)?, "

www.-a.b

", "should support a dash to start a domain" ); assert_eq!( to_html_with_options("www.$", &gfm)?, "

www.$

", "should support a dollar as a domain name" ); assert_eq!( to_html_with_options("www.a..b.c", &gfm)?, "

www.a..b.c

", "should support adjacent dots in a domain name" ); assert_eq!( to_html_with_options("www.a&a;", &gfm)?, "

www.a&a;

", "should support named character references in domains" ); assert_eq!( to_html_with_options("https://a.bc/d/e/).", &gfm)?, "

https://a.bc/d/e/).

", "should support a closing paren and period after a path" ); assert_eq!( to_html_with_options("https://a.bc/d/e/.)", &gfm)?, "

https://a.bc/d/e/.)

", "should support a period and closing paren after a path" ); assert_eq!( to_html_with_options("https://a.bc).", &gfm)?, "

https://a.bc).

", "should support a closing paren and period after a domain" ); assert_eq!( to_html_with_options("https://a.bc.)", &gfm)?, "

https://a.bc.)

", "should support a period and closing paren after a domain" ); assert_eq!( to_html_with_options("https://a.bc).d", &gfm)?, "

https://a.bc).d

", "should support a closing paren and period in a path" ); assert_eq!( to_html_with_options("https://a.bc.)d", &gfm)?, "

https://a.bc.)d

", "should support a period and closing paren in a path" ); assert_eq!( to_html_with_options("https://a.bc/))d", &gfm)?, "

https://a.bc/))d

", "should support two closing parens in a path" ); assert_eq!( to_html_with_options("ftp://a/b/c.txt", &gfm)?, "

ftp://a/b/c.txt

", "should not support ftp links" ); // Note: GH comments/issues/PRs do not link this, but Gists/readmes do. // Fixing it would mean deviating from `cmark-gfm`: // Source: . // assert_eq!( // to_html_with_options("，www.example.com", &gfm)?, // "

，www.example.com

", // "should support www links after Unicode punctuation", // ); assert_eq!( to_html_with_options("，https://example.com", &gfm)?, "

，https://example.com

", "should support http links after Unicode punctuation" ); assert_eq!( to_html_with_options("，example@example.com", &gfm)?, "

，example@example.com

", "should support email links after Unicode punctuation" ); assert_eq!( to_html_with_options( "http://user:password@host:port/path?key=value#fragment", &gfm )?, "

http://user:password@host:port/path?key=value#fragment

", "should not link character reference for `:`" ); assert_eq!( to_html_with_options("http://example.com/abhttp://example.com/ab<cd

", "should stop domains/paths at `<`" ); assert_eq!( to_html_with_options( r###" mailto:scyther@pokemon.com This is a mailto:scyther@pokemon.com mailto:scyther@pokemon.com. mmmmailto:scyther@pokemon.com mailto:scyther@pokemon.com/ mailto:scyther@pokemon.com/message mailto:scyther@pokemon.com/mailto:beedrill@pokemon.com xmpp:scyther@pokemon.com xmpp:scyther@pokemon.com. xmpp:scyther@pokemon.com/message xmpp:scyther@pokemon.com/message. Email me at:scyther@pokemon.com"###, &gfm )?, r###"

mailto:scyther@pokemon.com

This is a mailto:scyther@pokemon.com

mailto:scyther@pokemon.com.

mmmmailto:scyther@pokemon.com

mailto:scyther@pokemon.com/

mailto:scyther@pokemon.com/message

mailto:scyther@pokemon.com/mailto:beedrill@pokemon.com

xmpp:scyther@pokemon.com

xmpp:scyther@pokemon.com.

xmpp:scyther@pokemon.com/message

xmpp:scyther@pokemon.com/message.

Email me at:scyther@pokemon.com

"###, "should support `mailto:` and `xmpp:` protocols" ); assert_eq!( to_html_with_options( r###" a www.example.com&xxx;b c a www.example.com&xxx;. b a www.example.com&xxxxxxxxx;. b a www.example.com&xxxxxxxxxx;. b a www.example.com&xxxxxxxxxxx;. b a www.example.com&xxx. b a www.example.com{. b a www.example.com&123. b a www.example.com&x. b a www.example.com. b a www.example.com&1. b a www.example.com&. b a www.example.com& b "###, &gfm )?, r###"

a www.example.com&xxx;b c

a www.example.com&xxx;. b

a www.example.com&xxxxxxxxx;. b

a www.example.com&xxxxxxxxxx;. b

a www.example.com&xxxxxxxxxxx;. b

a www.example.com&xxx. b

a www.example.com&#123. b

a www.example.com&123. b

a www.example.com&x. b

a www.example.com&#1. b

a www.example.com&1. b

a www.example.com&. b

a www.example.com& b

"###, "should match “character references” like GitHub does" ); // Note: this deviates from GFM, as is fixed. assert_eq!( to_html_with_options( r###" [ www.example.com [ https://example.com [ contact@example.com [ www.example.com ] [ https://example.com ] [ contact@example.com ] [ www.example.com ](#) [ https://example.com ](#) [ contact@example.com ](#) ![ www.example.com ](#) ![ https://example.com ](#) ![ contact@example.com ](#) "###, &gfm )?, r###"

[ www.example.com

[ https://example.com

[ contact@example.com

[ www.example.com ]

[ https://example.com ]

[ contact@example.com ]

www.example.com

https://example.com

contact@example.com

www.example.com

contact@example.com

"###, "should match interplay with brackets, links, and images, like GitHub does (but without the bugs)" ); assert_eq!( to_html_with_options( r###" www.example.com/?=a(b)cccccc www.example.com/?=a(b(c)ccccc www.example.com/?=a(b(c)c)cccc www.example.com/?=a(b(c)c)c)ccc www.example.com/?q=a(business) www.example.com/?q=a(business))) (www.example.com/?q=a(business)) (www.example.com/?q=a(business) www.example.com/?q=a(business)". www.example.com/?q=a(business))) (www.example.com/?q=a(business))". (www.example.com/?q=a(business)".) (www.example.com/?q=a(business)". "###, &gfm )?, r###"

www.example.com/?=a(b)cccccc

www.example.com/?=a(b(c)ccccc

www.example.com/?=a(b(c)c)cccc

www.example.com/?=a(b(c)c)c)ccc

www.example.com/?q=a(business)

www.example.com/?q=a(business)))

(www.example.com/?q=a(business))

(www.example.com/?q=a(business)

www.example.com/?q=a(business)".

www.example.com/?q=a(business)))

(www.example.com/?q=a(business))".

(www.example.com/?q=a(business)".)

(www.example.com/?q=a(business)".

"###, "should match parens like GitHub does" ); // Note: this deviates from GFM. // Here, the following issues are fixed: // - assert_eq!( to_html_with_options( r###" # Literal autolinks ## WWW autolinks w.commonmark.org ww.commonmark.org www.commonmark.org Www.commonmark.org wWw.commonmark.org wwW.commonmark.org WWW.COMMONMARK.ORG Visit www.commonmark.org/help for more information. Visit www.commonmark.org. Visit www.commonmark.org/a.b. www.aaa.bbb.ccc_ccc www.aaa_bbb.ccc www.aaa.bbb.ccc.ddd_ddd www.aaa.bbb.ccc_ccc.ddd www.aaa.bbb_bbb.ccc.ddd www.aaa_aaa.bbb.ccc.ddd Visit www.commonmark.org. Visit www.commonmark.org/a.b. www.google.com/search?q=Markup+(business) www.google.com/search?q=Markup+(business))) (www.google.com/search?q=Markup+(business)) (www.google.com/search?q=Markup+(business) www.google.com/search?q=(business))+ok www.google.com/search?q=commonmark&hl=en www.google.com/search?q=commonmark&hl;en www.google.com/search?q=commonmark&hl; www.commonmark.org/he should still be expanded. "###, &gfm )?, r###"

Literal autolinks

WWW autolinks

w.commonmark.org

ww.commonmark.org

Visit www.commonmark.org/help for more information.

Visit www.commonmark.org.

Visit www.commonmark.org/a.b.

www.aaa.bbb.ccc_ccc

www.aaa_bbb.ccc

www.aaa.bbb.ccc.ddd_ddd

www.aaa.bbb.ccc_ccc.ddd

www.aaa.bbb_bbb.ccc.ddd

www.aaa_aaa.bbb.ccc.ddd

Visit www.commonmark.org.

Visit www.commonmark.org/a.b.

www.google.com/search?q=Markup+(business)

www.google.com/search?q=Markup+(business)))

(www.google.com/search?q=Markup+(business))

(www.google.com/search?q=Markup+(business)

www.google.com/search?q=(business))+ok

www.google.com/search?q=commonmark&hl=en

www.google.com/search?q=commonmark&hl;en

www.google.com/search?q=commonmark&hl;

www.commonmark.org/he<lp

HTTP autolinks

hexample.com

htexample.com

httexample.com

httpexample.com

http:example.com

http:/example.com

https:/example.com

http://example.com

https://example.com

https://example

http://commonmark.org

(Visit https://encrypted.google.com/search?q=Markup+(business))

Email autolinks

No dot: foo@barbaz

No dot: foo@barbaz.

foo@bar.baz

hello@mail+xyz.example isn’t valid, but hello+xyz@mail.example is.

a.b-c_d@a.b

a.b-c_d@a.b.

a.b-c_d@a.b-

a.b-c_d@a.b_

a@a_b.c

a@a-b.c

Can’t end in an underscore followed by a period: aaa@a.b_.

Can contain an underscore followed by a period: aaa@a.b_.c

Link text should not be expanded

Visit www.example.com please.

Visit http://www.example.com please.

Mail example@example.com please.

link http://autolink should still be expanded.

"###, "should match base like GitHub does" ); assert_eq!( to_html_with_options( r###"H0. [https://a.com©b [www.a.com©b H1. []https://a.com©b []www.a.com©b H2. [] https://a.com©b [] www.a.com©b H3. [[https://a.com©b [[www.a.com©b H4. [[]https://a.com©b [[]www.a.com©b H5. [[]]https://a.com©b [[]]www.a.com©b "###, &gfm )?, r###"

H0.

H1.

H2.

H3.

H4.

H5.

[[]]www.a.com©b

"###, "should match brackets like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options(r###"Image start. ![https://a.com ![http://a.com ![www.a.com ![a@b.c Image start and label end. ![https://a.com] ![http://a.com] ![www.a.com] ![a@b.c] Image label with reference (note: GH cleans hashes here, but we keep them in). ![https://a.com][x] ![http://a.com][x] ![www.a.com][x] ![a@b.c][x] [x]: # Image label with resource. ![https://a.com]() ![http://a.com]() ![www.a.com]() ![a@b.c]() Autolink literal after image. ![a]() https://a.com ![a]() http://a.com ![a]() www.a.com ![a]() a@b.c "###, &gfm)?, r###"

Image start.

![https://a.com

![http://a.com

![www.a.com

![a@b.c

Image start and label end.

![https://a.com]

![http://a.com]

![www.a.com]

![a@b.c]

Image label with reference (note: GH cleans hashes here, but we keep them in).

www.a.com

a@b.c

Image label with resource.

www.a.com

a@b.c

Autolink literal after image.

"###, "should match autolink literals combined w/ images like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options(r###"Link start. [https://a.com [http://a.com [www.a.com [a@b.c Label end. https://a.com] http://a.com] www.a.com] a@b.c] Link start and label end. [https://a.com] [http://a.com] [www.a.com] [a@b.c] What naïvely seems like a label end (A). https://a.com`]` http://a.com`]` www.a.com`]` a@b.c`]` Link start and what naïvely seems like a balanced brace (B). [https://a.com`]` [http://a.com`]` [www.a.com`]` [a@b.c`]` What naïvely seems like a label end (C). https://a.com `]` http://a.com `]` www.a.com `]` a@b.c `]` Link start and what naïvely seems like a balanced brace (D). [https://a.com `]` [http://a.com `]` [www.a.com `]` [a@b.c `]` Link label with reference. [https://a.com][x] [http://a.com][x] [www.a.com][x] [a@b.c][x] [x]: # Link label with resource. [https://a.com]() [http://a.com]() [www.a.com]() [a@b.c]() More in link. [a https://b.com c]() [a http://b.com c]() [a www.b.com c]() [a b@c.d e]() Autolink literal after link. [a]() https://a.com [a]() http://a.com [a]() www.a.com [a]() a@b.c "###, &gfm)?, r###"

Link start.

Label end.

Link start and label end.

[https://a.com]

[http://a.com]

[www.a.com]

[a@b.c]

What naïvely seems like a label end (A).

https://a.com`]`

http://a.com`]`

www.a.com`]`

a@b.c]

Link start and what naïvely seems like a balanced brace (B).

[https://a.com`]`

[http://a.com`]`

[www.a.com`]`

[a@b.c]

What naïvely seems like a label end (C).

https://a.com ]

http://a.com ]

www.a.com ]

a@b.c ]

Link start and what naïvely seems like a balanced brace (D).

[https://a.com ]

[http://a.com ]

[www.a.com ]

[a@b.c ]

Link label with reference.

Link label with resource.

“character reference”

www.a&b (space)

www.a&b;

"###, "should match “character references (named)” like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options(r###"# “character reference” www.a# (space) www.a#! www.a#" www.a## www.a#$ www.a#% www.a#& www.a#' www.a#( www.a#) www.a#* www.a#+ www.a#, www.a#- www.a# www.a#. www.a#/ www.a#: www.a# www.a#< www.a#= www.a#> www.a#? www.a#@ www.a#[ www.a#\ www.a#] www.a#^ www.a#_ www.a#` www.a#{ www.a#| www.a#} www.a#~ "###, &gfm)?, r###"

“character reference”

www.a&#35 (space)

"###, "should match “character references (numeric)” like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"a@0.0 a@0.b a@a.29 a@a.b a@0.0.c react@0.11.1 react@0.12.0-rc1 react@0.14.0-alpha1 react@16.7.0-alpha.2 react@0.0.0-experimental-aae83a4b9 [ react@0.11.1 [ react@0.12.0-rc1 [ react@0.14.0-alpha1 [ react@16.7.0-alpha.2 [ react@0.0.0-experimental-aae83a4b9 "###, &gfm )?, r###"

a@0.0

a@0.b

a@a.29

a@a.b

a@0.0.c

react@0.11.1

react@0.12.0-rc1

react@0.14.0-alpha1

react@16.7.0-alpha.2

react@0.0.0-experimental-aae83a4b9

[ react@0.11.1

[ react@0.12.0-rc1

[ react@0.14.0-alpha1

[ react@16.7.0-alpha.2

[ react@0.0.0-experimental-aae83a4b9

"###, "should match email TLD digits like GitHub does" ); assert_eq!( to_html_with_options( r###"# httpshhh? (2) http://a (space) http://a! http://a" http://a# http://a$ http://a% http://a& http://a' http://a( http://a) http://a* http://a+ http://a, http://a- http://a http://a. http://a/ http://a: http://a; http://a< http://a= http://a> http://a? http://a@ http://a[ http://a\ http://a] http://a^ http://a_ http://a` http://a{ http://a| http://a} http://a~ "###, &gfm )?, r###"

httpshhh? (2)

http://a (space)

"###, "should match protocol domain continue like GitHub does" ); assert_eq!( to_html_with_options( r###"# httpshhh? (1) http:// (space) http://! http://" http://# http://$ http://% http://& http://' http://( http://) http://* http://+ http://, http://- http:// http://. http:/// http://: http://; http://< http://= http://> http://? http://@ http://[ http://\ http://] http://^ http://_ http://` http://{ http://| http://} http://~ "###, &gfm )?, r###"

httpshhh? (1)

http:// (space)

http://!

http://"

http://#

http://$

http://%

http://&

http://'

http://(

http://)

http://*

http://+

http://,

http://-

http://

http://.

http:///

http://:

http://;

http://<

http://=

http://>

http://?

http://@

http://[

http://\

http://]

http://^

http://_

http://`

http://{

http://|

http://}

http://~

"###, "should match protocol domain start like GitHub does" ); assert_eq!( to_html_with_options( r###"# httpshhh? (4) http://a/b (space) http://a/b! http://a/b" http://a/b# http://a/b$ http://a/b% http://a/b& http://a/b' http://a/b( http://a/b) http://a/b* http://a/b+ http://a/b, http://a/b- http://a/b http://a/b. http://a/b/ http://a/b: http://a/b; http://a/b< http://a/b= http://a/b> http://a/b? http://a/b@ http://a/b[ http://a/b\ http://a/b] http://a/b^ http://a/b_ http://a/b` http://a/b{ http://a/b| http://a/b} http://a/b~ "###, &gfm )?, r###"

httpshhh? (4)

http://a/b (space)

"###, "should match protocol path continue like GitHub does" ); assert_eq!( to_html_with_options( r###"# httpshhh? (3) http://a/ (space) http://a/! http://a/" http://a/# http://a/$ http://a/% http://a/& http://a/' http://a/( http://a/) http://a/* http://a/+ http://a/, http://a/- http://a/ http://a/. http://a// http://a/: http://a/; http://a/< http://a/= http://a/> http://a/? http://a/@ http://a/[ http://a/\ http://a/] http://a/^ http://a/_ http://a/` http://a/{ http://a/| http://a/} http://a/~ "###, &gfm )?, r###"

httpshhh? (3)

http://a/ (space)

"###, "should match protocol path start like GitHub does" ); assert_eq!( to_html_with_options( r###"[www.example.com/a©](#) www.example.com/a© [www.example.com/a&bogus;](#) www.example.com/a&bogus; [www.example.com/a\.](#) www.example.com/a\. "###, &gfm )?, r###"

www.example.com/a©

www.example.com/a&bogus;

www.example.com/a\.

"###, "should match links, autolink literals, and characters like GitHub does" ); assert_eq!( to_html_with_options( r###"# “character reference” www.a/b&c (space) www.a/b&c! www.a/b&c" www.a/b&c# www.a/b&c$ www.a/b&c% www.a/b&c& www.a/b&c' www.a/b&c( www.a/b&c) www.a/b&c* www.a/b&c+ www.a/b&c, www.a/b&c- www.a/b&c www.a/b&c. www.a/b&c/ www.a/b&c: www.a/b&c; www.a/b&c< www.a/b&c= www.a/b&c> www.a/b&c? www.a/b&c@ www.a/b&c[ www.a/b&c\ www.a/b&c] www.a/b&c^ www.a/b&c_ www.a/b&c` www.a/b&c{ www.a/b&c| www.a/b&c} www.a/b&c~ "###, &gfm )?, r###"

“character reference”

www.a/b&c (space)

www.a/b&c;

"###, "should match character reference-like (named) things in paths like GitHub does" ); assert_eq!( to_html_with_options( r###"# “character reference” www.a/b# (space) www.a/b#! www.a/b#" www.a/b## www.a/b#$ www.a/b#% www.a/b#& www.a/b#' www.a/b#( www.a/b#) www.a/b#* www.a/b#+ www.a/b#, www.a/b#- www.a/b# www.a/b#. www.a/b#/ www.a/b#: www.a/b# www.a/b#< www.a/b#= www.a/b#> www.a/b#? www.a/b#@ www.a/b#[ www.a/b#\ www.a/b#] www.a/b#^ www.a/b#_ www.a/b#` www.a/b#{ www.a/b#| www.a/b#} www.a/b#~ "###, &gfm )?, r###"

“character reference”

www.a/b&#35 (space)

"###, "should match character reference-like (numeric) things in paths like GitHub does" ); assert_eq!( to_html_with_options( r###"In autolink literal path or link end? [https://a.com/d]() [http://a.com/d]() [www.a.com/d]() https://a.com/d]() http://a.com/d]() www.a.com/d]() In autolink literal search or link end? [https://a.com?d]() [http://a.com?d]() [www.a.com?d]() https://a.com?d]() http://a.com?d]() www.a.com?d]() In autolink literal hash or link end? [https://a.com#d]() [http://a.com#d]() [www.a.com#d]() https://a.com#d]() http://a.com#d]() www.a.com#d]() "###, &gfm )?, r###"

In autolink literal path or link end?

www.a.com/d]()

In autolink literal search or link end?

www.a.com?d]()

In autolink literal hash or link end?

www.a.com#d]()

"###, "should match path or link end like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"Last non-markdown ASCII whitespace (FF): noreply@example.com, http://example.com, https://example.com, www.example.com Last non-whitespace ASCII control (US): noreply@example.com, http://example.com, https://example.com, www.example.com First punctuation after controls: !noreply@example.com, !http://example.com, !https://example.com, !www.example.com Last punctuation before digits: /noreply@example.com, /http://example.com, /https://example.com, /www.example.com First digit: 0noreply@example.com, 0http://example.com, 0https://example.com, 0www.example.com First punctuation after digits: :noreply@example.com, :http://example.com, :https://example.com, :www.example.com Last punctuation before caps: @noreply@example.com, @http://example.com, @https://example.com, @www.example.com First uppercase: Anoreply@example.com, Ahttp://example.com, Ahttps://example.com, Awww.example.com Punctuation after uppercase: \noreply@example.com, \http://example.com, \https://example.com, \www.example.com Last punctuation before lowercase (1): `noreply@example.com; (2) `http://example.com; (3) `https://example.com; (4) `www.example.com; (broken up to prevent code from forming) First lowercase: anoreply@example.com, ahttp://example.com, ahttps://example.com, awww.example.com First punctuation after lowercase: {noreply@example.com, {http://example.com, {https://example.com, {www.example.com Last punctuation: ~noreply@example.com, ~http://example.com, ~https://example.com, ~www.example.com First non-ASCII unicode whitespace (0x80): noreply@example.com, http://example.com, https://example.com, www.example.com Last non-ASCII unicode whitespace (0x3000): 　noreply@example.com, 　http://example.com, 　https://example.com, 　www.example.com First non-ASCII punctuation: ¡noreply@example.com, ¡http://example.com, ¡https://example.com, ¡www.example.com Last non-ASCII punctuation: ･noreply@example.com, ･http://example.com, ･https://example.com, ･www.example.com Some non-ascii: 中noreply@example.com, 中http://example.com, 中https://example.com, 中www.example.com Some more non-ascii: 🤷‍noreply@example.com, 🤷‍http://example.com, 🤷‍https://example.com, 🤷‍www.example.com "###, &gfm )?, r###"

Last non-markdown ASCII whitespace (FF): noreply@example.com, http://example.com, https://example.com, www.example.com

Last non-whitespace ASCII control (US): noreply@example.com, http://example.com, https://example.com, www.example.com

First punctuation after controls: !noreply@example.com, !http://example.com, !https://example.com, !www.example.com

Last punctuation before digits: /noreply@example.com, /http://example.com, /https://example.com, /www.example.com

First digit: 0noreply@example.com, 0http://example.com, 0https://example.com, 0www.example.com

First punctuation after digits: :noreply@example.com, :http://example.com, :https://example.com, :www.example.com

Last punctuation before caps: @noreply@example.com, @http://example.com, @https://example.com, @www.example.com

First uppercase: Anoreply@example.com, Ahttp://example.com, Ahttps://example.com, Awww.example.com

Punctuation after uppercase: \noreply@example.com, \http://example.com, \https://example.com, \www.example.com

Last punctuation before lowercase (1): `noreply@example.com;

(2) `http://example.com;

(3) `https://example.com;

(4) `www.example.com; (broken up to prevent code from forming)

First lowercase: anoreply@example.com, ahttp://example.com, ahttps://example.com, awww.example.com

First punctuation after lowercase: {noreply@example.com, {http://example.com, {https://example.com, {www.example.com

Last punctuation: ~noreply@example.com, ~http://example.com, ~https://example.com, ~www.example.com

First non-ASCII unicode whitespace (0x80): noreply@example.com, http://example.com, https://example.com, www.example.com

Last non-ASCII unicode whitespace (0x3000): 　noreply@example.com, 　http://example.com, 　https://example.com, 　www.example.com

First non-ASCII punctuation: ¡noreply@example.com, ¡http://example.com, ¡https://example.com, ¡www.example.com

Last non-ASCII punctuation: ･noreply@example.com, ･http://example.com, ･https://example.com, ･www.example.com

Some non-ascii: 中noreply@example.com, 中http://example.com, 中https://example.com, 中www.example.com

Some more non-ascii: 🤷‍noreply@example.com, 🤷‍http://example.com, 🤷‍https://example.com, 🤷‍www.example.com

"###, "should match previous (complex) like GitHub does" ); assert_eq!( to_html_with_options( r###"# HTTP https://a.b can start after EOF Can start after EOL: https://a.b Can start after tab: https://a.b. Can start after space: https://a.b. Can start after left paren (https://a.b. Can start after asterisk *https://a.b. Can start after underscore *_https://a.b. Can start after tilde ~https://a.b. # www www.a.b can start after EOF Can start after EOL: www.a.b Can start after tab: www.a.b. Can start after space: www.a.b. Can start after left paren (www.a.b. Can start after asterisk *www.a.b. Can start after underscore *_www.a.b. Can start after tilde ~www.a.b. # Email ## Correct character before a@b.c can start after EOF Can start after EOL: a@b.c Can start after tab: a@b.c. Can start after space: a@b.c. Can start after left paren(a@b.c. Can start after asterisk*a@b.c. While theoretically it’s possible to start at an underscore, that underscore is part of the email, so it’s in fact part of the link: _a@b.c. Can start after tilde~a@b.c. ## Others characters before While other characters before the email aren’t allowed by GFM, they work on github.com: !a@b.c, "a@b.c, #a@b.c, $a@b.c, &a@b.c, 'a@b.c, )a@b.c, +a@b.c, ,a@b.c, -a@b.c, .a@b.c, /a@b.c, :a@b.c, ;a@b.c, a@b.c, ?a@b.c, @a@b.c, \a@b.c, ]a@b.c, ^a@b.c, `a@b.c, {a@b.c, }a@b.c. ## Commas See `https://github.com/remarkjs/remark/discussions/678`. ,https://github.com [ ,https://github.com [asd] ,https://github.com "###, &gfm )?, r###"

HTTP

https://a.b can start after EOF

Can start after EOL: https://a.b

Can start after tab: https://a.b.

Can start after space: https://a.b.

Can start after left paren (https://a.b.

Can start after asterisk *https://a.b.

Can start after underscore *_https://a.b.

Can start after tilde ~https://a.b.

www

www.a.b can start after EOF

Can start after EOL: www.a.b

Can start after tab: www.a.b.

Can start after space: www.a.b.

Can start after left paren (www.a.b.

Can start after asterisk *www.a.b.

Can start after underscore *_www.a.b.

Can start after tilde ~www.a.b.

Email

Correct character before

a@b.c can start after EOF

Can start after EOL: a@b.c

Can start after tab: a@b.c.

Can start after space: a@b.c.

Can start after left paren(a@b.c.

Can start after asterisk*a@b.c.

While theoretically it’s possible to start at an underscore, that underscore is part of the email, so it’s in fact part of the link: _a@b.c.

Can start after tilde~a@b.c.

Others characters before

While other characters before the email aren’t allowed by GFM, they work on github.com: !a@b.c, "a@b.c, #a@b.c, $a@b.c, &a@b.c, 'a@b.c, )a@b.c, +a@b.c, ,a@b.c, -a@b.c, .a@b.c, /a@b.c, :a@b.c, ;a@b.c, <a@b.c, =a@b.c, >a@b.c, ?a@b.c, @a@b.c, \a@b.c, ]a@b.c, ^a@b.c, `a@b.c, {a@b.c, }a@b.c.

Commas

See https://github.com/remarkjs/remark/discussions/678.

,https://github.com

[ ,https://github.com

[asd] ,https://github.com

"###, "should match previous like GitHub does" ); assert_eq!( to_html_with_options( r###"# wwwtf 2? www.a (space) www.a! www.a" www.a# www.a$ www.a% www.a& www.a' www.a( www.a) www.a* www.a+ www.a, www.a- www.a www.a. www.a/ www.a: www.a; www.a< www.a= www.a> www.a? www.a@ www.a[ www.a\ www.a] www.a^ www.a_ www.a` www.a{ www.a| www.a} www.a~ "###, &gfm )?, r###"

wwwtf 2?

www.a (space)

"###, "should match www (domain continue) like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"# wwwtf 5? www.a. (space) www.a.! www.a." www.a.# www.a.$ www.a.% www.a.& www.a.' www.a.( www.a.) www.a.* www.a.+ www.a., www.a.- www.a. www.a.. www.a./ www.a.: www.a.; www.a.< www.a.= www.a.> www.a.? www.a.@ www.a.[ www.a.\ www.a.] www.a.^ www.a._ www.a.` www.a.{ www.a.| www.a.} www.a.~ "###, &gfm )?, r###"

wwwtf 5?

www.a. (space)

www.a.!

www.a."

www.a.'

www.a.)

www.a.*

www.a.,

www.a..

www.a.:

www.a.;

www.a.<

www.a.?

www.a.]

www.a._

www.a.~

"###, "should match www (domain dot) like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"# wwwtf? www. (space) www.! www." www.# www.$ www.% www.& www.' www.( www.) www.* www.+ www., www.- www. www.. www./ www.: www.; www.< www.= www.> www.? www.@ www.[ www.\ www.] www.^ www._ www.` www.{ www.| www.} www.~ "###, &gfm )?, r###"

wwwtf?

www. (space)

www.!

www."

www.'

www.)

www.*

www.+

www.,

www.-

www.

www..

www./

www.:

www.;

www.<

www.?

www.]

www._

www.~

"###, "should match www (domain start) like GitHub does" ); assert_eq!( to_html_with_options( r###"# wwwtf? (4) www.a/b (space) www.a/b! www.a/b" www.a/b# www.a/b$ www.a/b% www.a/b& www.a/b' www.a/b( www.a/b) www.a/b* www.a/b+ www.a/b, www.a/b- www.a/b www.a/b. www.a/b/ www.a/b: www.a/b; www.a/b< www.a/b= www.a/b> www.a/b? www.a/b@ www.a/b[ www.a/b\ www.a/b] www.a/b^ www.a/b_ www.a/b` www.a/b{ www.a/b| www.a/b} www.a/b~ "###, &gfm )?, r###"

wwwtf? (4)

www.a/b (space)

"###, "should match www (path continue) like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"# wwwtf? (3) www.a/ (space) www.a/! www.a/" www.a/# www.a/$ www.a/% www.a/& www.a/' www.a/( www.a/) www.a/* www.a/+ www.a/, www.a/- www.a/ www.a/. www.a// www.a/: www.a/; www.a/< www.a/= www.a/> www.a/? www.a/@ www.a/[ www.a/\ www.a/] www.a/^ www.a/_ www.a/` www.a/{ www.a/| www.a/} www.a/~ "###, &gfm )?, r###"

wwwtf? (3)

www.a/ (space)

"###, "should match www (path start) like GitHub does (except for the bracket bug)" ); assert_eq!( to_mdast( "a https://alpha.com b bravo@charlie.com c www.delta.com d xmpp:echo@foxtrot.com e mailto:golf@hotel.com f.", &gfm.parse )?, Node::Root(Root { children: vec![Node::Paragraph(Paragraph { children: vec![ Node::Text(Text { value: "a ".into(), position: Some(Position::new(1, 1, 0, 1, 3, 2)) }), Node::Link(Link { url: "https://alpha.com".into(), title: None, children: vec![Node::Text(Text { value: "https://alpha.com".into(), position: Some(Position::new(1, 3, 2, 1, 20, 19)) }),], position: Some(Position::new(1, 3, 2, 1, 20, 19)) }), Node::Text(Text { value: " b ".into(), position: Some(Position::new(1, 20, 19, 1, 23, 22)) }), Node::Link(Link { url: "mailto:bravo@charlie.com".into(), title: None, children: vec![Node::Text(Text { value: "bravo@charlie.com".into(), position: Some(Position::new(1, 23, 22, 1, 40, 39)) }),], position: Some(Position::new(1, 23, 22, 1, 40, 39)) }), Node::Text(Text { value: " c ".into(), position: Some(Position::new(1, 40, 39, 1, 43, 42)) }), Node::Link(Link { url: "http://www.delta.com".into(), title: None, children: vec![Node::Text(Text { value: "www.delta.com".into(), position: Some(Position::new(1, 43, 42, 1, 56, 55)) }),], position: Some(Position::new(1, 43, 42, 1, 56, 55)) }), Node::Text(Text { value: " d ".into(), position: Some(Position::new(1, 56, 55, 1, 59, 58)) }), Node::Link(Link { url: "xmpp:echo@foxtrot.com".into(), title: None, children: vec![Node::Text(Text { value: "xmpp:echo@foxtrot.com".into(), position: Some(Position::new(1, 59, 58, 1, 80, 79)) }),], position: Some(Position::new(1, 59, 58, 1, 80, 79)) }), Node::Text(Text { value: " e ".into(), position: Some(Position::new(1, 80, 79, 1, 83, 82)) }), Node::Link(Link { url: "mailto:golf@hotel.com".into(), title: None, children: vec![Node::Text(Text { value: "mailto:golf@hotel.com".into(), position: Some(Position::new(1, 83, 82, 1, 104, 103)) }),], position: Some(Position::new(1, 83, 82, 1, 104, 103)) }), Node::Text(Text { value: " f.".into(), position: Some(Position::new(1, 104, 103, 1, 107, 106)) }) ], position: Some(Position::new(1, 1, 0, 1, 107, 106)) })], position: Some(Position::new(1, 1, 0, 1, 107, 106)) }), "should support GFM autolink literals as `Link`s in mdast" ); Ok(()) }