extern crate markdown; use markdown::{ mdast::{Link, Node, Paragraph, Root, Text}, to_html, to_html_with_options, to_mdast, unist::Position, Options, ParseOptions, }; use pretty_assertions::assert_eq; #[test] fn gfm_autolink_literal() -> Result<(), String> { assert_eq!( to_html("https://example.com"), "
https://example.com
", "should ignore protocol urls by default" ); assert_eq!( to_html("www.example.com"), "www.example.com
", "should ignore www urls by default" ); assert_eq!( to_html("user@example.com"), "user@example.com
", "should ignore email urls by default" ); assert_eq!( to_html_with_options("https://example.com", &Options::gfm())?, "", "should support protocol urls if enabled" ); assert_eq!( to_html_with_options("www.example.com", &Options::gfm())?, "", "should support www urls if enabled" ); assert_eq!( to_html_with_options("user@example.com", &Options::gfm())?, "", "should support email urls if enabled" ); assert_eq!( to_html_with_options("[https://example.com](xxx)", &Options::gfm())?, "", "should not link protocol urls in links" ); assert_eq!( to_html_with_options("[www.example.com](xxx)", &Options::gfm())?, "", "should not link www urls in links" ); assert_eq!( to_html_with_options("[user@example.com](xxx)", &Options::gfm())?, "", "should not link email urls in links" ); assert_eq!( to_html_with_options("user@example.com", &Options::gfm())?, "", "should support a closing paren at TLD (email)" ); assert_eq!( to_html_with_options("www.a.)", &Options::gfm())?, "www.a.)
", "should support a closing paren at TLD (www)" ); assert_eq!( to_html_with_options("www.a b", &Options::gfm())?, "www.a b
", "should support no TLD" ); assert_eq!( to_html_with_options("www.a/b c", &Options::gfm())?, "www.a/b c
", "should support a path instead of TLD" ); assert_eq!( to_html_with_options("www.�a", &Options::gfm())?, "", "should support a replacement character in a domain" ); assert_eq!( to_html_with_options("http://點看.com", &Options::gfm())?, "", "should support non-ascii characters in a domain (http)" ); assert_eq!( to_html_with_options("www.點看.com", &Options::gfm())?, "", "should support non-ascii characters in a domain (www)" ); assert_eq!( to_html_with_options("點看@example.com", &Options::gfm())?, "點看@example.com
", "should *not* support non-ascii characters in atext (email)" ); assert_eq!( to_html_with_options("example@點看.com", &Options::gfm())?, "example@點看.com
", "should *not* support non-ascii characters in a domain (email)" ); assert_eq!( to_html_with_options("www.a.com/點看", &Options::gfm())?, "", "should support non-ascii characters in a path" ); assert_eq!( to_html_with_options("www.-a.b", &Options::gfm())?, "", "should support a dash to start a domain" ); assert_eq!( to_html_with_options("www.$", &Options::gfm())?, "", "should support a dollar as a domain name" ); assert_eq!( to_html_with_options("www.a..b.c", &Options::gfm())?, "", "should support adjacent dots in a domain name" ); assert_eq!( to_html_with_options("www.a&a;", &Options::gfm())?, "www.a&a;
", "should support named character references in domains" ); assert_eq!( to_html_with_options("https://a.bc/d/e/).", &Options::gfm())?, "", "should support a closing paren and period after a path" ); assert_eq!( to_html_with_options("https://a.bc/d/e/.)", &Options::gfm())?, "", "should support a period and closing paren after a path" ); assert_eq!( to_html_with_options("https://a.bc).", &Options::gfm())?, "", "should support a closing paren and period after a domain" ); assert_eq!( to_html_with_options("https://a.bc.)", &Options::gfm())?, "", "should support a period and closing paren after a domain" ); assert_eq!( to_html_with_options("https://a.bc).d", &Options::gfm())?, "", "should support a closing paren and period in a path" ); assert_eq!( to_html_with_options("https://a.bc.)d", &Options::gfm())?, "", "should support a period and closing paren in a path" ); assert_eq!( to_html_with_options("https://a.bc/))d", &Options::gfm())?, "", "should support two closing parens in a path" ); assert_eq!( to_html_with_options("ftp://a/b/c.txt", &Options::gfm())?, "ftp://a/b/c.txt
", "should not support ftp links" ); // Note: GH comments/issues/PRs do not link this, but Gists/readmes do. // Fixing it would mean deviating from `cmark-gfm`: // Source:http://user:password@host:port/path?key=value#fragment
", "should not link character reference for `:`" ); assert_eq!( to_html_with_options("http://example.com/abThis is a mailto:scyther@pokemon.com
mmmmailto:scyther@pokemon.com
mailto:scyther@pokemon.com/message
mailto:scyther@pokemon.com/mailto:beedrill@pokemon.com
xmpp:scyther@pokemon.com/message
xmpp:scyther@pokemon.com/message.
Email me at:scyther@pokemon.com
"###, "should support `mailto:` and `xmpp:` protocols" ); assert_eq!( to_html_with_options( r###" a www.example.com&xxx;b c a www.example.com&xxx;. b a www.example.com&xxxxxxxxx;. b a www.example.com&xxxxxxxxxx;. b a www.example.com&xxxxxxxxxxx;. b a www.example.com&xxx. b a www.example.com{. b a www.example.com&123. b a www.example.com&x. b a www.example.com. b a www.example.com&1. b a www.example.com&. b a www.example.com& b "###, &Options::gfm() )?, r###"a www.example.com&xxx;. b
a www.example.com&xxxxxxxxx;. b
a www.example.com&xxxxxxxxxx;. b
a www.example.com&xxxxxxxxxxx;. b
a www.example.com&xxx. b
a www.example.com{. b
a www.example.com&123. b
a www.example.com&x. b
a www.example.com. b
a www.example.com&1. b
a www.example.com&. b
a www.example.com& b
"###, "should match “character references” like GitHub does" ); // Note: this deviates from GFM, as[ www.example.com ]
www.example.com/?=a(b(c)c)cccc
www.example.com/?=a(b(c)c)c)ccc
www.example.com/?q=a(business)
www.example.com/?q=a(business)))
(www.example.com/?q=a(business))
(www.example.com/?q=a(business)
www.example.com/?q=a(business)".
www.example.com/?q=a(business)))
(www.example.com/?q=a(business))".
(www.example.com/?q=a(business)".)
(www.example.com/?q=a(business)".
"###, "should match parens like GitHub does" ); // Note: this deviates from GFM. // Here, the following issues are fixed: // -w.commonmark.org
ww.commonmark.org
Visit www.commonmark.org/help for more information.
Visit www.commonmark.org.
Visit www.commonmark.org/a.b.
www.aaa.bbb.ccc_ccc
www.aaa_bbb.ccc
www.aaa.bbb.ccc.ddd_ddd
www.aaa.bbb.ccc_ccc.ddd
Visit www.commonmark.org.
Visit www.commonmark.org/a.b.
www.google.com/search?q=Markup+(business)
www.google.com/search?q=Markup+(business)))
(www.google.com/search?q=Markup+(business))
(www.google.com/search?q=Markup+(business)
www.google.com/search?q=(business))+ok
www.google.com/search?q=commonmark&hl=en
www.google.com/search?q=commonmark&hl;en
www.google.com/search?q=commonmark&hl;
hexample.com
htexample.com
httexample.com
httpexample.com
http:example.com
http:/example.com
https:/example.com
(Visit https://encrypted.google.com/search?q=Markup+(business))
No dot: foo@barbaz
No dot: foo@barbaz.
hello@mail+xyz.example isn’t valid, but hello+xyz@mail.example is.
a.b-c_d@a.b-
a.b-c_d@a.b_
Can’t end in an underscore followed by a period: aaa@a.b_.
Can contain an underscore followed by a period: aaa@a.b_.c
Visit www.example.com please.
Visit http://www.example.com please.
Mail example@example.com please.
link http://autolink should still be expanded.
"###, "should match base like GitHub does" ); assert_eq!( to_html_with_options( r###"H0. [https://a.com©b [www.a.com©b H1. []https://a.com©b []www.a.com©b H2. [] https://a.com©b [] www.a.com©b H3. [[https://a.com©b [[www.a.com©b H4. [[]https://a.com©b [[]www.a.com©b H5. [[]]https://a.com©b [[]]www.a.com©b "###, &Options::gfm() )?, r###"H0.
H1.
H2.
H3.
H4.
H5.
[[]]www.a.com©b
"###, "should match brackets like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options(r###"Image start. ![https://a.com ![http://a.com ![www.a.com ![a@b.c Image start and label end. ![https://a.com] ![http://a.com] ![www.a.com] ![a@b.c] Image label with reference (note: GH cleans hashes here, but we keep them in). ![https://a.com][x] ![http://a.com][x] ![www.a.com][x] ![a@b.c][x] [x]: # Image label with resource. ![https://a.com]() ![http://a.com]() ![www.a.com]() ![a@b.c]() Autolink literal after image. ![a]() https://a.com ![a]() http://a.com ![a]() www.a.com ![a]() a@b.c "###, &Options::gfm())?, r###"Image start.
![a@b.c
Image start and label end.
![http://a.com]
![www.a.com]
![a@b.c]
Image label with reference (note: GH cleans hashes here, but we keep them in).
Image label with resource.
Autolink literal after image.
"###, "should match autolink literals combined w/ images like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options(r###"Link start. [https://a.com [http://a.com [www.a.com [a@b.c Label end. https://a.com] http://a.com] www.a.com] a@b.c] Link start and label end. [https://a.com] [http://a.com] [www.a.com] [a@b.c] What naïvely seems like a label end (A). https://a.com`]` http://a.com`]` www.a.com`]` a@b.c`]` Link start and what naïvely seems like a balanced brace (B). [https://a.com`]` [http://a.com`]` [www.a.com`]` [a@b.c`]` What naïvely seems like a label end (C). https://a.com `]` http://a.com `]` www.a.com `]` a@b.c `]` Link start and what naïvely seems like a balanced brace (D). [https://a.com `]` [http://a.com `]` [www.a.com `]` [a@b.c `]` Link label with reference. [https://a.com][x] [http://a.com][x] [www.a.com][x] [a@b.c][x] [x]: # Link label with resource. [https://a.com]() [http://a.com]() [www.a.com]() [a@b.c]() More in link. [a https://b.com c]() [a http://b.com c]() [a www.b.com c]() [a b@c.d e]() Autolink literal after link. [a]() https://a.com [a]() http://a.com [a]() www.a.com [a]() a@b.c "###, &Options::gfm())?, r###"Link start.
Label end.
Link start and label end.
[a@b.c]
What naïvely seems like a label end (A).
Link start and what naïvely seems like a balanced brace (B).
[a@b.c]
What naïvely seems like a label end (C).
a@b.c ]
Link start and what naïvely seems like a balanced brace (D).
[http://a.com ]
[www.a.com ]
[a@b.c ]
Link label with reference.
Link label with resource.
More in link.
Autolink literal after link.
"###, "should match autolink literals combined w/ links like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"# “character reference” www.a&b (space) www.a&b! www.a&b" www.a&b# www.a&b$ www.a&b% www.a&b& www.a&b' www.a&b( www.a&b) www.a&b* www.a&b+ www.a&b, www.a&b- www.a&b www.a&b. www.a&b/ www.a&b: www.a&b; www.a&b< www.a&b= www.a&b> www.a&b? www.a&b@ www.a&b[ www.a&b\ www.a&b] www.a&b^ www.a&b_ www.a&b` www.a&b{ www.a&b| www.a&b} www.a&b~ "###, &Options::gfm() )?, r###"www.a&b (space)
www.a&b;
"###, "should match “character references (named)” like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options(r###"# “character reference” www.a# (space) www.a#! www.a#" www.a## www.a#$ www.a#% www.a#& www.a#' www.a#( www.a#) www.a#* www.a#+ www.a#, www.a#- www.a# www.a#. www.a#/ www.a#: www.a# www.a#< www.a#= www.a#> www.a#? www.a#@ www.a#[ www.a#\ www.a#] www.a#^ www.a#_ www.a#` www.a#{ www.a#| www.a#} www.a#~ "###, &Options::gfm())?, r###"www.a# (space)
"###, "should match “character references (numeric)” like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"a@0.0 a@0.b a@a.29 a@a.b a@0.0.c react@0.11.1 react@0.12.0-rc1 react@0.14.0-alpha1 react@16.7.0-alpha.2 react@0.0.0-experimental-aae83a4b9 [ react@0.11.1 [ react@0.12.0-rc1 [ react@0.14.0-alpha1 [ react@16.7.0-alpha.2 [ react@0.0.0-experimental-aae83a4b9 "###, &Options::gfm() )?, r###"a@0.0
a@a.29
react@0.11.1
react@0.12.0-rc1
react@0.14.0-alpha1
react@16.7.0-alpha.2
react@0.0.0-experimental-aae83a4b9
[ react@0.11.1
[ react@0.12.0-rc1
[ react@0.14.0-alpha1
[ react@16.7.0-alpha.2
[ react@0.0.0-experimental-aae83a4b9
"###, "should match email TLD digits like GitHub does" ); assert_eq!( to_html_with_options( r###"# httpshhh? (2) http://a (space) http://a! http://a" http://a# http://a$ http://a% http://a& http://a' http://a( http://a) http://a* http://a+ http://a, http://a- http://a http://a. http://a/ http://a: http://a; http://a< http://a= http://a> http://a? http://a@ http://a[ http://a\ http://a] http://a^ http://a_ http://a` http://a{ http://a| http://a} http://a~ "###, &Options::gfm() )?, r###"http://a (space)
"###, "should match protocol domain continue like GitHub does" ); assert_eq!( to_html_with_options( r###"# httpshhh? (1) http:// (space) http://! http://" http://# http://$ http://% http://& http://' http://( http://) http://* http://+ http://, http://- http:// http://. http:/// http://: http://; http://< http://= http://> http://? http://@ http://[ http://\ http://] http://^ http://_ http://` http://{ http://| http://} http://~ "###, &Options::gfm() )?, r###"http:// (space)
http://!
http://"
http://#
http://$
http://%
http://&
http://'
http://(
http://)
http://*
http://+
http://,
http://-
http://
http://.
http:///
http://:
http://;
http://<
http://=
http://>
http://?
http://@
http://[
http://\
http://]
http://^
http://_
http://`
http://{
http://|
http://}
http://~
"###, "should match protocol domain start like GitHub does" ); assert_eq!( to_html_with_options( r###"# httpshhh? (4) http://a/b (space) http://a/b! http://a/b" http://a/b# http://a/b$ http://a/b% http://a/b& http://a/b' http://a/b( http://a/b) http://a/b* http://a/b+ http://a/b, http://a/b- http://a/b http://a/b. http://a/b/ http://a/b: http://a/b; http://a/b< http://a/b= http://a/b> http://a/b? http://a/b@ http://a/b[ http://a/b\ http://a/b] http://a/b^ http://a/b_ http://a/b` http://a/b{ http://a/b| http://a/b} http://a/b~ "###, &Options::gfm() )?, r###"http://a/b (space)
"###, "should match protocol path continue like GitHub does" ); assert_eq!( to_html_with_options( r###"# httpshhh? (3) http://a/ (space) http://a/! http://a/" http://a/# http://a/$ http://a/% http://a/& http://a/' http://a/( http://a/) http://a/* http://a/+ http://a/, http://a/- http://a/ http://a/. http://a// http://a/: http://a/; http://a/< http://a/= http://a/> http://a/? http://a/@ http://a/[ http://a/\ http://a/] http://a/^ http://a/_ http://a/` http://a/{ http://a/| http://a/} http://a/~ "###, &Options::gfm() )?, r###"http://a/ (space)
"###, "should match protocol path start like GitHub does" ); assert_eq!( to_html_with_options( r###"[www.example.com/a©](#) www.example.com/a© [www.example.com/a&bogus;](#) www.example.com/a&bogus; [www.example.com/a\.](#) www.example.com/a\. "###, &Options::gfm() )?, r###"www.example.com/a&bogus;
"###, "should match links, autolink literals, and characters like GitHub does" ); assert_eq!( to_html_with_options( r###"# “character reference” www.a/b&c (space) www.a/b&c! www.a/b&c" www.a/b&c# www.a/b&c$ www.a/b&c% www.a/b&c& www.a/b&c' www.a/b&c( www.a/b&c) www.a/b&c* www.a/b&c+ www.a/b&c, www.a/b&c- www.a/b&c www.a/b&c. www.a/b&c/ www.a/b&c: www.a/b&c; www.a/b&c< www.a/b&c= www.a/b&c> www.a/b&c? www.a/b&c@ www.a/b&c[ www.a/b&c\ www.a/b&c] www.a/b&c^ www.a/b&c_ www.a/b&c` www.a/b&c{ www.a/b&c| www.a/b&c} www.a/b&c~ "###, &Options::gfm() )?, r###"www.a/b&c (space)
www.a/b&c;
"###, "should match character reference-like (named) things in paths like GitHub does" ); assert_eq!( to_html_with_options( r###"# “character reference” www.a/b# (space) www.a/b#! www.a/b#" www.a/b## www.a/b#$ www.a/b#% www.a/b#& www.a/b#' www.a/b#( www.a/b#) www.a/b#* www.a/b#+ www.a/b#, www.a/b#- www.a/b# www.a/b#. www.a/b#/ www.a/b#: www.a/b# www.a/b#< www.a/b#= www.a/b#> www.a/b#? www.a/b#@ www.a/b#[ www.a/b#\ www.a/b#] www.a/b#^ www.a/b#_ www.a/b#` www.a/b#{ www.a/b#| www.a/b#} www.a/b#~ "###, &Options::gfm() )?, r###"www.a/b# (space)
"###, "should match character reference-like (numeric) things in paths like GitHub does" ); assert_eq!( to_html_with_options( r###"In autolink literal path or link end? [https://a.com/d]() [http://a.com/d]() [www.a.com/d]() https://a.com/d]() http://a.com/d]() www.a.com/d]() In autolink literal search or link end? [https://a.com?d]() [http://a.com?d]() [www.a.com?d]() https://a.com?d]() http://a.com?d]() www.a.com?d]() In autolink literal hash or link end? [https://a.com#d]() [http://a.com#d]() [www.a.com#d]() https://a.com#d]() http://a.com#d]() www.a.com#d]() "###, &Options::gfm() )?, r###"In autolink literal path or link end?
www.a.com/d]()
In autolink literal search or link end?
www.a.com?d]()
In autolink literal hash or link end?
www.a.com#d]()
"###, "should match path or link end like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"Last non-markdown ASCII whitespace (FF): noreply@example.com, http://example.com, https://example.com, www.example.com Last non-whitespace ASCII control (US): noreply@example.com, http://example.com, https://example.com, www.example.com First punctuation after controls: !noreply@example.com, !http://example.com, !https://example.com, !www.example.com Last punctuation before digits: /noreply@example.com, /http://example.com, /https://example.com, /www.example.com First digit: 0noreply@example.com, 0http://example.com, 0https://example.com, 0www.example.com First punctuation after digits: :noreply@example.com, :http://example.com, :https://example.com, :www.example.com Last punctuation before caps: @noreply@example.com, @http://example.com, @https://example.com, @www.example.com First uppercase: Anoreply@example.com, Ahttp://example.com, Ahttps://example.com, Awww.example.com Punctuation after uppercase: \noreply@example.com, \http://example.com, \https://example.com, \www.example.com Last punctuation before lowercase (1): `noreply@example.com; (2) `http://example.com; (3) `https://example.com; (4) `www.example.com; (broken up to prevent code from forming) First lowercase: anoreply@example.com, ahttp://example.com, ahttps://example.com, awww.example.com First punctuation after lowercase: {noreply@example.com, {http://example.com, {https://example.com, {www.example.com Last punctuation: ~noreply@example.com, ~http://example.com, ~https://example.com, ~www.example.com First non-ASCII unicode whitespace (0x80): noreply@example.com, http://example.com, https://example.com, www.example.com Last non-ASCII unicode whitespace (0x3000): noreply@example.com, http://example.com, https://example.com, www.example.com First non-ASCII punctuation: ¡noreply@example.com, ¡http://example.com, ¡https://example.com, ¡www.example.com Last non-ASCII punctuation: ・noreply@example.com, ・http://example.com, ・https://example.com, ・www.example.com Some non-ascii: 中noreply@example.com, 中http://example.com, 中https://example.com, 中www.example.com Some more non-ascii: 🤷noreply@example.com, 🤷http://example.com, 🤷https://example.com, 🤷www.example.com "###, &Options::gfm() )?, r###"Last non-markdown ASCII whitespace (FF): noreply@example.com, http://example.com, https://example.com, www.example.com
Last non-whitespace ASCII control (US): noreply@example.com, http://example.com, https://example.com, www.example.com
First punctuation after controls: !noreply@example.com, !http://example.com, !https://example.com, !www.example.com
Last punctuation before digits: /noreply@example.com, /http://example.com, /https://example.com, /www.example.com
First digit: 0noreply@example.com, 0http://example.com, 0https://example.com, 0www.example.com
First punctuation after digits: :noreply@example.com, :http://example.com, :https://example.com, :www.example.com
Last punctuation before caps: @noreply@example.com, @http://example.com, @https://example.com, @www.example.com
First uppercase: Anoreply@example.com, Ahttp://example.com, Ahttps://example.com, Awww.example.com
Punctuation after uppercase: \noreply@example.com, \http://example.com, \https://example.com, \www.example.com
Last punctuation before lowercase (1): `noreply@example.com;
(2) `http://example.com;
(3) `https://example.com;
(4) `www.example.com; (broken up to prevent code from forming)
First lowercase: anoreply@example.com, ahttp://example.com, ahttps://example.com, awww.example.com
First punctuation after lowercase: {noreply@example.com, {http://example.com, {https://example.com, {www.example.com
Last punctuation: ~noreply@example.com, ~http://example.com, ~https://example.com, ~www.example.com
First non-ASCII unicode whitespace (0x80): noreply@example.com, http://example.com, https://example.com, www.example.com
Last non-ASCII unicode whitespace (0x3000): noreply@example.com, http://example.com, https://example.com, www.example.com
First non-ASCII punctuation: ¡noreply@example.com, ¡http://example.com, ¡https://example.com, ¡www.example.com
Last non-ASCII punctuation: ・noreply@example.com, ・http://example.com, ・https://example.com, ・www.example.com
Some non-ascii: 中noreply@example.com, 中http://example.com, 中https://example.com, 中www.example.com
Some more non-ascii: 🤷noreply@example.com, 🤷http://example.com, 🤷https://example.com, 🤷www.example.com
"###, "should match previous (complex) like GitHub does" ); assert_eq!( to_html_with_options( r###"# HTTP https://a.b can start after EOF Can start after EOL: https://a.b Can start after tab: https://a.b. Can start after space: https://a.b. Can start after left paren (https://a.b. Can start after asterisk *https://a.b. Can start after underscore *_https://a.b. Can start after tilde ~https://a.b. # www www.a.b can start after EOF Can start after EOL: www.a.b Can start after tab: www.a.b. Can start after space: www.a.b. Can start after left paren (www.a.b. Can start after asterisk *www.a.b. Can start after underscore *_www.a.b. Can start after tilde ~www.a.b. # Email ## Correct character before a@b.c can start after EOF Can start after EOL: a@b.c Can start after tab: a@b.c. Can start after space: a@b.c. Can start after left paren(a@b.c. Can start after asterisk*a@b.c. While theoretically it’s possible to start at an underscore, that underscore is part of the email, so it’s in fact part of the link: _a@b.c. Can start after tilde~a@b.c. ## Others characters before While other characters before the email aren’t allowed by GFM, they work on github.com: !a@b.c, "a@b.c, #a@b.c, $a@b.c, &a@b.c, 'a@b.c, )a@b.c, +a@b.c, ,a@b.c, -a@b.c, .a@b.c, /a@b.c, :a@b.c, ;a@b.c, a@b.c, ?a@b.c, @a@b.c, \a@b.c, ]a@b.c, ^a@b.c, `a@b.c, {a@b.c, }a@b.c. ## Commas See `https://github.com/remarkjs/remark/discussions/678`. ,https://github.com [ ,https://github.com [asd] ,https://github.com "###, &Options::gfm() )?, r###"https://a.b can start after EOF
Can start after EOL: https://a.b
Can start after tab: https://a.b.
Can start after space: https://a.b.
Can start after left paren (https://a.b.
Can start after asterisk *https://a.b.
Can start after underscore *_https://a.b.
Can start after tilde ~https://a.b.
www.a.b can start after EOF
Can start after EOL: www.a.b
Can start after tab: www.a.b.
Can start after space: www.a.b.
Can start after left paren (www.a.b.
Can start after asterisk *www.a.b.
Can start after underscore *_www.a.b.
Can start after tilde ~www.a.b.
a@b.c can start after EOF
Can start after EOL: a@b.c
Can start after tab: a@b.c.
Can start after space: a@b.c.
Can start after left paren(a@b.c.
Can start after asterisk*a@b.c.
While theoretically it’s possible to start at an underscore, that underscore is part of the email, so it’s in fact part of the link: _a@b.c.
Can start after tilde~a@b.c.
While other characters before the email aren’t allowed by GFM, they work on github.com: !a@b.c, "a@b.c, #a@b.c, $a@b.c, &a@b.c, 'a@b.c, )a@b.c, +a@b.c, ,a@b.c, -a@b.c, .a@b.c, /a@b.c, :a@b.c, ;a@b.c, <a@b.c, =a@b.c, >a@b.c, ?a@b.c, @a@b.c, \a@b.c, ]a@b.c, ^a@b.c, `a@b.c, {a@b.c, }a@b.c.
See https://github.com/remarkjs/remark/discussions/678.
[asd] ,https://github.com
"###, "should match previous like GitHub does" ); assert_eq!( to_html_with_options( r###"# wwwtf 2? www.a (space) www.a! www.a" www.a# www.a$ www.a% www.a& www.a' www.a( www.a) www.a* www.a+ www.a, www.a- www.a www.a. www.a/ www.a: www.a; www.a< www.a= www.a> www.a? www.a@ www.a[ www.a\ www.a] www.a^ www.a_ www.a` www.a{ www.a| www.a} www.a~ "###, &Options::gfm() )?, r###"www.a (space)
"###, "should match www (domain continue) like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"# wwwtf 5? www.a. (space) www.a.! www.a." www.a.# www.a.$ www.a.% www.a.& www.a.' www.a.( www.a.) www.a.* www.a.+ www.a., www.a.- www.a. www.a.. www.a./ www.a.: www.a.; www.a.< www.a.= www.a.> www.a.? www.a.@ www.a.[ www.a.\ www.a.] www.a.^ www.a._ www.a.` www.a.{ www.a.| www.a.} www.a.~ "###, &Options::gfm() )?, r###"www.a. (space)
www.a.!
www.a."
www.a.'
www.a.)
www.a.*
www.a.,
www.a..
www.a.:
www.a.;
www.a.<
www.a.?
www.a.]
www.a._
www.a.~
"###, "should match www (domain dot) like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"# wwwtf? www. (space) www.! www." www.# www.$ www.% www.& www.' www.( www.) www.* www.+ www., www.- www. www.. www./ www.: www.; www.< www.= www.> www.? www.@ www.[ www.\ www.] www.^ www._ www.` www.{ www.| www.} www.~ "###, &Options::gfm() )?, r###"www. (space)
www.!
www."
www.'
www.)
www.*
www.,
www.
www..
www.:
www.;
www.<
www.?
www.]
www._
www.~
"###, "should match www (domain start) like GitHub does" ); assert_eq!( to_html_with_options( r###"# wwwtf? (4) www.a/b (space) www.a/b! www.a/b" www.a/b# www.a/b$ www.a/b% www.a/b& www.a/b' www.a/b( www.a/b) www.a/b* www.a/b+ www.a/b, www.a/b- www.a/b www.a/b. www.a/b/ www.a/b: www.a/b; www.a/b< www.a/b= www.a/b> www.a/b? www.a/b@ www.a/b[ www.a/b\ www.a/b] www.a/b^ www.a/b_ www.a/b` www.a/b{ www.a/b| www.a/b} www.a/b~ "###, &Options::gfm() )?, r###"www.a/b (space)
"###, "should match www (path continue) like GitHub does (except for the bracket bug)" ); assert_eq!( to_html_with_options( r###"# wwwtf? (3) www.a/ (space) www.a/! www.a/" www.a/# www.a/$ www.a/% www.a/& www.a/' www.a/( www.a/) www.a/* www.a/+ www.a/, www.a/- www.a/ www.a/. www.a// www.a/: www.a/; www.a/< www.a/= www.a/> www.a/? www.a/@ www.a/[ www.a/\ www.a/] www.a/^ www.a/_ www.a/` www.a/{ www.a/| www.a/} www.a/~ "###, &Options::gfm() )?, r###"www.a/ (space)
"###, "should match www (path start) like GitHub does (except for the bracket bug)" ); assert_eq!( to_mdast( "a https://alpha.com b bravo@charlie.com c www.delta.com d xmpp:echo@foxtrot.com e mailto:golf@hotel.com f.", &ParseOptions::gfm() )?, Node::Root(Root { children: vec![Node::Paragraph(Paragraph { children: vec![ Node::Text(Text { value: "a ".into(), position: Some(Position::new(1, 1, 0, 1, 3, 2)) }), Node::Link(Link { url: "https://alpha.com".into(), title: None, children: vec![Node::Text(Text { value: "https://alpha.com".into(), position: Some(Position::new(1, 3, 2, 1, 20, 19)) }),], position: Some(Position::new(1, 3, 2, 1, 20, 19)) }), Node::Text(Text { value: " b ".into(), position: Some(Position::new(1, 20, 19, 1, 23, 22)) }), Node::Link(Link { url: "mailto:bravo@charlie.com".into(), title: None, children: vec![Node::Text(Text { value: "bravo@charlie.com".into(), position: Some(Position::new(1, 23, 22, 1, 40, 39)) }),], position: Some(Position::new(1, 23, 22, 1, 40, 39)) }), Node::Text(Text { value: " c ".into(), position: Some(Position::new(1, 40, 39, 1, 43, 42)) }), Node::Link(Link { url: "http://www.delta.com".into(), title: None, children: vec![Node::Text(Text { value: "www.delta.com".into(), position: Some(Position::new(1, 43, 42, 1, 56, 55)) }),], position: Some(Position::new(1, 43, 42, 1, 56, 55)) }), Node::Text(Text { value: " d ".into(), position: Some(Position::new(1, 56, 55, 1, 59, 58)) }), Node::Link(Link { url: "xmpp:echo@foxtrot.com".into(), title: None, children: vec![Node::Text(Text { value: "xmpp:echo@foxtrot.com".into(), position: Some(Position::new(1, 59, 58, 1, 80, 79)) }),], position: Some(Position::new(1, 59, 58, 1, 80, 79)) }), Node::Text(Text { value: " e ".into(), position: Some(Position::new(1, 80, 79, 1, 83, 82)) }), Node::Link(Link { url: "mailto:golf@hotel.com".into(), title: None, children: vec![Node::Text(Text { value: "mailto:golf@hotel.com".into(), position: Some(Position::new(1, 83, 82, 1, 104, 103)) }),], position: Some(Position::new(1, 83, 82, 1, 104, 103)) }), Node::Text(Text { value: " f.".into(), position: Some(Position::new(1, 104, 103, 1, 107, 106)) }) ], position: Some(Position::new(1, 1, 0, 1, 107, 106)) })], position: Some(Position::new(1, 1, 0, 1, 107, 106)) }), "should support GFM autolink literals as `Link`s in mdast" ); Ok(()) }