From 3d00bf57a225369120fd98bee36f65a541260da1 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 5 Sep 2022 15:03:24 +0200 Subject: Fix to implement GFM autolink literals exactly --- tests/gfm_autolink_literal.rs | 2442 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 2440 insertions(+), 2 deletions(-) (limited to 'tests') diff --git a/tests/gfm_autolink_literal.rs b/tests/gfm_autolink_literal.rs index 9551751..2e84e6d 100644 --- a/tests/gfm_autolink_literal.rs +++ b/tests/gfm_autolink_literal.rs @@ -41,6 +41,22 @@ fn gfm_autolink_literal() { "should support email urls if enabled" ); + assert_eq!( + micromark_with_options("[https://example.com](xxx)", &gfm), + "

https://example.com

", + "should not link protocol urls in links" + ); + assert_eq!( + micromark_with_options("[www.example.com](xxx)", &gfm), + "

www.example.com

", + "should not link www urls in links" + ); + assert_eq!( + micromark_with_options("[user@example.com](xxx)", &gfm), + "

user@example.com

", + "should not link email urls in links" + ); + assert_eq!( micromark_with_options("user@example.com", &gfm), "

user@example.com

", @@ -174,7 +190,7 @@ fn gfm_autolink_literal() { ); // Note: GH comments/issues/PRs do not link this, but Gists/readmes do. - // Fixing it would mean defiating from `cmark-gfm`: + // Fixing it would mean deviating from `cmark-gfm`: // Source: . // assert_eq!( // micromark_with_options(",www.example.com", &gfm), @@ -209,6 +225,55 @@ fn gfm_autolink_literal() { "should stop domains/paths at `<`" ); + assert_eq!( + micromark_with_options( + r###" +a www.example.com&xxx;b c + +a www.example.com&xxx;. b + +a www.example.com&xxxxxxxxx;. b + +a www.example.com&xxxxxxxxxx;. b + +a www.example.com&xxxxxxxxxxx;. b + +a www.example.com&xxx. b + +a www.example.com{. b + +a www.example.com&123. b + +a www.example.com&x. b + +a www.example.com. b + +a www.example.com&1. b + +a www.example.com&. b + +a www.example.com& b +"###, + &gfm + ), + r###"

a www.example.com&xxx;b c

+

a www.example.com&xxx;. b

+

a www.example.com&xxxxxxxxx;. b

+

a www.example.com&xxxxxxxxxx;. b

+

a www.example.com&xxxxxxxxxxx;. b

+

a www.example.com&xxx. b

+

a www.example.com&#123. b

+

a www.example.com&123. b

+

a www.example.com&x. b

+

a www.example.com&#1. b

+

a www.example.com&1. b

+

a www.example.com&. b

+

a www.example.com& b

+"###, + "should match “character references” like GitHub does" + ); + + // Note: this deviates from GFM, as is fixed. assert_eq!( micromark_with_options( r###" @@ -251,6 +316,2379 @@ fn gfm_autolink_literal() {

 https://example.com

 contact@example.com

"###, - "should interplay with brackets, links, and images" + "should match interplay with brackets, links, and images, like GitHub does (but without the bugs)" + ); + + assert_eq!( + micromark_with_options( + r###" +www.example.com/?=a(b)cccccc + +www.example.com/?=a(b(c)ccccc + +www.example.com/?=a(b(c)c)cccc + +www.example.com/?=a(b(c)c)c)ccc + +www.example.com/?q=a(business) + +www.example.com/?q=a(business))) + +(www.example.com/?q=a(business)) + +(www.example.com/?q=a(business) + +www.example.com/?q=a(business)". + +www.example.com/?q=a(business))) + +(www.example.com/?q=a(business))". + +(www.example.com/?q=a(business)".) + +(www.example.com/?q=a(business)". +"###, + &gfm + ), + r###"

www.example.com/?=a(b)cccccc

+

www.example.com/?=a(b(c)ccccc

+

www.example.com/?=a(b(c)c)cccc

+

www.example.com/?=a(b(c)c)c)ccc

+

www.example.com/?q=a(business)

+

www.example.com/?q=a(business)))

+

(www.example.com/?q=a(business))

+

(www.example.com/?q=a(business)

+

www.example.com/?q=a(business)".

+

www.example.com/?q=a(business)))

+

(www.example.com/?q=a(business))".

+

(www.example.com/?q=a(business)".)

+

(www.example.com/?q=a(business)".

+"###, + "should match parens like GitHub does" + ); + + // Note: this deviates from GFM. + // Here, the following issues are fixed: + // - + assert_eq!( + micromark_with_options( + r###" +# Literal autolinks + +## WWW autolinks + +w.commonmark.org + +ww.commonmark.org + +www.commonmark.org + +Www.commonmark.org + +wWw.commonmark.org + +wwW.commonmark.org + +WWW.COMMONMARK.ORG + +Visit www.commonmark.org/help for more information. + +Visit www.commonmark.org. + +Visit www.commonmark.org/a.b. + +www.aaa.bbb.ccc_ccc + +www.aaa_bbb.ccc + +www.aaa.bbb.ccc.ddd_ddd + +www.aaa.bbb.ccc_ccc.ddd + +www.aaa.bbb_bbb.ccc.ddd + +www.aaa_aaa.bbb.ccc.ddd + +Visit www.commonmark.org. + +Visit www.commonmark.org/a.b. + +www.google.com/search?q=Markup+(business) + +www.google.com/search?q=Markup+(business))) + +(www.google.com/search?q=Markup+(business)) + +(www.google.com/search?q=Markup+(business) + +www.google.com/search?q=(business))+ok + +www.google.com/search?q=commonmark&hl=en + +www.google.com/search?q=commonmark&hl;en + +www.google.com/search?q=commonmark&hl; + +www.commonmark.org/he should still be expanded. +"###, + &gfm + ), + r###"

Literal autolinks

+

WWW autolinks

+

w.commonmark.org

+

ww.commonmark.org

+

www.commonmark.org

+

Www.commonmark.org

+

wWw.commonmark.org

+

wwW.commonmark.org

+

WWW.COMMONMARK.ORG

+

Visit www.commonmark.org/help for more information.

+

Visit www.commonmark.org.

+

Visit www.commonmark.org/a.b.

+

www.aaa.bbb.ccc_ccc

+

www.aaa_bbb.ccc

+

www.aaa.bbb.ccc.ddd_ddd

+

www.aaa.bbb.ccc_ccc.ddd

+

www.aaa.bbb_bbb.ccc.ddd

+

www.aaa_aaa.bbb.ccc.ddd

+

Visit www.commonmark.org.

+

Visit www.commonmark.org/a.b.

+

www.google.com/search?q=Markup+(business)

+

www.google.com/search?q=Markup+(business)))

+

(www.google.com/search?q=Markup+(business))

+

(www.google.com/search?q=Markup+(business)

+

www.google.com/search?q=(business))+ok

+

www.google.com/search?q=commonmark&hl=en

+

www.google.com/search?q=commonmark&hl;en

+

www.google.com/search?q=commonmark&hl;

+

www.commonmark.org/he<lp

+

HTTP autolinks

+

hexample.com

+

htexample.com

+

httexample.com

+

httpexample.com

+

http:example.com

+

http:/example.com

+

https:/example.com

+

http://example.com

+

https://example.com

+

https://example

+

http://commonmark.org

+

(Visit https://encrypted.google.com/search?q=Markup+(business))

+

Email autolinks

+

No dot: foo@barbaz

+

No dot: foo@barbaz.

+

foo@bar.baz

+

hello@mail+xyz.example isn’t valid, but hello+xyz@mail.example is.

+

a.b-c_d@a.b

+

a.b-c_d@a.b.

+

a.b-c_d@a.b-

+

a.b-c_d@a.b_

+

a@a_b.c

+

a@a-b.c

+

Can’t end in an underscore followed by a period: aaa@a.b_.

+

Can contain an underscore followed by a period: aaa@a.b_.c

+

Link text should not be expanded

+

Visit www.example.com please.

+

Visit http://www.example.com please.

+

Mail example@example.com please.

+

link http://autolink should still be expanded.

+"###, + "should match base like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"H0. + +[https://a.com©b + +[www.a.com©b + +H1. + +[]https://a.com©b + +[]www.a.com©b + +H2. + +[] https://a.com©b + +[] www.a.com©b + +H3. + +[[https://a.com©b + +[[www.a.com©b + +H4. + +[[]https://a.com©b + +[[]www.a.com©b + +H5. + +[[]]https://a.com©b + +[[]]www.a.com©b +"###, + &gfm + ), + r###"

H0.

+

[https://a.com&copy;b

+

[www.a.com&copy;b

+

H1.

+

[]https://a.com&copy;b

+

[]www.a.com&copy;b

+

H2.

+

[] https://a.com&copy;b

+

[] www.a.com&copy;b

+

H3.

+

[[https://a.com&copy;b

+

[[www.a.com&copy;b

+

H4.

+

[[]https://a.com&copy;b

+

[[]www.a.com&copy;b

+

H5.

+

[[]]https://a.com&copy;b

+

[[]]www.a.com&copy;b

+"###, + "should match brackets like GitHub does (except for the bracket bug)" + ); + + assert_eq!( + micromark_with_options(r###"Image start. + +![https://a.com + +![http://a.com + +![www.a.com + +![a@b.c + +Image start and label end. + +![https://a.com] + +![http://a.com] + +![www.a.com] + +![a@b.c] + +Image label with reference (note: GH cleans hashes here, but we keep them in). + +![https://a.com][x] + +![http://a.com][x] + +![www.a.com][x] + +![a@b.c][x] + +[x]: # + +Image label with resource. + +![https://a.com]() + +![http://a.com]() + +![www.a.com]() + +![a@b.c]() + +Autolink literal after image. + +![a]() https://a.com + +![a]() http://a.com + +![a]() www.a.com + +![a]() a@b.c +"###, &gfm), + r###"

Image start.

+

![https://a.com

+

![http://a.com

+

![www.a.com

+

![a@b.c

+

Image start and label end.

+

![https://a.com]

+

![http://a.com]

+

![www.a.com]

+

![a@b.c]

+

Image label with reference (note: GH cleans hashes here, but we keep them in).

+

https://a.com

+

http://a.com

+

www.a.com

+

a@b.c

+

Image label with resource.

+

https://a.com

+

http://a.com

+

www.a.com

+

a@b.c

+

Autolink literal after image.

+

a https://a.com

+

a http://a.com

+

a www.a.com

+

a a@b.c

+"###, + "should match autolink literals combined w/ images like GitHub does (except for the bracket bug)" + ); + + assert_eq!( + micromark_with_options(r###"Link start. + +[https://a.com + +[http://a.com + +[www.a.com + +[a@b.c + +Label end. + +https://a.com] + +http://a.com] + +www.a.com] + +a@b.c] + +Link start and label end. + +[https://a.com] + +[http://a.com] + +[www.a.com] + +[a@b.c] + +What naïvely seems like a label end (A). + +https://a.com`]` + +http://a.com`]` + +www.a.com`]` + +a@b.c`]` + +Link start and what naïvely seems like a balanced brace (B). + +[https://a.com`]` + +[http://a.com`]` + +[www.a.com`]` + +[a@b.c`]` + +What naïvely seems like a label end (C). + +https://a.com `]` + +http://a.com `]` + +www.a.com `]` + +a@b.c `]` + +Link start and what naïvely seems like a balanced brace (D). + +[https://a.com `]` + +[http://a.com `]` + +[www.a.com `]` + +[a@b.c `]` + +Link label with reference. + +[https://a.com][x] + +[http://a.com][x] + +[www.a.com][x] + +[a@b.c][x] + +[x]: # + +Link label with resource. + +[https://a.com]() + +[http://a.com]() + +[www.a.com]() + +[a@b.c]() + +More in link. + +[a https://b.com c]() + +[a http://b.com c]() + +[a www.b.com c]() + +[a b@c.d e]() + +Autolink literal after link. + +[a]() https://a.com + +[a]() http://a.com + +[a]() www.a.com + +[a]() a@b.c +"###, &gfm), + r###"

Link start.

+

[https://a.com

+

[http://a.com

+

[www.a.com

+

[a@b.c

+

Label end.

+

https://a.com]

+

http://a.com]

+

www.a.com]

+

a@b.c]

+

Link start and label end.

+

[https://a.com]

+

[http://a.com]

+

[www.a.com]

+

[a@b.c]

+

What naïvely seems like a label end (A).

+

https://a.com`]`

+

http://a.com`]`

+

www.a.com`]`

+

a@b.c]

+

Link start and what naïvely seems like a balanced brace (B).

+

[https://a.com`]`

+

[http://a.com`]`

+

[www.a.com`]`

+

[a@b.c]

+

What naïvely seems like a label end (C).

+

https://a.com ]

+

http://a.com ]

+

www.a.com ]

+

a@b.c ]

+

Link start and what naïvely seems like a balanced brace (D).

+

[https://a.com ]

+

[http://a.com ]

+

[www.a.com ]

+

[a@b.c ]

+

Link label with reference.

+

https://a.com

+

http://a.com

+

www.a.com

+

a@b.c

+

Link label with resource.

+

https://a.com

+

http://a.com

+

www.a.com

+

a@b.c

+

More in link.

+

a https://b.com c

+

a http://b.com c

+

a www.b.com c

+

a b@c.d e

+

Autolink literal after link.

+

a https://a.com

+

a http://a.com

+

a www.a.com

+

a a@b.c

+"###, + "should match autolink literals combined w/ links like GitHub does (except for the bracket bug)" + ); + + assert_eq!( + micromark_with_options( + r###"# “character reference” + +www.a&b (space) + +www.a&b! + +www.a&b" + +www.a&b# + +www.a&b$ + +www.a&b% + +www.a&b& + +www.a&b' + +www.a&b( + +www.a&b) + +www.a&b* + +www.a&b+ + +www.a&b, + +www.a&b- + +www.a&b + +www.a&b. + +www.a&b/ + +www.a&b: + +www.a&b; + +www.a&b< + +www.a&b= + +www.a&b> + +www.a&b? + +www.a&b@ + +www.a&b[ + +www.a&b\ + +www.a&b] + +www.a&b^ + +www.a&b_ + +www.a&b` + +www.a&b{ + +www.a&b| + +www.a&b} + +www.a&b~ +"###, + &gfm + ), + r###"

“character reference”

+

www.a&b (space)

+

www.a&b!

+

www.a&b"

+

www.a&b#

+

www.a&b$

+

www.a&b%

+

www.a&b&

+

www.a&b'

+

www.a&b(

+

www.a&b)

+

www.a&b*

+

www.a&b+

+

www.a&b,

+

www.a&b-

+

www.a&b

+

www.a&b.

+

www.a&b/

+

www.a&b:

+

www.a&b;

+

www.a&b<

+

www.a&b=

+

www.a&b>

+

www.a&b?

+

www.a&b@

+

www.a&b[

+

www.a&b\

+

www.a&b]

+

www.a&b^

+

www.a&b_

+

www.a&b`

+

www.a&b{

+

www.a&b|

+

www.a&b}

+

www.a&b~

+"###, + "should match “character references (named)” like GitHub does (except for the bracket bug)" + ); + + assert_eq!( + micromark_with_options(r###"# “character reference” + +www.a# (space) + +www.a#! + +www.a#" + +www.a## + +www.a#$ + +www.a#% + +www.a#& + +www.a#' + +www.a#( + +www.a#) + +www.a#* + +www.a#+ + +www.a#, + +www.a#- + +www.a# + +www.a#. + +www.a#/ + +www.a#: + +www.a# + +www.a#< + +www.a#= + +www.a#> + +www.a#? + +www.a#@ + +www.a#[ + +www.a#\ + +www.a#] + +www.a#^ + +www.a#_ + +www.a#` + +www.a#{ + +www.a#| + +www.a#} + +www.a#~ +"###, &gfm), + r###"

“character reference”

+

www.a&#35 (space)

+

www.a&#35!

+

www.a&#35"

+

www.a&#35#

+

www.a&#35$

+

www.a&#35%

+

www.a&#35&

+

www.a&#35'

+

www.a&#35(

+

www.a&#35)

+

www.a&#35*

+

www.a&#35+

+

www.a&#35,

+

www.a&#35-

+

www.a&#35

+

www.a&#35.

+

www.a&#35/

+

www.a&#35:

+

www.a&#35;

+

www.a&#35<

+

www.a&#35=

+

www.a&#35>

+

www.a&#35?

+

www.a&#35@

+

www.a&#35[

+

www.a&#35\

+

www.a&#35]

+

www.a&#35^

+

www.a&#35_

+

www.a&#35`

+

www.a&#35{

+

www.a&#35|

+

www.a&#35}

+

www.a&#35~

+"###, + "should match “character references (numeric)” like GitHub does (except for the bracket bug)" + ); + + assert_eq!( + micromark_with_options( + r###"a@0.0 + +a@0.b + +a@a.29 + +a@a.b + +a@0.0.c + +react@0.11.1 + +react@0.12.0-rc1 + +react@0.14.0-alpha1 + +react@16.7.0-alpha.2 + +react@0.0.0-experimental-aae83a4b9 + +[ react@0.11.1 + +[ react@0.12.0-rc1 + +[ react@0.14.0-alpha1 + +[ react@16.7.0-alpha.2 + +[ react@0.0.0-experimental-aae83a4b9 +"###, + &gfm + ), + r###"

a@0.0

+

a@0.b

+

a@a.29

+

a@a.b

+

a@0.0.c

+

react@0.11.1

+

react@0.12.0-rc1

+

react@0.14.0-alpha1

+

react@16.7.0-alpha.2

+

react@0.0.0-experimental-aae83a4b9

+

[ react@0.11.1

+

[ react@0.12.0-rc1

+

[ react@0.14.0-alpha1

+

[ react@16.7.0-alpha.2

+

[ react@0.0.0-experimental-aae83a4b9

+"###, + "should match email TLD digits like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"# httpshhh? (2) + +http://a (space) + +http://a! + +http://a" + +http://a# + +http://a$ + +http://a% + +http://a& + +http://a' + +http://a( + +http://a) + +http://a* + +http://a+ + +http://a, + +http://a- + +http://a + +http://a. + +http://a/ + +http://a: + +http://a; + +http://a< + +http://a= + +http://a> + +http://a? + +http://a@ + +http://a[ + +http://a\ + +http://a] + +http://a^ + +http://a_ + +http://a` + +http://a{ + +http://a| + +http://a} + +http://a~ +"###, + &gfm + ), + r###"

httpshhh? (2)

+

http://a (space)

+

http://a!

+

http://a"

+

http://a#

+

http://a$

+

http://a%

+

http://a&

+

http://a'

+

http://a(

+

http://a)

+

http://a*

+

http://a+

+

http://a,

+

http://a-

+

http://a

+

http://a.

+

http://a/

+

http://a:

+

http://a;

+

http://a<

+

http://a=

+

http://a>

+

http://a?

+

http://a@

+

http://a[

+

http://a\

+

http://a]

+

http://a^

+

http://a_

+

http://a`

+

http://a{

+

http://a|

+

http://a}

+

http://a~

+"###, + "should match protocol domain continue like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"# httpshhh? (1) + +http:// (space) + +http://! + +http://" + +http://# + +http://$ + +http://% + +http://& + +http://' + +http://( + +http://) + +http://* + +http://+ + +http://, + +http://- + +http:// + +http://. + +http:/// + +http://: + +http://; + +http://< + +http://= + +http://> + +http://? + +http://@ + +http://[ + +http://\ + +http://] + +http://^ + +http://_ + +http://` + +http://{ + +http://| + +http://} + +http://~ +"###, + &gfm + ), + r###"

httpshhh? (1)

+

http:// (space)

+

http://!

+

http://"

+

http://#

+

http://$

+

http://%

+

http://&

+

http://'

+

http://(

+

http://)

+

http://*

+

http://+

+

http://,

+

http://-

+

http://

+

http://.

+

http:///

+

http://:

+

http://;

+

http://<

+

http://=

+

http://>

+

http://?

+

http://@

+

http://[

+

http://\

+

http://]

+

http://^

+

http://_

+

http://`

+

http://{

+

http://|

+

http://}

+

http://~

+"###, + "should match protocol domain start like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"# httpshhh? (4) + +http://a/b (space) + +http://a/b! + +http://a/b" + +http://a/b# + +http://a/b$ + +http://a/b% + +http://a/b& + +http://a/b' + +http://a/b( + +http://a/b) + +http://a/b* + +http://a/b+ + +http://a/b, + +http://a/b- + +http://a/b + +http://a/b. + +http://a/b/ + +http://a/b: + +http://a/b; + +http://a/b< + +http://a/b= + +http://a/b> + +http://a/b? + +http://a/b@ + +http://a/b[ + +http://a/b\ + +http://a/b] + +http://a/b^ + +http://a/b_ + +http://a/b` + +http://a/b{ + +http://a/b| + +http://a/b} + +http://a/b~ +"###, + &gfm + ), + r###"

httpshhh? (4)

+

http://a/b (space)

+

http://a/b!

+

http://a/b"

+

http://a/b#

+

http://a/b$

+

http://a/b%

+

http://a/b&

+

http://a/b'

+

http://a/b(

+

http://a/b)

+

http://a/b*

+

http://a/b+

+

http://a/b,

+

http://a/b-

+

http://a/b

+

http://a/b.

+

http://a/b/

+

http://a/b:

+

http://a/b;

+

http://a/b<

+

http://a/b=

+

http://a/b>

+

http://a/b?

+

http://a/b@

+

http://a/b[

+

http://a/b\

+

http://a/b]

+

http://a/b^

+

http://a/b_

+

http://a/b`

+

http://a/b{

+

http://a/b|

+

http://a/b}

+

http://a/b~

+"###, + "should match protocol path continue like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"# httpshhh? (3) + +http://a/ (space) + +http://a/! + +http://a/" + +http://a/# + +http://a/$ + +http://a/% + +http://a/& + +http://a/' + +http://a/( + +http://a/) + +http://a/* + +http://a/+ + +http://a/, + +http://a/- + +http://a/ + +http://a/. + +http://a// + +http://a/: + +http://a/; + +http://a/< + +http://a/= + +http://a/> + +http://a/? + +http://a/@ + +http://a/[ + +http://a/\ + +http://a/] + +http://a/^ + +http://a/_ + +http://a/` + +http://a/{ + +http://a/| + +http://a/} + +http://a/~ +"###, + &gfm + ), + r###"

httpshhh? (3)

+

http://a/ (space)

+

http://a/!

+

http://a/"

+

http://a/#

+

http://a/$

+

http://a/%

+

http://a/&

+

http://a/'

+

http://a/(

+

http://a/)

+

http://a/*

+

http://a/+

+

http://a/,

+

http://a/-

+

http://a/

+

http://a/.

+

http://a//

+

http://a/:

+

http://a/;

+

http://a/<

+

http://a/=

+

http://a/>

+

http://a/?

+

http://a/@

+

http://a/[

+

http://a/\

+

http://a/]

+

http://a/^

+

http://a/_

+

http://a/`

+

http://a/{

+

http://a/|

+

http://a/}

+

http://a/~

+"###, + "should match protocol path start like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"[www.example.com/a©](#) + +www.example.com/a© + +[www.example.com/a&bogus;](#) + +www.example.com/a&bogus; + +[www.example.com/a\.](#) + +www.example.com/a\. +"###, + &gfm + ), + r###"

www.example.com/a©

+

www.example.com/a©

+

www.example.com/a&bogus;

+

www.example.com/a&bogus;

+

www.example.com/a\.

+

www.example.com/a\.

+"###, + "should match links, autolink literals, and characters like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"# “character reference” + +www.a/b&c (space) + +www.a/b&c! + +www.a/b&c" + +www.a/b&c# + +www.a/b&c$ + +www.a/b&c% + +www.a/b&c& + +www.a/b&c' + +www.a/b&c( + +www.a/b&c) + +www.a/b&c* + +www.a/b&c+ + +www.a/b&c, + +www.a/b&c- + +www.a/b&c + +www.a/b&c. + +www.a/b&c/ + +www.a/b&c: + +www.a/b&c; + +www.a/b&c< + +www.a/b&c= + +www.a/b&c> + +www.a/b&c? + +www.a/b&c@ + +www.a/b&c[ + +www.a/b&c\ + +www.a/b&c] + +www.a/b&c^ + +www.a/b&c_ + +www.a/b&c` + +www.a/b&c{ + +www.a/b&c| + +www.a/b&c} + +www.a/b&c~ +"###, + &gfm + ), + r###"

“character reference”

+

www.a/b&c (space)

+

www.a/b&c!

+

www.a/b&c"

+

www.a/b&c#

+

www.a/b&c$

+

www.a/b&c%

+

www.a/b&c&

+

www.a/b&c'

+

www.a/b&c(

+

www.a/b&c)

+

www.a/b&c*

+

www.a/b&c+

+

www.a/b&c,

+

www.a/b&c-

+

www.a/b&c

+

www.a/b&c.

+

www.a/b&c/

+

www.a/b&c:

+

www.a/b&c;

+

www.a/b&c<

+

www.a/b&c=

+

www.a/b&c>

+

www.a/b&c?

+

www.a/b&c@

+

www.a/b&c[

+

www.a/b&c\

+

www.a/b&c]

+

www.a/b&c^

+

www.a/b&c_

+

www.a/b&c`

+

www.a/b&c{

+

www.a/b&c|

+

www.a/b&c}

+

www.a/b&c~

+"###, + "should match character reference-like (named) things in paths like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"# “character reference” + +www.a/b# (space) + +www.a/b#! + +www.a/b#" + +www.a/b## + +www.a/b#$ + +www.a/b#% + +www.a/b#& + +www.a/b#' + +www.a/b#( + +www.a/b#) + +www.a/b#* + +www.a/b#+ + +www.a/b#, + +www.a/b#- + +www.a/b# + +www.a/b#. + +www.a/b#/ + +www.a/b#: + +www.a/b# + +www.a/b#< + +www.a/b#= + +www.a/b#> + +www.a/b#? + +www.a/b#@ + +www.a/b#[ + +www.a/b#\ + +www.a/b#] + +www.a/b#^ + +www.a/b#_ + +www.a/b#` + +www.a/b#{ + +www.a/b#| + +www.a/b#} + +www.a/b#~ +"###, + &gfm + ), + r###"

“character reference”

+

www.a/b&#35 (space)

+

www.a/b&#35!

+

www.a/b&#35"

+

www.a/b&#35#

+

www.a/b&#35$

+

www.a/b&#35%

+

www.a/b&#35&

+

www.a/b&#35'

+

www.a/b&#35(

+

www.a/b&#35)

+

www.a/b&#35*

+

www.a/b&#35+

+

www.a/b&#35,

+

www.a/b&#35-

+

www.a/b&#35

+

www.a/b&#35.

+

www.a/b&#35/

+

www.a/b&#35:

+

www.a/b&#35;

+

www.a/b&#35<

+

www.a/b&#35=

+

www.a/b&#35>

+

www.a/b&#35?

+

www.a/b&#35@

+

www.a/b&#35[

+

www.a/b&#35\

+

www.a/b&#35]

+

www.a/b&#35^

+

www.a/b&#35_

+

www.a/b&#35`

+

www.a/b&#35{

+

www.a/b&#35|

+

www.a/b&#35}

+

www.a/b&#35~

+"###, + "should match character reference-like (numeric) things in paths like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"In autolink literal path or link end? + +[https://a.com/d]() + +[http://a.com/d]() + +[www.a.com/d]() + +https://a.com/d]() + +http://a.com/d]() + +www.a.com/d]() + +In autolink literal search or link end? + +[https://a.com?d]() + +[http://a.com?d]() + +[www.a.com?d]() + +https://a.com?d]() + +http://a.com?d]() + +www.a.com?d]() + +In autolink literal hash or link end? + +[https://a.com#d]() + +[http://a.com#d]() + +[www.a.com#d]() + +https://a.com#d]() + +http://a.com#d]() + +www.a.com#d]() +"###, + &gfm + ), + r###"

In autolink literal path or link end?

+

https://a.com/d

+

http://a.com/d

+

www.a.com/d

+

https://a.com/d]()

+

http://a.com/d]()

+

www.a.com/d]()

+

In autolink literal search or link end?

+

https://a.com?d

+

http://a.com?d

+

www.a.com?d

+

https://a.com?d]()

+

http://a.com?d]()

+

www.a.com?d]()

+

In autolink literal hash or link end?

+

https://a.com#d

+

http://a.com#d

+

www.a.com#d

+

https://a.com#d]()

+

http://a.com#d]()

+

www.a.com#d]()

+"###, + "should match path or link end like GitHub does (except for the bracket bug)" + ); + + assert_eq!( + micromark_with_options( + r###"Last non-markdown ASCII whitespace (FF): noreply@example.com, http://example.com, https://example.com, www.example.com + +Last non-whitespace ASCII control (US): noreply@example.com, http://example.com, https://example.com, www.example.com + +First punctuation after controls: !noreply@example.com, !http://example.com, !https://example.com, !www.example.com + +Last punctuation before digits: /noreply@example.com, /http://example.com, /https://example.com, /www.example.com + +First digit: 0noreply@example.com, 0http://example.com, 0https://example.com, 0www.example.com + +First punctuation after digits: :noreply@example.com, :http://example.com, :https://example.com, :www.example.com + +Last punctuation before caps: @noreply@example.com, @http://example.com, @https://example.com, @www.example.com + +First uppercase: Anoreply@example.com, Ahttp://example.com, Ahttps://example.com, Awww.example.com + +Punctuation after uppercase: \noreply@example.com, \http://example.com, \https://example.com, \www.example.com + +Last punctuation before lowercase (1): `noreply@example.com; + +(2) `http://example.com; + +(3) `https://example.com; + +(4) `www.example.com; (broken up to prevent code from forming) + +First lowercase: anoreply@example.com, ahttp://example.com, ahttps://example.com, awww.example.com + +First punctuation after lowercase: {noreply@example.com, {http://example.com, {https://example.com, {www.example.com + +Last punctuation: ~noreply@example.com, ~http://example.com, ~https://example.com, ~www.example.com + +First non-ASCII unicode whitespace (0x80): …noreply@example.com, …http://example.com, …https://example.com, …www.example.com + +Last non-ASCII unicode whitespace (0x3000):  noreply@example.com,  http://example.com,  https://example.com,  www.example.com + +First non-ASCII punctuation: ¡noreply@example.com, ¡http://example.com, ¡https://example.com, ¡www.example.com + +Last non-ASCII punctuation: ・noreply@example.com, ・http://example.com, ・https://example.com, ・www.example.com + +Some non-ascii: 中noreply@example.com, 中http://example.com, 中https://example.com, 中www.example.com + +Some more non-ascii: 🤷‍noreply@example.com, 🤷‍http://example.com, 🤷‍https://example.com, 🤷‍www.example.com +"###, + &gfm + ), + r###"

Last non-markdown ASCII whitespace (FF): noreply@example.com, http://example.com, https://example.com, www.example.com

+

Last non-whitespace ASCII control (US): noreply@example.com, http://example.com, https://example.com, www.example.com

+

First punctuation after controls: !noreply@example.com, !http://example.com, !https://example.com, !www.example.com

+

Last punctuation before digits: /noreply@example.com, /http://example.com, /https://example.com, /www.example.com

+

First digit: 0noreply@example.com, 0http://example.com, 0https://example.com, 0www.example.com

+

First punctuation after digits: :noreply@example.com, :http://example.com, :https://example.com, :www.example.com

+

Last punctuation before caps: @noreply@example.com, @http://example.com, @https://example.com, @www.example.com

+

First uppercase: Anoreply@example.com, Ahttp://example.com, Ahttps://example.com, Awww.example.com

+

Punctuation after uppercase: \noreply@example.com, \http://example.com, \https://example.com, \www.example.com

+

Last punctuation before lowercase (1): `noreply@example.com;

+

(2) `http://example.com;

+

(3) `https://example.com;

+

(4) `www.example.com; (broken up to prevent code from forming)

+

First lowercase: anoreply@example.com, ahttp://example.com, ahttps://example.com, awww.example.com

+

First punctuation after lowercase: {noreply@example.com, {http://example.com, {https://example.com, {www.example.com

+

Last punctuation: ~noreply@example.com, ~http://example.com, ~https://example.com, ~www.example.com

+

First non-ASCII unicode whitespace (0x80): …noreply@example.com, …http://example.com, …https://example.com, …www.example.com

+

Last non-ASCII unicode whitespace (0x3000):  noreply@example.com,  http://example.com,  https://example.com,  www.example.com

+

First non-ASCII punctuation: ¡noreply@example.com, ¡http://example.com, ¡https://example.com, ¡www.example.com

+

Last non-ASCII punctuation: ・noreply@example.com, ・http://example.com, ・https://example.com, ・www.example.com

+

Some non-ascii: 中noreply@example.com, 中http://example.com, 中https://example.com, 中www.example.com

+

Some more non-ascii: 🤷‍noreply@example.com, 🤷‍http://example.com, 🤷‍https://example.com, 🤷‍www.example.com

+"###, + "should match previous (complex) like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"# HTTP + +https://a.b can start after EOF + +Can start after EOL: +https://a.b + +Can start after tab: https://a.b. + +Can start after space: https://a.b. + +Can start after left paren (https://a.b. + +Can start after asterisk *https://a.b. + +Can start after underscore *_https://a.b. + +Can start after tilde ~https://a.b. + +# www + +www.a.b can start after EOF + +Can start after EOL: +www.a.b + +Can start after tab: www.a.b. + +Can start after space: www.a.b. + +Can start after left paren (www.a.b. + +Can start after asterisk *www.a.b. + +Can start after underscore *_www.a.b. + +Can start after tilde ~www.a.b. + +# Email + +## Correct character before + +a@b.c can start after EOF + +Can start after EOL: +a@b.c + +Can start after tab: a@b.c. + +Can start after space: a@b.c. + +Can start after left paren(a@b.c. + +Can start after asterisk*a@b.c. + +While theoretically it’s possible to start at an underscore, that underscore +is part of the email, so it’s in fact part of the link: _a@b.c. + +Can start after tilde~a@b.c. + +## Others characters before + +While other characters before the email aren’t allowed by GFM, they work on +github.com: !a@b.c, "a@b.c, #a@b.c, $a@b.c, &a@b.c, 'a@b.c, )a@b.c, +a@b.c, +,a@b.c, -a@b.c, .a@b.c, /a@b.c, :a@b.c, ;a@b.c, a@b.c, ?a@b.c, +@a@b.c, \a@b.c, ]a@b.c, ^a@b.c, `a@b.c, {a@b.c, }a@b.c. + +## Commas + +See `https://github.com/remarkjs/remark/discussions/678`. + +,https://github.com + +[ ,https://github.com + +[asd] ,https://github.com +"###, + &gfm + ), + r###"

HTTP

+

https://a.b can start after EOF

+

Can start after EOL: +https://a.b

+

Can start after tab: https://a.b.

+

Can start after space: https://a.b.

+

Can start after left paren (https://a.b.

+

Can start after asterisk *https://a.b.

+

Can start after underscore *_https://a.b.

+

Can start after tilde ~https://a.b.

+

www

+

www.a.b can start after EOF

+

Can start after EOL: +www.a.b

+

Can start after tab: www.a.b.

+

Can start after space: www.a.b.

+

Can start after left paren (www.a.b.

+

Can start after asterisk *www.a.b.

+

Can start after underscore *_www.a.b.

+

Can start after tilde ~www.a.b.

+

Email

+

Correct character before

+

a@b.c can start after EOF

+

Can start after EOL: +a@b.c

+

Can start after tab: a@b.c.

+

Can start after space: a@b.c.

+

Can start after left paren(a@b.c.

+

Can start after asterisk*a@b.c.

+

While theoretically it’s possible to start at an underscore, that underscore +is part of the email, so it’s in fact part of the link: _a@b.c.

+

Can start after tilde~a@b.c.

+

Others characters before

+

While other characters before the email aren’t allowed by GFM, they work on +github.com: !a@b.c, "a@b.c, #a@b.c, $a@b.c, &a@b.c, 'a@b.c, )a@b.c, +a@b.c, +,a@b.c, -a@b.c, .a@b.c, /a@b.c, :a@b.c, ;a@b.c, <a@b.c, =a@b.c, >a@b.c, ?a@b.c, +@a@b.c, \a@b.c, ]a@b.c, ^a@b.c, `a@b.c, {a@b.c, }a@b.c.

+

Commas

+

See https://github.com/remarkjs/remark/discussions/678.

+

,https://github.com

+

[ ,https://github.com

+

[asd] ,https://github.com

+"###, + "should match previous like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"# wwwtf 2? + +www.a (space) + +www.a! + +www.a" + +www.a# + +www.a$ + +www.a% + +www.a& + +www.a' + +www.a( + +www.a) + +www.a* + +www.a+ + +www.a, + +www.a- + +www.a + +www.a. + +www.a/ + +www.a: + +www.a; + +www.a< + +www.a= + +www.a> + +www.a? + +www.a@ + +www.a[ + +www.a\ + +www.a] + +www.a^ + +www.a_ + +www.a` + +www.a{ + +www.a| + +www.a} + +www.a~ +"###, + &gfm + ), + r###"

wwwtf 2?

+

www.a (space)

+

www.a!

+

www.a"

+

www.a#

+

www.a$

+

www.a%

+

www.a&

+

www.a'

+

www.a(

+

www.a)

+

www.a*

+

www.a+

+

www.a,

+

www.a-

+

www.a

+

www.a.

+

www.a/

+

www.a:

+

www.a;

+

www.a<

+

www.a=

+

www.a>

+

www.a?

+

www.a@

+

www.a[

+

www.a\

+

www.a]

+

www.a^

+

www.a_

+

www.a`

+

www.a{

+

www.a|

+

www.a}

+

www.a~

+"###, + "should match www (domain continue) like GitHub does (except for the bracket bug)" + ); + + assert_eq!( + micromark_with_options( + r###"# wwwtf 5? + +www.a. (space) + +www.a.! + +www.a." + +www.a.# + +www.a.$ + +www.a.% + +www.a.& + +www.a.' + +www.a.( + +www.a.) + +www.a.* + +www.a.+ + +www.a., + +www.a.- + +www.a. + +www.a.. + +www.a./ + +www.a.: + +www.a.; + +www.a.< + +www.a.= + +www.a.> + +www.a.? + +www.a.@ + +www.a.[ + +www.a.\ + +www.a.] + +www.a.^ + +www.a._ + +www.a.` + +www.a.{ + +www.a.| + +www.a.} + +www.a.~ +"###, + &gfm + ), + r###"

wwwtf 5?

+

www.a. (space)

+

www.a.!

+

www.a."

+

www.a.#

+

www.a.$

+

www.a.%

+

www.a.&

+

www.a.'

+

www.a.(

+

www.a.)

+

www.a.*

+

www.a.+

+

www.a.,

+

www.a.-

+

www.a.

+

www.a..

+

www.a./

+

www.a.:

+

www.a.;

+

www.a.<

+

www.a.=

+

www.a.>

+

www.a.?

+

www.a.@

+

www.a.[

+

www.a.\

+

www.a.]

+

www.a.^

+

www.a._

+

www.a.`

+

www.a.{

+

www.a.|

+

www.a.}

+

www.a.~

+"###, + "should match www (domain dot) like GitHub does (except for the bracket bug)" + ); + + assert_eq!( + micromark_with_options( + r###"# wwwtf? + +www. (space) + +www.! + +www." + +www.# + +www.$ + +www.% + +www.& + +www.' + +www.( + +www.) + +www.* + +www.+ + +www., + +www.- + +www. + +www.. + +www./ + +www.: + +www.; + +www.< + +www.= + +www.> + +www.? + +www.@ + +www.[ + +www.\ + +www.] + +www.^ + +www._ + +www.` + +www.{ + +www.| + +www.} + +www.~ +"###, + &gfm + ), + r###"

wwwtf?

+

www. (space)

+

www.!

+

www."

+

www.#

+

www.$

+

www.%

+

www.&

+

www.'

+

www.(

+

www.)

+

www.*

+

www.+

+

www.,

+

www.-

+

www.

+

www..

+

www./

+

www.:

+

www.;

+

www.<

+

www.=

+

www.>

+

www.?

+

www.@

+

www.[

+

www.\

+

www.]

+

www.^

+

www._

+

www.`

+

www.{

+

www.|

+

www.}

+

www.~

+"###, + "should match www (domain start) like GitHub does" + ); + + assert_eq!( + micromark_with_options( + r###"# wwwtf? (4) + +www.a/b (space) + +www.a/b! + +www.a/b" + +www.a/b# + +www.a/b$ + +www.a/b% + +www.a/b& + +www.a/b' + +www.a/b( + +www.a/b) + +www.a/b* + +www.a/b+ + +www.a/b, + +www.a/b- + +www.a/b + +www.a/b. + +www.a/b/ + +www.a/b: + +www.a/b; + +www.a/b< + +www.a/b= + +www.a/b> + +www.a/b? + +www.a/b@ + +www.a/b[ + +www.a/b\ + +www.a/b] + +www.a/b^ + +www.a/b_ + +www.a/b` + +www.a/b{ + +www.a/b| + +www.a/b} + +www.a/b~ +"###, + &gfm + ), + r###"

wwwtf? (4)

+

www.a/b (space)

+

www.a/b!

+

www.a/b"

+

www.a/b#

+

www.a/b$

+

www.a/b%

+

www.a/b&

+

www.a/b'

+

www.a/b(

+

www.a/b)

+

www.a/b*

+

www.a/b+

+

www.a/b,

+

www.a/b-

+

www.a/b

+

www.a/b.

+

www.a/b/

+

www.a/b:

+

www.a/b;

+

www.a/b<

+

www.a/b=

+

www.a/b>

+

www.a/b?

+

www.a/b@

+

www.a/b[

+

www.a/b\

+

www.a/b]

+

www.a/b^

+

www.a/b_

+

www.a/b`

+

www.a/b{

+

www.a/b|

+

www.a/b}

+

www.a/b~

+"###, + "should match www (path continue) like GitHub does (except for the bracket bug)" + ); + + assert_eq!( + micromark_with_options( + r###"# wwwtf? (3) + +www.a/ (space) + +www.a/! + +www.a/" + +www.a/# + +www.a/$ + +www.a/% + +www.a/& + +www.a/' + +www.a/( + +www.a/) + +www.a/* + +www.a/+ + +www.a/, + +www.a/- + +www.a/ + +www.a/. + +www.a// + +www.a/: + +www.a/; + +www.a/< + +www.a/= + +www.a/> + +www.a/? + +www.a/@ + +www.a/[ + +www.a/\ + +www.a/] + +www.a/^ + +www.a/_ + +www.a/` + +www.a/{ + +www.a/| + +www.a/} + +www.a/~ +"###, + &gfm + ), + r###"

wwwtf? (3)

+

www.a/ (space)

+

www.a/!

+

www.a/"

+

www.a/#

+

www.a/$

+

www.a/%

+

www.a/&

+

www.a/'

+

www.a/(

+

www.a/)

+

www.a/*

+

www.a/+

+

www.a/,

+

www.a/-

+

www.a/

+

www.a/.

+

www.a//

+

www.a/:

+

www.a/;

+

www.a/<

+

www.a/=

+

www.a/>

+

www.a/?

+

www.a/@

+

www.a/[

+

www.a/\

+

www.a/]

+

www.a/^

+

www.a/_

+

www.a/`

+

www.a/{

+

www.a/|

+

www.a/}

+

www.a/~

+"###, + "should match www (path start) like GitHub does (except for the bracket bug)" ); } -- cgit