From df5b39f8c1cb6b3f3a8a19c5a4722433e2ef1dff Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Wed, 15 Jun 2022 12:13:02 +0200 Subject: Add tests for character reference * Fix encoding of character references * Fix unneeded remainder in `nok` state --- tests/character_reference.rs | 204 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 204 insertions(+) create mode 100644 tests/character_reference.rs (limited to 'tests/character_reference.rs') diff --git a/tests/character_reference.rs b/tests/character_reference.rs new file mode 100644 index 0000000..5e71792 --- /dev/null +++ b/tests/character_reference.rs @@ -0,0 +1,204 @@ +extern crate micromark; +use micromark::{micromark, micromark_with_options, CompileOptions}; + +const DANGER: &CompileOptions = &CompileOptions { + allow_dangerous_html: true, + allow_dangerous_protocol: true, +}; + +#[test] +fn character_reference() { + assert_eq!( + micromark( + "  & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸" + ), + "

& © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸

", + "should support named character references" + ); + + assert_eq!( + micromark("# Ӓ Ϡ �"), + "

# Ӓ Ϡ �

", + "should support decimal character references" + ); + + assert_eq!( + micromark("" ആ ಫ"), + "

" ആ ಫ

", + "should support hexadecimal character references" + ); + + assert_eq!( + micromark( + "  &x; &#; &#x;\n�\n&#abcdef0;\n&ThisIsNotDefined; &hi?;" + ), + "

&nbsp &x; &#; &#x;\n�\n&#abcdef0;\n&ThisIsNotDefined; &hi?;

", + "should not support other things that look like character references" + ); + + assert_eq!( + micromark("©"), + "

&copy

", + "should not support character references w/o semicolon" + ); + + assert_eq!( + micromark("&MadeUpEntity;"), + "

&MadeUpEntity;

", + "should not support unknown named character references" + ); + + assert_eq!( + micromark_with_options("", DANGER), + "", + "should not care about character references in html" + ); + + // To do: link (resource). + // assert_eq!( + // micromark("[foo](/föö \"föö\")"), + // "

foo

", + // "should support character references in resource URLs and titles" + // ); + + // To do: definition. + // assert_eq!( + // micromark("[foo]: /föö \"föö\"\n\n[foo]"), + // "

foo

", + // "should support character references in definition URLs and titles" + // ); + + assert_eq!( + micromark("``` föö\nfoo\n```"), + "
foo\n
", + "should support character references in code language" + ); + + // To do: code (text). + // assert_eq!( + // micromark("`föö`"), + // "

föö

", + // "should not support character references in text code" + // ); + + assert_eq!( + micromark(" föfö"), + "
föfö\n
", + "should not support character references in indented code" + ); + + // To do: attention. + // assert_eq!( + // micromark("*foo*\n*foo*"), + // "

*foo*\nfoo

", + // "should not support character references as construct markers (1)" + // ); + + // To do: list. + // assert_eq!( + // micromark("* foo\n\n* foo"), + // "

* foo

\n", + // "should not support character references as construct markers (2)" + // ); + + // To do: link. + // assert_eq!( + // micromark("[a](url "tit")"), + // "

[a](url "tit")

", + // "should not support character references as construct markers (3)" + // ); + + assert_eq!( + micromark("foo bar"), + "

foo\n\nbar

", + "should not support character references as whitespace (1)" + ); + + assert_eq!( + micromark(" foo"), + "

\tfoo

", + "should not support character references as whitespace (2)" + ); + + // Extra: + assert_eq!( + micromark("∳"), + "

", + "should support the longest possible named character reference" + ); + + assert_eq!( + micromark("�"), + "

", + "should “support” a longest possible hexadecimal character reference" + ); + + assert_eq!( + micromark("�"), + "

", + "should “support” a longest possible decimal character reference" + ); + + assert_eq!( + micromark("&CounterClockwiseContourIntegrali;"), + "

&CounterClockwiseContourIntegrali;

", + "should not support the longest possible named character reference" + ); + + assert_eq!( + micromark("�"), + "

�

", + "should not support a longest possible hexadecimal character reference" + ); + + assert_eq!( + micromark("�"), + "

�

", + "should not support a longest possible decimal character reference" + ); + + assert_eq!( + micromark("&-;"), + "

&-;

", + "should not support the other characters after `&`" + ); + + assert_eq!( + micromark("&#-;"), + "

&#-;

", + "should not support the other characters after `#`" + ); + + assert_eq!( + micromark("&#x-;"), + "

&#x-;

", + "should not support the other characters after `#x`" + ); + + assert_eq!( + micromark("<-;"), + "

&lt-;

", + "should not support the other characters inside a name" + ); + + assert_eq!( + micromark(" -;"), + "

&#9-;

", + "should not support the other characters inside a demical" + ); + + assert_eq!( + micromark(" -;"), + "

&#x9-;

", + "should not support the other characters inside a hexademical" + ); + + // To do: extensions. + // assert_eq!( + // micromark("&", { + // extensions: [{disable: {null: ["characterReferences"]}}] + // }), + // "

&

", + // "should support turning off character references" + // ); +} -- cgit