diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-15 12:13:02 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-15 12:13:02 +0200 |
commit | df5b39f8c1cb6b3f3a8a19c5a4722433e2ef1dff (patch) | |
tree | 5e32dc2969c2bfb1e2fac2d0d83d10351d86483f | |
parent | 7d8cce920e74dfe5a24f52fb738035ff7a0390cc (diff) | |
download | markdown-rs-df5b39f8c1cb6b3f3a8a19c5a4722433e2ef1dff.tar.gz markdown-rs-df5b39f8c1cb6b3f3a8a19c5a4722433e2ef1dff.tar.bz2 markdown-rs-df5b39f8c1cb6b3f3a8a19c5a4722433e2ef1dff.zip |
Add tests for character reference
* Fix encoding of character references
* Fix unneeded remainder in `nok` state
-rw-r--r-- | src/compiler.rs | 2 | ||||
-rw-r--r-- | src/construct/character_reference.rs | 2 | ||||
-rw-r--r-- | tests/character_reference.rs | 204 |
3 files changed, 206 insertions, 2 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 2a3f101..6f0215c 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -374,7 +374,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St CharacterReferenceKind::Named => decode_named(ref_string), }; - buf_tail_mut(buffers).push(value); + buf_tail_mut(buffers).push(encode(&value)); character_reference_kind = None; } diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index ba2ad61..af9c02e 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -189,7 +189,7 @@ fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { if let Kind::Named = info.kind { if !CHARACTER_REFERENCE_NAMES.contains(&value.as_str()) { - return (State::Nok, Some(vec![code])); + return (State::Nok, None); } } diff --git a/tests/character_reference.rs b/tests/character_reference.rs new file mode 100644 index 0000000..5e71792 --- /dev/null +++ b/tests/character_reference.rs @@ -0,0 +1,204 @@ +extern crate micromark; +use micromark::{micromark, micromark_with_options, CompileOptions}; + +const DANGER: &CompileOptions = &CompileOptions { + allow_dangerous_html: true, + allow_dangerous_protocol: true, +}; + +#[test] +fn character_reference() { + assert_eq!( + micromark( + " & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸" + ), + "<p> & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸</p>", + "should support named character references" + ); + + assert_eq!( + micromark("# Ӓ Ϡ �"), + "<p># Ӓ Ϡ �</p>", + "should support decimal character references" + ); + + assert_eq!( + micromark("" ആ ಫ"), + "<p>" ആ ಫ</p>", + "should support hexadecimal character references" + ); + + assert_eq!( + micromark( + "  &x; &#; &#x;\n�\n&#abcdef0;\n&ThisIsNotDefined; &hi?;" + ), + "<p>&nbsp &x; &#; &#x;\n&#987654321;\n&#abcdef0;\n&ThisIsNotDefined; &hi?;</p>", + "should not support other things that look like character references" + ); + + assert_eq!( + micromark("©"), + "<p>&copy</p>", + "should not support character references w/o semicolon" + ); + + assert_eq!( + micromark("&MadeUpEntity;"), + "<p>&MadeUpEntity;</p>", + "should not support unknown named character references" + ); + + assert_eq!( + micromark_with_options("<a href=\"öö.html\">", DANGER), + "<a href=\"öö.html\">", + "should not care about character references in html" + ); + + // To do: link (resource). + // assert_eq!( + // micromark("[foo](/föö \"föö\")"), + // "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>", + // "should support character references in resource URLs and titles" + // ); + + // To do: definition. + // assert_eq!( + // micromark("[foo]: /föö \"föö\"\n\n[foo]"), + // "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>", + // "should support character references in definition URLs and titles" + // ); + + assert_eq!( + micromark("``` föö\nfoo\n```"), + "<pre><code class=\"language-föö\">foo\n</code></pre>", + "should support character references in code language" + ); + + // To do: code (text). + // assert_eq!( + // micromark("`föö`"), + // "<p><code>f&ouml;&ouml;</code></p>", + // "should not support character references in text code" + // ); + + assert_eq!( + micromark(" föfö"), + "<pre><code>f&ouml;f&ouml;\n</code></pre>", + "should not support character references in indented code" + ); + + // To do: attention. + // assert_eq!( + // micromark("*foo*\n*foo*"), + // "<p>*foo*\n<em>foo</em></p>", + // "should not support character references as construct markers (1)" + // ); + + // To do: list. + // assert_eq!( + // micromark("* foo\n\n* foo"), + // "<p>* foo</p>\n<ul>\n<li>foo</li>\n</ul>", + // "should not support character references as construct markers (2)" + // ); + + // To do: link. + // assert_eq!( + // micromark("[a](url "tit")"), + // "<p>[a](url "tit")</p>", + // "should not support character references as construct markers (3)" + // ); + + assert_eq!( + micromark("foo bar"), + "<p>foo\n\nbar</p>", + "should not support character references as whitespace (1)" + ); + + assert_eq!( + micromark("	foo"), + "<p>\tfoo</p>", + "should not support character references as whitespace (2)" + ); + + // Extra: + assert_eq!( + micromark("∳"), + "<p>∳</p>", + "should support the longest possible named character reference" + ); + + assert_eq!( + micromark("�"), + "<p>�</p>", + "should “support” a longest possible hexadecimal character reference" + ); + + assert_eq!( + micromark("�"), + "<p>�</p>", + "should “support” a longest possible decimal character reference" + ); + + assert_eq!( + micromark("&CounterClockwiseContourIntegrali;"), + "<p>&CounterClockwiseContourIntegrali;</p>", + "should not support the longest possible named character reference" + ); + + assert_eq!( + micromark("�"), + "<p>&#xff99999;</p>", + "should not support a longest possible hexadecimal character reference" + ); + + assert_eq!( + micromark("�"), + "<p>&#99999999;</p>", + "should not support a longest possible decimal character reference" + ); + + assert_eq!( + micromark("&-;"), + "<p>&-;</p>", + "should not support the other characters after `&`" + ); + + assert_eq!( + micromark("&#-;"), + "<p>&#-;</p>", + "should not support the other characters after `#`" + ); + + assert_eq!( + micromark("&#x-;"), + "<p>&#x-;</p>", + "should not support the other characters after `#x`" + ); + + assert_eq!( + micromark("<-;"), + "<p>&lt-;</p>", + "should not support the other characters inside a name" + ); + + assert_eq!( + micromark("	-;"), + "<p>&#9-;</p>", + "should not support the other characters inside a demical" + ); + + assert_eq!( + micromark("	-;"), + "<p>&#x9-;</p>", + "should not support the other characters inside a hexademical" + ); + + // To do: extensions. + // assert_eq!( + // micromark("&", { + // extensions: [{disable: {null: ["characterReferences"]}}] + // }), + // "<p>&</p>", + // "should support turning off character references" + // ); +} |