aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/compiler.rs2
-rw-r--r--src/construct/character_reference.rs2
-rw-r--r--tests/character_reference.rs204
3 files changed, 206 insertions, 2 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 2a3f101..6f0215c 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -374,7 +374,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
CharacterReferenceKind::Named => decode_named(ref_string),
};
- buf_tail_mut(buffers).push(value);
+ buf_tail_mut(buffers).push(encode(&value));
character_reference_kind = None;
}
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index ba2ad61..af9c02e 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -189,7 +189,7 @@ fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
if let Kind::Named = info.kind {
if !CHARACTER_REFERENCE_NAMES.contains(&value.as_str()) {
- return (State::Nok, Some(vec![code]));
+ return (State::Nok, None);
}
}
diff --git a/tests/character_reference.rs b/tests/character_reference.rs
new file mode 100644
index 0000000..5e71792
--- /dev/null
+++ b/tests/character_reference.rs
@@ -0,0 +1,204 @@
+extern crate micromark;
+use micromark::{micromark, micromark_with_options, CompileOptions};
+
+const DANGER: &CompileOptions = &CompileOptions {
+ allow_dangerous_html: true,
+ allow_dangerous_protocol: true,
+};
+
+#[test]
+fn character_reference() {
+ assert_eq!(
+ micromark(
+ "  & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸"
+ ),
+ "<p> &amp; © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸</p>",
+ "should support named character references"
+ );
+
+ assert_eq!(
+ micromark("&#35; &#1234; &#992; &#0;"),
+ "<p># Ӓ Ϡ �</p>",
+ "should support decimal character references"
+ );
+
+ assert_eq!(
+ micromark("&#X22; &#XD06; &#xcab;"),
+ "<p>&quot; ആ ಫ</p>",
+ "should support hexadecimal character references"
+ );
+
+ assert_eq!(
+ micromark(
+ "&nbsp &x; &#; &#x;\n&#987654321;\n&#abcdef0;\n&ThisIsNotDefined; &hi?;"
+ ),
+ "<p>&amp;nbsp &amp;x; &amp;#; &amp;#x;\n&amp;#987654321;\n&amp;#abcdef0;\n&amp;ThisIsNotDefined; &amp;hi?;</p>",
+ "should not support other things that look like character references"
+ );
+
+ assert_eq!(
+ micromark("&copy"),
+ "<p>&amp;copy</p>",
+ "should not support character references w/o semicolon"
+ );
+
+ assert_eq!(
+ micromark("&MadeUpEntity;"),
+ "<p>&amp;MadeUpEntity;</p>",
+ "should not support unknown named character references"
+ );
+
+ assert_eq!(
+ micromark_with_options("<a href=\"&ouml;&ouml;.html\">", DANGER),
+ "<a href=\"&ouml;&ouml;.html\">",
+ "should not care about character references in html"
+ );
+
+ // To do: link (resource).
+ // assert_eq!(
+ // micromark("[foo](/f&ouml;&ouml; \"f&ouml;&ouml;\")"),
+ // "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>",
+ // "should support character references in resource URLs and titles"
+ // );
+
+ // To do: definition.
+ // assert_eq!(
+ // micromark("[foo]: /f&ouml;&ouml; \"f&ouml;&ouml;\"\n\n[foo]"),
+ // "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>",
+ // "should support character references in definition URLs and titles"
+ // );
+
+ assert_eq!(
+ micromark("``` f&ouml;&ouml;\nfoo\n```"),
+ "<pre><code class=\"language-föö\">foo\n</code></pre>",
+ "should support character references in code language"
+ );
+
+ // To do: code (text).
+ // assert_eq!(
+ // micromark("`f&ouml;&ouml;`"),
+ // "<p><code>f&amp;ouml;&amp;ouml;</code></p>",
+ // "should not support character references in text code"
+ // );
+
+ assert_eq!(
+ micromark(" f&ouml;f&ouml;"),
+ "<pre><code>f&amp;ouml;f&amp;ouml;\n</code></pre>",
+ "should not support character references in indented code"
+ );
+
+ // To do: attention.
+ // assert_eq!(
+ // micromark("&#42;foo&#42;\n*foo*"),
+ // "<p>*foo*\n<em>foo</em></p>",
+ // "should not support character references as construct markers (1)"
+ // );
+
+ // To do: list.
+ // assert_eq!(
+ // micromark("&#42; foo\n\n* foo"),
+ // "<p>* foo</p>\n<ul>\n<li>foo</li>\n</ul>",
+ // "should not support character references as construct markers (2)"
+ // );
+
+ // To do: link.
+ // assert_eq!(
+ // micromark("[a](url &quot;tit&quot;)"),
+ // "<p>[a](url &quot;tit&quot;)</p>",
+ // "should not support character references as construct markers (3)"
+ // );
+
+ assert_eq!(
+ micromark("foo&#10;&#10;bar"),
+ "<p>foo\n\nbar</p>",
+ "should not support character references as whitespace (1)"
+ );
+
+ assert_eq!(
+ micromark("&#9;foo"),
+ "<p>\tfoo</p>",
+ "should not support character references as whitespace (2)"
+ );
+
+ // Extra:
+ assert_eq!(
+ micromark("&CounterClockwiseContourIntegral;"),
+ "<p>∳</p>",
+ "should support the longest possible named character reference"
+ );
+
+ assert_eq!(
+ micromark("&#xff9999;"),
+ "<p>�</p>",
+ "should “support” a longest possible hexadecimal character reference"
+ );
+
+ assert_eq!(
+ micromark("&#9999999;"),
+ "<p>�</p>",
+ "should “support” a longest possible decimal character reference"
+ );
+
+ assert_eq!(
+ micromark("&CounterClockwiseContourIntegrali;"),
+ "<p>&amp;CounterClockwiseContourIntegrali;</p>",
+ "should not support the longest possible named character reference"
+ );
+
+ assert_eq!(
+ micromark("&#xff99999;"),
+ "<p>&amp;#xff99999;</p>",
+ "should not support a longest possible hexadecimal character reference"
+ );
+
+ assert_eq!(
+ micromark("&#99999999;"),
+ "<p>&amp;#99999999;</p>",
+ "should not support a longest possible decimal character reference"
+ );
+
+ assert_eq!(
+ micromark("&-;"),
+ "<p>&amp;-;</p>",
+ "should not support the other characters after `&`"
+ );
+
+ assert_eq!(
+ micromark("&#-;"),
+ "<p>&amp;#-;</p>",
+ "should not support the other characters after `#`"
+ );
+
+ assert_eq!(
+ micromark("&#x-;"),
+ "<p>&amp;#x-;</p>",
+ "should not support the other characters after `#x`"
+ );
+
+ assert_eq!(
+ micromark("&lt-;"),
+ "<p>&amp;lt-;</p>",
+ "should not support the other characters inside a name"
+ );
+
+ assert_eq!(
+ micromark("&#9-;"),
+ "<p>&amp;#9-;</p>",
+ "should not support the other characters inside a demical"
+ );
+
+ assert_eq!(
+ micromark("&#x9-;"),
+ "<p>&amp;#x9-;</p>",
+ "should not support the other characters inside a hexademical"
+ );
+
+ // To do: extensions.
+ // assert_eq!(
+ // micromark("&amp;", {
+ // extensions: [{disable: {null: ["characterReferences"]}}]
+ // }),
+ // "<p>&amp;</p>",
+ // "should support turning off character references"
+ // );
+}