diff options
| -rw-r--r-- | src/compiler.rs | 2 | ||||
| -rw-r--r-- | src/construct/character_reference.rs | 2 | ||||
| -rw-r--r-- | tests/character_reference.rs | 204 | 
3 files changed, 206 insertions, 2 deletions
| diff --git a/src/compiler.rs b/src/compiler.rs index 2a3f101..6f0215c 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -374,7 +374,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St                          CharacterReferenceKind::Named => decode_named(ref_string),                      }; -                    buf_tail_mut(buffers).push(value); +                    buf_tail_mut(buffers).push(encode(&value));                      character_reference_kind = None;                  } diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index ba2ad61..af9c02e 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -189,7 +189,7 @@ fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {              if let Kind::Named = info.kind {                  if !CHARACTER_REFERENCE_NAMES.contains(&value.as_str()) { -                    return (State::Nok, Some(vec![code])); +                    return (State::Nok, None);                  }              } diff --git a/tests/character_reference.rs b/tests/character_reference.rs new file mode 100644 index 0000000..5e71792 --- /dev/null +++ b/tests/character_reference.rs @@ -0,0 +1,204 @@ +extern crate micromark; +use micromark::{micromark, micromark_with_options, CompileOptions}; + +const DANGER: &CompileOptions = &CompileOptions { +    allow_dangerous_html: true, +    allow_dangerous_protocol: true, +}; + +#[test] +fn character_reference() { +    assert_eq!( +      micromark( +        "  & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸" +      ), +      "<p>  & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸</p>", +      "should support named character references" +    ); + +    assert_eq!( +        micromark("# Ӓ Ϡ �"), +        "<p># Ӓ Ϡ �</p>", +        "should support decimal character references" +    ); + +    assert_eq!( +        micromark("" ആ ಫ"), +        "<p>" ആ ಫ</p>", +        "should support hexadecimal character references" +    ); + +    assert_eq!( +      micromark( +        "  &x; &#; &#x;\n�\n&#abcdef0;\n&ThisIsNotDefined; &hi?;" +      ), +      "<p>&nbsp &x; &#; &#x;\n&#987654321;\n&#abcdef0;\n&ThisIsNotDefined; &hi?;</p>", +      "should not support other things that look like character references" +    ); + +    assert_eq!( +        micromark("©"), +        "<p>&copy</p>", +        "should not support character references w/o semicolon" +    ); + +    assert_eq!( +        micromark("&MadeUpEntity;"), +        "<p>&MadeUpEntity;</p>", +        "should not support unknown named character references" +    ); + +    assert_eq!( +        micromark_with_options("<a href=\"öö.html\">", DANGER), +        "<a href=\"öö.html\">", +        "should not care about character references in html" +    ); + +    // To do: link (resource). +    // assert_eq!( +    //     micromark("[foo](/föö \"föö\")"), +    //     "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>", +    //     "should support character references in resource URLs and titles" +    // ); + +    // To do: definition. +    // assert_eq!( +    //     micromark("[foo]: /föö \"föö\"\n\n[foo]"), +    //     "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>", +    //     "should support character references in definition URLs and titles" +    // ); + +    assert_eq!( +        micromark("``` föö\nfoo\n```"), +        "<pre><code class=\"language-föö\">foo\n</code></pre>", +        "should support character references in code language" +    ); + +    // To do: code (text). +    // assert_eq!( +    //     micromark("`föö`"), +    //     "<p><code>f&ouml;&ouml;</code></p>", +    //     "should not support character references in text code" +    // ); + +    assert_eq!( +        micromark("    föfö"), +        "<pre><code>f&ouml;f&ouml;\n</code></pre>", +        "should not support character references in indented code" +    ); + +    // To do: attention. +    // assert_eq!( +    //     micromark("*foo*\n*foo*"), +    //     "<p>*foo*\n<em>foo</em></p>", +    //     "should not support character references as construct markers (1)" +    // ); + +    // To do: list. +    // assert_eq!( +    //     micromark("* foo\n\n* foo"), +    //     "<p>* foo</p>\n<ul>\n<li>foo</li>\n</ul>", +    //     "should not support character references as construct markers (2)" +    // ); + +    // To do: link. +    // assert_eq!( +    //     micromark("[a](url "tit")"), +    //     "<p>[a](url "tit")</p>", +    //     "should not support character references as construct markers (3)" +    // ); + +    assert_eq!( +        micromark("foo

bar"), +        "<p>foo\n\nbar</p>", +        "should not support character references as whitespace (1)" +    ); + +    assert_eq!( +        micromark("	foo"), +        "<p>\tfoo</p>", +        "should not support character references as whitespace (2)" +    ); + +    // Extra: +    assert_eq!( +        micromark("∳"), +        "<p>∳</p>", +        "should support the longest possible named character reference" +    ); + +    assert_eq!( +        micromark("�"), +        "<p>�</p>", +        "should “support” a longest possible hexadecimal character reference" +    ); + +    assert_eq!( +        micromark("�"), +        "<p>�</p>", +        "should “support” a longest possible decimal character reference" +    ); + +    assert_eq!( +        micromark("&CounterClockwiseContourIntegrali;"), +        "<p>&CounterClockwiseContourIntegrali;</p>", +        "should not support the longest possible named character reference" +    ); + +    assert_eq!( +        micromark("�"), +        "<p>&#xff99999;</p>", +        "should not support a longest possible hexadecimal character reference" +    ); + +    assert_eq!( +        micromark("�"), +        "<p>&#99999999;</p>", +        "should not support a longest possible decimal character reference" +    ); + +    assert_eq!( +        micromark("&-;"), +        "<p>&-;</p>", +        "should not support the other characters after `&`" +    ); + +    assert_eq!( +        micromark("&#-;"), +        "<p>&#-;</p>", +        "should not support the other characters after `#`" +    ); + +    assert_eq!( +        micromark("&#x-;"), +        "<p>&#x-;</p>", +        "should not support the other characters after `#x`" +    ); + +    assert_eq!( +        micromark("<-;"), +        "<p>&lt-;</p>", +        "should not support the other characters inside a name" +    ); + +    assert_eq!( +        micromark("	-;"), +        "<p>&#9-;</p>", +        "should not support the other characters inside a demical" +    ); + +    assert_eq!( +        micromark("	-;"), +        "<p>&#x9-;</p>", +        "should not support the other characters inside a hexademical" +    ); + +    // To do: extensions. +    // assert_eq!( +    //   micromark("&", { +    //     extensions: [{disable: {null: ["characterReferences"]}}] +    //   }), +    //   "<p>&</p>", +    //   "should support turning off character references" +    // ); +} | 
