use markdown::{
mdast::{Node, Paragraph, Root, Text},
to_html, to_html_with_options, to_mdast,
unist::Position,
CompileOptions, Constructs, Options, ParseOptions,
};
use pretty_assertions::assert_eq;
#[test]
fn character_reference() -> Result<(), String> {
assert_eq!(
to_html(
" & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸"
),
"<p>\u{a0} & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸</p>",
"should support named character references"
);
assert_eq!(
to_html("# Ӓ Ϡ �"),
"<p># Ӓ Ϡ �</p>",
"should support decimal character references"
);
assert_eq!(
to_html("" ആ ಫ"),
"<p>" ആ ಫ</p>",
"should support hexadecimal character references"
);
assert_eq!(
to_html(
"  &x; &#; &#x;\n�\n&#abcdef0;\n&ThisIsNotDefined; &hi?;"),
"<p>&nbsp &x; &#; &#x;\n&#987654321;\n&#abcdef0;\n&ThisIsNotDefined; &hi?;</p>",
"should not support other things that look like character references"
);
assert_eq!(
to_html("©"),
"<p>&copy</p>",
"should not support character references w/o semicolon"
);
assert_eq!(
to_html("&MadeUpEntity;"),
"<p>&MadeUpEntity;</p>",
"should not support unknown named character references"
);
assert_eq!(
to_html_with_options(
"<a href=\"öö.html\">",
&Options {
compile: CompileOptions {
allow_dangerous_html: true,
allow_dangerous_protocol: true,
..Default::default()
},
..Default::default()
}
)?,
"<a href=\"öö.html\">",
"should not care about character references in html"
);
assert_eq!(
to_html("[foo](/föö \"föö\")"),
"<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>",
"should support character references in resource URLs and titles"
);
assert_eq!(
to_html("[foo]: /föö \"föö\"\n\n[foo]"),
"<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>",
"should support character references in definition URLs and titles"
);
assert_eq!(
to_html("``` föö\nfoo\n```"),
"<pre><code class=\"language-föö\">foo\n</code></pre>",
"should support character references in code language"
);
assert_eq!(
to_html("`föö`"),
"<p><code>f&ouml;&ouml;</code></p>",
"should not support character references in text code"
);
assert_eq!(
to_html(" föfö"),
"<pre><code>f&ouml;f&ouml;\n</code></pre>",
"should not support character references in indented code"
);
assert_eq!(
to_html("*foo*\n*foo*"),
"<p>*foo*\n<em>foo</em></p>",
"should not support character references as construct markers (1)"
);
assert_eq!(
to_html("* foo\n\n* foo"),
"<p>* foo</p>\n<ul>\n<li>foo</li>\n</ul>",
"should not support character references as construct markers (2)"
);
assert_eq!(
to_html("[a](url "tit")"),
"<p>[a](url "tit")</p>",
"should not support character references as construct markers (3)"
);
assert_eq!(
to_html("foo bar"),
"<p>foo\n\nbar</p>",
"should not support character references as whitespace (1)"
);
assert_eq!(
to_html("	foo"),
"<p>\tfoo</p>",
"should not support character references as whitespace (2)"
);
// Extra:
assert_eq!(
to_html("∳"),
"<p>∳</p>",
"should support the longest possible named character reference"
);
assert_eq!(
to_html("�"),
"<p>�</p>",
"should “support” a longest possible hexadecimal character reference"
);
assert_eq!(
to_html("�"),
"<p>�</p>",
"should “support” a longest possible decimal character reference"
);
assert_eq!(
to_html("&CounterClockwiseContourIntegrali;"),
"<p>&CounterClockwiseContourIntegrali;</p>",
"should not support the longest possible named character reference"
);
assert_eq!(
to_html("�"),
"<p>&#xff99999;</p>",
"should not support a longest possible hexadecimal character reference"
);
assert_eq!(
to_html("�"),
"<p>&#99999999;</p>",
"should not support a longest possible decimal character reference"
);
assert_eq!(
to_html("&-;"),
"<p>&-;</p>",
"should not support the other characters after `&`"
);
assert_eq!(
to_html("&#-;"),
"<p>&#-;</p>",
"should not support the other characters after `#`"
);
assert_eq!(
to_html("&#x-;"),
"<p>&#x-;</p>",
"should not support the other characters after `#x`"
);
assert_eq!(
to_html("<-;"),
"<p>&lt-;</p>",
"should not support the other characters inside a name"
);
assert_eq!(
to_html("	-;"),
"<p>&#9-;</p>",
"should not support the other characters inside a demical"
);
assert_eq!(
to_html("	-;"),
"<p>&#x9-;</p>",
"should not support the other characters inside a hexademical"
);
assert_eq!(
to_html_with_options(
"&",
&Options {
parse: ParseOptions {
constructs: Constructs {
character_reference: false,
..Default::default()
},
..Default::default()
},
..Default::default()
}
)?,
"<p>&amp;</p>",
"should support turning off character references"
);
assert_eq!(
to_mdast(" & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸\n# Ӓ Ϡ �\n" ആ ಫ", &Default::default())?,
Node::Root(Root {
children: vec![Node::Paragraph(Paragraph {
children: vec![Node::Text(Text {
value: "\u{a0} & © Æ Ď\n¾ ℋ ⅆ\n∲ ≧̸\n# Ӓ Ϡ �\n\" ആ ಫ".into(),
position: Some(Position::new(1, 1, 0, 5, 23, 158))
}),],
position: Some(Position::new(1, 1, 0, 5, 23, 158))
})],
position: Some(Position::new(1, 1, 0, 5, 23, 158))
}),
"should support character references as `Text`s in mdast"
);
Ok(())
}