From 6fd5d61ed9b8cb66c13f44893d50025c9a87b217 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 1 Sep 2022 12:18:43 +0200 Subject: Add support for GFM tagfilter --- src/util/constant.rs | 31 +++++++++++++++++++ src/util/gfm_tagfilter.rs | 77 +++++++++++++++++++++++++++++++++++++++++++++++ src/util/mod.rs | 1 + src/util/sanitize_uri.rs | 74 +++++++++++++++++++++++++++------------------ 4 files changed, 154 insertions(+), 29 deletions(-) create mode 100644 src/util/gfm_tagfilter.rs (limited to 'src/util') diff --git a/src/util/constant.rs b/src/util/constant.rs index f397f38..d6a6651 100644 --- a/src/util/constant.rs +++ b/src/util/constant.rs @@ -74,6 +74,31 @@ pub const CODE_FENCED_SEQUENCE_SIZE_MIN: usize = 3; /// [frontmatter]: crate::construct::frontmatter pub const FRONTMATTER_SEQUENCE_SIZE: usize = 3; +/// The number of the longest tag name in [`GFM_HTML_TAGFILTER_NAMES`][]. +/// +/// This is currently the size of `plaintext`. +pub const GFM_HTML_TAGFILTER_SIZE_MAX: usize = 9; + +/// List of HTML tag names that are escaped by GFMs tag filter. +/// +/// Tag name matching must be performed insensitive to case, and thus this list +/// includes lowercase tag names. +/// +/// ## References +/// +/// * [*§ 6.1 Disallowed Raw HTML (extension)* in GFM](https://github.github.com/gfm/#disallowed-raw-html-extension-) +pub const GFM_HTML_TAGFILTER_NAMES: [&str; 9] = [ + "iframe", + "noembed", + "noframes", + "plaintext", + "script", + "style", + "textarea", + "title", + "xmp", +]; + /// The number of preceding spaces needed for a [hard break /// (trailing)][whitespace] to form. /// @@ -2426,6 +2451,12 @@ mod tests { "`CHARACTER_REFERENCE_NAMED_SIZE_MAX`" ); + assert_eq!( + GFM_HTML_TAGFILTER_SIZE_MAX, + longest(&GFM_HTML_TAGFILTER_NAMES).unwrap().len(), + "`GFM_HTML_TAGFILTER_SIZE_MAX`" + ); + assert_eq!( HTML_RAW_SIZE_MAX, longest(&HTML_RAW_NAMES).unwrap().len(), diff --git a/src/util/gfm_tagfilter.rs b/src/util/gfm_tagfilter.rs new file mode 100644 index 0000000..8023c66 --- /dev/null +++ b/src/util/gfm_tagfilter.rs @@ -0,0 +1,77 @@ +//! Make dangerous HTML a tiny bit safer. + +use crate::util::constant::{GFM_HTML_TAGFILTER_NAMES, GFM_HTML_TAGFILTER_SIZE_MAX}; +use alloc::string::String; +use core::str; + +/// Make dangerous HTML a tiny bit safer. +/// +/// The tagfilter is kinda weird and kinda useless. +/// The tag filter is a naïve attempt at XSS protection. +/// You should use a proper HTML sanitizing algorithm. +/// +/// ## Examples +/// +/// ```rust ignore +/// use micromark::util::gfm_tagfilter::gfm_tagfilter; +/// +/// assert_eq!(gfm_tagfilter("