aboutsummaryrefslogtreecommitdiffstats
path: root/src/util/sanitize_uri.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-01 12:18:43 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-01 12:18:43 +0200
commit6fd5d61ed9b8cb66c13f44893d50025c9a87b217 (patch)
treedc471f61a8b4cec968a98ad61b3d4f14745d6c3b /src/util/sanitize_uri.rs
parentfa363dbba79f50001a22d1c90b8fb2009101d48c (diff)
downloadmarkdown-rs-6fd5d61ed9b8cb66c13f44893d50025c9a87b217.tar.gz
markdown-rs-6fd5d61ed9b8cb66c13f44893d50025c9a87b217.tar.bz2
markdown-rs-6fd5d61ed9b8cb66c13f44893d50025c9a87b217.zip
Add support for GFM tagfilter
Diffstat (limited to 'src/util/sanitize_uri.rs')
-rw-r--r--src/util/sanitize_uri.rs74
1 files changed, 45 insertions, 29 deletions
diff --git a/src/util/sanitize_uri.rs b/src/util/sanitize_uri.rs
index 969a4d8..0099347 100644
--- a/src/util/sanitize_uri.rs
+++ b/src/util/sanitize_uri.rs
@@ -10,55 +10,71 @@ use alloc::{
/// Make a value safe for injection as a URL.
///
/// This encodes unsafe characters with percent-encoding and skips already
-/// encoded sequences (see [`normalize_uri`][] below).
+/// encoded sequences (see [`normalize`][] below).
/// Further unsafe characters are encoded as character references (see
/// [`encode`][]).
///
+/// ## Examples
+///
+/// ```rust ignore
+/// use micromark::util::sanitize_uri::sanitize;
+///
+/// assert_eq!(sanitize("javascript:alert(1)"), "javascript:alert(1)");
+/// assert_eq!(sanitize("https://a👍b.c/%20/%"), "https://a%F0%9F%91%8Db.c/%20/%25");
+/// ```
+///
+/// ## References
+///
+/// * [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri)
+pub fn sanitize(value: &str) -> String {
+ encode(&*normalize(value), true)
+}
+
+/// Make a value safe for injection as a URL, and check protocols.
+///
+/// This first uses [`sanitize`][sanitize].
/// Then, a vec of (lowercase) allowed protocols can be given, in which case
-/// the URL is sanitized.
+/// the URL is ignored or kept.
///
-/// For example, `Some(vec!["http", "https", "irc", "ircs", "mailto", "xmpp"])`
-/// can be used for `a[href]`, or `Some(vec!["http", "https"])` for `img[src]`.
+/// For example, `&["http", "https", "irc", "ircs", "mailto", "xmpp"]`
+/// can be used for `a[href]`, or `&["http", "https"]` for `img[src]`.
/// If the URL includes an unknown protocol (one not matched by `protocol`, such
/// as a dangerous example, `javascript:`), the value is ignored.
///
/// ## Examples
///
/// ```rust ignore
-/// use micromark::util::sanitize_url::sanitize_url;
+/// use micromark::util::sanitize_uri::sanitize_with_protocols;
///
-/// assert_eq!(sanitize_uri("javascript:alert(1)", &None), "javascript:alert(1)");
-/// assert_eq!(sanitize_uri("javascript:alert(1)", &Some(vec!["http", "https"])), "");
-/// assert_eq!(sanitize_uri("https://example.com", &Some(vec!["http", "https"])), "https://example.com");
-/// assert_eq!(sanitize_uri("https://a👍b.c/%20/%", &Some(vec!["http", "https"])), "https://a%F0%9F%91%8Db.c/%20/%25");
+/// assert_eq!(sanitize_with_protocols("javascript:alert(1)", &["http", "https"]), "");
+/// assert_eq!(sanitize_with_protocols("https://example.com", &["http", "https"]), "https://example.com");
+/// assert_eq!(sanitize_with_protocols("https://a👍b.c/%20/%", &["http", "https"]), "https://a%F0%9F%91%8Db.c/%20/%25");
/// ```
///
/// ## References
///
/// * [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri)
-pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String {
- let value = encode(&*normalize_uri(value), true);
+pub fn sanitize_with_protocols(value: &str, protocols: &[&str]) -> String {
+ let value = sanitize(value);
- if let Some(protocols) = protocols {
- let end = value.find(|c| matches!(c, '?' | '#' | '/'));
- let mut colon = value.find(|c| matches!(c, ':'));
+ let end = value.find(|c| matches!(c, '?' | '#' | '/'));
+ let mut colon = value.find(|c| matches!(c, ':'));
- // If the first colon is after `?`, `#`, or `/`, it’s not a protocol.
- if let Some(end) = end {
- if let Some(index) = colon {
- if index > end {
- colon = None;
- }
+ // If the first colon is after `?`, `#`, or `/`, it’s not a protocol.
+ if let Some(end) = end {
+ if let Some(index) = colon {
+ if index > end {
+ colon = None;
}
}
+ }
- // If there is no protocol, it’s relative, and fine.
- if let Some(colon) = colon {
- // If it is a protocol, it should be allowed.
- let protocol = value[0..colon].to_lowercase();
- if !protocols.contains(&protocol.as_str()) {
- return "".to_string();
- }
+ // If there is no protocol, it’s relative, and fine.
+ if let Some(colon) = colon {
+ // If it is a protocol, it should be allowed.
+ let protocol = value[0..colon].to_lowercase();
+ if !protocols.contains(&protocol.as_str()) {
+ return "".to_string();
}
}
@@ -74,7 +90,7 @@ pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String {
/// ## Examples
///
/// ```rust ignore
-/// use micromark::util::sanitize_url::normalize_uri;
+/// use micromark::util::sanitize_uri::normalize;
///
/// assert_eq!(sanitize_uri("https://example.com"), "https://example.com");
/// assert_eq!(sanitize_uri("https://a👍b.c/%20/%"), "https://a%F0%9F%91%8Db.c/%20/%25");
@@ -86,7 +102,7 @@ pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String {
///
/// [definition]: crate::construct::definition
/// [label_end]: crate::construct::label_end
-fn normalize_uri(value: &str) -> String {
+fn normalize(value: &str) -> String {
let chars = value.chars().collect::<Vec<_>>();
// Note: it’ll grow bigger for each non-ascii or non-safe character.
let mut result = String::with_capacity(value.len());