From a1ce43e428754084474a7ecf88ae6debf88b9164 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 14 Jun 2022 13:47:32 +0200 Subject: Reorganize to split util --- src/util/encode.rs | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 src/util/encode.rs (limited to 'src/util/encode.rs') diff --git a/src/util/encode.rs b/src/util/encode.rs new file mode 100644 index 0000000..f79c8ea --- /dev/null +++ b/src/util/encode.rs @@ -0,0 +1,29 @@ +//! Utilities to encode HTML. + +/// Encode dangerous html characters. +/// +/// This ensures that certain characters which have special meaning in HTML are +/// dealt with. +/// Technically, we can skip `>` and `"` in many cases, but CM includes them. +/// +/// This behavior is not explained in prose in `CommonMark` but can be inferred +/// from the input/output test cases. +/// +/// ## Examples +/// +/// ```rust ignore +/// use micromark::util::encode; +/// +/// assert_eq!(encode("I <3 🦀"), "I <3 🦀"); +/// ``` +/// +/// ## References +/// +/// * [`micromark-util-encode` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-encode) +pub fn encode(value: &str) -> String { + value + .replace('&', "&") + .replace('"', """) + .replace('<', "<") + .replace('>', ">") +} -- cgit