aboutsummaryrefslogtreecommitdiffstats
path: root/src/util
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/util/char.rs2
-rw-r--r--src/util/character_reference.rs37
2 files changed, 25 insertions, 14 deletions
diff --git a/src/util/char.rs b/src/util/char.rs
index cfaacd5..b902fbe 100644
--- a/src/util/char.rs
+++ b/src/util/char.rs
@@ -1,4 +1,4 @@
-//! Deal with byte and chars and kinds.
+//! Deal with bytes, chars, and kinds.
use crate::util::unicode::PUNCTUATION;
use alloc::{
diff --git a/src/util/character_reference.rs b/src/util/character_reference.rs
index 75db98b..ee2a65c 100644
--- a/src/util/character_reference.rs
+++ b/src/util/character_reference.rs
@@ -15,6 +15,10 @@ use core::str;
/// [`CHARACTER_REFERENCES`][] (or [`CHARACTER_REFERENCES_HTML_4`][]) and then
/// takes the corresponding value from `1`.
///
+/// The `html5` boolean us used for named character references, and specifier
+/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are
+/// supported.
+///
/// The result is `String` instead of `char` because named character references
/// can expand into multiple characters.
///
@@ -28,14 +32,6 @@ use core::str;
/// assert_eq!(decode_named("aelig", true), "æ");
/// ```
///
-/// ## Panics
-///
-/// This function panics if a name not in [`CHARACTER_REFERENCES`][] is
-/// given.
-/// It is expected that figuring out whether a name is allowed is handled in
-/// the parser.
-/// When `micromark` is used, this function never panics.
-///
/// ## References
///
/// * [`wooorm/decode-named-character-reference`](https://github.com/wooorm/decode-named-character-reference)
@@ -100,6 +96,21 @@ pub fn decode_numeric(value: &str, radix: u32) -> String {
char::REPLACEMENT_CHARACTER.to_string()
}
+/// Decode a character reference.
+///
+/// This turns the number (in string form as either hexadecimal or decimal) or
+/// name from a character reference into a string.
+///
+/// The marker specifies the format: `#` for hexadecimal, `x` for decimal, and
+/// `&` for named.
+///
+/// The `html5` boolean us used for named character references, and specifier
+/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are
+/// supported.
+///
+/// ## Panics
+///
+/// Panics if `marker` is not `b'&'`, `b'x'`, or `b'#'`.
pub fn decode(value: &str, marker: u8, html5: bool) -> Option<String> {
match marker {
b'#' => Some(decode_numeric(value, 10)),
@@ -144,11 +155,11 @@ pub fn value_test(marker: u8) -> fn(&u8) -> bool {
/// Decode character references in a string.
///
-/// Note: this currently only supports HTML 4 references, as it’s only used for
-/// them.
-///
-/// If it’s ever needed to support HTML 5 (which is what normal markdown uses),
-/// a boolean parameter can be added here.
+/// > 👉 **Note**: this currently only supports the 252 named character
+/// > references from HTML 4, as it’s only used for JSX.
+/// >
+/// > If it’s ever needed to support HTML 5 (which is what normal markdown
+/// > uses), a boolean parameter can be added here.
pub fn parse(value: &str) -> String {
let bytes = value.as_bytes();
let mut index = 0;