aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/construct/attention.rs2
-rw-r--r--src/construct/mod.rs10
-rw-r--r--src/to_mdast.rs37
-rw-r--r--src/util/char.rs2
-rw-r--r--src/util/character_reference.rs37
5 files changed, 66 insertions, 22 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index d99a52c..7ac41de 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -37,7 +37,7 @@
//! There are some small differences in whether sequences can open and/or close
//! based on whether they are formed with asterisks or underscores.
//! Because underscores also frequently occur in natural language inside words,
-//! while asterisks typically never do, `CommonMark` prohobits underscore
+//! while asterisks typically never do, `CommonMark` prohibits underscore
//! sequences from opening or closing when *inside* a word.
//!
//! Because asterisks can be used to form the most markdown constructs, using
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 88f3050..c78a002 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -1,7 +1,7 @@
//! Constructs found in markdown.
//!
//! Constructs are grouped by content type.
-//! Which content type is allowed somewhere, defines which constructs are
+//! Which content type is allowed somewhere, prescribes which constructs are
//! allowed there.
//!
//! ## Content type
@@ -24,11 +24,15 @@
//! There are several *things* found when parsing markdown, such as, say, a
//! thematic break.
//! These things are called constructs here.
+//!
//! Sometimes, there are several constructs that result in an equivalent thing.
//! For example, [code (fenced)][raw_flow] and
//! [code (indented)][code_indented] are considered different constructs.
+//! Sometimes, constructs on their own don’t result in anything.
+//! For example, a `*` is parsed as an attention sequence, but later when we
+//! didn’t find another sequence, it’s turned back into plain data.
//!
-//! The following constructs are found in markdown (CommonMark):
+//! The following constructs are found in markdown (`CommonMark`):
//!
//! * [attention][attention] (strong, emphasis, extension: GFM strikethrough)
//! * [autolink][]
@@ -64,9 +68,9 @@
//! * [gfm label start footnote][gfm_label_start_footnote]
//! * [gfm table][gfm_table]
//! * [gfm task list item check][gfm_task_list_item_check]
+//! * [mdx esm][mdx_esm]
//! * [mdx expression (flow)][mdx_expression_flow]
//! * [mdx expression (text)][mdx_expression_text]
-//! * [mdx esm][mdx_esm]
//! * [mdx jsx (flow)][mdx_jsx_flow]
//! * [mdx jsx (text)][mdx_jsx_text]
//!
diff --git a/src/to_mdast.rs b/src/to_mdast.rs
index f2b3c30..c4650da 100644
--- a/src/to_mdast.rs
+++ b/src/to_mdast.rs
@@ -26,6 +26,7 @@ use alloc::{
};
use core::str;
+/// A reference to something.
#[derive(Debug)]
struct Reference {
reference_kind: Option<ReferenceKind>,
@@ -33,13 +34,35 @@ struct Reference {
label: String,
}
+/// Info on a tag.
+///
+/// JSX tags are parsed on their own.
+/// They’re matched together here.
#[derive(Debug, Clone)]
struct JsxTag {
+ /// Optional tag name.
+ ///
+ /// `None` means that it’s a fragment.
name: Option<String>,
+ /// List of attributes.
attributes: Vec<AttributeContent>,
+ /// Whether this is a closing tag.
+ ///
+ /// ```markdown
+ /// > | </a>
+ /// ^
+ /// ```
close: bool,
+ /// Whether this is a self-closing tag.
+ ///
+ /// ```markdown
+ /// > | <a/>
+ /// ^
+ /// ```
self_closing: bool,
+ /// Starting point.
start: Point,
+ /// Ending point.
end: Point,
}
@@ -1422,7 +1445,7 @@ fn on_exit_list_item_value(context: &mut CompileContext) {
}
}
-/// Handle [`Enter`][Kind::Enter]:{[`MdxJsxFlowTag`][Name::MdxJsxFlowTag],[`MdxJsxTextTag`][Name::MdxJsxTextTag]}.
+/// Handle [`Exit`][Kind::Exit]:{[`MdxJsxFlowTag`][Name::MdxJsxFlowTag],[`MdxJsxTextTag`][Name::MdxJsxTextTag]}.
fn on_exit_mdx_jsx_tag(context: &mut CompileContext) -> Result<(), String> {
let mut tag = context.jsx_tag.as_ref().expect("expected tag").clone();
@@ -1648,17 +1671,17 @@ fn on_exit_resource_title_string(context: &mut CompileContext) {
}
}
-// Create a point from an event.
+/// Create a point from an event.
fn point_from_event_point(point: &EventPoint) -> Point {
Point::new(point.line, point.column, point.index)
}
-// Create a point from an event.
+/// Create a point from an event.
fn point_from_event(event: &Event) -> Point {
point_from_event_point(&event.point)
}
-// Create a position from an event.
+/// Create a position from an event.
fn position_from_event(event: &Event) -> Position {
let end = Point::new(event.point.line, event.point.column, event.point.index);
Position {
@@ -1667,6 +1690,7 @@ fn position_from_event(event: &Event) -> Position {
}
}
+/// Resolve the current stack on the tree.
fn delve_mut<'tree>(mut node: &'tree mut Node, stack: &'tree [usize]) -> &'tree mut Node {
let mut stack_index = 0;
while stack_index < stack.len() {
@@ -1677,6 +1701,7 @@ fn delve_mut<'tree>(mut node: &'tree mut Node, stack: &'tree [usize]) -> &'tree
node
}
+/// Remove initial/final EOLs.
fn trim_eol(value: String, at_start: bool, at_end: bool) -> String {
let bytes = value.as_bytes();
let mut start = 0;
@@ -1711,6 +1736,9 @@ fn trim_eol(value: String, at_start: bool, at_end: bool) -> String {
}
}
+/// Handle a mismatch.
+///
+/// Mismatches can occur with MDX JSX tags.
fn on_mismatch_error(
context: &mut CompileContext,
left: Option<&Event>,
@@ -1759,6 +1787,7 @@ fn on_mismatch_error(
}
}
+/// Format a JSX tag, ignoring its attributes.
fn serialize_abbreviated_tag(tag: &JsxTag) -> String {
format!(
"<{}{}>",
diff --git a/src/util/char.rs b/src/util/char.rs
index cfaacd5..b902fbe 100644
--- a/src/util/char.rs
+++ b/src/util/char.rs
@@ -1,4 +1,4 @@
-//! Deal with byte and chars and kinds.
+//! Deal with bytes, chars, and kinds.
use crate::util::unicode::PUNCTUATION;
use alloc::{
diff --git a/src/util/character_reference.rs b/src/util/character_reference.rs
index 75db98b..ee2a65c 100644
--- a/src/util/character_reference.rs
+++ b/src/util/character_reference.rs
@@ -15,6 +15,10 @@ use core::str;
/// [`CHARACTER_REFERENCES`][] (or [`CHARACTER_REFERENCES_HTML_4`][]) and then
/// takes the corresponding value from `1`.
///
+/// The `html5` boolean us used for named character references, and specifier
+/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are
+/// supported.
+///
/// The result is `String` instead of `char` because named character references
/// can expand into multiple characters.
///
@@ -28,14 +32,6 @@ use core::str;
/// assert_eq!(decode_named("aelig", true), "æ");
/// ```
///
-/// ## Panics
-///
-/// This function panics if a name not in [`CHARACTER_REFERENCES`][] is
-/// given.
-/// It is expected that figuring out whether a name is allowed is handled in
-/// the parser.
-/// When `micromark` is used, this function never panics.
-///
/// ## References
///
/// * [`wooorm/decode-named-character-reference`](https://github.com/wooorm/decode-named-character-reference)
@@ -100,6 +96,21 @@ pub fn decode_numeric(value: &str, radix: u32) -> String {
char::REPLACEMENT_CHARACTER.to_string()
}
+/// Decode a character reference.
+///
+/// This turns the number (in string form as either hexadecimal or decimal) or
+/// name from a character reference into a string.
+///
+/// The marker specifies the format: `#` for hexadecimal, `x` for decimal, and
+/// `&` for named.
+///
+/// The `html5` boolean us used for named character references, and specifier
+/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are
+/// supported.
+///
+/// ## Panics
+///
+/// Panics if `marker` is not `b'&'`, `b'x'`, or `b'#'`.
pub fn decode(value: &str, marker: u8, html5: bool) -> Option<String> {
match marker {
b'#' => Some(decode_numeric(value, 10)),
@@ -144,11 +155,11 @@ pub fn value_test(marker: u8) -> fn(&u8) -> bool {
/// Decode character references in a string.
///
-/// Note: this currently only supports HTML 4 references, as it’s only used for
-/// them.
-///
-/// If it’s ever needed to support HTML 5 (which is what normal markdown uses),
-/// a boolean parameter can be added here.
+/// > 👉 **Note**: this currently only supports the 252 named character
+/// > references from HTML 4, as it’s only used for JSX.
+/// >
+/// > If it’s ever needed to support HTML 5 (which is what normal markdown
+/// > uses), a boolean parameter can be added here.
pub fn parse(value: &str) -> String {
let bytes = value.as_bytes();
let mut index = 0;