From 43edc8fc9d204da962c92b9f9fef45ac8b6b03da Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 11 Oct 2022 14:01:01 +0200 Subject: Add some missing internal docs --- src/construct/attention.rs | 2 +- src/construct/mod.rs | 10 +++++++--- src/to_mdast.rs | 37 +++++++++++++++++++++++++++++++++---- src/util/char.rs | 2 +- src/util/character_reference.rs | 37 ++++++++++++++++++++++++------------- 5 files changed, 66 insertions(+), 22 deletions(-) diff --git a/src/construct/attention.rs b/src/construct/attention.rs index d99a52c..7ac41de 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -37,7 +37,7 @@ //! There are some small differences in whether sequences can open and/or close //! based on whether they are formed with asterisks or underscores. //! Because underscores also frequently occur in natural language inside words, -//! while asterisks typically never do, `CommonMark` prohobits underscore +//! while asterisks typically never do, `CommonMark` prohibits underscore //! sequences from opening or closing when *inside* a word. //! //! Because asterisks can be used to form the most markdown constructs, using diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 88f3050..c78a002 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -1,7 +1,7 @@ //! Constructs found in markdown. //! //! Constructs are grouped by content type. -//! Which content type is allowed somewhere, defines which constructs are +//! Which content type is allowed somewhere, prescribes which constructs are //! allowed there. //! //! ## Content type @@ -24,11 +24,15 @@ //! There are several *things* found when parsing markdown, such as, say, a //! thematic break. //! These things are called constructs here. +//! //! Sometimes, there are several constructs that result in an equivalent thing. //! For example, [code (fenced)][raw_flow] and //! [code (indented)][code_indented] are considered different constructs. +//! Sometimes, constructs on their own don’t result in anything. +//! For example, a `*` is parsed as an attention sequence, but later when we +//! didn’t find another sequence, it’s turned back into plain data. //! -//! The following constructs are found in markdown (CommonMark): +//! The following constructs are found in markdown (`CommonMark`): //! //! * [attention][attention] (strong, emphasis, extension: GFM strikethrough) //! * [autolink][] @@ -64,9 +68,9 @@ //! * [gfm label start footnote][gfm_label_start_footnote] //! * [gfm table][gfm_table] //! * [gfm task list item check][gfm_task_list_item_check] +//! * [mdx esm][mdx_esm] //! * [mdx expression (flow)][mdx_expression_flow] //! * [mdx expression (text)][mdx_expression_text] -//! * [mdx esm][mdx_esm] //! * [mdx jsx (flow)][mdx_jsx_flow] //! * [mdx jsx (text)][mdx_jsx_text] //! diff --git a/src/to_mdast.rs b/src/to_mdast.rs index f2b3c30..c4650da 100644 --- a/src/to_mdast.rs +++ b/src/to_mdast.rs @@ -26,6 +26,7 @@ use alloc::{ }; use core::str; +/// A reference to something. #[derive(Debug)] struct Reference { reference_kind: Option, @@ -33,13 +34,35 @@ struct Reference { label: String, } +/// Info on a tag. +/// +/// JSX tags are parsed on their own. +/// They’re matched together here. #[derive(Debug, Clone)] struct JsxTag { + /// Optional tag name. + /// + /// `None` means that it’s a fragment. name: Option, + /// List of attributes. attributes: Vec, + /// Whether this is a closing tag. + /// + /// ```markdown + /// > | + /// ^ + /// ``` close: bool, + /// Whether this is a self-closing tag. + /// + /// ```markdown + /// > | + /// ^ + /// ``` self_closing: bool, + /// Starting point. start: Point, + /// Ending point. end: Point, } @@ -1422,7 +1445,7 @@ fn on_exit_list_item_value(context: &mut CompileContext) { } } -/// Handle [`Enter`][Kind::Enter]:{[`MdxJsxFlowTag`][Name::MdxJsxFlowTag],[`MdxJsxTextTag`][Name::MdxJsxTextTag]}. +/// Handle [`Exit`][Kind::Exit]:{[`MdxJsxFlowTag`][Name::MdxJsxFlowTag],[`MdxJsxTextTag`][Name::MdxJsxTextTag]}. fn on_exit_mdx_jsx_tag(context: &mut CompileContext) -> Result<(), String> { let mut tag = context.jsx_tag.as_ref().expect("expected tag").clone(); @@ -1648,17 +1671,17 @@ fn on_exit_resource_title_string(context: &mut CompileContext) { } } -// Create a point from an event. +/// Create a point from an event. fn point_from_event_point(point: &EventPoint) -> Point { Point::new(point.line, point.column, point.index) } -// Create a point from an event. +/// Create a point from an event. fn point_from_event(event: &Event) -> Point { point_from_event_point(&event.point) } -// Create a position from an event. +/// Create a position from an event. fn position_from_event(event: &Event) -> Position { let end = Point::new(event.point.line, event.point.column, event.point.index); Position { @@ -1667,6 +1690,7 @@ fn position_from_event(event: &Event) -> Position { } } +/// Resolve the current stack on the tree. fn delve_mut<'tree>(mut node: &'tree mut Node, stack: &'tree [usize]) -> &'tree mut Node { let mut stack_index = 0; while stack_index < stack.len() { @@ -1677,6 +1701,7 @@ fn delve_mut<'tree>(mut node: &'tree mut Node, stack: &'tree [usize]) -> &'tree node } +/// Remove initial/final EOLs. fn trim_eol(value: String, at_start: bool, at_end: bool) -> String { let bytes = value.as_bytes(); let mut start = 0; @@ -1711,6 +1736,9 @@ fn trim_eol(value: String, at_start: bool, at_end: bool) -> String { } } +/// Handle a mismatch. +/// +/// Mismatches can occur with MDX JSX tags. fn on_mismatch_error( context: &mut CompileContext, left: Option<&Event>, @@ -1759,6 +1787,7 @@ fn on_mismatch_error( } } +/// Format a JSX tag, ignoring its attributes. fn serialize_abbreviated_tag(tag: &JsxTag) -> String { format!( "<{}{}>", diff --git a/src/util/char.rs b/src/util/char.rs index cfaacd5..b902fbe 100644 --- a/src/util/char.rs +++ b/src/util/char.rs @@ -1,4 +1,4 @@ -//! Deal with byte and chars and kinds. +//! Deal with bytes, chars, and kinds. use crate::util::unicode::PUNCTUATION; use alloc::{ diff --git a/src/util/character_reference.rs b/src/util/character_reference.rs index 75db98b..ee2a65c 100644 --- a/src/util/character_reference.rs +++ b/src/util/character_reference.rs @@ -15,6 +15,10 @@ use core::str; /// [`CHARACTER_REFERENCES`][] (or [`CHARACTER_REFERENCES_HTML_4`][]) and then /// takes the corresponding value from `1`. /// +/// The `html5` boolean us used for named character references, and specifier +/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are +/// supported. +/// /// The result is `String` instead of `char` because named character references /// can expand into multiple characters. /// @@ -28,14 +32,6 @@ use core::str; /// assert_eq!(decode_named("aelig", true), "æ"); /// ``` /// -/// ## Panics -/// -/// This function panics if a name not in [`CHARACTER_REFERENCES`][] is -/// given. -/// It is expected that figuring out whether a name is allowed is handled in -/// the parser. -/// When `micromark` is used, this function never panics. -/// /// ## References /// /// * [`wooorm/decode-named-character-reference`](https://github.com/wooorm/decode-named-character-reference) @@ -100,6 +96,21 @@ pub fn decode_numeric(value: &str, radix: u32) -> String { char::REPLACEMENT_CHARACTER.to_string() } +/// Decode a character reference. +/// +/// This turns the number (in string form as either hexadecimal or decimal) or +/// name from a character reference into a string. +/// +/// The marker specifies the format: `#` for hexadecimal, `x` for decimal, and +/// `&` for named. +/// +/// The `html5` boolean us used for named character references, and specifier +/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are +/// supported. +/// +/// ## Panics +/// +/// Panics if `marker` is not `b'&'`, `b'x'`, or `b'#'`. pub fn decode(value: &str, marker: u8, html5: bool) -> Option { match marker { b'#' => Some(decode_numeric(value, 10)), @@ -144,11 +155,11 @@ pub fn value_test(marker: u8) -> fn(&u8) -> bool { /// Decode character references in a string. /// -/// Note: this currently only supports HTML 4 references, as it’s only used for -/// them. -/// -/// If it’s ever needed to support HTML 5 (which is what normal markdown uses), -/// a boolean parameter can be added here. +/// > 👉 **Note**: this currently only supports the 252 named character +/// > references from HTML 4, as it’s only used for JSX. +/// > +/// > If it’s ever needed to support HTML 5 (which is what normal markdown +/// > uses), a boolean parameter can be added here. pub fn parse(value: &str) -> String { let bytes = value.as_bytes(); let mut index = 0; -- cgit