diff options
Diffstat (limited to '')
| -rw-r--r-- | src/construct/attention.rs | 2 | ||||
| -rw-r--r-- | src/construct/mod.rs | 10 | ||||
| -rw-r--r-- | src/to_mdast.rs | 37 | ||||
| -rw-r--r-- | src/util/char.rs | 2 | ||||
| -rw-r--r-- | src/util/character_reference.rs | 37 | 
5 files changed, 66 insertions, 22 deletions
| diff --git a/src/construct/attention.rs b/src/construct/attention.rs index d99a52c..7ac41de 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -37,7 +37,7 @@  //! There are some small differences in whether sequences can open and/or close  //! based on whether they are formed with asterisks or underscores.  //! Because underscores also frequently occur in natural language inside words, -//! while asterisks typically never do, `CommonMark` prohobits underscore +//! while asterisks typically never do, `CommonMark` prohibits underscore  //! sequences from opening or closing when *inside* a word.  //!  //! Because asterisks can be used to form the most markdown constructs, using diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 88f3050..c78a002 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -1,7 +1,7 @@  //! Constructs found in markdown.  //!  //! Constructs are grouped by content type. -//! Which content type is allowed somewhere, defines which constructs are +//! Which content type is allowed somewhere, prescribes which constructs are  //! allowed there.  //!  //! ## Content type @@ -24,11 +24,15 @@  //! There are several *things* found when parsing markdown, such as, say, a  //! thematic break.  //! These things are called constructs here. +//!  //! Sometimes, there are several constructs that result in an equivalent thing.  //! For example, [code (fenced)][raw_flow] and  //! [code (indented)][code_indented] are considered different constructs. +//! Sometimes, constructs on their own don’t result in anything. +//! For example, a `*` is parsed as an attention sequence, but later when we +//! didn’t find another sequence, it’s turned back into plain data.  //! -//! The following constructs are found in markdown (CommonMark): +//! The following constructs are found in markdown (`CommonMark`):  //!  //! *   [attention][attention] (strong, emphasis, extension: GFM strikethrough)  //! *   [autolink][] @@ -64,9 +68,9 @@  //! *   [gfm label start footnote][gfm_label_start_footnote]  //! *   [gfm table][gfm_table]  //! *   [gfm task list item check][gfm_task_list_item_check] +//! *   [mdx esm][mdx_esm]  //! *   [mdx expression (flow)][mdx_expression_flow]  //! *   [mdx expression (text)][mdx_expression_text] -//! *   [mdx esm][mdx_esm]  //! *   [mdx jsx (flow)][mdx_jsx_flow]  //! *   [mdx jsx (text)][mdx_jsx_text]  //! diff --git a/src/to_mdast.rs b/src/to_mdast.rs index f2b3c30..c4650da 100644 --- a/src/to_mdast.rs +++ b/src/to_mdast.rs @@ -26,6 +26,7 @@ use alloc::{  };  use core::str; +/// A reference to something.  #[derive(Debug)]  struct Reference {      reference_kind: Option<ReferenceKind>, @@ -33,13 +34,35 @@ struct Reference {      label: String,  } +/// Info on a tag. +/// +/// JSX tags are parsed on their own. +/// They’re matched together here.  #[derive(Debug, Clone)]  struct JsxTag { +    /// Optional tag name. +    /// +    /// `None` means that it’s a fragment.      name: Option<String>, +    /// List of attributes.      attributes: Vec<AttributeContent>, +    /// Whether this is a closing tag. +    /// +    /// ```markdown +    /// > | </a> +    ///      ^ +    /// ```      close: bool, +    /// Whether this is a self-closing tag. +    /// +    /// ```markdown +    /// > | <a/> +    ///       ^ +    /// ```      self_closing: bool, +    /// Starting point.      start: Point, +    /// Ending point.      end: Point,  } @@ -1422,7 +1445,7 @@ fn on_exit_list_item_value(context: &mut CompileContext) {      }  } -/// Handle [`Enter`][Kind::Enter]:{[`MdxJsxFlowTag`][Name::MdxJsxFlowTag],[`MdxJsxTextTag`][Name::MdxJsxTextTag]}. +/// Handle [`Exit`][Kind::Exit]:{[`MdxJsxFlowTag`][Name::MdxJsxFlowTag],[`MdxJsxTextTag`][Name::MdxJsxTextTag]}.  fn on_exit_mdx_jsx_tag(context: &mut CompileContext) -> Result<(), String> {      let mut tag = context.jsx_tag.as_ref().expect("expected tag").clone(); @@ -1648,17 +1671,17 @@ fn on_exit_resource_title_string(context: &mut CompileContext) {      }  } -// Create a point from an event. +/// Create a point from an event.  fn point_from_event_point(point: &EventPoint) -> Point {      Point::new(point.line, point.column, point.index)  } -// Create a point from an event. +/// Create a point from an event.  fn point_from_event(event: &Event) -> Point {      point_from_event_point(&event.point)  } -// Create a position from an event. +/// Create a position from an event.  fn position_from_event(event: &Event) -> Position {      let end = Point::new(event.point.line, event.point.column, event.point.index);      Position { @@ -1667,6 +1690,7 @@ fn position_from_event(event: &Event) -> Position {      }  } +/// Resolve the current stack on the tree.  fn delve_mut<'tree>(mut node: &'tree mut Node, stack: &'tree [usize]) -> &'tree mut Node {      let mut stack_index = 0;      while stack_index < stack.len() { @@ -1677,6 +1701,7 @@ fn delve_mut<'tree>(mut node: &'tree mut Node, stack: &'tree [usize]) -> &'tree      node  } +/// Remove initial/final EOLs.  fn trim_eol(value: String, at_start: bool, at_end: bool) -> String {      let bytes = value.as_bytes();      let mut start = 0; @@ -1711,6 +1736,9 @@ fn trim_eol(value: String, at_start: bool, at_end: bool) -> String {      }  } +/// Handle a mismatch. +/// +/// Mismatches can occur with MDX JSX tags.  fn on_mismatch_error(      context: &mut CompileContext,      left: Option<&Event>, @@ -1759,6 +1787,7 @@ fn on_mismatch_error(      }  } +/// Format a JSX tag, ignoring its attributes.  fn serialize_abbreviated_tag(tag: &JsxTag) -> String {      format!(          "<{}{}>", diff --git a/src/util/char.rs b/src/util/char.rs index cfaacd5..b902fbe 100644 --- a/src/util/char.rs +++ b/src/util/char.rs @@ -1,4 +1,4 @@ -//! Deal with byte and chars and kinds. +//! Deal with bytes, chars, and kinds.  use crate::util::unicode::PUNCTUATION;  use alloc::{ diff --git a/src/util/character_reference.rs b/src/util/character_reference.rs index 75db98b..ee2a65c 100644 --- a/src/util/character_reference.rs +++ b/src/util/character_reference.rs @@ -15,6 +15,10 @@ use core::str;  /// [`CHARACTER_REFERENCES`][] (or [`CHARACTER_REFERENCES_HTML_4`][]) and then  /// takes the corresponding value from `1`.  /// +/// The `html5` boolean us used for named character references, and specifier +/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are +/// supported. +///  /// The result is `String` instead of `char` because named character references  /// can expand into multiple characters.  /// @@ -28,14 +32,6 @@ use core::str;  /// assert_eq!(decode_named("aelig", true), "æ");  /// ```  /// -/// ## Panics -/// -/// This function panics if a name not in [`CHARACTER_REFERENCES`][] is -/// given. -/// It is expected that figuring out whether a name is allowed is handled in -/// the parser. -/// When `micromark` is used, this function never panics. -///  /// ## References  ///  /// *   [`wooorm/decode-named-character-reference`](https://github.com/wooorm/decode-named-character-reference) @@ -100,6 +96,21 @@ pub fn decode_numeric(value: &str, radix: u32) -> String {      char::REPLACEMENT_CHARACTER.to_string()  } +/// Decode a character reference. +/// +/// This turns the number (in string form as either hexadecimal or decimal) or +/// name from a character reference into a string. +/// +/// The marker specifies the format: `#` for hexadecimal, `x` for decimal, and +/// `&` for named. +/// +/// The `html5` boolean us used for named character references, and specifier +/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are +/// supported. +/// +/// ## Panics +/// +/// Panics if `marker` is not `b'&'`, `b'x'`, or `b'#'`.  pub fn decode(value: &str, marker: u8, html5: bool) -> Option<String> {      match marker {          b'#' => Some(decode_numeric(value, 10)), @@ -144,11 +155,11 @@ pub fn value_test(marker: u8) -> fn(&u8) -> bool {  /// Decode character references in a string.  /// -/// Note: this currently only supports HTML 4 references, as it’s only used for -/// them. -/// -/// If it’s ever needed to support HTML 5 (which is what normal markdown uses), -/// a boolean parameter can be added here. +/// > 👉 **Note**: this currently only supports the 252 named character +/// > references from HTML 4, as it’s only used for JSX. +/// > +/// > If it’s ever needed to support HTML 5 (which is what normal markdown +/// > uses), a boolean parameter can be added here.  pub fn parse(value: &str) -> String {      let bytes = value.as_bytes();      let mut index = 0; | 
