Refactor to share identifier methods

author: Titus Wormer <tituswormer@gmail.com> 2022-10-06 12:12:36 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-10-06 12:12:36 +0200
commit: 6e80e03bb6d6af47aba2b339f160e4895ab5afba (patch)
tree: 98ecdfd1d73065ae45846320ccf43e5eb8c8cced
parent: b75d7976cfe8db43783b930c1f4774f2ad4936f5 (diff)
download: markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.tar.gz
markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.tar.bz2
markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.zip
5 files changed, 63 insertions, 40 deletions
diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs
index e49a8e0..1bf6fc2 100644
--- a/src/construct/partial_mdx_jsx.rs
+++ b/src/construct/partial_mdx_jsx.rs
@@ -164,13 +164,15 @@
 use crate::event::Name;
 use crate::state::{Name as StateName, State};
 use crate::tokenizer::Tokenizer;
-use crate::util::char::{
-    after_index as char_after_index, format_byte, format_opt as format_char_opt, kind_after_index,
-    Kind as CharacterKind,
+use crate::util::{
+    char::{
+        after_index as char_after_index, format_byte, format_opt as format_char_opt,
+        kind_after_index, Kind as CharacterKind,
+    },
+    identifier::{id_cont, id_start},
 };
 use alloc::format;
 use core::str;
-use unicode_id::UnicodeID;
 
 /// Start of MDX: JSX.
 ///
@@ -230,7 +232,7 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State {
         // Fragment opening tag.
         Some(b'>') => State::Retry(StateName::MdxJsxTagEnd),
         _ => {
-            if id_start(char_after_index(
+            if id_start_opt(char_after_index(
                 tokenizer.parse_state.bytes,
                 tokenizer.point.index,
             )) {
@@ -270,7 +272,7 @@ pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State {
         State::Retry(StateName::MdxJsxTagEnd)
     }
     // Start of a closing tag name.
-    else if id_start(char_after_index(
+    else if id_start_opt(char_after_index(
         tokenizer.parse_state.bytes,
         tokenizer.point.index,
     )) {
@@ -313,7 +315,7 @@ pub fn primary_name(tokenizer: &mut Tokenizer) -> State {
     // Continuation of name: remain.
     // Allow continuation bytes.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
-        || id_cont(char_after_index(
+        || id_cont_opt(char_after_index(
             tokenizer.parse_state.bytes,
             tokenizer.point.index,
         ))
@@ -365,7 +367,7 @@ pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State {
         // End of name.
         _ => {
             if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
-                || id_start(char_after_index(
+                || id_start_opt(char_after_index(
                     tokenizer.parse_state.bytes,
                     tokenizer.point.index,
                 ))
@@ -391,7 +393,7 @@ pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn member_name_before(tokenizer: &mut Tokenizer) -> State {
     // Start of a member name.
-    if id_start(char_after_index(
+    if id_start_opt(char_after_index(
         tokenizer.parse_state.bytes,
         tokenizer.point.index,
     )) {
@@ -427,7 +429,7 @@ pub fn member_name(tokenizer: &mut Tokenizer) -> State {
     // Continuation of name: remain.
     // Allow continuation bytes.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
-        || id_cont(char_after_index(
+        || id_cont_opt(char_after_index(
             tokenizer.parse_state.bytes,
             tokenizer.point.index,
         ))
@@ -471,7 +473,7 @@ pub fn member_name_after(tokenizer: &mut Tokenizer) -> State {
         // End of name.
         _ => {
             if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
-                || id_start(char_after_index(
+                || id_start_opt(char_after_index(
                     tokenizer.parse_state.bytes,
                     tokenizer.point.index,
                 ))
@@ -497,7 +499,7 @@ pub fn member_name_after(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn local_name_before(tokenizer: &mut Tokenizer) -> State {
     // Start of a local name.
-    if id_start(char_after_index(
+    if id_start_opt(char_after_index(
         tokenizer.parse_state.bytes,
         tokenizer.point.index,
     )) {
@@ -539,7 +541,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State {
     // Continuation of name: remain.
     // Allow continuation bytes.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
-        || id_cont(char_after_index(
+        || id_cont_opt(char_after_index(
             tokenizer.parse_state.bytes,
             tokenizer.point.index,
         ))
@@ -569,7 +571,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State {
 pub fn local_name_after(tokenizer: &mut Tokenizer) -> State {
     // End of name.
     if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
-        || id_start(char_after_index(
+        || id_start_opt(char_after_index(
             tokenizer.parse_state.bytes,
             tokenizer.point.index,
         ))
@@ -621,7 +623,7 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State {
         }
         _ => {
             // Start of an attribute name.
-            if id_start(char_after_index(
+            if id_start_opt(char_after_index(
                 tokenizer.parse_state.bytes,
                 tokenizer.point.index,
             )) {
@@ -680,7 +682,7 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {
     // Continuation of name: remain.
     // Allow continuation bytes.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
-        || id_cont(char_after_index(
+        || id_cont_opt(char_after_index(
             tokenizer.parse_state.bytes,
             tokenizer.point.index,
         ))
@@ -736,7 +738,7 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State {
             if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
                 == CharacterKind::Whitespace
                 || matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
-                || id_start(char_after_index(
+                || id_start_opt(char_after_index(
                     tokenizer.parse_state.bytes,
                     tokenizer.point.index,
                 ))
@@ -764,7 +766,7 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn attribute_local_name_before(tokenizer: &mut Tokenizer) -> State {
     // Start of a local name.
-    if id_start(char_after_index(
+    if id_start_opt(char_after_index(
         tokenizer.parse_state.bytes,
         tokenizer.point.index,
     )) {
@@ -805,7 +807,7 @@ pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State {
     // Continuation of name: remain.
     // Allow continuation bytes.
     else if matches!(tokenizer.current, Some(0x80..=0xBF))
-        || id_cont(char_after_index(
+        || id_cont_opt(char_after_index(
             tokenizer.parse_state.bytes,
             tokenizer.point.index,
         ))
@@ -845,7 +847,7 @@ pub fn attribute_local_name_after(tokenizer: &mut Tokenizer) -> State {
         _ => {
             // End of name.
             if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
-                || id_start(char_after_index(
+                || id_start_opt(char_after_index(
                     tokenizer.parse_state.bytes,
                     tokenizer.point.index,
                 ))
@@ -1101,18 +1103,18 @@ pub fn es_whitespace_eol_after(tokenizer: &mut Tokenizer) -> State {
 }
 
 /// Check if a character can start a JSX identifier.
-fn id_start(code: Option<char>) -> bool {
+fn id_start_opt(code: Option<char>) -> bool {
     if let Some(char) = code {
-        UnicodeID::is_id_start(char) || matches!(char, '$' | '_')
+        id_start(char)
     } else {
         false
     }
 }
 
 /// Check if a character can continue a JSX identifier.
-fn id_cont(code: Option<char>) -> bool {
+fn id_cont_opt(code: Option<char>) -> bool {
     if let Some(char) = code {
-        UnicodeID::is_id_continue(char) || matches!(char, '-' | '\u{200c}' | '\u{200d}')
+        id_cont(char, true)
     } else {
         false
     }
diff --git a/src/lib.rs b/src/lib.rs
index f423b68..8eaa2bb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -36,7 +36,10 @@ use mdast::Node;
 use parser::parse;
 use to_html::compile as to_html;
 use to_mdast::compile as to_mdast;
-use util::sanitize_uri::sanitize;
+use util::{
+    identifier::{id_cont, id_start},
+    sanitize_uri::sanitize,
+};
 
 /// Type of line endings in markdown.
 #[derive(Clone, Debug, Default, Eq, PartialEq)]
@@ -1198,3 +1201,15 @@ pub fn micromark_to_mdast(value: &str, options: &Options) -> Result<Node, String
 pub fn sanitize_(value: &str) -> String {
     sanitize(value)
 }
+
+/// Do not use: exported for quick prototyping, will be removed.
+#[must_use]
+pub fn id_start_(char: char) -> bool {
+    id_start(char)
+}
+
+/// Do not use: exported for quick prototyping, will be removed.
+#[must_use]
+pub fn id_cont_(char: char, jsx: bool) -> bool {
+    id_cont(char, jsx)
+}
diff --git a/src/util/identifier.rs b/src/util/identifier.rs
new file mode 100644
index 0000000..4887e02
--- /dev/null
+++ b/src/util/identifier.rs
@@ -0,0 +1,15 @@
+//! Info on JavaScript identifiers.
+
+use unicode_id::UnicodeID;
+
+/// Check if a character can start a JS identifier.
+pub fn id_start(char: char) -> bool {
+    UnicodeID::is_id_start(char) || matches!(char, '$' | '_')
+}
+
+/// Check if a character can continue a JS (or JSX) identifier.
+pub fn id_cont(char: char, jsx: bool) -> bool {
+    UnicodeID::is_id_continue(char)
+        || matches!(char, '\u{200c}' | '\u{200d}')
+        || (jsx && char == '-')
+}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index edc7e14..ac93be0 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -6,6 +6,7 @@ pub mod constant;
 pub mod edit_map;
 pub mod encode;
 pub mod gfm_tagfilter;
+pub mod identifier;
 pub mod infer;
 pub mod mdx_collect;
 pub mod normalize_identifier;
diff --git a/tests/test_utils/jsx_rewrite.rs b/tests/test_utils/jsx_rewrite.rs
index fbce344..b6ffad6 100644
--- a/tests/test_utils/jsx_rewrite.rs
+++ b/tests/test_utils/jsx_rewrite.rs
@@ -1,8 +1,10 @@
 extern crate swc_common;
 extern crate swc_ecma_ast;
-use crate::test_utils::to_swc::Program;
+use crate::{
+    micromark::{id_cont_ as id_cont, id_start_ as id_start},
+    test_utils::to_swc::Program,
+};
 use swc_ecma_visit::{noop_visit_mut_type, VisitMut, VisitMutWith};
-use unicode_id::UnicodeID;
 
 /// Configuration.
 #[derive(Debug, Default, Clone)]
@@ -1159,7 +1161,7 @@ fn is_identifier_name(name: &str) -> bool {
         if if index == 0 {
             !id_start(char)
         } else {
-            !id_cont(char)
+            !id_cont(char, false)
         } {
             return false;
         }
@@ -1167,15 +1169,3 @@ fn is_identifier_name(name: &str) -> bool {
 
     true
 }
-
-// To do: share with `partial_mdx_jsx`.
-/// Check if a character can start a JS identifier.
-fn id_start(char: char) -> bool {
-    UnicodeID::is_id_start(char) || matches!(char, '$' | '_')
-}
-
-// To do: share with `partial_mdx_jsx`.
-/// Check if a character can continue a JS identifier.
-fn id_cont(char: char) -> bool {
-    UnicodeID::is_id_continue(char) || matches!(char, '\u{200c}' | '\u{200d}')
-}
author	Titus Wormer <tituswormer@gmail.com>	2022-10-06 12:12:36 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-10-06 12:12:36 +0200
commit	6e80e03bb6d6af47aba2b339f160e4895ab5afba (patch)
tree	98ecdfd1d73065ae45846320ccf43e5eb8c8cced
parent	b75d7976cfe8db43783b930c1f4774f2ad4936f5 (diff)
download	markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.tar.gz markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.tar.bz2 markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.zip