diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-10-06 12:12:36 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-10-06 12:12:36 +0200 |
commit | 6e80e03bb6d6af47aba2b339f160e4895ab5afba (patch) | |
tree | 98ecdfd1d73065ae45846320ccf43e5eb8c8cced | |
parent | b75d7976cfe8db43783b930c1f4774f2ad4936f5 (diff) | |
download | markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.tar.gz markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.tar.bz2 markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.zip |
Refactor to share identifier methods
-rw-r--r-- | src/construct/partial_mdx_jsx.rs | 50 | ||||
-rw-r--r-- | src/lib.rs | 17 | ||||
-rw-r--r-- | src/util/identifier.rs | 15 | ||||
-rw-r--r-- | src/util/mod.rs | 1 | ||||
-rw-r--r-- | tests/test_utils/jsx_rewrite.rs | 20 |
5 files changed, 63 insertions, 40 deletions
diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs index e49a8e0..1bf6fc2 100644 --- a/src/construct/partial_mdx_jsx.rs +++ b/src/construct/partial_mdx_jsx.rs @@ -164,13 +164,15 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -use crate::util::char::{ - after_index as char_after_index, format_byte, format_opt as format_char_opt, kind_after_index, - Kind as CharacterKind, +use crate::util::{ + char::{ + after_index as char_after_index, format_byte, format_opt as format_char_opt, + kind_after_index, Kind as CharacterKind, + }, + identifier::{id_cont, id_start}, }; use alloc::format; use core::str; -use unicode_id::UnicodeID; /// Start of MDX: JSX. /// @@ -230,7 +232,7 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State { // Fragment opening tag. Some(b'>') => State::Retry(StateName::MdxJsxTagEnd), _ => { - if id_start(char_after_index( + if id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { @@ -270,7 +272,7 @@ pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::MdxJsxTagEnd) } // Start of a closing tag name. - else if id_start(char_after_index( + else if id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { @@ -313,7 +315,7 @@ pub fn primary_name(tokenizer: &mut Tokenizer) -> State { // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) - || id_cont(char_after_index( + || id_cont_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -365,7 +367,7 @@ pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State { // End of name. _ => { if matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) - || id_start(char_after_index( + || id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -391,7 +393,7 @@ pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn member_name_before(tokenizer: &mut Tokenizer) -> State { // Start of a member name. - if id_start(char_after_index( + if id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { @@ -427,7 +429,7 @@ pub fn member_name(tokenizer: &mut Tokenizer) -> State { // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) - || id_cont(char_after_index( + || id_cont_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -471,7 +473,7 @@ pub fn member_name_after(tokenizer: &mut Tokenizer) -> State { // End of name. _ => { if matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) - || id_start(char_after_index( + || id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -497,7 +499,7 @@ pub fn member_name_after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn local_name_before(tokenizer: &mut Tokenizer) -> State { // Start of a local name. - if id_start(char_after_index( + if id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { @@ -539,7 +541,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State { // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) - || id_cont(char_after_index( + || id_cont_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -569,7 +571,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State { pub fn local_name_after(tokenizer: &mut Tokenizer) -> State { // End of name. if matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) - || id_start(char_after_index( + || id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -621,7 +623,7 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State { } _ => { // Start of an attribute name. - if id_start(char_after_index( + if id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { @@ -680,7 +682,7 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State { // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) - || id_cont(char_after_index( + || id_cont_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -736,7 +738,7 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State { if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace || matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) - || id_start(char_after_index( + || id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -764,7 +766,7 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn attribute_local_name_before(tokenizer: &mut Tokenizer) -> State { // Start of a local name. - if id_start(char_after_index( + if id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) { @@ -805,7 +807,7 @@ pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State { // Continuation of name: remain. // Allow continuation bytes. else if matches!(tokenizer.current, Some(0x80..=0xBF)) - || id_cont(char_after_index( + || id_cont_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -845,7 +847,7 @@ pub fn attribute_local_name_after(tokenizer: &mut Tokenizer) -> State { _ => { // End of name. if matches!(tokenizer.current, Some(b'/' | b'>' | b'{')) - || id_start(char_after_index( + || id_start_opt(char_after_index( tokenizer.parse_state.bytes, tokenizer.point.index, )) @@ -1101,18 +1103,18 @@ pub fn es_whitespace_eol_after(tokenizer: &mut Tokenizer) -> State { } /// Check if a character can start a JSX identifier. -fn id_start(code: Option<char>) -> bool { +fn id_start_opt(code: Option<char>) -> bool { if let Some(char) = code { - UnicodeID::is_id_start(char) || matches!(char, '$' | '_') + id_start(char) } else { false } } /// Check if a character can continue a JSX identifier. -fn id_cont(code: Option<char>) -> bool { +fn id_cont_opt(code: Option<char>) -> bool { if let Some(char) = code { - UnicodeID::is_id_continue(char) || matches!(char, '-' | '\u{200c}' | '\u{200d}') + id_cont(char, true) } else { false } @@ -36,7 +36,10 @@ use mdast::Node; use parser::parse; use to_html::compile as to_html; use to_mdast::compile as to_mdast; -use util::sanitize_uri::sanitize; +use util::{ + identifier::{id_cont, id_start}, + sanitize_uri::sanitize, +}; /// Type of line endings in markdown. #[derive(Clone, Debug, Default, Eq, PartialEq)] @@ -1198,3 +1201,15 @@ pub fn micromark_to_mdast(value: &str, options: &Options) -> Result<Node, String pub fn sanitize_(value: &str) -> String { sanitize(value) } + +/// Do not use: exported for quick prototyping, will be removed. +#[must_use] +pub fn id_start_(char: char) -> bool { + id_start(char) +} + +/// Do not use: exported for quick prototyping, will be removed. +#[must_use] +pub fn id_cont_(char: char, jsx: bool) -> bool { + id_cont(char, jsx) +} diff --git a/src/util/identifier.rs b/src/util/identifier.rs new file mode 100644 index 0000000..4887e02 --- /dev/null +++ b/src/util/identifier.rs @@ -0,0 +1,15 @@ +//! Info on JavaScript identifiers. + +use unicode_id::UnicodeID; + +/// Check if a character can start a JS identifier. +pub fn id_start(char: char) -> bool { + UnicodeID::is_id_start(char) || matches!(char, '$' | '_') +} + +/// Check if a character can continue a JS (or JSX) identifier. +pub fn id_cont(char: char, jsx: bool) -> bool { + UnicodeID::is_id_continue(char) + || matches!(char, '\u{200c}' | '\u{200d}') + || (jsx && char == '-') +} diff --git a/src/util/mod.rs b/src/util/mod.rs index edc7e14..ac93be0 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -6,6 +6,7 @@ pub mod constant; pub mod edit_map; pub mod encode; pub mod gfm_tagfilter; +pub mod identifier; pub mod infer; pub mod mdx_collect; pub mod normalize_identifier; diff --git a/tests/test_utils/jsx_rewrite.rs b/tests/test_utils/jsx_rewrite.rs index fbce344..b6ffad6 100644 --- a/tests/test_utils/jsx_rewrite.rs +++ b/tests/test_utils/jsx_rewrite.rs @@ -1,8 +1,10 @@ extern crate swc_common; extern crate swc_ecma_ast; -use crate::test_utils::to_swc::Program; +use crate::{ + micromark::{id_cont_ as id_cont, id_start_ as id_start}, + test_utils::to_swc::Program, +}; use swc_ecma_visit::{noop_visit_mut_type, VisitMut, VisitMutWith}; -use unicode_id::UnicodeID; /// Configuration. #[derive(Debug, Default, Clone)] @@ -1159,7 +1161,7 @@ fn is_identifier_name(name: &str) -> bool { if if index == 0 { !id_start(char) } else { - !id_cont(char) + !id_cont(char, false) } { return false; } @@ -1167,15 +1169,3 @@ fn is_identifier_name(name: &str) -> bool { true } - -// To do: share with `partial_mdx_jsx`. -/// Check if a character can start a JS identifier. -fn id_start(char: char) -> bool { - UnicodeID::is_id_start(char) || matches!(char, '$' | '_') -} - -// To do: share with `partial_mdx_jsx`. -/// Check if a character can continue a JS identifier. -fn id_cont(char: char) -> bool { - UnicodeID::is_id_continue(char) || matches!(char, '\u{200c}' | '\u{200d}') -} |