aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-10-06 12:12:36 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-10-06 12:12:36 +0200
commit6e80e03bb6d6af47aba2b339f160e4895ab5afba (patch)
tree98ecdfd1d73065ae45846320ccf43e5eb8c8cced /src
parentb75d7976cfe8db43783b930c1f4774f2ad4936f5 (diff)
downloadmarkdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.tar.gz
markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.tar.bz2
markdown-rs-6e80e03bb6d6af47aba2b339f160e4895ab5afba.zip
Refactor to share identifier methods
Diffstat (limited to 'src')
-rw-r--r--src/construct/partial_mdx_jsx.rs50
-rw-r--r--src/lib.rs17
-rw-r--r--src/util/identifier.rs15
-rw-r--r--src/util/mod.rs1
4 files changed, 58 insertions, 25 deletions
diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs
index e49a8e0..1bf6fc2 100644
--- a/src/construct/partial_mdx_jsx.rs
+++ b/src/construct/partial_mdx_jsx.rs
@@ -164,13 +164,15 @@
use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-use crate::util::char::{
- after_index as char_after_index, format_byte, format_opt as format_char_opt, kind_after_index,
- Kind as CharacterKind,
+use crate::util::{
+ char::{
+ after_index as char_after_index, format_byte, format_opt as format_char_opt,
+ kind_after_index, Kind as CharacterKind,
+ },
+ identifier::{id_cont, id_start},
};
use alloc::format;
use core::str;
-use unicode_id::UnicodeID;
/// Start of MDX: JSX.
///
@@ -230,7 +232,7 @@ pub fn name_before(tokenizer: &mut Tokenizer) -> State {
// Fragment opening tag.
Some(b'>') => State::Retry(StateName::MdxJsxTagEnd),
_ => {
- if id_start(char_after_index(
+ if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
@@ -270,7 +272,7 @@ pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::MdxJsxTagEnd)
}
// Start of a closing tag name.
- else if id_start(char_after_index(
+ else if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
@@ -313,7 +315,7 @@ pub fn primary_name(tokenizer: &mut Tokenizer) -> State {
// Continuation of name: remain.
// Allow continuation bytes.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
- || id_cont(char_after_index(
+ || id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -365,7 +367,7 @@ pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State {
// End of name.
_ => {
if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
- || id_start(char_after_index(
+ || id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -391,7 +393,7 @@ pub fn primary_name_after(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn member_name_before(tokenizer: &mut Tokenizer) -> State {
// Start of a member name.
- if id_start(char_after_index(
+ if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
@@ -427,7 +429,7 @@ pub fn member_name(tokenizer: &mut Tokenizer) -> State {
// Continuation of name: remain.
// Allow continuation bytes.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
- || id_cont(char_after_index(
+ || id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -471,7 +473,7 @@ pub fn member_name_after(tokenizer: &mut Tokenizer) -> State {
// End of name.
_ => {
if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
- || id_start(char_after_index(
+ || id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -497,7 +499,7 @@ pub fn member_name_after(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn local_name_before(tokenizer: &mut Tokenizer) -> State {
// Start of a local name.
- if id_start(char_after_index(
+ if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
@@ -539,7 +541,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State {
// Continuation of name: remain.
// Allow continuation bytes.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
- || id_cont(char_after_index(
+ || id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -569,7 +571,7 @@ pub fn local_name(tokenizer: &mut Tokenizer) -> State {
pub fn local_name_after(tokenizer: &mut Tokenizer) -> State {
// End of name.
if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
- || id_start(char_after_index(
+ || id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -621,7 +623,7 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State {
}
_ => {
// Start of an attribute name.
- if id_start(char_after_index(
+ if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
@@ -680,7 +682,7 @@ pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {
// Continuation of name: remain.
// Allow continuation bytes.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
- || id_cont(char_after_index(
+ || id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -736,7 +738,7 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State {
if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
== CharacterKind::Whitespace
|| matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
- || id_start(char_after_index(
+ || id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -764,7 +766,7 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn attribute_local_name_before(tokenizer: &mut Tokenizer) -> State {
// Start of a local name.
- if id_start(char_after_index(
+ if id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
)) {
@@ -805,7 +807,7 @@ pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State {
// Continuation of name: remain.
// Allow continuation bytes.
else if matches!(tokenizer.current, Some(0x80..=0xBF))
- || id_cont(char_after_index(
+ || id_cont_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -845,7 +847,7 @@ pub fn attribute_local_name_after(tokenizer: &mut Tokenizer) -> State {
_ => {
// End of name.
if matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
- || id_start(char_after_index(
+ || id_start_opt(char_after_index(
tokenizer.parse_state.bytes,
tokenizer.point.index,
))
@@ -1101,18 +1103,18 @@ pub fn es_whitespace_eol_after(tokenizer: &mut Tokenizer) -> State {
}
/// Check if a character can start a JSX identifier.
-fn id_start(code: Option<char>) -> bool {
+fn id_start_opt(code: Option<char>) -> bool {
if let Some(char) = code {
- UnicodeID::is_id_start(char) || matches!(char, '$' | '_')
+ id_start(char)
} else {
false
}
}
/// Check if a character can continue a JSX identifier.
-fn id_cont(code: Option<char>) -> bool {
+fn id_cont_opt(code: Option<char>) -> bool {
if let Some(char) = code {
- UnicodeID::is_id_continue(char) || matches!(char, '-' | '\u{200c}' | '\u{200d}')
+ id_cont(char, true)
} else {
false
}
diff --git a/src/lib.rs b/src/lib.rs
index f423b68..8eaa2bb 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -36,7 +36,10 @@ use mdast::Node;
use parser::parse;
use to_html::compile as to_html;
use to_mdast::compile as to_mdast;
-use util::sanitize_uri::sanitize;
+use util::{
+ identifier::{id_cont, id_start},
+ sanitize_uri::sanitize,
+};
/// Type of line endings in markdown.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
@@ -1198,3 +1201,15 @@ pub fn micromark_to_mdast(value: &str, options: &Options) -> Result<Node, String
pub fn sanitize_(value: &str) -> String {
sanitize(value)
}
+
+/// Do not use: exported for quick prototyping, will be removed.
+#[must_use]
+pub fn id_start_(char: char) -> bool {
+ id_start(char)
+}
+
+/// Do not use: exported for quick prototyping, will be removed.
+#[must_use]
+pub fn id_cont_(char: char, jsx: bool) -> bool {
+ id_cont(char, jsx)
+}
diff --git a/src/util/identifier.rs b/src/util/identifier.rs
new file mode 100644
index 0000000..4887e02
--- /dev/null
+++ b/src/util/identifier.rs
@@ -0,0 +1,15 @@
+//! Info on JavaScript identifiers.
+
+use unicode_id::UnicodeID;
+
+/// Check if a character can start a JS identifier.
+pub fn id_start(char: char) -> bool {
+ UnicodeID::is_id_start(char) || matches!(char, '$' | '_')
+}
+
+/// Check if a character can continue a JS (or JSX) identifier.
+pub fn id_cont(char: char, jsx: bool) -> bool {
+ UnicodeID::is_id_continue(char)
+ || matches!(char, '\u{200c}' | '\u{200d}')
+ || (jsx && char == '-')
+}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index edc7e14..ac93be0 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -6,6 +6,7 @@ pub mod constant;
pub mod edit_map;
pub mod encode;
pub mod gfm_tagfilter;
+pub mod identifier;
pub mod infer;
pub mod mdx_collect;
pub mod normalize_identifier;