Refactor to move byte, char info to own file

author: Titus Wormer <tituswormer@gmail.com> 2022-09-09 10:54:13 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-09-09 10:54:13 +0200
commit: 13337d77954b4c92d1cf4592f43f01d94fce3c77 (patch)
tree: d5feef9a971c1af52e58b5c857d1dd9c9e7fedca
parent: 71dbc8c0189d6b2032f3d8f21cbfffa3f8fe0f12 (diff)
download: markdown-rs-13337d77954b4c92d1cf4592f43f01d94fce3c77.tar.gz
markdown-rs-13337d77954b4c92d1cf4592f43f01d94fce3c77.tar.bz2
markdown-rs-13337d77954b4c92d1cf4592f43f01d94fce3c77.zip
9 files changed, 225 insertions, 215 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 947a79b..4a208df 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -80,8 +80,13 @@ use crate::event::{Event, Kind, Name, Point};
 use crate::resolve::Name as ResolveName;
 use crate::state::{Name as StateName, State};
 use crate::tokenizer::Tokenizer;
-use crate::util::classify_character::{classify_opt, Kind as CharacterKind};
-use crate::util::slice::{char_after_index, char_before_index, Slice};
+use crate::util::{
+    char::{
+        after_index as char_after_index, before_index as char_before_index, classify_opt,
+        Kind as CharacterKind,
+    },
+    slice::Slice,
+};
 use alloc::{vec, vec::Vec};
 
 /// Attentention sequence that we can take markers from.
diff --git a/src/construct/gfm_autolink_literal.rs b/src/construct/gfm_autolink_literal.rs
index ae483a7..c25f04c 100644
--- a/src/construct/gfm_autolink_literal.rs
+++ b/src/construct/gfm_autolink_literal.rs
@@ -148,8 +148,8 @@ use crate::event::{Event, Kind, Name};
 use crate::state::{Name as StateName, State};
 use crate::tokenizer::Tokenizer;
 use crate::util::{
-    classify_character::Kind as CharacterKind,
-    slice::{byte_to_kind, Position, Slice},
+    char::{kind_after_index, Kind as CharacterKind},
+    slice::{Position, Slice},
 };
 use alloc::vec::Vec;
 
@@ -366,7 +366,7 @@ pub fn domain_inside(tokenizer: &mut Tokenizer) -> State {
         }
         _ => {
             // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
-            if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index)
+            if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
                 == CharacterKind::Other
             {
                 tokenizer.tokenize_state.seen = true;
@@ -470,7 +470,7 @@ pub fn path_inside(tokenizer: &mut Tokenizer) -> State {
         }
         _ => {
             // Source: <https://github.com/github/cmark-gfm/blob/ef1cfcb/extensions/autolink.c#L12>.
-            if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index)
+            if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
                 == CharacterKind::Whitespace
             {
                 State::Retry(StateName::GfmAutolinkLiteralPathAfter)
@@ -543,7 +543,7 @@ pub fn trail(tokenizer: &mut Tokenizer) -> State {
         }
         _ => {
             // Whitespace is the end of the URL, anything else is continuation.
-            if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index)
+            if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
                 == CharacterKind::Whitespace
             {
                 State::Ok
diff --git a/src/construct/partial_mdx_jsx.rs b/src/construct/partial_mdx_jsx.rs
index 1a51608..2daa448 100644
--- a/src/construct/partial_mdx_jsx.rs
+++ b/src/construct/partial_mdx_jsx.rs
@@ -164,14 +164,11 @@
 use crate::event::Name;
 use crate::state::{Name as StateName, State};
 use crate::tokenizer::Tokenizer;
-use crate::util::{
-    classify_character::Kind as CharacterKind,
-    slice::{byte_to_kind, char_after_index},
-};
-use alloc::{
-    format,
-    string::{String, ToString},
+use crate::util::char::{
+    after_index as char_after_index, format_byte, format_opt as format_char_opt, kind_after_index,
+    Kind as CharacterKind,
 };
+use alloc::format;
 use core::str;
 use unicode_id::UnicodeID;
 
@@ -305,7 +302,8 @@ pub fn closing_tag_name_before(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn primary_name(tokenizer: &mut Tokenizer) -> State {
     // End of name.
-    if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace
+    if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
+        == CharacterKind::Whitespace
         || matches!(tokenizer.current, Some(b'.' | b'/' | b':' | b'>' | b'{'))
     {
         tokenizer.exit(Name::MdxJsxTagNamePrimary);
@@ -418,7 +416,8 @@ pub fn member_name_before(tokenizer: &mut Tokenizer) -> State {
 pub fn member_name(tokenizer: &mut Tokenizer) -> State {
     // End of name.
     // Note: no `:` allowed here.
-    if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace
+    if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
+        == CharacterKind::Whitespace
         || matches!(tokenizer.current, Some(b'.' | b'/' | b'>' | b'{'))
     {
         tokenizer.exit(Name::MdxJsxTagNameMember);
@@ -529,7 +528,8 @@ pub fn local_name_before(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn local_name(tokenizer: &mut Tokenizer) -> State {
     // End of local name (note that we don’t expect another colon, or a member).
-    if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace
+    if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
+        == CharacterKind::Whitespace
         || matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
     {
         tokenizer.exit(Name::MdxJsxTagNameLocal);
@@ -645,7 +645,8 @@ pub fn attribute_before(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn attribute_primary_name(tokenizer: &mut Tokenizer) -> State {
     // End of attribute name or tag.
-    if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace
+    if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
+        == CharacterKind::Whitespace
         || matches!(tokenizer.current, Some(b'/' | b':' | b'=' | b'>' | b'{'))
     {
         tokenizer.exit(Name::MdxJsxTagAttributePrimaryName);
@@ -711,7 +712,7 @@ pub fn attribute_primary_name_after(tokenizer: &mut Tokenizer) -> State {
         }
         _ => {
             // End of tag / new attribute.
-            if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index)
+            if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
                 == CharacterKind::Whitespace
                 || matches!(tokenizer.current, Some(b'/' | b'>' | b'{'))
                 || id_start(char_after_index(
@@ -768,7 +769,8 @@ pub fn attribute_local_name_before(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn attribute_local_name(tokenizer: &mut Tokenizer) -> State {
     // End of local name (note that we don’t expect another colon).
-    if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index) == CharacterKind::Whitespace
+    if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
+        == CharacterKind::Whitespace
         || matches!(tokenizer.current, Some(b'/' | b'=' | b'>' | b'{'))
     {
         tokenizer.exit(Name::MdxJsxTagAttributeNameLocal);
@@ -986,7 +988,7 @@ pub fn es_whitespace_start(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         Some(b'\n') => State::Retry(StateName::MdxJsxEsWhitespaceEol),
         _ => {
-            if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index)
+            if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
                 == CharacterKind::Whitespace
             {
                 tokenizer.enter(Name::MdxJsxEsWhitespace);
@@ -1016,7 +1018,7 @@ pub fn es_whitespace_inside(tokenizer: &mut Tokenizer) -> State {
             State::Next(StateName::MdxJsxEsWhitespaceInside)
         }
         _ => {
-            if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index)
+            if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
                 == CharacterKind::Whitespace
             {
                 tokenizer.consume();
@@ -1044,7 +1046,7 @@ pub fn es_whitespace_eol(tokenizer: &mut Tokenizer) -> State {
 pub fn es_whitespace_eol_after(tokenizer: &mut Tokenizer) -> State {
     if tokenizer.tokenize_state.token_1 == Name::MdxJsxFlowTag && tokenizer.lazy {
         crash_lazy(tokenizer)
-    } else if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index)
+    } else if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
         == CharacterKind::Whitespace
     {
         tokenizer.enter(Name::MdxJsxEsWhitespace);
@@ -1064,7 +1066,7 @@ pub fn es_whitespace_eol_after_inside(tokenizer: &mut Tokenizer) -> State {
             State::Next(StateName::MdxJsxEsWhitespaceEolAfterInside)
         }
         _ => {
-            if byte_to_kind(tokenizer.parse_state.bytes, tokenizer.point.index)
+            if kind_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
                 == CharacterKind::Whitespace
             {
                 tokenizer.consume();
@@ -1107,45 +1109,12 @@ fn crash(tokenizer: &Tokenizer, at: &str, expect: &str) -> State {
         char_after_index(tokenizer.parse_state.bytes, tokenizer.point.index)
     };
 
-    // To do: externalize this, and the print mechanism in the tokenizer,
-    // to one proper formatter.
-    let actual = match char {
-        None => "end of file".to_string(),
-        Some(char) => format!("character {}", format_char(char)),
-    };
-
     State::Error(format!(
         "{}:{}: Unexpected {} {}, expected {}",
-        tokenizer.point.line, tokenizer.point.column, actual, at, expect
+        tokenizer.point.line,
+        tokenizer.point.column,
+        format_char_opt(char),
+        at,
+        expect
     ))
 }
-
-fn format_char(char: char) -> String {
-    let unicode = format!("U+{:>04X}", char as u32);
-    let printable = match char {
-        '`' => Some("`` ` ``".to_string()),
-        ' '..='~' => Some(format!("`{}`", char)),
-        _ => None,
-    };
-
-    if let Some(char) = printable {
-        format!("{} ({})", char, unicode)
-    } else {
-        unicode
-    }
-}
-
-fn format_byte(byte: u8) -> String {
-    let unicode = format!("U+{:>04X}", byte);
-    let printable = match byte {
-        b'`' => Some("`` ` ``".to_string()),
-        b' '..=b'~' => Some(format!("`{}`", str::from_utf8(&[byte]).unwrap())),
-        _ => None,
-    };
-
-    if let Some(char) = printable {
-        format!("{} ({})", char, unicode)
-    } else {
-        unicode
-    }
-}
diff --git a/src/construct/partial_space_or_tab_eol.rs b/src/construct/partial_space_or_tab_eol.rs
index 01f440e..1247639 100644
--- a/src/construct/partial_space_or_tab_eol.rs
+++ b/src/construct/partial_space_or_tab_eol.rs
@@ -64,24 +64,26 @@ pub fn space_or_tab_eol_with_options(tokenizer: &mut Tokenizer, options: Options
 ///   | ␠␠b
 /// ```
 pub fn start(tokenizer: &mut Tokenizer) -> State {
-    if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) {
-        tokenizer.attempt(
-            State::Next(StateName::SpaceOrTabEolAfterFirst),
-            State::Next(StateName::SpaceOrTabEolAtEol),
-        );
+    match tokenizer.current {
+        Some(b'\t' | b' ') => {
+            tokenizer.attempt(
+                State::Next(StateName::SpaceOrTabEolAfterFirst),
+                State::Next(StateName::SpaceOrTabEolAtEol),
+            );
 
-        State::Retry(space_or_tab_with_options(
-            tokenizer,
-            SpaceOrTabOptions {
-                kind: Name::SpaceOrTab,
-                min: 1,
-                max: usize::MAX,
-                content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
-                connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
-            },
-        ))
-    } else {
-        State::Nok
+            State::Retry(space_or_tab_with_options(
+                tokenizer,
+                SpaceOrTabOptions {
+                    kind: Name::SpaceOrTab,
+                    min: 1,
+                    max: usize::MAX,
+                    content: tokenizer.tokenize_state.space_or_tab_eol_content.clone(),
+                    connect: tokenizer.tokenize_state.space_or_tab_eol_connect,
+                },
+            ))
+        }
+        Some(b'\n') => State::Retry(StateName::SpaceOrTabEolAtEol),
+        _ => State::Nok,
     }
 }
 
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 04523b3..aca8ec2 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -12,15 +12,8 @@ use crate::event::{Content, Event, Kind, Link, Name, Point, VOID_EVENTS};
 use crate::parser::ParseState;
 use crate::resolve::{call as call_resolve, Name as ResolveName};
 use crate::state::{call, State};
-use crate::util::{constant::TAB_SIZE, edit_map::EditMap};
-use alloc::{
-    boxed::Box,
-    format,
-    string::{String, ToString},
-    vec,
-    vec::Vec,
-};
-use core::str;
+use crate::util::{char::format_byte_opt, constant::TAB_SIZE, edit_map::EditMap};
+use alloc::{boxed::Box, string::String, vec, vec::Vec};
 
 /// Containers.
 ///
@@ -725,14 +718,7 @@ fn push_impl(
                             None
                         };
 
-                    let visible = byte.map(|d| {
-                        if (b' '..=b'~').contains(&d) {
-                            str::from_utf8(&[d]).unwrap().to_string()
-                        } else {
-                            format!("0x{:x}", d)
-                        }
-                    });
-                    log::debug!("feed:    `{:?}` to {:?}", visible, name);
+                    log::debug!("feed:    {} to {:?}", format_byte_opt(byte), name);
                     tokenizer.expect(byte);
                     state = call(tokenizer, name);
                 };
diff --git a/src/util/char.rs b/src/util/char.rs
new file mode 100644
index 0000000..cfaacd5
--- /dev/null
+++ b/src/util/char.rs
@@ -0,0 +1,165 @@
+//! Deal with byte and chars and kinds.
+
+use crate::util::unicode::PUNCTUATION;
+use alloc::{
+    format,
+    string::{String, ToString},
+};
+use core::str;
+
+/// Character kinds.
+#[derive(Debug, PartialEq, Eq)]
+pub enum Kind {
+    /// Whitespace.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | **a_b_ c**.
+    ///    ^      ^    ^
+    /// ```
+    Whitespace,
+    /// Punctuation.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | **a_b_ c**.
+    ///     ^^ ^ ^    ^
+    /// ```
+    Punctuation,
+    /// Everything else.
+    ///
+    /// ## Example
+    ///
+    /// ```markdown
+    /// > | **a_b_ c**.
+    ///       ^ ^  ^
+    /// ```
+    Other,
+}
+
+/// Get a [`char`][] right before `index` in bytes (`&[u8]`).
+///
+/// In most cases, markdown operates on ASCII bytes.
+/// In a few cases, it is unicode aware, so we need to find an actual char.
+pub fn before_index(bytes: &[u8], index: usize) -> Option<char> {
+    let start = if index < 4 { 0 } else { index - 4 };
+    String::from_utf8_lossy(&bytes[start..index]).chars().last()
+}
+
+/// Get a [`char`][] right at `index` in bytes (`&[u8]`).
+///
+/// In most cases, markdown operates on ASCII bytes.
+/// In a few cases, it is unicode aware, so we need to find an actual char.
+pub fn after_index(bytes: &[u8], index: usize) -> Option<char> {
+    let end = if index + 4 > bytes.len() {
+        bytes.len()
+    } else {
+        index + 4
+    };
+    String::from_utf8_lossy(&bytes[index..end]).chars().next()
+}
+
+/// Classify a char at `index` in bytes (`&[u8]`).
+pub fn kind_after_index(bytes: &[u8], index: usize) -> Kind {
+    if index == bytes.len() {
+        Kind::Whitespace
+    } else {
+        let byte = bytes[index];
+        if byte.is_ascii_whitespace() {
+            Kind::Whitespace
+        } else if byte.is_ascii_punctuation() {
+            Kind::Punctuation
+        } else if byte.is_ascii_alphanumeric() {
+            Kind::Other
+        } else {
+            // Otherwise: seems to be an ASCII control, so it seems to be a
+            // non-ASCII `char`.
+            classify_opt(after_index(bytes, index))
+        }
+    }
+}
+
+/// Classify whether a `char` represents whitespace, punctuation, or something
+/// else.
+///
+/// Used for attention (emphasis, strong), whose sequences can open or close
+/// based on the class of surrounding characters.
+///
+/// ## References
+///
+/// *   [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)
+pub fn classify(char: char) -> Kind {
+    // Unicode whitespace.
+    if char.is_whitespace() {
+        Kind::Whitespace
+    }
+    // Unicode punctuation.
+    else if PUNCTUATION.contains(&char) {
+        Kind::Punctuation
+    }
+    // Everything else.
+    else {
+        Kind::Other
+    }
+}
+
+/// Like [`classify`], but supports eof as whitespace.
+pub fn classify_opt(char_opt: Option<char>) -> Kind {
+    if let Some(char) = char_opt {
+        classify(char)
+    }
+    // EOF.
+    else {
+        Kind::Whitespace
+    }
+}
+
+/// Format an optional `char` (`none` means eof).
+pub fn format_opt(char: Option<char>) -> String {
+    match char {
+        None => "end of file".to_string(),
+        Some(char) => format!("character {}", format(char)),
+    }
+}
+
+/// Format an optional `byte` (`none` means eof).
+pub fn format_byte_opt(byte: Option<u8>) -> String {
+    match byte {
+        None => "end of file".to_string(),
+        Some(byte) => format!("byte {}", format_byte(byte)),
+    }
+}
+
+/// Format a `char`.
+pub fn format(char: char) -> String {
+    let representation = format!("U+{:>04X}", char as u32);
+    let printable = match char {
+        '`' => Some("`` ` ``".to_string()),
+        '!'..='~' => Some(format!("`{}`", char)),
+        _ => None,
+    };
+
+    if let Some(char) = printable {
+        format!("{} ({})", char, representation)
+    } else {
+        representation
+    }
+}
+
+/// Format a byte (`u8`).
+pub fn format_byte(byte: u8) -> String {
+    let representation = format!("U+{:>04X}", byte);
+    let printable = match byte {
+        b'`' => Some("`` ` ``".to_string()),
+        b'!'..=b'~' => Some(format!("`{}`", str::from_utf8(&[byte]).unwrap())),
+        _ => None,
+    };
+
+    if let Some(char) = printable {
+        format!("{} ({})", char, representation)
+    } else {
+        representation
+    }
+}
diff --git a/src/util/classify_character.rs b/src/util/classify_character.rs
deleted file mode 100644
index 79ed46a..0000000
--- a/src/util/classify_character.rs
+++ /dev/null
@@ -1,72 +0,0 @@
-//! Utilities to classify characters as whitespace, punctuation, or rest.
-
-use crate::util::unicode::PUNCTUATION;
-
-/// Character kinds.
-#[derive(Debug, PartialEq, Eq)]
-pub enum Kind {
-    /// Whitespace.
-    ///
-    /// ## Example
-    ///
-    /// ```markdown
-    /// > | **a_b_ c**.
-    ///    ^      ^    ^
-    /// ```
-    Whitespace,
-    /// Punctuation.
-    ///
-    /// ## Example
-    ///
-    /// ```markdown
-    /// > | **a_b_ c**.
-    ///     ^^ ^ ^    ^
-    /// ```
-    Punctuation,
-    /// Everything else.
-    ///
-    /// ## Example
-    ///
-    /// ```markdown
-    /// > | **a_b_ c**.
-    ///       ^ ^  ^
-    /// ```
-    Other,
-}
-
-/// Classify whether a character code represents whitespace, punctuation, or
-/// something else.
-///
-/// Used for attention (emphasis, strong), whose sequences can open or close
-/// based on the class of surrounding characters.
-///
-/// > 👉 **Note** that eof (`None`) is seen as whitespace.
-///
-/// ## References
-///
-/// *   [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)
-pub fn classify(char: char) -> Kind {
-    // Unicode whitespace.
-    if char.is_whitespace() {
-        Kind::Whitespace
-    }
-    // Unicode punctuation.
-    else if PUNCTUATION.contains(&char) {
-        Kind::Punctuation
-    }
-    // Everything else.
-    else {
-        Kind::Other
-    }
-}
-
-/// Like [`classify`], but supports eof as whitespace.
-pub fn classify_opt(char_opt: Option<char>) -> Kind {
-    if let Some(char) = char_opt {
-        classify(char)
-    }
-    // EOF.
-    else {
-        Kind::Whitespace
-    }
-}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index e5823cf..2ea372c 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -1,6 +1,6 @@
 //! Utilities used when processing markdown.
 
-pub mod classify_character;
+pub mod char;
 pub mod constant;
 pub mod decode_character_reference;
 pub mod edit_map;
diff --git a/src/util/slice.rs b/src/util/slice.rs
index 54524c3..0734d78 100644
--- a/src/util/slice.rs
+++ b/src/util/slice.rs
@@ -1,55 +1,10 @@
 //! Deal with bytes.
 
 use crate::event::{Event, Kind, Point};
-use crate::util::{
-    classify_character::{classify_opt, Kind as CharacterKind},
-    constant::TAB_SIZE,
-};
+use crate::util::constant::TAB_SIZE;
 use alloc::string::String;
 use core::str;
 
-/// Get a [`char`][] right before `index` in bytes (`&[u8]`).
-///
-/// In most cases, markdown operates on ASCII bytes.
-/// In a few cases, it is unicode aware, so we need to find an actual char.
-pub fn char_before_index(bytes: &[u8], index: usize) -> Option<char> {
-    let start = if index < 4 { 0 } else { index - 4 };
-    String::from_utf8_lossy(&bytes[start..index]).chars().last()
-}
-
-/// Get a [`char`][] right at `index` in bytes (`&[u8]`).
-///
-/// In most cases, markdown operates on ASCII bytes.
-/// In a few cases, it is unicode aware, so we need to find an actual char.
-pub fn char_after_index(bytes: &[u8], index: usize) -> Option<char> {
-    let end = if index + 4 > bytes.len() {
-        bytes.len()
-    } else {
-        index + 4
-    };
-    String::from_utf8_lossy(&bytes[index..end]).chars().next()
-}
-
-/// Classify a byte (or `char`).
-pub fn byte_to_kind(bytes: &[u8], index: usize) -> CharacterKind {
-    if index == bytes.len() {
-        CharacterKind::Whitespace
-    } else {
-        let byte = bytes[index];
-        if byte.is_ascii_whitespace() {
-            CharacterKind::Whitespace
-        } else if byte.is_ascii_punctuation() {
-            CharacterKind::Punctuation
-        } else if byte.is_ascii_alphanumeric() {
-            CharacterKind::Other
-        } else {
-            // Otherwise: seems to be an ASCII control, so it seems to be a
-            // non-ASCII `char`.
-            classify_opt(char_after_index(bytes, index))
-        }
-    }
-}
-
 /// A range between two points.
 #[derive(Debug)]
 pub struct Position<'a> {
author	Titus Wormer <tituswormer@gmail.com>	2022-09-09 10:54:13 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-09-09 10:54:13 +0200
commit	13337d77954b4c92d1cf4592f43f01d94fce3c77 (patch)
tree	d5feef9a971c1af52e58b5c857d1dd9c9e7fedca
parent	71dbc8c0189d6b2032f3d8f21cbfffa3f8fe0f12 (diff)
download	markdown-rs-13337d77954b4c92d1cf4592f43f01d94fce3c77.tar.gz markdown-rs-13337d77954b4c92d1cf4592f43f01d94fce3c77.tar.bz2 markdown-rs-13337d77954b4c92d1cf4592f43f01d94fce3c77.zip