Refactor to work on bytes (`u8`)

author: Titus Wormer <tituswormer@gmail.com> 2022-07-29 10:49:07 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-29 10:49:07 +0200
commit: 148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (patch)
tree: 7655ffebe0c6a917c3c391edacde03d754f2de4f /src/content
parent: 6f61649ac8d08fff85a99172afbf4cd852dda2e6 (diff)
download: markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.tar.gz
markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.tar.bz2
markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.zip
4 files changed, 18 insertions, 25 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
index 935c4ef..828431d 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -11,6 +11,7 @@
 use crate::construct::{
     block_quote::{cont as block_quote_cont, start as block_quote},
     list::{cont as list_item_const, start as list_item},
+    partial_bom::start as bom,
 };
 use crate::content::flow::start as flow;
 use crate::parser::ParseState;
@@ -78,7 +79,7 @@ struct DocumentInfo {
 pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
     let mut tokenizer = Tokenizer::new(point, parse_state);
 
-    let state = tokenizer.push(0, parse_state.chars.len(), Box::new(before));
+    let state = tokenizer.push(0, parse_state.bytes.len(), Box::new(before));
     tokenizer.flush(state, true);
 
     let mut index = 0;
@@ -92,7 +93,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
             // don‘t need virtual spaces.
             let id = normalize_identifier(
                 &Slice::from_position(
-                    &tokenizer.parse_state.chars,
+                    tokenizer.parse_state.bytes,
                     &Position::from_exit_event(&tokenizer.events, index),
                 )
                 .serialize(),
@@ -124,15 +125,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {
 ///     ^
 /// ```
 fn before(tokenizer: &mut Tokenizer) -> State {
-    match tokenizer.current {
-        Some('\u{FEFF}') => {
-            tokenizer.enter(Token::ByteOrderMark);
-            tokenizer.consume();
-            tokenizer.exit(Token::ByteOrderMark);
-            State::Fn(Box::new(start))
-        }
-        _ => start(tokenizer),
-    }
+    tokenizer.attempt_opt(bom, start)(tokenizer)
 }
 
 /// Before document.
@@ -358,7 +351,7 @@ fn containers_after(tokenizer: &mut Tokenizer, mut info: DocumentInfo) -> State
     // Parse flow, pausing after eols.
     tokenizer.go_until(
         state,
-        |code| matches!(code, Some('\n')),
+        |code| matches!(code, Some(b'\n')),
         move |state| Box::new(move |t| flow_end(t, info, state)),
     )(tokenizer)
 }
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 09c4e2c..bf4104c 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -88,7 +88,7 @@ fn initial_before(tokenizer: &mut Tokenizer) -> State {
 fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         None => State::Ok,
-        Some('\n') => {
+        Some(b'\n') => {
             tokenizer.enter(Token::BlankLineEnding);
             tokenizer.consume();
             tokenizer.exit(Token::BlankLineEnding);
@@ -112,7 +112,7 @@ fn blank_line_after(tokenizer: &mut Tokenizer) -> State {
 fn after(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
         None => State::Ok,
-        Some('\n') => {
+        Some(b'\n') => {
             tokenizer.enter(Token::LineEnding);
             tokenizer.consume();
             tokenizer.exit(Token::LineEnding);
diff --git a/src/content/string.rs b/src/content/string.rs
index 8bc2b91..d2aec3f 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -18,7 +18,7 @@ use crate::construct::{
 };
 use crate::tokenizer::{State, Tokenizer};
 
-const MARKERS: [char; 2] = ['&', '\\'];
+const MARKERS: [u8; 2] = [b'&', b'\\'];
 
 /// Start of string.
 pub fn start(tokenizer: &mut Tokenizer) -> State {
diff --git a/src/content/text.rs b/src/content/text.rs
index ebdf888..30c98a3 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -30,16 +30,16 @@ use crate::construct::{
 };
 use crate::tokenizer::{State, Tokenizer};
 
-const MARKERS: [char; 9] = [
-    '!',  // `label_start_image`
-    '&',  // `character_reference`
-    '*',  // `attention`
-    '<',  // `autolink`, `html_text`
-    '[',  // `label_start_link`
-    '\\', // `character_escape`, `hard_break_escape`
-    ']',  // `label_end`
-    '_',  // `attention`
-    '`',  // `code_text`
+const MARKERS: [u8; 9] = [
+    b'!',  // `label_start_image`
+    b'&',  // `character_reference`
+    b'*',  // `attention`
+    b'<',  // `autolink`, `html_text`
+    b'[',  // `label_start_link`
+    b'\\', // `character_escape`, `hard_break_escape`
+    b']',  // `label_end`
+    b'_',  // `attention`
+    b'`',  // `code_text`
 ];
 
 /// Start of text.
author	Titus Wormer <tituswormer@gmail.com>	2022-07-29 10:49:07 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-29 10:49:07 +0200
commit	148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (patch)
tree	7655ffebe0c6a917c3c391edacde03d754f2de4f /src/content
parent	6f61649ac8d08fff85a99172afbf4cd852dda2e6 (diff)
download	markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.tar.gz markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.tar.bz2 markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.zip