aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/html_flow.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 10:49:07 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 10:49:07 +0200
commit148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (patch)
tree7655ffebe0c6a917c3c391edacde03d754f2de4f /src/construct/html_flow.rs
parent6f61649ac8d08fff85a99172afbf4cd852dda2e6 (diff)
downloadmarkdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.tar.gz
markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.tar.bz2
markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.zip
Refactor to work on bytes (`u8`)
Diffstat (limited to '')
-rw-r--r--src/construct/html_flow.rs131
1 files changed, 65 insertions, 66 deletions
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index e2b66e5..5860c5d 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -108,7 +108,7 @@ use crate::token::Token;
use crate::tokenizer::{Point, State, Tokenizer};
use crate::util::slice::{Position, Slice};
-const CDATA_SEARCH: [char; 6] = ['C', 'D', 'A', 'T', 'A', '['];
+const CDATA_SEARCH: [u8; 6] = [b'C', b'D', b'A', b'T', b'A', b'['];
/// Kind of HTML (flow).
#[derive(Debug, PartialEq)]
@@ -151,23 +151,23 @@ enum QuoteKind {
}
impl QuoteKind {
- /// Turn the kind into a [char].
- fn as_char(&self) -> char {
+ /// Turn the kind into a byte ([u8]).
+ fn as_byte(&self) -> u8 {
match self {
- QuoteKind::Double => '"',
- QuoteKind::Single => '\'',
+ QuoteKind::Double => b'"',
+ QuoteKind::Single => b'\'',
}
}
- /// Turn a [char] into a kind.
+ /// Turn a byte ([u8]) into a kind.
///
/// ## Panics
///
- /// Panics if `char` is not `"` or `'`.
- fn from_char(char: char) -> QuoteKind {
- match char {
- '"' => QuoteKind::Double,
- '\'' => QuoteKind::Single,
- _ => unreachable!("invalid char"),
+ /// Panics if `byte` is not `"` or `'`.
+ fn from_byte(byte: u8) -> QuoteKind {
+ match byte {
+ b'"' => QuoteKind::Double,
+ b'\'' => QuoteKind::Single,
+ _ => unreachable!("invalid byte"),
}
}
}
@@ -179,8 +179,7 @@ struct Info {
kind: Kind,
/// Whether this is a start tag (`<` not followed by `/`).
start_tag: bool,
- /// Used depending on `kind` to either collect all parsed characters, or to
- /// store expected characters.
+ /// Used depending on `kind` to collect all parsed bytes.
start: Option<Point>,
/// Collected index, for various reasons.
size: usize,
@@ -225,7 +224,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
- if Some('<') == tokenizer.current {
+ if Some(b'<') == tokenizer.current {
tokenizer.enter(Token::HtmlFlowData);
tokenizer.consume();
State::Fn(Box::new(open))
@@ -256,16 +255,16 @@ fn open(tokenizer: &mut Tokenizer) -> State {
};
match tokenizer.current {
- Some('!') => {
+ Some(b'!') => {
tokenizer.consume();
State::Fn(Box::new(|t| declaration_open(t, info)))
}
- Some('/') => {
+ Some(b'/') => {
tokenizer.consume();
info.start = Some(tokenizer.point.clone());
State::Fn(Box::new(|t| tag_close_start(t, info)))
}
- Some('?') => {
+ Some(b'?') => {
info.kind = Kind::Instruction;
tokenizer.consume();
// Do not form containers.
@@ -274,7 +273,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
// right now, so we do need to search for `>`, similar to declarations.
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
- Some('A'..='Z' | 'a'..='z') => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
info.start_tag = true;
info.start = Some(tokenizer.point.clone());
tag_name(tokenizer, info)
@@ -295,18 +294,18 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('-') => {
+ Some(b'-') => {
tokenizer.consume();
info.kind = Kind::Comment;
State::Fn(Box::new(|t| comment_open_inside(t, info)))
}
- Some('[') => {
+ Some(b'[') => {
tokenizer.consume();
info.kind = Kind::Cdata;
info.size = 0;
State::Fn(Box::new(|t| cdata_open_inside(t, info)))
}
- Some('A'..='Z' | 'a'..='z') => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
info.kind = Kind::Declaration;
// Do not form containers.
@@ -325,7 +324,7 @@ fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('-') => {
+ Some(b'-') => {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
@@ -343,7 +342,7 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some(char) if char == CDATA_SEARCH[info.size] => {
+ Some(byte) if byte == CDATA_SEARCH[info.size] => {
info.size += 1;
tokenizer.consume();
@@ -368,7 +367,7 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn tag_close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('A'..='Z' | 'a'..='z') => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(|t| tag_name(t, info)))
}
@@ -386,11 +385,11 @@ fn tag_close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- None | Some('\t' | '\n' | ' ' | '/' | '>') => {
- let slash = matches!(tokenizer.current, Some('/'));
+ None | Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => {
+ let slash = matches!(tokenizer.current, Some(b'/'));
let start = info.start.take().unwrap();
let name = Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position {
start: &start,
end: &tokenizer.point,
@@ -428,7 +427,7 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
}
}
}
- Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
+ Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(|t| tag_name(t, info)))
}
@@ -444,7 +443,7 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
@@ -462,7 +461,7 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_closing_tag_after(t, info)))
}
@@ -491,15 +490,15 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('/') => {
+ Some(b'/') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_end(t, info)))
}
- Some('0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some(b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name(t, info)))
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name_before(t, info)))
}
@@ -519,7 +518,7 @@ fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> Stat
/// ```
fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name(t, info)))
}
@@ -538,11 +537,11 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('=') => {
+ Some(b'=') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name_after(t, info)))
}
@@ -561,13 +560,13 @@ fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State
/// ```
fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- None | Some('<' | '=' | '>' | '`') => State::Nok,
- Some(char) if matches!(char, '"' | '\'') => {
- info.quote = Some(QuoteKind::from_char(char));
+ None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok,
+ Some(byte) if matches!(byte, b'"' | b'\'') => {
+ info.quote = Some(QuoteKind::from_byte(byte));
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_quoted(t, info)))
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))
}
@@ -585,8 +584,8 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) ->
/// ```
fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => State::Nok,
- Some(char) if char == info.quote.as_ref().unwrap().as_char() => {
+ None | Some(b'\n') => State::Nok,
+ Some(byte) if byte == info.quote.as_ref().unwrap().as_byte() => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_quoted_after(t, info)))
}
@@ -605,7 +604,7 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> Sta
/// ```
fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\t' | '\n' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => {
+ None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => {
complete_attribute_name_after(tokenizer, info)
}
Some(_) => {
@@ -624,7 +623,7 @@ fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> S
/// ```
fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('\t' | ' ' | '/' | '>') => complete_attribute_name_before(tokenizer, info),
+ Some(b'\t' | b' ' | b'/' | b'>') => complete_attribute_name_before(tokenizer, info),
_ => State::Nok,
}
}
@@ -637,7 +636,7 @@ fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info)
/// ```
fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_after(t, info)))
}
@@ -653,12 +652,12 @@ fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
// Do not form containers.
tokenizer.concrete = true;
continuation(tokenizer, info)
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_after(t, info)))
}
@@ -674,27 +673,27 @@ fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('-') if info.kind == Kind::Comment => {
+ Some(b'-') if info.kind == Kind::Comment => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_comment_inside(t, info)))
}
- Some('<') if info.kind == Kind::Raw => {
+ Some(b'<') if info.kind == Kind::Raw => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_raw_tag_open(t, info)))
}
- Some('>') if info.kind == Kind::Declaration => {
+ Some(b'>') if info.kind == Kind::Declaration => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_close(t, info)))
}
- Some('?') if info.kind == Kind::Instruction => {
+ Some(b'?') if info.kind == Kind::Instruction => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
- Some(']') if info.kind == Kind::Cdata => {
+ Some(b']') if info.kind == Kind::Cdata => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_character_data_inside(t, info)))
}
- Some('\n') if info.kind == Kind::Basic || info.kind == Kind::Complete => {
+ Some(b'\n') if info.kind == Kind::Basic || info.kind == Kind::Complete => {
tokenizer.exit(Token::HtmlFlowData);
tokenizer.check(blank_line_before, |ok| {
if ok {
@@ -704,7 +703,7 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {
}
})(tokenizer)
}
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::HtmlFlowData);
continuation_start(tokenizer, info)
}
@@ -741,7 +740,7 @@ fn continuation_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
@@ -760,7 +759,7 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => continuation_start(tokenizer, info),
+ None | Some(b'\n') => continuation_start(tokenizer, info),
_ => {
tokenizer.enter(Token::HtmlFlowData);
continuation(tokenizer, info)
@@ -776,7 +775,7 @@ fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('-') => {
+ Some(b'-') => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
@@ -792,7 +791,7 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('/') => {
+ Some(b'/') => {
tokenizer.consume();
info.start = Some(tokenizer.point.clone());
State::Fn(Box::new(|t| continuation_raw_end_tag(t, info)))
@@ -809,12 +808,12 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, mut info: Info) -> State
/// ```
fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
info.size = 0;
let start = info.start.take().unwrap();
let name = Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position {
start: &start,
end: &tokenizer.point,
@@ -830,7 +829,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State
continuation(tokenizer, info)
}
}
- Some('A'..='Z' | 'a'..='z') if info.size < HTML_RAW_SIZE_MAX => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') if info.size < HTML_RAW_SIZE_MAX => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| continuation_raw_end_tag(t, info)))
@@ -850,7 +849,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State
/// ```
fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some(']') => {
+ Some(b']') => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
@@ -874,11 +873,11 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) ->
/// ```
fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_close(t, info)))
}
- Some('-') if info.kind == Kind::Comment => {
+ Some(b'-') if info.kind == Kind::Comment => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
@@ -894,7 +893,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> Sta
/// ```
fn continuation_close(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::HtmlFlowData);
continuation_after(tokenizer)
}