aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 10:49:07 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 10:49:07 +0200
commit148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (patch)
tree7655ffebe0c6a917c3c391edacde03d754f2de4f /src/construct
parent6f61649ac8d08fff85a99172afbf4cd852dda2e6 (diff)
downloadmarkdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.tar.gz
markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.tar.bz2
markdown-rs-148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f.zip
Refactor to work on bytes (`u8`)
Diffstat (limited to '')
-rw-r--r--src/construct/attention.rs66
-rw-r--r--src/construct/autolink.rs40
-rw-r--r--src/construct/blank_line.rs2
-rw-r--r--src/construct/block_quote.rs6
-rw-r--r--src/construct/character_escape.rs4
-rw-r--r--src/construct/character_reference.rs27
-rw-r--r--src/construct/code_fenced.rs56
-rw-r--r--src/construct/code_indented.rs8
-rw-r--r--src/construct/code_text.rs14
-rw-r--r--src/construct/definition.rs8
-rw-r--r--src/construct/hard_break_escape.rs4
-rw-r--r--src/construct/heading_atx.rs16
-rw-r--r--src/construct/heading_setext.rs30
-rw-r--r--src/construct/html_flow.rs131
-rw-r--r--src/construct/html_text.rs108
-rw-r--r--src/construct/label_end.rs24
-rw-r--r--src/construct/label_start_image.rs4
-rw-r--r--src/construct/label_start_link.rs2
-rw-r--r--src/construct/list.rs45
-rw-r--r--src/construct/mod.rs1
-rw-r--r--src/construct/paragraph.rs4
-rw-r--r--src/construct/partial_bom.rs54
-rw-r--r--src/construct/partial_data.rs16
-rw-r--r--src/construct/partial_destination.rs28
-rw-r--r--src/construct/partial_label.rs22
-rw-r--r--src/construct/partial_non_lazy_continuation.rs2
-rw-r--r--src/construct/partial_space_or_tab.rs12
-rw-r--r--src/construct/partial_title.rs44
-rw-r--r--src/construct/partial_whitespace.rs22
-rw-r--r--src/construct/thematic_break.rs36
30 files changed, 450 insertions, 386 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 65c2f6f..b042645 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -110,23 +110,23 @@ enum MarkerKind {
}
impl MarkerKind {
- /// Turn the kind into a [char].
- fn as_char(&self) -> char {
+ /// Turn the kind into a byte ([u8]).
+ fn as_byte(&self) -> u8 {
match self {
- MarkerKind::Asterisk => '*',
- MarkerKind::Underscore => '_',
+ MarkerKind::Asterisk => b'*',
+ MarkerKind::Underscore => b'_',
}
}
- /// Turn [char] into a kind.
+ /// Turn a byte ([u8]) into a kind.
///
/// ## Panics
///
- /// Panics if `char` is not `*` or `_`.
- fn from_char(char: char) -> MarkerKind {
- match char {
- '*' => MarkerKind::Asterisk,
- '_' => MarkerKind::Underscore,
- _ => unreachable!("invalid char"),
+ /// Panics if `byte` is not `*` or `_`.
+ fn from_byte(byte: u8) -> MarkerKind {
+ match byte {
+ b'*' => MarkerKind::Asterisk,
+ b'_' => MarkerKind::Underscore,
+ _ => unreachable!("invalid byte"),
}
}
}
@@ -160,9 +160,9 @@ struct Sequence {
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(char) if tokenizer.parse_state.constructs.attention && matches!(char, '*' | '_') => {
+ Some(byte) if tokenizer.parse_state.constructs.attention && matches!(byte, b'*' | b'_') => {
tokenizer.enter(Token::AttentionSequence);
- inside(tokenizer, MarkerKind::from_char(char))
+ inside(tokenizer, MarkerKind::from_byte(byte))
}
_ => State::Nok,
}
@@ -175,7 +175,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^^
/// ```
fn inside(tokenizer: &mut Tokenizer, marker: MarkerKind) -> State {
- if tokenizer.current == Some(marker.as_char()) {
+ if tokenizer.current == Some(marker.as_byte()) {
tokenizer.consume();
State::Fn(Box::new(move |t| inside(t, marker)))
} else {
@@ -188,7 +188,6 @@ fn inside(tokenizer: &mut Tokenizer, marker: MarkerKind) -> State {
/// Resolve attention sequences.
#[allow(clippy::too_many_lines)]
fn resolve_attention(tokenizer: &mut Tokenizer) {
- let chars = &tokenizer.parse_state.chars;
let mut start = 0;
let mut balance = 0;
let mut sequences = vec![];
@@ -203,21 +202,34 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
if enter.token_type == Token::AttentionSequence {
let end = start + 1;
let exit = &tokenizer.events[end];
- let marker =
- MarkerKind::from_char(Slice::from_point(chars, &enter.point).head().unwrap());
+
+ let before_end = enter.point.index;
+ let before_start = if before_end < 4 { 0 } else { before_end - 4 };
+ let string_before =
+ String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]);
+ let char_before = string_before.chars().last();
+
+ let after_start = exit.point.index;
+ let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() {
+ tokenizer.parse_state.bytes.len()
+ } else {
+ after_start + 4
+ };
+ let string_after =
+ String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]);
+ let char_after = string_after.chars().next();
+
+ let marker = MarkerKind::from_byte(
+ Slice::from_point(tokenizer.parse_state.bytes, &enter.point)
+ .head()
+ .unwrap(),
+ );
let before = classify_character(if enter.point.index > 0 {
- Slice::from_point(
- chars,
- &Point {
- index: enter.point.index - 1,
- ..enter.point
- },
- )
- .tail()
+ char_before
} else {
None
});
- let after = classify_character(Slice::from_point(chars, &exit.point).tail());
+ let after = classify_character(char_after);
let open = after == GroupKind::Other
|| (after == GroupKind::Punctuation && before != GroupKind::Other);
// To do: GFM strikethrough?
@@ -490,7 +502,7 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
/// * [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)
fn classify_character(char: Option<char>) -> GroupKind {
match char {
- // Custom characters.
+ // EOF.
None => GroupKind::Whitespace,
// Unicode whitespace.
Some(char) if char.is_whitespace() => GroupKind::Whitespace,
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 399570b..b843af8 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -115,7 +115,7 @@ use crate::tokenizer::{State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('<') if tokenizer.parse_state.constructs.autolink => {
+ Some(b'<') if tokenizer.parse_state.constructs.autolink => {
tokenizer.enter(Token::Autolink);
tokenizer.enter(Token::AutolinkMarker);
tokenizer.consume();
@@ -137,16 +137,16 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(char) if char.is_ascii_alphabetic() => {
+ Some(byte) if byte.is_ascii_alphabetic() => {
tokenizer.consume();
State::Fn(Box::new(scheme_or_email_atext))
}
- Some(char) if is_ascii_atext(char) => email_atext(tokenizer),
+ Some(byte) if is_ascii_atext(byte) => email_atext(tokenizer),
_ => State::Nok,
}
}
-/// After the first character of the protocol or email name.
+/// After the first byte of the protocol or email name.
///
/// ```markdown
/// > | a<https://example.com>b
@@ -156,7 +156,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('+' | '-' | '.' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
+ Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
scheme_inside_or_email_atext(tokenizer, 1)
}
_ => email_atext(tokenizer),
@@ -173,11 +173,11 @@ fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
/// ```
fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Some(':') => {
+ Some(b':') => {
tokenizer.consume();
State::Fn(Box::new(url_inside))
}
- Some('+' | '-' | '.' | '0'..='9' | 'A'..='Z' | 'a'..='z')
+ Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
if size < AUTOLINK_SCHEME_SIZE_MAX =>
{
tokenizer.consume();
@@ -195,12 +195,12 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer, size: usize) -> State
/// ```
fn url_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.exit(Token::AutolinkProtocol);
end(tokenizer)
}
- Some(char) if char.is_ascii_control() => State::Nok,
- None | Some(' ') => State::Nok,
+ Some(byte) if byte.is_ascii_control() => State::Nok,
+ None | Some(b' ') => State::Nok,
Some(_) => {
tokenizer.consume();
State::Fn(Box::new(url_inside))
@@ -216,11 +216,11 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {
/// ```
fn email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('@') => {
+ Some(b'@') => {
tokenizer.consume();
State::Fn(Box::new(|t| email_at_sign_or_dot(t, 0)))
}
- Some(char) if is_ascii_atext(char) => {
+ Some(byte) if is_ascii_atext(byte) => {
tokenizer.consume();
State::Fn(Box::new(email_atext))
}
@@ -236,7 +236,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
/// ```
fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Some(char) if char.is_ascii_alphanumeric() => email_value(tokenizer, size),
+ Some(byte) if byte.is_ascii_alphanumeric() => email_value(tokenizer, size),
_ => State::Nok,
}
}
@@ -249,11 +249,11 @@ fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, size: usize) -> State {
/// ```
fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Some('.') => {
+ Some(b'.') => {
tokenizer.consume();
State::Fn(Box::new(|t| email_at_sign_or_dot(t, 0)))
}
- Some('>') => {
+ Some(b'>') => {
let index = tokenizer.events.len();
tokenizer.exit(Token::AutolinkProtocol);
// Change the token type.
@@ -275,11 +275,11 @@ fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {
/// ```
fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Some('-') if size < AUTOLINK_DOMAIN_SIZE_MAX => {
+ Some(b'-') if size < AUTOLINK_DOMAIN_SIZE_MAX => {
tokenizer.consume();
State::Fn(Box::new(move |t| email_value(t, size + 1)))
}
- Some(char) if char.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => {
+ Some(byte) if byte.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => {
tokenizer.consume();
State::Fn(Box::new(move |t| email_label(t, size + 1)))
}
@@ -297,7 +297,7 @@ fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State {
/// ```
fn end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.enter(Token::AutolinkMarker);
tokenizer.consume();
tokenizer.exit(Token::AutolinkMarker);
@@ -324,6 +324,6 @@ fn end(tokenizer: &mut Tokenizer) -> State {
/// IETF.
///
/// [`is_ascii_alphanumeric`]: char::is_ascii_alphanumeric
-fn is_ascii_atext(x: char) -> bool {
- matches!(x, '#'..='\'' | '*' | '+' | '-'..='9' | '=' | '?' | 'A'..='Z' | '^'..='~')
+fn is_ascii_atext(byte: u8) -> bool {
+ matches!(byte, b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~')
}
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index 6780f40..f397a48 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('\n') => State::Ok,
+ None | Some(b'\n') => State::Ok,
_ => State::Nok,
}
}
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 49a0ea0..7e4753d 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -65,7 +65,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.enter(Token::BlockQuote);
cont_before(tokenizer)
}
@@ -98,7 +98,7 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State {
/// ```
fn cont_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.enter(Token::BlockQuotePrefix);
tokenizer.enter(Token::BlockQuoteMarker);
tokenizer.consume();
@@ -118,7 +118,7 @@ fn cont_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn cont_after(tokenizer: &mut Tokenizer) -> State {
- if let Some('\t' | ' ') = tokenizer.current {
+ if let Some(b'\t' | b' ') = tokenizer.current {
tokenizer.enter(Token::SpaceOrTab);
tokenizer.consume();
tokenizer.exit(Token::SpaceOrTab);
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index e9263af..02e8b62 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -44,7 +44,7 @@ use crate::tokenizer::{State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\\') if tokenizer.parse_state.constructs.character_escape => {
+ Some(b'\\') if tokenizer.parse_state.constructs.character_escape => {
tokenizer.enter(Token::CharacterEscape);
tokenizer.enter(Token::CharacterEscapeMarker);
tokenizer.consume();
@@ -63,7 +63,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(char) if char.is_ascii_punctuation() => {
+ Some(byte) if byte.is_ascii_punctuation() => {
tokenizer.enter(Token::CharacterEscapeValue);
tokenizer.consume();
tokenizer.exit(Token::CharacterEscapeValue);
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 59043d1..90763c1 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -106,15 +106,15 @@ impl Kind {
}
}
- /// Check if a char is allowed.
- fn allowed(&self, char: char) -> bool {
+ /// Check if a byte ([`u8`]) is allowed.
+ fn allowed(&self, byte: u8) -> bool {
let check = match self {
- Kind::Hexadecimal => char::is_ascii_hexdigit,
- Kind::Decimal => char::is_ascii_digit,
- Kind::Named => char::is_ascii_alphanumeric,
+ Kind::Hexadecimal => u8::is_ascii_hexdigit,
+ Kind::Decimal => u8::is_ascii_digit,
+ Kind::Named => u8::is_ascii_alphanumeric,
};
- check(&char)
+ check(&byte)
}
}
@@ -141,7 +141,7 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('&') if tokenizer.parse_state.constructs.character_reference => {
+ Some(b'&') if tokenizer.parse_state.constructs.character_reference => {
tokenizer.enter(Token::CharacterReference);
tokenizer.enter(Token::CharacterReferenceMarker);
tokenizer.consume();
@@ -164,7 +164,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn open(tokenizer: &mut Tokenizer) -> State {
- if let Some('#') = tokenizer.current {
+ if let Some(b'#') = tokenizer.current {
tokenizer.enter(Token::CharacterReferenceMarkerNumeric);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarkerNumeric);
@@ -192,7 +192,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn numeric(tokenizer: &mut Tokenizer) -> State {
- if let Some('x' | 'X') = tokenizer.current {
+ if let Some(b'x' | b'X') = tokenizer.current {
tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal);
@@ -229,10 +229,11 @@ fn numeric(tokenizer: &mut Tokenizer) -> State {
/// ```
fn value(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some(';') if info.size > 0 => {
+ Some(b';') if info.size > 0 => {
if Kind::Named == info.kind {
+ // To do: fix slice.
let value = Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position {
start: &info.start,
end: &tokenizer.point,
@@ -252,8 +253,8 @@ fn value(tokenizer: &mut Tokenizer, mut info: Info) -> State {
tokenizer.exit(Token::CharacterReference);
State::Ok
}
- Some(char) => {
- if info.size < info.kind.max() && info.kind.allowed(char) {
+ Some(byte) => {
+ if info.size < info.kind.max() && info.kind.allowed(byte) {
info.size += 1;
tokenizer.consume();
State::Fn(Box::new(|t| value(t, info)))
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 98fa54f..21e9259 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -136,23 +136,23 @@ pub enum Kind {
}
impl Kind {
- /// Turn the kind into a [char].
- fn as_char(&self) -> char {
+ /// Turn the kind into a byte ([u8]).
+ fn as_byte(&self) -> u8 {
match self {
- Kind::GraveAccent => '`',
- Kind::Tilde => '~',
+ Kind::GraveAccent => b'`',
+ Kind::Tilde => b'~',
}
}
- /// Turn a [char] into a kind.
+ /// Turn a byte ([u8]) into a kind.
///
/// ## Panics
///
- /// Panics if `char` is not `~` or `` ` ``.
- fn from_char(char: char) -> Kind {
- match char {
- '`' => Kind::GraveAccent,
- '~' => Kind::Tilde,
- _ => unreachable!("invalid char"),
+ /// Panics if `byte` is not `~` or `` ` ``.
+ fn from_byte(byte: u8) -> Kind {
+ match byte {
+ b'`' => Kind::GraveAccent,
+ b'~' => Kind::Tilde,
+ _ => unreachable!("invalid byte"),
}
}
}
@@ -207,7 +207,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
if let Some(event) = tail {
if event.token_type == Token::SpaceOrTab {
prefix = Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position::from_exit_event(&tokenizer.events, tokenizer.events.len() - 1),
)
.size();
@@ -215,14 +215,14 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
}
match tokenizer.current {
- Some(char) if matches!(char, '`' | '~') => {
+ Some(byte) if matches!(byte, b'`' | b'~') => {
tokenizer.enter(Token::CodeFencedFenceSequence);
sequence_open(
tokenizer,
Info {
prefix,
size: 0,
- kind: Kind::from_char(char),
+ kind: Kind::from_byte(byte),
},
)
}
@@ -240,7 +240,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn sequence_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some(char) if char == info.kind.as_char() => {
+ Some(byte) if byte == info.kind.as_byte() => {
tokenizer.consume();
State::Fn(Box::new(|t| {
info.size += 1;
@@ -265,7 +265,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn info_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
// Do not form containers.
tokenizer.concrete = true;
@@ -289,7 +289,7 @@ fn info_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceInfo);
tokenizer.exit(Token::CodeFencedFence);
@@ -297,12 +297,12 @@ fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
tokenizer.concrete = true;
at_break(tokenizer, info)
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceInfo);
tokenizer.attempt_opt(space_or_tab(), |t| meta_before(t, info))(tokenizer)
}
- Some('`') if info.kind == Kind::GraveAccent => State::Nok,
+ Some(b'`') if info.kind == Kind::GraveAccent => State::Nok,
Some(_) => {
tokenizer.consume();
State::Fn(Box::new(|t| info_inside(t, info)))
@@ -320,7 +320,7 @@ fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn meta_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
// Do not form containers.
tokenizer.concrete = true;
@@ -344,7 +344,7 @@ fn meta_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn meta(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceMeta);
tokenizer.exit(Token::CodeFencedFence);
@@ -352,7 +352,7 @@ fn meta(tokenizer: &mut Tokenizer, info: Info) -> State {
tokenizer.concrete = true;
at_break(tokenizer, info)
}
- Some('`') if info.kind == Kind::GraveAccent => State::Nok,
+ Some(b'`') if info.kind == Kind::GraveAccent => State::Nok,
_ => {
tokenizer.consume();
State::Fn(Box::new(|t| meta(t, info)))
@@ -413,7 +413,7 @@ fn at_non_lazy_break(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn close_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
@@ -452,7 +452,7 @@ fn close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn close_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some(char) if char == info.kind.as_char() => {
+ Some(byte) if byte == info.kind.as_byte() => {
tokenizer.enter(Token::CodeFencedFenceSequence);
close_sequence(tokenizer, info, 0)
}
@@ -470,7 +470,7 @@ fn close_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn close_sequence(tokenizer: &mut Tokenizer, info: Info, size: usize) -> State {
match tokenizer.current {
- Some(char) if char == info.kind.as_char() => {
+ Some(byte) if byte == info.kind.as_byte() => {
tokenizer.consume();
State::Fn(Box::new(move |t| close_sequence(t, info, size + 1)))
}
@@ -492,7 +492,7 @@ fn close_sequence(tokenizer: &mut Tokenizer, info: Info, size: usize) -> State {
/// ```
fn close_sequence_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::CodeFencedFence);
State::Ok
}
@@ -538,7 +538,7 @@ fn content_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn content_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => at_break(tokenizer, info),
+ None | Some(b'\n') => at_break(tokenizer, info),
_ => {
tokenizer.enter(Token::CodeFlowChunk);
content_continue(tokenizer, info)
@@ -556,7 +556,7 @@ fn content_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn content_continue(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::CodeFlowChunk);
at_break(tokenizer, info)
}
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index bb1615c..4a3a9f6 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -79,7 +79,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => after(tokenizer),
- Some('\n') => tokenizer.attempt(further_start, |ok| {
+ Some(b'\n') => tokenizer.attempt(further_start, |ok| {
Box::new(if ok { at_break } else { after })
})(tokenizer),
_ => {
@@ -97,7 +97,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// ```
fn content(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::CodeFlowChunk);
at_break(tokenizer)
}
@@ -133,7 +133,7 @@ fn further_start(tokenizer: &mut Tokenizer) -> State {
State::Nok
} else {
match tokenizer.current {
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
@@ -177,7 +177,7 @@ fn further_begin(tokenizer: &mut Tokenizer) -> State {
/// ```
fn further_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\n') => further_start(tokenizer),
+ Some(b'\n') => further_start(tokenizer),
_ => State::Nok,
}
}
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index 150f63b..b36a208 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -98,9 +98,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
let len = tokenizer.events.len();
match tokenizer.current {
- Some('`')
+ Some(b'`')
if tokenizer.parse_state.constructs.code_text
- && (tokenizer.previous != Some('`')
+ && (tokenizer.previous != Some(b'`')
|| (len > 0
&& tokenizer.events[len - 1].token_type == Token::CharacterEscape)) =>
{
@@ -119,7 +119,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State {
- if let Some('`') = tokenizer.current {
+ if let Some(b'`') = tokenizer.current {
tokenizer.consume();
State::Fn(Box::new(move |t| sequence_open(t, size + 1)))
} else {
@@ -137,13 +137,13 @@ fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State {
fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State {
match tokenizer.current {
None => State::Nok,
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
State::Fn(Box::new(move |t| between(t, size_open)))
}
- Some('`') => {
+ Some(b'`') => {
tokenizer.enter(Token::CodeTextSequence);
sequence_close(tokenizer, size_open, 0)
}
@@ -162,7 +162,7 @@ fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State {
/// ```
fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State {
match tokenizer.current {
- None | Some('\n' | '`') => {
+ None | Some(b'\n' | b'`') => {
tokenizer.exit(Token::CodeTextData);
between(tokenizer, size_open)
}
@@ -181,7 +181,7 @@ fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State {
/// ```
fn sequence_close(tokenizer: &mut Tokenizer, size_open: usize, size: usize) -> State {
match tokenizer.current {
- Some('`') => {
+ Some(b'`') => {
tokenizer.consume();
State::Fn(Box::new(move |t| sequence_close(t, size_open, size + 1)))
}
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index f2b5ae0..14755c9 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -137,7 +137,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('[') => tokenizer.go(
+ Some(b'[') => tokenizer.go(
|t| {
label(
t,
@@ -162,7 +162,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ```
fn label_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(':') => {
+ Some(b':') => {
tokenizer.enter(Token::DefinitionMarker);
tokenizer.consume();
tokenizer.exit(Token::DefinitionMarker);
@@ -231,7 +231,7 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::Definition);
// Youโ€™d be interrupting.
tokenizer.interrupt = true;
@@ -294,7 +294,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn title_after_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('\n') => State::Ok,
+ None | Some(b'\n') => State::Ok,
_ => State::Nok,
}
}
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index 0585c4c..cdbc192 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -51,7 +51,7 @@ use crate::tokenizer::{State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\\') if tokenizer.parse_state.constructs.hard_break_escape => {
+ Some(b'\\') if tokenizer.parse_state.constructs.hard_break_escape => {
tokenizer.enter(Token::HardBreakEscape);
tokenizer.consume();
State::Fn(Box::new(inside))
@@ -69,7 +69,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.exit(Token::HardBreakEscape);
State::Ok
}
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 7a7cf2e..9a73b77 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -87,7 +87,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
- if Some('#') == tokenizer.current {
+ if Some(b'#') == tokenizer.current {
tokenizer.enter(Token::HeadingAtxSequence);
sequence_open(tokenizer, 0)
} else {
@@ -103,11 +103,11 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ```
fn sequence_open(tokenizer: &mut Tokenizer, rank: usize) -> State {
match tokenizer.current {
- None | Some('\n') if rank > 0 => {
+ None | Some(b'\n') if rank > 0 => {
tokenizer.exit(Token::HeadingAtxSequence);
at_break(tokenizer)
}
- Some('#') if rank < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
+ Some(b'#') if rank < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
tokenizer.consume();
State::Fn(Box::new(move |tokenizer| {
sequence_open(tokenizer, rank + 1)
@@ -129,15 +129,15 @@ fn sequence_open(tokenizer: &mut Tokenizer, rank: usize) -> State {
/// ```
fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::HeadingAtx);
tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve));
// Feel free to interrupt.
tokenizer.interrupt = false;
State::Ok
}
- Some('\t' | ' ') => tokenizer.go(space_or_tab(), at_break)(tokenizer),
- Some('#') => {
+ Some(b'\t' | b' ') => tokenizer.go(space_or_tab(), at_break)(tokenizer),
+ Some(b'#') => {
tokenizer.enter(Token::HeadingAtxSequence);
further_sequence(tokenizer)
}
@@ -157,7 +157,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn further_sequence(tokenizer: &mut Tokenizer) -> State {
- if let Some('#') = tokenizer.current {
+ if let Some(b'#') = tokenizer.current {
tokenizer.consume();
State::Fn(Box::new(further_sequence))
} else {
@@ -175,7 +175,7 @@ fn further_sequence(tokenizer: &mut Tokenizer) -> State {
fn data(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Note: `#` for closing sequence must be preceded by whitespace, otherwise itโ€™s just text.
- None | Some('\t' | '\n' | ' ') => {
+ None | Some(b'\t' | b'\n' | b' ') => {
tokenizer.exit(Token::Data);
at_break(tokenizer)
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index f9dd3f7..2a4adbf 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -88,23 +88,23 @@ pub enum Kind {
}
impl Kind {
- /// Turn the kind into a [char].
- fn as_char(&self) -> char {
+ /// Turn the kind into a byte ([u8]).
+ fn as_byte(&self) -> u8 {
match self {
- Kind::Dash => '-',
- Kind::EqualsTo => '=',
+ Kind::Dash => b'-',
+ Kind::EqualsTo => b'=',
}
}
- /// Turn a [char] into a kind.
+ /// Turn a byte ([u8]) into a kind.
///
/// ## Panics
///
- /// Panics if `char` is not `-` or `=`.
- fn from_char(char: char) -> Kind {
- match char {
- '-' => Kind::Dash,
- '=' => Kind::EqualsTo,
- _ => unreachable!("invalid char"),
+ /// Panics if `byte` is not `-` or `=`.
+ fn from_byte(byte: u8) -> Kind {
+ match byte {
+ b'-' => Kind::Dash,
+ b'=' => Kind::EqualsTo,
+ _ => unreachable!("invalid byte"),
}
}
}
@@ -148,9 +148,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(char) if matches!(char, '-' | '=') => {
+ Some(byte) if matches!(byte, b'-' | b'=') => {
tokenizer.enter(Token::HeadingSetextUnderline);
- inside(tokenizer, Kind::from_char(char))
+ inside(tokenizer, Kind::from_byte(byte))
}
_ => State::Nok,
}
@@ -165,7 +165,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer, kind: Kind) -> State {
match tokenizer.current {
- Some(char) if char == kind.as_char() => {
+ Some(byte) if byte == kind.as_byte() => {
tokenizer.consume();
State::Fn(Box::new(move |t| inside(t, kind)))
}
@@ -185,7 +185,7 @@ fn inside(tokenizer: &mut Tokenizer, kind: Kind) -> State {
/// ```
fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
// Feel free to interrupt.
tokenizer.interrupt = false;
tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve));
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index e2b66e5..5860c5d 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -108,7 +108,7 @@ use crate::token::Token;
use crate::tokenizer::{Point, State, Tokenizer};
use crate::util::slice::{Position, Slice};
-const CDATA_SEARCH: [char; 6] = ['C', 'D', 'A', 'T', 'A', '['];
+const CDATA_SEARCH: [u8; 6] = [b'C', b'D', b'A', b'T', b'A', b'['];
/// Kind of HTML (flow).
#[derive(Debug, PartialEq)]
@@ -151,23 +151,23 @@ enum QuoteKind {
}
impl QuoteKind {
- /// Turn the kind into a [char].
- fn as_char(&self) -> char {
+ /// Turn the kind into a byte ([u8]).
+ fn as_byte(&self) -> u8 {
match self {
- QuoteKind::Double => '"',
- QuoteKind::Single => '\'',
+ QuoteKind::Double => b'"',
+ QuoteKind::Single => b'\'',
}
}
- /// Turn a [char] into a kind.
+ /// Turn a byte ([u8]) into a kind.
///
/// ## Panics
///
- /// Panics if `char` is not `"` or `'`.
- fn from_char(char: char) -> QuoteKind {
- match char {
- '"' => QuoteKind::Double,
- '\'' => QuoteKind::Single,
- _ => unreachable!("invalid char"),
+ /// Panics if `byte` is not `"` or `'`.
+ fn from_byte(byte: u8) -> QuoteKind {
+ match byte {
+ b'"' => QuoteKind::Double,
+ b'\'' => QuoteKind::Single,
+ _ => unreachable!("invalid byte"),
}
}
}
@@ -179,8 +179,7 @@ struct Info {
kind: Kind,
/// Whether this is a start tag (`<` not followed by `/`).
start_tag: bool,
- /// Used depending on `kind` to either collect all parsed characters, or to
- /// store expected characters.
+ /// Used depending on `kind` to collect all parsed bytes.
start: Option<Point>,
/// Collected index, for various reasons.
size: usize,
@@ -225,7 +224,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
- if Some('<') == tokenizer.current {
+ if Some(b'<') == tokenizer.current {
tokenizer.enter(Token::HtmlFlowData);
tokenizer.consume();
State::Fn(Box::new(open))
@@ -256,16 +255,16 @@ fn open(tokenizer: &mut Tokenizer) -> State {
};
match tokenizer.current {
- Some('!') => {
+ Some(b'!') => {
tokenizer.consume();
State::Fn(Box::new(|t| declaration_open(t, info)))
}
- Some('/') => {
+ Some(b'/') => {
tokenizer.consume();
info.start = Some(tokenizer.point.clone());
State::Fn(Box::new(|t| tag_close_start(t, info)))
}
- Some('?') => {
+ Some(b'?') => {
info.kind = Kind::Instruction;
tokenizer.consume();
// Do not form containers.
@@ -274,7 +273,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
// right now, so we do need to search for `>`, similar to declarations.
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
- Some('A'..='Z' | 'a'..='z') => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
info.start_tag = true;
info.start = Some(tokenizer.point.clone());
tag_name(tokenizer, info)
@@ -295,18 +294,18 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('-') => {
+ Some(b'-') => {
tokenizer.consume();
info.kind = Kind::Comment;
State::Fn(Box::new(|t| comment_open_inside(t, info)))
}
- Some('[') => {
+ Some(b'[') => {
tokenizer.consume();
info.kind = Kind::Cdata;
info.size = 0;
State::Fn(Box::new(|t| cdata_open_inside(t, info)))
}
- Some('A'..='Z' | 'a'..='z') => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
info.kind = Kind::Declaration;
// Do not form containers.
@@ -325,7 +324,7 @@ fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('-') => {
+ Some(b'-') => {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
@@ -343,7 +342,7 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some(char) if char == CDATA_SEARCH[info.size] => {
+ Some(byte) if byte == CDATA_SEARCH[info.size] => {
info.size += 1;
tokenizer.consume();
@@ -368,7 +367,7 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn tag_close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('A'..='Z' | 'a'..='z') => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(|t| tag_name(t, info)))
}
@@ -386,11 +385,11 @@ fn tag_close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- None | Some('\t' | '\n' | ' ' | '/' | '>') => {
- let slash = matches!(tokenizer.current, Some('/'));
+ None | Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => {
+ let slash = matches!(tokenizer.current, Some(b'/'));
let start = info.start.take().unwrap();
let name = Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position {
start: &start,
end: &tokenizer.point,
@@ -428,7 +427,7 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
}
}
}
- Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
+ Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(|t| tag_name(t, info)))
}
@@ -444,7 +443,7 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
@@ -462,7 +461,7 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_closing_tag_after(t, info)))
}
@@ -491,15 +490,15 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('/') => {
+ Some(b'/') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_end(t, info)))
}
- Some('0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some(b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name(t, info)))
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name_before(t, info)))
}
@@ -519,7 +518,7 @@ fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> Stat
/// ```
fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name(t, info)))
}
@@ -538,11 +537,11 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('=') => {
+ Some(b'=') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name_after(t, info)))
}
@@ -561,13 +560,13 @@ fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State
/// ```
fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- None | Some('<' | '=' | '>' | '`') => State::Nok,
- Some(char) if matches!(char, '"' | '\'') => {
- info.quote = Some(QuoteKind::from_char(char));
+ None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok,
+ Some(byte) if matches!(byte, b'"' | b'\'') => {
+ info.quote = Some(QuoteKind::from_byte(byte));
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_quoted(t, info)))
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))
}
@@ -585,8 +584,8 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) ->
/// ```
fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => State::Nok,
- Some(char) if char == info.quote.as_ref().unwrap().as_char() => {
+ None | Some(b'\n') => State::Nok,
+ Some(byte) if byte == info.quote.as_ref().unwrap().as_byte() => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_quoted_after(t, info)))
}
@@ -605,7 +604,7 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> Sta
/// ```
fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\t' | '\n' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => {
+ None | Some(b'\t' | b'\n' | b' ' | b'"' | b'\'' | b'/' | b'<' | b'=' | b'>' | b'`') => {
complete_attribute_name_after(tokenizer, info)
}
Some(_) => {
@@ -624,7 +623,7 @@ fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> S
/// ```
fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('\t' | ' ' | '/' | '>') => complete_attribute_name_before(tokenizer, info),
+ Some(b'\t' | b' ' | b'/' | b'>') => complete_attribute_name_before(tokenizer, info),
_ => State::Nok,
}
}
@@ -637,7 +636,7 @@ fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info)
/// ```
fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_after(t, info)))
}
@@ -653,12 +652,12 @@ fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
// Do not form containers.
tokenizer.concrete = true;
continuation(tokenizer, info)
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_after(t, info)))
}
@@ -674,27 +673,27 @@ fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('-') if info.kind == Kind::Comment => {
+ Some(b'-') if info.kind == Kind::Comment => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_comment_inside(t, info)))
}
- Some('<') if info.kind == Kind::Raw => {
+ Some(b'<') if info.kind == Kind::Raw => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_raw_tag_open(t, info)))
}
- Some('>') if info.kind == Kind::Declaration => {
+ Some(b'>') if info.kind == Kind::Declaration => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_close(t, info)))
}
- Some('?') if info.kind == Kind::Instruction => {
+ Some(b'?') if info.kind == Kind::Instruction => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
- Some(']') if info.kind == Kind::Cdata => {
+ Some(b']') if info.kind == Kind::Cdata => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_character_data_inside(t, info)))
}
- Some('\n') if info.kind == Kind::Basic || info.kind == Kind::Complete => {
+ Some(b'\n') if info.kind == Kind::Basic || info.kind == Kind::Complete => {
tokenizer.exit(Token::HtmlFlowData);
tokenizer.check(blank_line_before, |ok| {
if ok {
@@ -704,7 +703,7 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {
}
})(tokenizer)
}
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::HtmlFlowData);
continuation_start(tokenizer, info)
}
@@ -741,7 +740,7 @@ fn continuation_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
@@ -760,7 +759,7 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => continuation_start(tokenizer, info),
+ None | Some(b'\n') => continuation_start(tokenizer, info),
_ => {
tokenizer.enter(Token::HtmlFlowData);
continuation(tokenizer, info)
@@ -776,7 +775,7 @@ fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('-') => {
+ Some(b'-') => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
@@ -792,7 +791,7 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('/') => {
+ Some(b'/') => {
tokenizer.consume();
info.start = Some(tokenizer.point.clone());
State::Fn(Box::new(|t| continuation_raw_end_tag(t, info)))
@@ -809,12 +808,12 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, mut info: Info) -> State
/// ```
fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
info.size = 0;
let start = info.start.take().unwrap();
let name = Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position {
start: &start,
end: &tokenizer.point,
@@ -830,7 +829,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State
continuation(tokenizer, info)
}
}
- Some('A'..='Z' | 'a'..='z') if info.size < HTML_RAW_SIZE_MAX => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') if info.size < HTML_RAW_SIZE_MAX => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| continuation_raw_end_tag(t, info)))
@@ -850,7 +849,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State
/// ```
fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some(']') => {
+ Some(b']') => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
@@ -874,11 +873,11 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) ->
/// ```
fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_close(t, info)))
}
- Some('-') if info.kind == Kind::Comment => {
+ Some(b'-') if info.kind == Kind::Comment => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
@@ -894,7 +893,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> Sta
/// ```
fn continuation_close(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::HtmlFlowData);
continuation_after(tokenizer)
}
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index b1ad113..f10a476 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -58,7 +58,7 @@ use crate::construct::partial_space_or_tab::space_or_tab;
use crate::token::Token;
use crate::tokenizer::{State, StateFn, Tokenizer};
-const CDATA_SEARCH: [char; 6] = ['C', 'D', 'A', 'T', 'A', '['];
+const CDATA_SEARCH: [u8; 6] = [b'C', b'D', b'A', b'T', b'A', b'['];
/// Start of HTML (text)
///
@@ -67,7 +67,7 @@ const CDATA_SEARCH: [char; 6] = ['C', 'D', 'A', 'T', 'A', '['];
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if Some('<') == tokenizer.current && tokenizer.parse_state.constructs.html_text {
+ if Some(b'<') == tokenizer.current && tokenizer.parse_state.constructs.html_text {
tokenizer.enter(Token::HtmlText);
tokenizer.enter(Token::HtmlTextData);
tokenizer.consume();
@@ -89,19 +89,19 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('!') => {
+ Some(b'!') => {
tokenizer.consume();
State::Fn(Box::new(declaration_open))
}
- Some('/') => {
+ Some(b'/') => {
tokenizer.consume();
State::Fn(Box::new(tag_close_start))
}
- Some('?') => {
+ Some(b'?') => {
tokenizer.consume();
State::Fn(Box::new(instruction))
}
- Some('A'..='Z' | 'a'..='z') => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(tag_open))
}
@@ -121,15 +121,15 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn declaration_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('-') => {
+ Some(b'-') => {
tokenizer.consume();
State::Fn(Box::new(comment_open_inside))
}
- Some('[') => {
+ Some(b'[') => {
tokenizer.consume();
State::Fn(Box::new(|t| cdata_open_inside(t, 0)))
}
- Some('A'..='Z' | 'a'..='z') => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(declaration))
}
@@ -145,7 +145,7 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('-') => {
+ Some(b'-') => {
tokenizer.consume();
State::Fn(Box::new(comment_start))
}
@@ -168,8 +168,8 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
/// [html_flow]: crate::construct::html_flow
fn comment_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('>') => State::Nok,
- Some('-') => {
+ None | Some(b'>') => State::Nok,
+ Some(b'-') => {
tokenizer.consume();
State::Fn(Box::new(comment_start_dash))
}
@@ -192,7 +192,7 @@ fn comment_start(tokenizer: &mut Tokenizer) -> State {
/// [html_flow]: crate::construct::html_flow
fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('>') => State::Nok,
+ None | Some(b'>') => State::Nok,
_ => comment(tokenizer),
}
}
@@ -206,8 +206,8 @@ fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
fn comment(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
- Some('\n') => at_line_ending(tokenizer, Box::new(comment)),
- Some('-') => {
+ Some(b'\n') => at_line_ending(tokenizer, Box::new(comment)),
+ Some(b'-') => {
tokenizer.consume();
State::Fn(Box::new(comment_close))
}
@@ -226,7 +226,7 @@ fn comment(tokenizer: &mut Tokenizer) -> State {
/// ```
fn comment_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('-') => {
+ Some(b'-') => {
tokenizer.consume();
State::Fn(Box::new(end))
}
@@ -242,7 +242,7 @@ fn comment_close(tokenizer: &mut Tokenizer) -> State {
/// ```
fn cdata_open_inside(tokenizer: &mut Tokenizer, index: usize) -> State {
match tokenizer.current {
- Some(char) if char == CDATA_SEARCH[index] => {
+ Some(byte) if byte == CDATA_SEARCH[index] => {
tokenizer.consume();
if index + 1 == CDATA_SEARCH.len() {
@@ -264,8 +264,8 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, index: usize) -> State {
fn cdata(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
- Some('\n') => at_line_ending(tokenizer, Box::new(cdata)),
- Some(']') => {
+ Some(b'\n') => at_line_ending(tokenizer, Box::new(cdata)),
+ Some(b']') => {
tokenizer.consume();
State::Fn(Box::new(cdata_close))
}
@@ -284,7 +284,7 @@ fn cdata(tokenizer: &mut Tokenizer) -> State {
/// ```
fn cdata_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(']') => {
+ Some(b']') => {
tokenizer.consume();
State::Fn(Box::new(cdata_end))
}
@@ -300,8 +300,8 @@ fn cdata_close(tokenizer: &mut Tokenizer) -> State {
/// ```
fn cdata_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('>') => end(tokenizer),
- Some(']') => cdata_close(tokenizer),
+ Some(b'>') => end(tokenizer),
+ Some(b']') => cdata_close(tokenizer),
_ => cdata(tokenizer),
}
}
@@ -314,8 +314,8 @@ fn cdata_end(tokenizer: &mut Tokenizer) -> State {
/// ```
fn declaration(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('>') => end(tokenizer),
- Some('\n') => at_line_ending(tokenizer, Box::new(declaration)),
+ None | Some(b'>') => end(tokenizer),
+ Some(b'\n') => at_line_ending(tokenizer, Box::new(declaration)),
_ => {
tokenizer.consume();
State::Fn(Box::new(declaration))
@@ -332,8 +332,8 @@ fn declaration(tokenizer: &mut Tokenizer) -> State {
fn instruction(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
None => State::Nok,
- Some('\n') => at_line_ending(tokenizer, Box::new(instruction)),
- Some('?') => {
+ Some(b'\n') => at_line_ending(tokenizer, Box::new(instruction)),
+ Some(b'?') => {
tokenizer.consume();
State::Fn(Box::new(instruction_close))
}
@@ -352,7 +352,7 @@ fn instruction(tokenizer: &mut Tokenizer) -> State {
/// ```
fn instruction_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('>') => end(tokenizer),
+ Some(b'>') => end(tokenizer),
_ => instruction(tokenizer),
}
}
@@ -365,7 +365,7 @@ fn instruction_close(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('A'..='Z' | 'a'..='z') => {
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(tag_close))
}
@@ -381,7 +381,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
+ Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(tag_close))
}
@@ -397,8 +397,8 @@ fn tag_close(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\n') => at_line_ending(tokenizer, Box::new(tag_close_between)),
- Some('\t' | ' ') => {
+ Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_close_between)),
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_close_between))
}
@@ -414,11 +414,11 @@ fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
+ Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(tag_open))
}
- Some('\t' | '\n' | ' ' | '/' | '>') => tag_open_between(tokenizer),
+ Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),
_ => State::Nok,
}
}
@@ -431,16 +431,16 @@ fn tag_open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\n') => at_line_ending(tokenizer, Box::new(tag_open_between)),
- Some('\t' | ' ') => {
+ Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_open_between)),
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_between))
}
- Some('/') => {
+ Some(b'/') => {
tokenizer.consume();
State::Fn(Box::new(end))
}
- Some(':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some(b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_name))
}
@@ -456,7 +456,7 @@ fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some(b'-' | b'.' | b'0'..=b'9' | b':' | b'A'..=b'Z' | b'_' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_name))
}
@@ -473,12 +473,12 @@ fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_name_after)),
- Some('\t' | ' ') => {
+ Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_name_after)),
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_name_after))
}
- Some('=') => {
+ Some(b'=') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_before))
}
@@ -495,15 +495,15 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('<' | '=' | '>' | '`') => State::Nok,
- Some('\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_value_before)),
- Some('\t' | ' ') => {
+ None | Some(b'<' | b'=' | b'>' | b'`') => State::Nok,
+ Some(b'\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_value_before)),
+ Some(b'\t' | b' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_before))
}
- Some(char) if char == '"' || char == '\'' => {
+ Some(byte) if byte == b'"' || byte == b'\'' => {
tokenizer.consume();
- State::Fn(Box::new(move |t| tag_open_attribute_value_quoted(t, char)))
+ State::Fn(Box::new(move |t| tag_open_attribute_value_quoted(t, byte)))
}
Some(_) => {
tokenizer.consume();
@@ -518,14 +518,14 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// > | a <b c="d"> e
/// ^
/// ```
-fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer, marker: char) -> State {
+fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer, marker: u8) -> State {
match tokenizer.current {
None => State::Nok,
- Some('\n') => at_line_ending(
+ Some(b'\n') => at_line_ending(
tokenizer,
Box::new(move |t| tag_open_attribute_value_quoted(t, marker)),
),
- Some(char) if char == marker => {
+ Some(byte) if byte == marker => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_quoted_after))
}
@@ -546,8 +546,8 @@ fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer, marker: char) -> S
/// ```
fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('"' | '\'' | '<' | '=' | '`') => State::Nok,
- Some('\t' | '\n' | ' ' | '/' | '>') => tag_open_between(tokenizer),
+ None | Some(b'"' | b'\'' | b'<' | b'=' | b'`') => State::Nok,
+ Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => tag_open_between(tokenizer),
Some(_) => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_unquoted))
@@ -564,7 +564,7 @@ fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\t' | '\n' | ' ' | '>' | '/') => tag_open_between(tokenizer),
+ Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => tag_open_between(tokenizer),
_ => State::Nok,
}
}
@@ -577,7 +577,7 @@ fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.consume();
tokenizer.exit(Token::HtmlTextData);
tokenizer.exit(Token::HtmlText);
@@ -599,7 +599,7 @@ fn end(tokenizer: &mut Tokenizer) -> State {
/// ```
fn at_line_ending(tokenizer: &mut Tokenizer, return_state: Box<StateFn>) -> State {
match tokenizer.current {
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.exit(Token::HtmlTextData);
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 5ea788f..6399f81 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -182,7 +182,7 @@ struct Info {
/// > | [a] b
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if Some(']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {
+ if Some(b']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {
let mut label_start_index = None;
let mut index = tokenizer.label_start_stack.len();
@@ -217,7 +217,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
// To do: virtual spaces not needed, create a `to_str`?
id: normalize_identifier(
&Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position {
start: &tokenizer.events[label_start.start.1].point,
end: &tokenizer.events[label_end_start - 1].point,
@@ -258,7 +258,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
// Resource (`[asd](fgh)`)?
- Some('(') => tokenizer.attempt(resource, move |is_ok| {
+ Some(b'(') => tokenizer.attempt(resource, move |is_ok| {
Box::new(move |t| {
// Also fine if `defined`, as then itโ€™s a valid shortcut.
if is_ok || defined {
@@ -269,7 +269,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {
})
})(tokenizer),
// Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference?
- Some('[') => tokenizer.attempt(full_reference, move |is_ok| {
+ Some(b'[') => tokenizer.attempt(full_reference, move |is_ok| {
Box::new(move |t| {
if is_ok {
ok(t, info)
@@ -382,7 +382,7 @@ fn nok(tokenizer: &mut Tokenizer, label_start_index: usize) -> State {
/// ```
fn resource(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('(') => {
+ Some(b'(') => {
tokenizer.enter(Token::Resource);
tokenizer.enter(Token::ResourceMarker);
tokenizer.consume();
@@ -411,7 +411,7 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(')') => resource_end(tokenizer),
+ Some(b')') => resource_end(tokenizer),
_ => tokenizer.go(
|t| {
destination(
@@ -451,7 +451,7 @@ fn destination_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('"' | '\'' | '(') => tokenizer.go(
+ Some(b'"' | b'\'' | b'(') => tokenizer.go(
|t| {
title(
t,
@@ -486,7 +486,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(')') => {
+ Some(b')') => {
tokenizer.enter(Token::ResourceMarker);
tokenizer.consume();
tokenizer.exit(Token::ResourceMarker);
@@ -505,7 +505,7 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State {
/// ```
fn full_reference(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('[') => tokenizer.go(
+ Some(b'[') => tokenizer.go(
|t| {
label(
t,
@@ -537,7 +537,7 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
// To do: virtual spaces not needed, create a `to_str`?
let id = Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position::from_exit_event(&tokenizer.events, end),
)
.serialize();
@@ -563,7 +563,7 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('[') => {
+ Some(b'[') => {
tokenizer.enter(Token::Reference);
tokenizer.enter(Token::ReferenceMarker);
tokenizer.consume();
@@ -584,7 +584,7 @@ fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
/// ```
fn collapsed_reference_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(']') => {
+ Some(b']') => {
tokenizer.enter(Token::ReferenceMarker);
tokenizer.consume();
tokenizer.exit(Token::ReferenceMarker);
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 078026d..d30b8dd 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -40,7 +40,7 @@ use crate::tokenizer::{LabelStart, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('!') if tokenizer.parse_state.constructs.label_start_image => {
+ Some(b'!') if tokenizer.parse_state.constructs.label_start_image => {
tokenizer.enter(Token::LabelImage);
tokenizer.enter(Token::LabelImageMarker);
tokenizer.consume();
@@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('[') => {
+ Some(b'[') => {
tokenizer.enter(Token::LabelMarker);
tokenizer.consume();
tokenizer.exit(Token::LabelMarker);
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index d7ae1d6..c47941c 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -39,7 +39,7 @@ use crate::tokenizer::{LabelStart, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('[') if tokenizer.parse_state.constructs.label_start_link => {
+ Some(b'[') if tokenizer.parse_state.constructs.label_start_link => {
let start = tokenizer.events.len();
tokenizer.enter(Token::LabelLink);
tokenizer.enter(Token::LabelMarker);
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 355eeee..9b59130 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -102,19 +102,19 @@ enum Kind {
}
impl Kind {
- /// Turn a [char] into a kind.
+ /// Turn a byte ([u8]) into a kind.
///
/// ## Panics
///
- /// Panics if `char` is not `.`, `)`, `*`, `+`, or `-`.
- fn from_char(char: char) -> Kind {
- match char {
- '.' => Kind::Dot,
- ')' => Kind::Paren,
- '*' => Kind::Asterisk,
- '+' => Kind::Plus,
- '-' => Kind::Dash,
- _ => unreachable!("invalid char"),
+ /// Panics if `byte` is not `.`, `)`, `*`, `+`, or `-`.
+ fn from_byte(byte: u8) -> Kind {
+ match byte {
+ b'.' => Kind::Dot,
+ b')' => Kind::Paren,
+ b'*' => Kind::Asterisk,
+ b'+' => Kind::Plus,
+ b'-' => Kind::Dash,
+ _ => unreachable!("invalid byte"),
}
}
}
@@ -149,11 +149,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Unordered.
- Some('*' | '+' | '-') => tokenizer.check(thematic_break, |ok| {
+ Some(b'*' | b'+' | b'-') => tokenizer.check(thematic_break, |ok| {
Box::new(if ok { nok } else { before_unordered })
})(tokenizer),
// Ordered.
- Some(char) if char.is_ascii_digit() && (!tokenizer.interrupt || char == '1') => {
+ Some(byte) if byte.is_ascii_digit() && (!tokenizer.interrupt || byte == b'1') => {
tokenizer.enter(Token::ListItemPrefix);
tokenizer.enter(Token::ListItemValue);
inside(tokenizer, 0)
@@ -183,11 +183,11 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Some(char) if char.is_ascii_digit() && size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
+ Some(byte) if byte.is_ascii_digit() && size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
tokenizer.consume();
State::Fn(Box::new(move |t| inside(t, size + 1)))
}
- Some('.' | ')') if !tokenizer.interrupt || size < 2 => {
+ Some(b'.' | b')') if !tokenizer.interrupt || size < 2 => {
tokenizer.exit(Token::ListItemValue);
marker(tokenizer)
}
@@ -262,7 +262,7 @@ fn whitespace(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
- if matches!(tokenizer.current, Some('\t' | ' ')) {
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
State::Nok
} else {
State::Ok
@@ -277,7 +277,7 @@ fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn prefix_other(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.enter(Token::SpaceOrTab);
tokenizer.consume();
tokenizer.exit(Token::SpaceOrTab);
@@ -303,7 +303,7 @@ fn after(tokenizer: &mut Tokenizer, blank: bool) -> State {
&[Token::ListItem],
);
let mut prefix = Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position {
start: &tokenizer.events[start].point,
end: &tokenizer.point,
@@ -400,13 +400,10 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
if event.event_type == EventType::Enter {
let end = skip::opt(&tokenizer.events, index, &[Token::ListItem]) - 1;
let marker = skip::to(&tokenizer.events, index, &[Token::ListItemMarker]);
- let kind = Kind::from_char(
- Slice::from_point(
- &tokenizer.parse_state.chars,
- &tokenizer.events[marker].point,
- )
- .head()
- .unwrap(),
+ let kind = Kind::from_byte(
+ Slice::from_point(tokenizer.parse_state.bytes, &tokenizer.events[marker].point)
+ .head()
+ .unwrap(),
);
let current = (kind, balance, index, end);
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 569c609..7b50957 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -84,6 +84,7 @@ pub mod label_start_image;
pub mod label_start_link;
pub mod list;
pub mod paragraph;
+pub mod partial_bom;
pub mod partial_data;
pub mod partial_destination;
pub mod partial_label;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 5d230d3..146dc40 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -44,7 +44,7 @@ use crate::util::skip::opt as skip_opt;
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
unreachable!("unexpected eol/eof")
}
_ => {
@@ -63,7 +63,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::Paragraph);
tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve));
diff --git a/src/construct/partial_bom.rs b/src/construct/partial_bom.rs
new file mode 100644
index 0000000..be8d6c8
--- /dev/null
+++ b/src/construct/partial_bom.rs
@@ -0,0 +1,54 @@
+//! To do.
+
+use crate::token::Token;
+use crate::tokenizer::{State, Tokenizer};
+
+/// Before a BOM.
+///
+/// ```text
+/// > | 0xEF 0xBB 0xBF
+/// ^^^^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(0xEF) => {
+ tokenizer.enter(Token::ByteOrderMark);
+ tokenizer.consume();
+ State::Fn(Box::new(cont))
+ }
+ _ => State::Nok,
+ }
+}
+
+/// Second byte in BOM.
+///
+/// ```text
+/// > | 0xEF 0xBB 0xBF
+/// ^^^^
+/// ```
+fn cont(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(0xBB) => {
+ tokenizer.consume();
+ State::Fn(Box::new(end))
+ }
+ _ => State::Nok,
+ }
+}
+
+/// Last byte in BOM.
+///
+/// ```text
+/// > | 0xEF 0xBB 0xBF
+/// ^^^^
+/// ```
+fn end(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(0xBF) => {
+ tokenizer.consume();
+ tokenizer.exit(Token::ByteOrderMark);
+ State::Ok
+ }
+ _ => State::Nok,
+ }
+}
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 0b66b09..335d7ab 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -15,9 +15,9 @@ use crate::tokenizer::{EventType, State, Tokenizer};
/// > | abc
/// ^
/// ```
-pub fn start(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {
+pub fn start(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {
match tokenizer.current {
- Some(char) if stop.contains(&char) => {
+ Some(byte) if stop.contains(&byte) => {
tokenizer.enter(Token::Data);
tokenizer.consume();
State::Fn(Box::new(move |t| data(t, stop)))
@@ -32,16 +32,16 @@ pub fn start(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {
/// > | abc
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {
+fn at_break(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {
match tokenizer.current {
None => State::Ok,
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
State::Fn(Box::new(move |t| at_break(t, stop)))
}
- Some(char) if stop.contains(&char) => {
+ Some(byte) if stop.contains(&byte) => {
tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data));
State::Ok
}
@@ -58,10 +58,10 @@ fn at_break(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {
/// > | abc
/// ^^^
/// ```
-fn data(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {
+fn data(tokenizer: &mut Tokenizer, stop: &'static [u8]) -> State {
let done = match tokenizer.current {
- None | Some('\n') => true,
- Some(char) if stop.contains(&char) => true,
+ None | Some(b'\n') => true,
+ Some(byte) if stop.contains(&byte) => true,
_ => false,
};
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 6447228..0a3721c 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -117,7 +117,7 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
};
match tokenizer.current {
- Some('<') => {
+ Some(b'<') => {
tokenizer.enter(info.options.destination.clone());
tokenizer.enter(info.options.literal.clone());
tokenizer.enter(info.options.marker.clone());
@@ -125,8 +125,8 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
tokenizer.exit(info.options.marker.clone());
State::Fn(Box::new(|t| enclosed_before(t, info)))
}
- None | Some(' ' | ')') => State::Nok,
- Some(char) if char.is_ascii_control() => State::Nok,
+ None | Some(b' ' | b')') => State::Nok,
+ Some(byte) if byte != b'\0' && byte.is_ascii_control() => State::Nok,
Some(_) => {
tokenizer.enter(info.options.destination.clone());
tokenizer.enter(info.options.raw.clone());
@@ -144,7 +144,7 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
/// ^
/// ```
fn enclosed_before(tokenizer: &mut Tokenizer, info: Info) -> State {
- if let Some('>') = tokenizer.current {
+ if let Some(b'>') = tokenizer.current {
tokenizer.enter(info.options.marker.clone());
tokenizer.consume();
tokenizer.exit(info.options.marker.clone());
@@ -166,13 +166,13 @@ fn enclosed_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn enclosed(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('>') => {
+ Some(b'>') => {
tokenizer.exit(Token::Data);
tokenizer.exit(info.options.string.clone());
enclosed_before(tokenizer, info)
}
- None | Some('\n' | '<') => State::Nok,
- Some('\\') => {
+ None | Some(b'\n' | b'<') => State::Nok,
+ Some(b'\\') => {
tokenizer.consume();
State::Fn(Box::new(|t| enclosed_escape(t, info)))
}
@@ -191,7 +191,7 @@ fn enclosed(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn enclosed_escape(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('<' | '>' | '\\') => {
+ Some(b'<' | b'>' | b'\\') => {
tokenizer.consume();
State::Fn(Box::new(|t| enclosed(t, info)))
}
@@ -207,7 +207,7 @@ fn enclosed_escape(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('(') => {
+ Some(b'(') => {
if info.balance >= info.options.limit {
State::Nok
} else {
@@ -216,7 +216,7 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
State::Fn(Box::new(move |t| raw(t, info)))
}
}
- Some(')') => {
+ Some(b')') => {
if info.balance == 0 {
tokenizer.exit(Token::Data);
tokenizer.exit(info.options.string.clone());
@@ -229,7 +229,7 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
State::Fn(Box::new(move |t| raw(t, info)))
}
}
- None | Some('\t' | '\n' | ' ') => {
+ None | Some(b'\t' | b'\n' | b' ') => {
if info.balance > 0 {
State::Nok
} else {
@@ -240,8 +240,8 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
State::Ok
}
}
- Some(char) if char.is_ascii_control() => State::Nok,
- Some('\\') => {
+ Some(byte) if byte != b'\0' && byte.is_ascii_control() => State::Nok,
+ Some(b'\\') => {
tokenizer.consume();
State::Fn(Box::new(move |t| raw_escape(t, info)))
}
@@ -260,7 +260,7 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn raw_escape(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some('(' | ')' | '\\') => {
+ Some(b'(' | b')' | b'\\') => {
tokenizer.consume();
State::Fn(Box::new(move |t| raw(t, info)))
}
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index ee31533..7e40a2d 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -82,9 +82,9 @@ pub struct Options {
struct Info {
/// Whether weโ€™ve seen our first `ChunkString`.
connect: bool,
- /// Whether there are non-blank characters in the label.
+ /// Whether there are non-blank bytes in the label.
data: bool,
- /// Number of characters in the label.
+ /// Number of bytes in the label.
size: usize,
/// Configuration.
options: Options,
@@ -98,7 +98,7 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
match tokenizer.current {
- Some('[') => {
+ Some(b'[') => {
let info = Info {
connect: false,
data: false,
@@ -124,10 +124,10 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
/// ```
fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- None | Some('[') => State::Nok,
- Some(']') if !info.data => State::Nok,
+ None | Some(b'[') => State::Nok,
+ Some(b']') if !info.data => State::Nok,
_ if info.size > LINK_REFERENCE_SIZE_MAX => State::Nok,
- Some(']') => {
+ Some(b']') => {
tokenizer.exit(info.options.string.clone());
tokenizer.enter(info.options.marker.clone());
tokenizer.consume();
@@ -135,7 +135,7 @@ fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
tokenizer.exit(info.options.label);
State::Ok
}
- Some('\n') => tokenizer.go(
+ Some(b'\n') => tokenizer.go(
space_or_tab_eol_with_options(EolOptions {
content_type: Some(ContentType::String),
connect: info.connect,
@@ -168,7 +168,7 @@ fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- None | Some('\n' | '[' | ']') => {
+ None | Some(b'\n' | b'[' | b']') => {
tokenizer.exit(Token::Data);
at_break(tokenizer, info)
}
@@ -176,12 +176,12 @@ fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {
tokenizer.exit(Token::Data);
at_break(tokenizer, info)
}
- Some('\t' | ' ') => {
+ Some(b'\t' | b' ') => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| label(t, info)))
}
- Some('\\') => {
+ Some(b'\\') => {
tokenizer.consume();
info.size += 1;
if !info.data {
@@ -208,7 +208,7 @@ fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn escape(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('[' | '\\' | ']') => {
+ Some(b'[' | b'\\' | b']') => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| label(t, info)))
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
index 068e30f..6005a6c 100644
--- a/src/construct/partial_non_lazy_continuation.rs
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -22,7 +22,7 @@ use crate::tokenizer::{State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 6070ffe..f31cbc6 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -11,9 +11,9 @@ use crate::tokenizer::{ContentType, State, StateFn, Tokenizer};
/// Options to parse `space_or_tab`.
#[derive(Debug)]
pub struct Options {
- /// Minimum allowed characters (inclusive).
+ /// Minimum allowed bytes (inclusive).
pub min: usize,
- /// Maximum allowed characters (inclusive).
+ /// Maximum allowed bytes (inclusive).
pub max: usize,
/// Token type to use for whitespace events.
pub kind: Token,
@@ -134,7 +134,7 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
/// ```
fn start(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('\t' | ' ') if info.options.max > 0 => {
+ Some(b'\t' | b' ') if info.options.max > 0 => {
tokenizer
.enter_with_content(info.options.kind.clone(), info.options.content_type.clone());
@@ -165,7 +165,7 @@ fn start(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some('\t' | ' ') if info.size < info.options.max => {
+ Some(b'\t' | b' ') if info.size < info.options.max => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| inside(t, info)))
@@ -190,7 +190,7 @@ fn inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn after_space_or_tab(tokenizer: &mut Tokenizer, mut info: EolInfo) -> State {
match tokenizer.current {
- Some('\n') => {
+ Some(b'\n') => {
tokenizer.enter_with_content(Token::LineEnding, info.options.content_type.clone());
if info.connect {
@@ -239,7 +239,7 @@ fn after_eol(tokenizer: &mut Tokenizer, info: EolInfo) -> State {
/// ```
fn after_more_space_or_tab(tokenizer: &mut Tokenizer) -> State {
// Blank line not allowed.
- if matches!(tokenizer.current, None | Some('\n')) {
+ if matches!(tokenizer.current, None | Some(b'\n')) {
State::Nok
} else {
State::Ok
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 15fc25e..80861af 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -78,29 +78,29 @@ enum Kind {
}
impl Kind {
- /// Turn the kind into a [char].
+ /// Turn the kind into a byte ([u8]).
///
/// > ๐Ÿ‘‰ **Note**: a closing paren is used for `Kind::Paren`.
- fn as_char(&self) -> char {
+ fn as_byte(&self) -> u8 {
match self {
- Kind::Paren => ')',
- Kind::Double => '"',
- Kind::Single => '\'',
+ Kind::Paren => b')',
+ Kind::Double => b'"',
+ Kind::Single => b'\'',
}
}
- /// Turn a [char] into a kind.
+ /// Turn a byte ([u8]) into a kind.
///
/// > ๐Ÿ‘‰ **Note**: an opening paren must be used for `Kind::Paren`.
///
/// ## Panics
///
- /// Panics if `char` is not `(`, `"`, or `'`.
- fn from_char(char: char) -> Kind {
- match char {
- '(' => Kind::Paren,
- '"' => Kind::Double,
- '\'' => Kind::Single,
- _ => unreachable!("invalid char"),
+ /// Panics if `byte` is not `(`, `"`, or `'`.
+ fn from_byte(byte: u8) -> Kind {
+ match byte {
+ b'(' => Kind::Paren,
+ b'"' => Kind::Double,
+ b'\'' => Kind::Single,
+ _ => unreachable!("invalid byte"),
}
}
}
@@ -124,10 +124,10 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
match tokenizer.current {
- Some(char) if matches!(char, '"' | '\'' | '(') => {
+ Some(byte) if matches!(byte, b'"' | b'\'' | b'(') => {
let info = Info {
connect: false,
- kind: Kind::from_char(char),
+ kind: Kind::from_byte(byte),
options,
};
tokenizer.enter(info.options.title.clone());
@@ -150,7 +150,7 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
/// ```
fn begin(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some(char) if char == info.kind.as_char() => {
+ Some(byte) if byte == info.kind.as_byte() => {
tokenizer.enter(info.options.marker.clone());
tokenizer.consume();
tokenizer.exit(info.options.marker.clone());
@@ -172,12 +172,12 @@ fn begin(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some(char) if char == info.kind.as_char() => {
+ Some(byte) if byte == info.kind.as_byte() => {
tokenizer.exit(info.options.string.clone());
begin(tokenizer, info)
}
None => State::Nok,
- Some('\n') => tokenizer.go(
+ Some(b'\n') => tokenizer.go(
space_or_tab_eol_with_options(EolOptions {
content_type: Some(ContentType::String),
connect: info.connect,
@@ -210,15 +210,15 @@ fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn title(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some(char) if char == info.kind.as_char() => {
+ Some(byte) if byte == info.kind.as_byte() => {
tokenizer.exit(Token::Data);
at_break(tokenizer, info)
}
- None | Some('\n') => {
+ None | Some(b'\n') => {
tokenizer.exit(Token::Data);
at_break(tokenizer, info)
}
- Some('\\') => {
+ Some(b'\\') => {
tokenizer.consume();
State::Fn(Box::new(|t| escape(t, info)))
}
@@ -237,7 +237,7 @@ fn title(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn escape(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Some(char) if char == info.kind.as_char() => {
+ Some(byte) if byte == info.kind.as_byte() => {
tokenizer.consume();
State::Fn(Box::new(|t| title(t, info)))
}
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index 152824b..13815cb 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -86,25 +86,25 @@ fn trim_data(
hard_break: bool,
) {
let mut slice = Slice::from_position(
- &tokenizer.parse_state.chars,
+ tokenizer.parse_state.bytes,
&Position::from_exit_event(&tokenizer.events, exit_index),
);
if trim_end {
- let mut index = slice.chars.len();
+ let mut index = slice.bytes.len();
let vs = slice.after;
let mut spaces_only = vs == 0;
while index > 0 {
- match slice.chars[index - 1] {
- ' ' => {}
- '\t' => spaces_only = false,
+ match slice.bytes[index - 1] {
+ b' ' => {}
+ b'\t' => spaces_only = false,
_ => break,
}
index -= 1;
}
- let diff = slice.chars.len() - index;
+ let diff = slice.bytes.len() - index;
let token_type = if spaces_only
&& hard_break
&& exit_index + 1 < tokenizer.events.len()
@@ -150,16 +150,16 @@ fn trim_data(
);
tokenizer.events[exit_index].point = enter_point;
- slice.chars = &slice.chars[..index];
+ slice.bytes = &slice.bytes[..index];
}
}
if trim_start {
let mut index = 0;
let vs = slice.before;
- while index < slice.chars.len() {
- match slice.chars[index] {
- ' ' | '\t' => {}
+ while index < slice.bytes.len() {
+ match slice.bytes[index] {
+ b' ' | b'\t' => {}
_ => break,
}
@@ -168,7 +168,7 @@ fn trim_data(
// The whole data is whitespace.
// We can be very fast: we only change the token types.
- if index == slice.chars.len() {
+ if index == slice.bytes.len() {
tokenizer.events[exit_index - 1].token_type = Token::SpaceOrTab;
tokenizer.events[exit_index].token_type = Token::SpaceOrTab;
return;
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index bed454b..4fc4dc4 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -83,25 +83,25 @@ enum Kind {
}
impl Kind {
- /// Turn the kind into a [char].
- fn as_char(&self) -> char {
+ /// Turn the kind into a byte ([u8]).
+ fn as_byte(&self) -> u8 {
match self {
- Kind::Asterisk => '*',
- Kind::Dash => '-',
- Kind::Underscore => '_',
+ Kind::Asterisk => b'*',
+ Kind::Dash => b'-',
+ Kind::Underscore => b'_',
}
}
- /// Turn a [char] into a kind.
+ /// Turn a byte ([u8]) into a kind.
///
/// ## Panics
///
- /// Panics if `char` is not `*`, `-`, or `_`.
- fn from_char(char: char) -> Kind {
- match char {
- '*' => Kind::Asterisk,
- '-' => Kind::Dash,
- '_' => Kind::Underscore,
- _ => unreachable!("invalid char"),
+ /// Panics if `byte` is not `*`, `-`, or `_`.
+ fn from_byte(byte: u8) -> Kind {
+ match byte {
+ b'*' => Kind::Asterisk,
+ b'-' => Kind::Dash,
+ b'_' => Kind::Underscore,
+ _ => unreachable!("invalid byte"),
}
}
}
@@ -144,10 +144,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(char) if matches!(char, '*' | '-' | '_') => at_break(
+ Some(byte) if matches!(byte, b'*' | b'-' | b'_') => at_break(
tokenizer,
Info {
- kind: Kind::from_char(char),
+ kind: Kind::from_byte(byte),
size: 0,
},
),
@@ -163,13 +163,13 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ```
fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- None | Some('\n' | '\r') if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {
+ None | Some(b'\n' | b'\r') if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {
tokenizer.exit(Token::ThematicBreak);
// Feel free to interrupt.
tokenizer.interrupt = false;
State::Ok
}
- Some(char) if char == info.kind.as_char() => {
+ Some(byte) if byte == info.kind.as_byte() => {
tokenizer.enter(Token::ThematicBreakSequence);
sequence(tokenizer, info)
}
@@ -185,7 +185,7 @@ fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn sequence(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Some(char) if char == info.kind.as_char() => {
+ Some(byte) if byte == info.kind.as_byte() => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| sequence(t, info)))