aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/html_flow.rs
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/construct/html_flow.rs85
1 files changed, 43 insertions, 42 deletions
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index a1bddad..229b0ef 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -103,6 +103,7 @@ use crate::construct::{
blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max,
};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::codes::{parse, serialize};
/// Kind of HTML (flow).
#[derive(Debug, PartialEq)]
@@ -164,6 +165,17 @@ impl QuoteKind {
_ => unreachable!("invalid char"),
}
}
+ /// Turn [Code] into a kind.
+ ///
+ /// ## Panics
+ ///
+ /// Panics if `code` is not `Code::Char('"' | '\'')`.
+ fn from_code(code: Code) -> QuoteKind {
+ match code {
+ Code::Char(char) => QuoteKind::from_char(char),
+ _ => unreachable!("invalid code"),
+ }
+ }
}
/// State needed to parse HTML (flow).
@@ -175,7 +187,7 @@ struct Info {
start_tag: bool,
/// Used depending on `kind` to either collect all parsed characters, or to
/// store expected characters.
- buffer: Vec<char>,
+ buffer: Vec<Code>,
/// `index` into `buffer` when expecting certain characters.
index: usize,
/// Current quote, when in a double or single quoted attribute value.
@@ -254,7 +266,7 @@ fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
None,
)
}
- Code::Char(char) if char.is_ascii_alphabetic() => {
+ Code::Char('A'..='Z' | 'a'..='z') => {
info.start_tag = true;
tag_name(tokenizer, code, info)
}
@@ -282,14 +294,14 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> St
Code::Char('[') => {
tokenizer.consume(code);
info.kind = Kind::Cdata;
- info.buffer = vec!['C', 'D', 'A', 'T', 'A', '['];
+ info.buffer = parse("CDATA[");
info.index = 0;
(
State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))),
None,
)
}
- Code::Char(char) if char.is_ascii_alphabetic() => {
+ Code::Char('A'..='Z' | 'a'..='z') => {
tokenizer.consume(code);
info.kind = Kind::Declaration;
(
@@ -329,22 +341,21 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta
/// <![CDATA|[>&<]]>
/// ```
fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
- match code {
- Code::Char(char) if char == info.buffer[info.index] => {
- info.index += 1;
- tokenizer.consume(code);
+ if code == info.buffer[info.index] {
+ info.index += 1;
+ tokenizer.consume(code);
- if info.index == info.buffer.len() {
- info.buffer.clear();
- (State::Fn(Box::new(|t, c| continuation(t, c, info))), None)
- } else {
- (
- State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))),
- None,
- )
- }
+ if info.index == info.buffer.len() {
+ info.buffer.clear();
+ (State::Fn(Box::new(|t, c| continuation(t, c, info))), None)
+ } else {
+ (
+ State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))),
+ None,
+ )
}
- _ => (State::Nok, None),
+ } else {
+ (State::Nok, None)
}
}
@@ -355,9 +366,9 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> S
/// ```
fn tag_close_start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
- Code::Char(char) if char.is_ascii_alphabetic() => {
+ Code::Char('A'..='Z' | 'a'..='z') => {
tokenizer.consume(code);
- info.buffer.push(char);
+ info.buffer.push(code);
(State::Fn(Box::new(|t, c| tag_name(t, c, info))), None)
}
_ => (State::Nok, None),
@@ -376,13 +387,9 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
| Code::CarriageReturnLineFeed
| Code::VirtualSpace
| Code::Char('\t' | '\n' | '\r' | ' ' | '/' | '>') => {
- let tag_name_buffer = info.buffer.iter().collect::<String>().to_lowercase();
+ let tag_name_buffer = serialize(&info.buffer, false).to_lowercase();
let name = tag_name_buffer.as_str();
- let slash = if let Code::Char(char) = code {
- char == '/'
- } else {
- false
- };
+ let slash = matches!(code, Code::Char('/'));
info.buffer.clear();
@@ -413,9 +420,9 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
}
}
}
- Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => {
+ Code::Char('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
tokenizer.consume(code);
- info.buffer.push(char);
+ info.buffer.push(code);
(State::Fn(Box::new(|t, c| tag_name(t, c, info))), None)
}
Code::Char(_) => (State::Nok, None),
@@ -481,7 +488,7 @@ fn complete_attribute_name_before(
tokenizer.consume(code);
(State::Fn(Box::new(|t, c| complete_end(t, c, info))), None)
}
- Code::Char(char) if char == ':' || char == '_' || char.is_ascii_alphabetic() => {
+ Code::Char('0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
tokenizer.consume(code);
(
State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))),
@@ -508,13 +515,7 @@ fn complete_attribute_name_before(
/// ```
fn complete_attribute_name(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
- Code::Char(char)
- if char == '-'
- || char == '.'
- || char == ':'
- || char == '_'
- || char.is_ascii_alphanumeric() =>
- {
+ Code::Char('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
tokenizer.consume(code);
(
State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))),
@@ -571,9 +572,9 @@ fn complete_attribute_value_before(
) -> StateFnResult {
match code {
Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None),
- Code::Char(char) if char == '"' || char == '\'' => {
+ Code::Char('"' | '\'') => {
tokenizer.consume(code);
- info.quote = Some(QuoteKind::from_char(char));
+ info.quote = Some(QuoteKind::from_code(code));
(
State::Fn(Box::new(|t, c| complete_attribute_value_quoted(t, c, info))),
None,
@@ -602,7 +603,7 @@ fn complete_attribute_value_quoted(
info: Info,
) -> StateFnResult {
match code {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
Code::Char(char) if char == info.quote.as_ref().unwrap().as_char() => {
tokenizer.consume(code);
(
@@ -860,7 +861,7 @@ fn continuation_raw_end_tag(
) -> StateFnResult {
match code {
Code::Char('>') => {
- let tag_name_buffer = info.buffer.iter().collect::<String>().to_lowercase();
+ let tag_name_buffer = serialize(&info.buffer, false).to_lowercase();
info.buffer.clear();
if HTML_RAW_NAMES.contains(&tag_name_buffer.as_str()) {
@@ -873,9 +874,9 @@ fn continuation_raw_end_tag(
continuation(tokenizer, code, info)
}
}
- Code::Char(char) if char.is_ascii_alphabetic() && info.buffer.len() < HTML_RAW_SIZE_MAX => {
+ Code::Char('A'..='Z' | 'a'..='z') if info.buffer.len() < HTML_RAW_SIZE_MAX => {
tokenizer.consume(code);
- info.buffer.push(char);
+ info.buffer.push(code);
(
State::Fn(Box::new(|t, c| continuation_raw_end_tag(t, c, info))),
None,