//! To do. use crate::construct::partial_whitespace::start as whitespace; use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer}; /// Start of HTML (text) /// /// ```markdown /// a | b /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(TokenType::HtmlText); tokenizer.enter(TokenType::HtmlTextData); tokenizer.consume(code); (State::Fn(Box::new(open)), None) } /// To do. pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('!') => { tokenizer.consume(code); (State::Fn(Box::new(declaration_open)), None) } Code::Char('/') => { tokenizer.consume(code); (State::Fn(Box::new(tag_close_start)), None) } Code::Char('?') => { tokenizer.consume(code); (State::Fn(Box::new(instruction)), None) } Code::Char(char) if char.is_ascii_alphabetic() => { tokenizer.consume(code); (State::Fn(Box::new(tag_open)), None) } _ => (State::Nok, None), } } /// To do. pub fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); (State::Fn(Box::new(comment_open)), None) } Code::Char('[') => { tokenizer.consume(code); let buffer = vec!['C', 'D', 'A', 'T', 'A', '[']; ( State::Fn(Box::new(|tokenizer, code| { cdata_open(tokenizer, code, buffer, 0) })), None, ) } Code::Char(char) if char.is_ascii_alphabetic() => { tokenizer.consume(code); (State::Fn(Box::new(declaration)), None) } _ => (State::Nok, None), } } /// To do. pub fn comment_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); (State::Fn(Box::new(comment_start)), None) } _ => (State::Nok, None), } } /// To do. pub fn comment_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('>') => (State::Nok, None), Code::Char('-') => { tokenizer.consume(code); (State::Fn(Box::new(comment_start_dash)), None) } _ => comment(tokenizer, code), } } /// To do. pub fn comment_start_dash(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('>') => (State::Nok, None), _ => comment(tokenizer, code), } } /// To do. pub fn comment(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Nok, None), Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { at_line_ending(tokenizer, code, Box::new(comment)) } Code::Char('-') => { tokenizer.consume(code); (State::Fn(Box::new(comment_close)), None) } _ => { tokenizer.consume(code); (State::Fn(Box::new(comment)), None) } } } /// To do. pub fn comment_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); (State::Fn(Box::new(end)), None) } _ => comment(tokenizer, code), } } /// To do. pub fn cdata_open( tokenizer: &mut Tokenizer, code: Code, buffer: Vec, index: usize, ) -> StateFnResult { match code { Code::Char(char) if char == buffer[index] => { tokenizer.consume(code); if index + 1 == buffer.len() { (State::Fn(Box::new(cdata)), None) } else { ( State::Fn(Box::new(move |tokenizer, code| { cdata_open(tokenizer, code, buffer, index + 1) })), None, ) } } _ => (State::Nok, None), } } /// To do. pub fn cdata(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Nok, None), Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { at_line_ending(tokenizer, code, Box::new(cdata)) } Code::Char(']') => { tokenizer.consume(code); (State::Fn(Box::new(cdata_close)), None) } _ => { tokenizer.consume(code); (State::Fn(Box::new(cdata)), None) } } } /// To do. pub fn cdata_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(']') => { tokenizer.consume(code); (State::Fn(Box::new(cdata_end)), None) } _ => cdata(tokenizer, code), } } /// To do. pub fn cdata_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('>') => end(tokenizer, code), Code::Char(']') => cdata_close(tokenizer, code), _ => cdata(tokenizer, code), } } /// To do. pub fn declaration(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('>') => end(tokenizer, code), Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { at_line_ending(tokenizer, code, Box::new(declaration)) } _ => { tokenizer.consume(code); (State::Fn(Box::new(declaration)), None) } } } /// To do. pub fn instruction(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Nok, None), Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { at_line_ending(tokenizer, code, Box::new(instruction)) } Code::Char('?') => { tokenizer.consume(code); (State::Fn(Box::new(instruction_close)), None) } _ => { tokenizer.consume(code); (State::Fn(Box::new(instruction)), None) } } } /// To do. pub fn instruction_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('>') => end(tokenizer, code), _ => instruction(tokenizer, code), } } /// To do. pub fn tag_close_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) if char.is_ascii_alphabetic() => { tokenizer.consume(code); (State::Fn(Box::new(tag_close)), None) } _ => (State::Nok, None), } } /// To do. pub fn tag_close(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => { tokenizer.consume(code); (State::Fn(Box::new(tag_close)), None) } _ => tag_close_between(tokenizer, code), } } /// To do. pub fn tag_close_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { at_line_ending(tokenizer, code, Box::new(tag_close_between)) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); (State::Fn(Box::new(tag_close_between)), None) } _ => end(tokenizer, code), } } /// To do. pub fn tag_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => { tokenizer.consume(code); (State::Fn(Box::new(tag_open)), None) } Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\r' | '\n' | '\t' | ' ' | '/' | '>') => tag_open_between(tokenizer, code), _ => (State::Nok, None), } } /// To do. pub fn tag_open_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { at_line_ending(tokenizer, code, Box::new(tag_open_between)) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); (State::Fn(Box::new(tag_open_between)), None) } Code::Char('/') => { tokenizer.consume(code); (State::Fn(Box::new(end)), None) } Code::Char(char) if char == ':' || char == '_' || char.is_ascii_alphabetic() => { tokenizer.consume(code); (State::Fn(Box::new(tag_open_attribute_name)), None) } _ => end(tokenizer, code), } } /// To do. pub fn tag_open_attribute_name(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char(char) if char == '-' || char == '.' || char == ':' || char == '_' || char.is_ascii_alphanumeric() => { tokenizer.consume(code); (State::Fn(Box::new(tag_open_attribute_name)), None) } _ => tag_open_attribute_name_after(tokenizer, code), } } /// To do. pub fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { at_line_ending(tokenizer, code, Box::new(tag_open_attribute_name_after)) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); (State::Fn(Box::new(tag_open_attribute_name_after)), None) } Code::Char('=') => { tokenizer.consume(code); (State::Fn(Box::new(tag_open_attribute_value_before)), None) } _ => tag_open_between(tokenizer, code), } } /// To do. pub fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None), Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { at_line_ending(tokenizer, code, Box::new(tag_open_attribute_value_before)) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); (State::Fn(Box::new(tag_open_attribute_value_before)), None) } Code::Char(char) if char == '"' || char == '\'' => { tokenizer.consume(code); ( State::Fn(Box::new(move |tokenizer, code| { tag_open_attribute_value_quoted(tokenizer, code, char) })), None, ) } Code::Char(_) => { tokenizer.consume(code); (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None) } } } /// To do. pub fn tag_open_attribute_value_quoted( tokenizer: &mut Tokenizer, code: Code, marker: char, ) -> StateFnResult { match code { Code::None => (State::Nok, None), Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => at_line_ending( tokenizer, code, Box::new(move |tokenizer, code| { tag_open_attribute_value_quoted(tokenizer, code, marker) }), ), Code::Char(char) if char == marker => { tokenizer.consume(code); ( State::Fn(Box::new(tag_open_attribute_value_quoted_after)), None, ) } _ => { tokenizer.consume(code); ( State::Fn(Box::new(move |tokenizer, code| { tag_open_attribute_value_quoted(tokenizer, code, marker) })), None, ) } } } /// To do. pub fn tag_open_attribute_value_quoted_after( tokenizer: &mut Tokenizer, code: Code, ) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '>' | '/') => { tag_open_between(tokenizer, code) } _ => (State::Nok, None), } } /// To do. pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => (State::Nok, None), Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | ' ' | '>') => { tag_open_between(tokenizer, code) } Code::Char(_) => { tokenizer.consume(code); (State::Fn(Box::new(tag_open_attribute_value_unquoted)), None) } } } /// To do. // We can’t have blank lines in content, so no need to worry about empty // tokens. pub fn at_line_ending( tokenizer: &mut Tokenizer, code: Code, return_state: Box, ) -> StateFnResult { match code { Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { tokenizer.exit(TokenType::HtmlTextData); tokenizer.enter(TokenType::LineEnding); tokenizer.consume(code); tokenizer.exit(TokenType::LineEnding); ( State::Fn(Box::new(|t, c| after_line_ending(t, c, return_state))), None, ) } _ => unreachable!("expected line ending"), } } pub fn after_line_ending( tokenizer: &mut Tokenizer, code: Code, return_state: Box, ) -> StateFnResult { tokenizer.attempt( |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace), |_ok| Box::new(|t, c| after_line_ending_prefix(t, c, return_state)), )(tokenizer, code) } pub fn after_line_ending_prefix( tokenizer: &mut Tokenizer, code: Code, return_state: Box, ) -> StateFnResult { tokenizer.enter(TokenType::HtmlTextData); return_state(tokenizer, code) } /// To do. pub fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::Char('>') => { tokenizer.consume(code); tokenizer.exit(TokenType::HtmlTextData); tokenizer.exit(TokenType::HtmlText); (State::Ok, None) } _ => (State::Nok, None), } }