diff options
Diffstat (limited to 'src/construct')
-rw-r--r-- | src/construct/autolink.rs | 7 | ||||
-rw-r--r-- | src/construct/character_reference.rs | 64 | ||||
-rw-r--r-- | src/construct/code_fenced.rs | 68 | ||||
-rw-r--r-- | src/construct/code_text.rs | 26 | ||||
-rw-r--r-- | src/construct/definition.rs | 12 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 56 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 462 | ||||
-rw-r--r-- | src/construct/html_text.rs | 20 | ||||
-rw-r--r-- | src/construct/paragraph.rs | 46 | ||||
-rw-r--r-- | src/construct/partial_label.rs | 111 | ||||
-rw-r--r-- | src/construct/partial_space_or_tab.rs | 70 | ||||
-rw-r--r-- | src/construct/partial_title.rs | 91 | ||||
-rw-r--r-- | src/construct/thematic_break.rs | 97 |
13 files changed, 572 insertions, 558 deletions
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index c9596a6..8376b98 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -266,12 +266,11 @@ fn email_label(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnRes ) } Code::Char('>') => { - let tail_index = tokenizer.events.len(); - let head_index = tokenizer.events.len() - 1; + let index = tokenizer.events.len(); tokenizer.exit(TokenType::AutolinkProtocol); // Change the token type. - tokenizer.events[head_index].token_type = TokenType::AutolinkEmail; - tokenizer.events[tail_index].token_type = TokenType::AutolinkEmail; + tokenizer.events[index - 1].token_type = TokenType::AutolinkEmail; + tokenizer.events[index].token_type = TokenType::AutolinkEmail; end(tokenizer, code) } _ => email_value(tokenizer, code, size), diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index c946dae..decf852 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -59,7 +59,7 @@ use crate::constant::{ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// Kind of a character reference. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub enum Kind { /// Numeric decimal character reference (`	`). Decimal, @@ -69,6 +69,28 @@ pub enum Kind { Named, } +impl Kind { + /// Get the maximum size of characters allowed in a character reference. + fn max(&self) -> usize { + match self { + Kind::Hexadecimal => CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, + Kind::Decimal => CHARACTER_REFERENCE_DECIMAL_SIZE_MAX, + Kind::Named => CHARACTER_REFERENCE_NAMED_SIZE_MAX, + } + } + + /// Check if a char is allowed. + fn allowed(&self, char: char) -> bool { + let check = match self { + Kind::Hexadecimal => char::is_ascii_hexdigit, + Kind::Decimal => char::is_ascii_digit, + Kind::Named => char::is_ascii_alphanumeric, + }; + + check(&char) + } +} + /// State needed to parse character references. #[derive(Debug, Clone)] struct Info { @@ -141,10 +163,10 @@ fn numeric(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(TokenType::CharacterReferenceValue); ( - State::Fn(Box::new(|tokenizer, code| { + State::Fn(Box::new(|t, c| { value( - tokenizer, - code, + t, + c, Info { buffer: vec![], kind: Kind::Hexadecimal, @@ -179,7 +201,7 @@ fn numeric(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// a|23;b /// a&#x|9;b /// ``` -fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { +fn value(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::Char(';') if !info.buffer.is_empty() => { tokenizer.exit(TokenType::CharacterReferenceValue); @@ -198,36 +220,10 @@ fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { (State::Ok, None) } Code::Char(char) => { - let len = info.buffer.len(); - - let cont = match info.kind { - Kind::Hexadecimal - if char.is_ascii_hexdigit() - && len < CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX => - { - true - } - Kind::Decimal - if char.is_ascii_digit() && len < CHARACTER_REFERENCE_DECIMAL_SIZE_MAX => - { - true - } - Kind::Named - if char.is_ascii_alphanumeric() && len < CHARACTER_REFERENCE_NAMED_SIZE_MAX => - { - true - } - _ => false, - }; - - if cont { - let mut clone = info; - clone.buffer.push(char); + if info.buffer.len() < info.kind.max() && info.kind.allowed(char) { + info.buffer.push(char); tokenizer.consume(code); - ( - State::Fn(Box::new(|tokenizer, code| value(tokenizer, code, clone))), - None, - ) + (State::Fn(Box::new(|t, c| value(t, c, info))), None) } else { (State::Nok, None) } diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 30ec911..f79705c 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -99,11 +99,49 @@ use crate::util::span::from_exit_event; #[derive(Debug, Clone, PartialEq)] pub enum Kind { /// Grave accent (tick) code. + /// + /// ## Example + /// + /// ````markdown + /// ```rust + /// println!("I <3 🦀"); + /// ``` + /// ```` GraveAccent, /// Tilde code. + /// + /// ## Example + /// + /// ```markdown + /// ~~~rust + /// println!("I <3 🦀"); + /// ~~~ + /// ``` Tilde, } +impl Kind { + /// Turn the kind into a [char]. + fn as_char(&self) -> char { + match self { + Kind::GraveAccent => '`', + Kind::Tilde => '~', + } + } + /// Turn a [char] into a kind. + /// + /// ## Panics + /// + /// Panics if `char` is not `~` or `` ` ``. + fn from_char(char: char) -> Kind { + match char { + '`' => Kind::GraveAccent, + '~' => Kind::Tilde, + _ => unreachable!("invalid char"), + } + } +} + /// State needed to parse code (fenced). #[derive(Debug, Clone)] struct Info { @@ -160,11 +198,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult Info { prefix, size: 0, - kind: if char == '`' { - Kind::GraveAccent - } else { - Kind::Tilde - }, + kind: Kind::from_char(char), }, ) } @@ -180,14 +214,8 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult /// ~~~ /// ``` fn sequence_open(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { - let marker = if info.kind == Kind::GraveAccent { - '`' - } else { - '~' - }; - match code { - Code::Char(char) if char == marker => { + Code::Char(char) if char == info.kind.as_char() => { tokenizer.consume(code); ( State::Fn(Box::new(|t, c| { @@ -375,14 +403,8 @@ fn close_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu /// |~~~ /// ``` fn close_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { - let marker = if info.kind == Kind::GraveAccent { - '`' - } else { - '~' - }; - match code { - Code::Char(char) if char == marker => { + Code::Char(char) if char == info.kind.as_char() => { tokenizer.enter(TokenType::CodeFencedFenceSequence); close_sequence(tokenizer, code, info, 0) } @@ -398,14 +420,8 @@ fn close_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes /// ~|~~ /// ``` fn close_sequence(tokenizer: &mut Tokenizer, code: Code, info: Info, size: usize) -> StateFnResult { - let marker = if info.kind == Kind::GraveAccent { - '`' - } else { - '~' - }; - match code { - Code::Char(char) if char == marker => { + Code::Char(char) if char == info.kind.as_char() => { tokenizer.consume(code); ( State::Fn(Box::new(move |t, c| close_sequence(t, c, info, size + 1))), diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index 1f34e41..94e0106 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -113,9 +113,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnR if let Code::Char('`') = code { tokenizer.consume(code); ( - State::Fn(Box::new(move |tokenizer, code| { - sequence_open(tokenizer, code, size + 1) - })), + State::Fn(Box::new(move |t, c| sequence_open(t, c, size + 1))), None, ) } else { @@ -138,9 +136,7 @@ fn between(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnRe tokenizer.consume(code); tokenizer.exit(TokenType::CodeTextLineEnding); ( - State::Fn(Box::new(move |tokenizer, code| { - between(tokenizer, code, size_open) - })), + State::Fn(Box::new(move |t, c| between(t, c, size_open))), None, ) } @@ -168,12 +164,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnResul } _ => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |tokenizer, code| { - data(tokenizer, code, size_open) - })), - None, - ) + (State::Fn(Box::new(move |t, c| data(t, c, size_open))), None) } } } @@ -193,8 +184,8 @@ fn sequence_close( Code::Char('`') => { tokenizer.consume(code); ( - State::Fn(Box::new(move |tokenizer, code| { - sequence_close(tokenizer, code, size_open, size + 1) + State::Fn(Box::new(move |t, c| { + sequence_close(t, c, size_open, size + 1) })), None, ) @@ -205,12 +196,11 @@ fn sequence_close( (State::Ok, Some(vec![code])) } _ => { - let tail_index = tokenizer.events.len(); - let head_index = tokenizer.events.len() - 1; + let index = tokenizer.events.len(); tokenizer.exit(TokenType::CodeTextSequence); // Change the token type. - tokenizer.events[head_index].token_type = TokenType::CodeTextData; - tokenizer.events[tail_index].token_type = TokenType::CodeTextData; + tokenizer.events[index - 1].token_type = TokenType::CodeTextData; + tokenizer.events[index].token_type = TokenType::CodeTextData; between(tokenizer, code, size_open) } } diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 03baee6..61c4d34 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -144,16 +144,14 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let event = tokenizer.events.last().unwrap(); - // Blank line not ok. - let char_nok = matches!( - code, - Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') - ); // Whitespace. if (event.token_type == TokenType::LineEnding || event.token_type == TokenType::Whitespace) - && !char_nok - { + // Blank line not ok. + && !matches!( + code, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + ) { tokenizer.go(destination, destination_after)(tokenizer, code) } else { (State::Nok, None) diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index a418041..7c41855 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -57,12 +57,49 @@ use crate::util::{link::link, span::from_exit_event}; /// Kind of underline. #[derive(Debug, Clone, PartialEq)] pub enum Kind { - /// Grave accent (tick) code. + /// Dash (rank 2) heading. + /// + /// ## Example + /// + /// ```markdown + /// alpha + /// ----- + /// ``` Dash, - /// Tilde code. + + /// Equals to (rank 1) heading. + /// + /// ## Example + /// + /// ```markdown + /// alpha + /// ===== + /// ``` EqualsTo, } +impl Kind { + /// Turn the kind into a [char]. + fn as_char(&self) -> char { + match self { + Kind::Dash => '-', + Kind::EqualsTo => '=', + } + } + /// Turn a [char] into a kind. + /// + /// ## Panics + /// + /// Panics if `char` is not `-` or `=`. + fn from_char(char: char) -> Kind { + match char { + '-' => Kind::Dash, + '=' => Kind::EqualsTo, + _ => unreachable!("invalid char"), + } + } +} + /// Start of a heading (setext). /// /// ```markdown @@ -232,13 +269,8 @@ fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes match code { Code::Char(char) if char == '-' || char == '=' => { - let marker = if char == '-' { - Kind::Dash - } else { - Kind::EqualsTo - }; tokenizer.enter(TokenType::HeadingSetextUnderline); - underline_sequence_inside(tokenizer, code, marker) + underline_sequence_inside(tokenizer, code, Kind::from_char(char)) } _ => (State::Nok, None), } @@ -251,15 +283,11 @@ fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes /// =|= /// ``` fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { - let marker = if kind == Kind::Dash { '-' } else { '=' }; - match code { - Code::Char(char) if char == marker => { + Code::Char(char) if char == kind.as_char() => { tokenizer.consume(code); ( - State::Fn(Box::new(move |tokenizer, code| { - underline_sequence_inside(tokenizer, code, kind) - })), + State::Fn(Box::new(move |t, c| underline_sequence_inside(t, c, kind))), None, ) } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 4819e63..d5937c5 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -97,10 +97,8 @@ use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::sp use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// Kind of HTML (flow). -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, PartialEq)] enum Kind { - /// Not yet known. - Unknown, /// Symbol for `<script>` (condition 1). Raw, /// Symbol for `<!---->` (condition 2). @@ -117,19 +115,51 @@ enum Kind { Complete, } -/// Type of quote, if we’re in an attribure, in complete (condition 7). -#[derive(Debug, Clone, PartialEq)] +/// Type of quote, if we’re in a quoted attribute, in complete (condition 7). +#[derive(Debug, PartialEq)] enum QuoteKind { - /// Not in a quoted attribute. - None, - /// In a double quoted (`"`) attribute. + /// In a double quoted (`"`) attribute value. + /// + /// ## Example + /// + /// ```markdown + /// <a b="c" /> + /// ``` Double, - /// In a single quoted (`"`) attribute. + /// In a single quoted (`'`) attribute value. + /// + /// ## Example + /// + /// ```markdown + /// <a b='c' /> + /// ``` Single, } +impl QuoteKind { + /// Turn the kind into a [char]. + fn as_char(&self) -> char { + match self { + QuoteKind::Double => '"', + QuoteKind::Single => '\'', + } + } + /// Turn a [char] into a kind. + /// + /// ## Panics + /// + /// Panics if `char` is not `"` or `'`. + fn from_char(char: char) -> QuoteKind { + match char { + '"' => QuoteKind::Double, + '\'' => QuoteKind::Single, + _ => unreachable!("invalid char"), + } + } +} + /// State needed to parse HTML (flow). -#[derive(Debug, Clone)] +#[derive(Debug)] struct Info { /// Kind of HTML (flow). kind: Kind, @@ -141,7 +171,7 @@ struct Info { /// `index` into `buffer` when expecting certain characters. index: usize, /// Current quote, when in a double or single quoted attribute value. - quote: QuoteKind, + quote: Option<QuoteKind>, } // To do: mark as concrete (block quotes or lists can’t “pierce” into HTML). @@ -166,22 +196,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { if Code::Char('<') == code { tokenizer.consume(code); - ( - State::Fn(Box::new(|tokenizer, code| { - open( - tokenizer, - Info { - kind: Kind::Unknown, - start_tag: false, - buffer: vec![], - index: 0, - quote: QuoteKind::None, - }, - code, - ) - })), - None, - ) + (State::Fn(Box::new(open)), None) } else { (State::Nok, None) } @@ -194,45 +209,44 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// <|!doctype> /// <|!--xxx--> /// ``` -fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let mut info = Info { + // Assume basic. + kind: Kind::Basic, + start_tag: false, + buffer: vec![], + index: 0, + quote: None, + }; + match code { Code::Char('!') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - declaration_open(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| declaration_open(t, c, info))), None, ) } Code::Char('/') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - tag_close_start(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| tag_close_start(t, c, info))), None, ) } Code::Char('?') => { - // To do: life times. - let mut clone = info; - clone.kind = Kind::Instruction; + info.kind = Kind::Instruction; tokenizer.consume(code); // While we’re in an instruction instead of a declaration, we’re on a `?` // right now, so we do need to search for `>`, similar to declarations. ( - State::Fn(Box::new(|tokenizer, code| { - continuation_declaration_inside(tokenizer, clone, code) - })), + State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, ) } Code::Char(char) if char.is_ascii_alphabetic() => { - // To do: life times. - let mut clone = info; - clone.start_tag = true; - tag_name(tokenizer, clone, code) + info.start_tag = true; + tag_name(tokenizer, code, info) } _ => (State::Nok, None), } @@ -245,41 +259,31 @@ fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { /// <!|--xxx--> /// <!|[CDATA[>&<]]> /// ``` -fn declaration_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); - let mut clone = info; - clone.kind = Kind::Comment; + info.kind = Kind::Comment; ( - State::Fn(Box::new(|tokenizer, code| { - comment_open_inside(tokenizer, clone, code) - })), + State::Fn(Box::new(|t, c| comment_open_inside(t, c, info))), None, ) } Code::Char('[') => { tokenizer.consume(code); - let mut clone = info; - clone.kind = Kind::Cdata; - clone.buffer = vec!['C', 'D', 'A', 'T', 'A', '[']; - clone.index = 0; + info.kind = Kind::Cdata; + info.buffer = vec!['C', 'D', 'A', 'T', 'A', '[']; + info.index = 0; ( - State::Fn(Box::new(|tokenizer, code| { - cdata_open_inside(tokenizer, clone, code) - })), + State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))), None, ) } Code::Char(char) if char.is_ascii_alphabetic() => { tokenizer.consume(code); - // To do: life times. - let mut clone = info; - clone.kind = Kind::Declaration; + info.kind = Kind::Declaration; ( - State::Fn(Box::new(|tokenizer, code| { - continuation_declaration_inside(tokenizer, clone, code) - })), + State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, ) } @@ -292,14 +296,12 @@ fn declaration_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateF /// ```markdown /// <!-|-xxx--> /// ``` -fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char('-') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_declaration_inside(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, ) } @@ -316,26 +318,18 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Sta /// <![CDAT|A[>&<]]> /// <![CDATA|[>&<]]> /// ``` -fn cdata_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::Char(char) if char == info.buffer[info.index] => { - let mut clone = info; - clone.index += 1; + info.index += 1; tokenizer.consume(code); - if clone.index == clone.buffer.len() { - clone.buffer.clear(); - ( - State::Fn(Box::new(|tokenizer, code| { - continuation(tokenizer, clone, code) - })), - None, - ) + if info.index == info.buffer.len() { + info.buffer.clear(); + (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) } else { ( - State::Fn(Box::new(|tokenizer, code| { - cdata_open_inside(tokenizer, clone, code) - })), + State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))), None, ) } @@ -349,17 +343,12 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> State /// ```markdown /// </|x> /// ``` -fn tag_close_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn tag_close_start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::Char(char) if char.is_ascii_alphabetic() => { tokenizer.consume(code); - // To do: life times. - let mut clone = info; - clone.buffer.push(char); - ( - State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))), - None, - ) + info.buffer.push(char); + (State::Fn(Box::new(|t, c| tag_name(t, c, info))), None) } _ => (State::Nok, None), } @@ -371,7 +360,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFn /// <a|b> /// </a|b> /// ``` -fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed @@ -385,50 +374,37 @@ fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult false }; + info.buffer.clear(); + if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) { - // To do: life times. - let mut clone = info; - clone.kind = Kind::Raw; - clone.buffer.clear(); - continuation(tokenizer, clone, code) + info.kind = Kind::Raw; + continuation(tokenizer, code, info) } else if HTML_BLOCK_NAMES.contains(&name) { - // To do: life times. - let mut clone = info; - clone.kind = Kind::Basic; - clone.buffer.clear(); - + // Basic is assumed, no need to set `kind`. if slash { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - basic_self_closing(tokenizer, clone, code) - })), + State::Fn(Box::new(|t, c| basic_self_closing(t, c, info))), None, ) } else { - continuation(tokenizer, clone, code) + continuation(tokenizer, code, info) } } else { - // To do: life times. - let mut clone = info; - clone.kind = Kind::Complete; + info.kind = Kind::Complete; // To do: do not support complete HTML when interrupting. - if clone.start_tag { - complete_attribute_name_before(tokenizer, clone, code) + if info.start_tag { + complete_attribute_name_before(tokenizer, code, info) } else { - complete_closing_tag_after(tokenizer, clone, code) + complete_closing_tag_after(tokenizer, code, info) } } } Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => { tokenizer.consume(code); - let mut clone = info; - clone.buffer.push(char); - ( - State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))), - None, - ) + info.buffer.push(char); + (State::Fn(Box::new(|t, c| tag_name(t, c, info))), None) } Code::Char(_) => (State::Nok, None), } @@ -439,16 +415,11 @@ fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult /// ```markdown /// <div/|> /// ``` -fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn basic_self_closing(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char('>') => { tokenizer.consume(code); - ( - State::Fn(Box::new(|tokenizer, code| { - continuation(tokenizer, info, code) - })), - None, - ) + (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) } _ => (State::Nok, None), } @@ -460,18 +431,16 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Stat /// <x/|> /// </x/|> /// ``` -fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn complete_closing_tag_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_closing_tag_after(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| complete_closing_tag_after(t, c, info))), None, ) } - _ => complete_end(tokenizer, info, code), + _ => complete_end(tokenizer, code, info), } } @@ -492,38 +461,29 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info, code: Code) /// ``` fn complete_attribute_name_before( tokenizer: &mut Tokenizer, - info: Info, code: Code, + info: Info, ) -> StateFnResult { match code { Code::Char('/') => { tokenizer.consume(code); - ( - State::Fn(Box::new(|tokenizer, code| { - complete_end(tokenizer, info, code) - })), - None, - ) + (State::Fn(Box::new(|t, c| complete_end(t, c, info))), None) } Code::Char(char) if char == ':' || char == '_' || char.is_ascii_alphabetic() => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_name(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))), None, ) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_name_before(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| complete_attribute_name_before(t, c, info))), None, ) } - _ => complete_end(tokenizer, info, code), + _ => complete_end(tokenizer, code, info), } } @@ -534,7 +494,7 @@ fn complete_attribute_name_before( /// <x _|> /// <x a|> /// ``` -fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn complete_attribute_name(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char(char) if char == '-' @@ -545,13 +505,11 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_name(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))), None, ) } - _ => complete_attribute_name_after(tokenizer, info, code), + _ => complete_attribute_name_after(tokenizer, code, info), } } @@ -565,29 +523,25 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> /// ``` fn complete_attribute_name_after( tokenizer: &mut Tokenizer, - info: Info, code: Code, + info: Info, ) -> StateFnResult { match code { Code::Char('=') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_value_before(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| complete_attribute_value_before(t, c, info))), None, ) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_name_after(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| complete_attribute_name_after(t, c, info))), None, ) } - _ => complete_attribute_name_before(tokenizer, info, code), + _ => complete_attribute_name_before(tokenizer, code, info), } } @@ -600,38 +554,27 @@ fn complete_attribute_name_after( /// ``` fn complete_attribute_value_before( tokenizer: &mut Tokenizer, - info: Info, code: Code, + mut info: Info, ) -> StateFnResult { match code { Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None), Code::Char(char) if char == '"' || char == '\'' => { tokenizer.consume(code); - // To do: life times. - let mut clone = info; - clone.quote = if char == '"' { - QuoteKind::Double - } else { - QuoteKind::Single - }; - + info.quote = Some(QuoteKind::from_char(char)); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_value_quoted(tokenizer, clone, code) - })), + State::Fn(Box::new(|t, c| complete_attribute_value_quoted(t, c, info))), None, ) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_value_before(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| complete_attribute_value_before(t, c, info))), None, ) } - _ => complete_attribute_value_unquoted(tokenizer, info, code), + _ => complete_attribute_value_unquoted(tokenizer, code, info), } } @@ -643,22 +586,16 @@ fn complete_attribute_value_before( /// ``` fn complete_attribute_value_quoted( tokenizer: &mut Tokenizer, - info: Info, code: Code, + info: Info, ) -> StateFnResult { - let marker = if info.quote == QuoteKind::Double { - '"' - } else { - '\'' - }; - match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None), - Code::Char(char) if char == marker => { + Code::Char(char) if char == info.quote.as_ref().unwrap().as_char() => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_value_quoted_after(tokenizer, info, code) + State::Fn(Box::new(|t, c| { + complete_attribute_value_quoted_after(t, c, info) })), None, ) @@ -666,9 +603,7 @@ fn complete_attribute_value_quoted( _ => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_value_quoted(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| complete_attribute_value_quoted(t, c, info))), None, ) } @@ -682,21 +617,21 @@ fn complete_attribute_value_quoted( /// ``` fn complete_attribute_value_unquoted( tokenizer: &mut Tokenizer, - info: Info, code: Code, + info: Info, ) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char('\t' | '\n' | '\r' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => { - complete_attribute_name_after(tokenizer, info, code) + complete_attribute_name_after(tokenizer, code, info) } Code::Char(_) => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - complete_attribute_value_unquoted(tokenizer, info, code) + State::Fn(Box::new(|t, c| { + complete_attribute_value_unquoted(t, c, info) })), None, ) @@ -712,12 +647,12 @@ fn complete_attribute_value_unquoted( /// ``` fn complete_attribute_value_quoted_after( tokenizer: &mut Tokenizer, - info: Info, code: Code, + info: Info, ) -> StateFnResult { match code { Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => { - complete_attribute_name_before(tokenizer, info, code) + complete_attribute_name_before(tokenizer, code, info) } _ => (State::Nok, None), } @@ -728,16 +663,11 @@ fn complete_attribute_value_quoted_after( /// ```markdown /// <x a="b"|> /// ``` -fn complete_end(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn complete_end(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char('>') => { tokenizer.consume(code); - ( - State::Fn(Box::new(|tokenizer, code| { - complete_after(tokenizer, info, code) - })), - None, - ) + (State::Fn(Box::new(|t, c| complete_after(t, c, info))), None) } _ => (State::Nok, None), } @@ -748,19 +678,14 @@ fn complete_end(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes /// ```markdown /// <x>| /// ``` -fn complete_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn complete_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - continuation(tokenizer, info, code) + continuation(tokenizer, code, info) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); - ( - State::Fn(Box::new(|tokenizer, code| { - complete_after(tokenizer, info, code) - })), - None, - ) + (State::Fn(Box::new(|t, c| complete_after(t, c, info))), None) } Code::Char(_) => (State::Nok, None), } @@ -771,49 +696,41 @@ fn complete_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnR /// ```markdown /// <!--x|xx--> /// ``` -fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char('-') if info.kind == Kind::Comment => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_comment_inside(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_comment_inside(t, c, info))), None, ) } Code::Char('<') if info.kind == Kind::Raw => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_raw_tag_open(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_raw_tag_open(t, c, info))), None, ) } Code::Char('>') if info.kind == Kind::Declaration => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_close(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_close(t, c, info))), None, ) } Code::Char('?') if info.kind == Kind::Instruction => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_declaration_inside(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, ) } Code::Char(']') if info.kind == Kind::Cdata => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_character_data_inside(tokenizer, info, code) + State::Fn(Box::new(|t, c| { + continuation_character_data_inside(t, c, info) })), None, ) @@ -821,27 +738,21 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') if info.kind == Kind::Basic || info.kind == Kind::Complete => { - let clone = info; - tokenizer.check(blank_line_before, |ok| { - if ok { - Box::new(|tokenizer, code| continuation_close(tokenizer, clone, code)) + let func = if ok { + continuation_close } else { - Box::new(|tokenizer, code| continuation_at_line_ending(tokenizer, clone, code)) - } + continuation_at_line_ending + }; + Box::new(move |t, c| func(t, c, info)) })(tokenizer, code) } Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - continuation_at_line_ending(tokenizer, info, code) + continuation_at_line_ending(tokenizer, code, info) } _ => { tokenizer.consume(code); - ( - State::Fn(Box::new(|tokenizer, code| { - continuation(tokenizer, info, code) - })), - None, - ) + (State::Fn(Box::new(|t, c| continuation(t, c, info))), None) } } } @@ -851,9 +762,9 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes /// ```markdown /// <x>| /// ``` -fn continuation_at_line_ending(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_at_line_ending(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { tokenizer.exit(TokenType::HtmlFlowData); - html_continue_start(tokenizer, info, code) + html_continue_start(tokenizer, code, info) } /// In continuation, after an eol. @@ -862,7 +773,7 @@ fn continuation_at_line_ending(tokenizer: &mut Tokenizer, info: Info, code: Code /// <x>| /// asd /// ``` -fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::None => { tokenizer.exit(TokenType::HtmlFlow); @@ -874,15 +785,13 @@ fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Sta tokenizer.consume(code); tokenizer.exit(TokenType::LineEnding); ( - State::Fn(Box::new(|tokenizer, code| { - html_continue_start(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| html_continue_start(t, c, info))), None, ) } _ => { tokenizer.enter(TokenType::HtmlFlowData); - continuation(tokenizer, info, code) + continuation(tokenizer, code, info) } } } @@ -892,18 +801,16 @@ fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Sta /// ```markdown /// <!--xxx-|-> /// ``` -fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_comment_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { - Code::Char('-') if info.kind == Kind::Comment => { + Code::Char('-') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_declaration_inside(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, ) } - _ => continuation(tokenizer, info, code), + _ => continuation(tokenizer, code, info), } } @@ -912,18 +819,16 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info, code: Code /// ```markdown /// <script>console.log(1)<|/script> /// ``` -fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::Char('/') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_raw_end_tag(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_raw_end_tag(t, c, info))), None, ) } - _ => continuation(tokenizer, info, code), + _ => continuation(tokenizer, code, info), } } @@ -934,39 +839,38 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info, code: Code) /// <script>console.log(1)</s|cript> /// <script>console.log(1)</script|> /// ``` -fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_raw_end_tag( + tokenizer: &mut Tokenizer, + code: Code, + mut info: Info, +) -> StateFnResult { match code { Code::Char('>') => { let tag_name_buffer = info.buffer.iter().collect::<String>().to_lowercase(); - // To do: life times. - let mut clone = info; - clone.buffer.clear(); + info.buffer.clear(); if HTML_RAW_NAMES.contains(&tag_name_buffer.as_str()) { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_close(tokenizer, clone, code) - })), + State::Fn(Box::new(|t, c| continuation_close(t, c, info))), None, ) } else { - continuation(tokenizer, clone, code) + continuation(tokenizer, code, info) } } Code::Char(char) if char.is_ascii_alphabetic() && info.buffer.len() < HTML_RAW_SIZE_MAX => { tokenizer.consume(code); - // To do: life times. - let mut clone = info; - clone.buffer.push(char); + info.buffer.push(char); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_raw_end_tag(tokenizer, clone, code) - })), + State::Fn(Box::new(|t, c| continuation_raw_end_tag(t, c, info))), None, ) } - _ => continuation(tokenizer, info, code), + _ => { + info.buffer.clear(); + continuation(tokenizer, code, info) + } } } @@ -977,20 +881,18 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, info: Info, code: Code) - /// ``` fn continuation_character_data_inside( tokenizer: &mut Tokenizer, - info: Info, code: Code, + info: Info, ) -> StateFnResult { match code { Code::Char(']') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_declaration_inside(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, ) } - _ => continuation(tokenizer, info, code), + _ => continuation(tokenizer, code, info), } } @@ -1008,29 +910,25 @@ fn continuation_character_data_inside( /// ``` fn continuation_declaration_inside( tokenizer: &mut Tokenizer, - info: Info, code: Code, + info: Info, ) -> StateFnResult { match code { Code::Char('>') => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_close(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_close(t, c, info))), None, ) } Code::Char('-') if info.kind == Kind::Comment => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_declaration_inside(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))), None, ) } - _ => continuation(tokenizer, info, code), + _ => continuation(tokenizer, code, info), } } @@ -1039,7 +937,7 @@ fn continuation_declaration_inside( /// ```markdown /// <!doctype>| /// ``` -fn continuation_close(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.exit(TokenType::HtmlFlowData); @@ -1049,9 +947,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Stat _ => { tokenizer.consume(code); ( - State::Fn(Box::new(|tokenizer, code| { - continuation_close(tokenizer, info, code) - })), + State::Fn(Box::new(|t, c| continuation_close(t, c, info))), None, ) } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index a91113f..c4d2353 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -114,9 +114,7 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.consume(code); let buffer = vec!['C', 'D', 'A', 'T', 'A', '[']; ( - State::Fn(Box::new(|tokenizer, code| { - cdata_open_inside(tokenizer, code, buffer, 0) - })), + State::Fn(Box::new(|t, c| cdata_open_inside(t, c, buffer, 0))), None, ) } @@ -247,8 +245,8 @@ fn cdata_open_inside( (State::Fn(Box::new(cdata)), None) } else { ( - State::Fn(Box::new(move |tokenizer, code| { - cdata_open_inside(tokenizer, code, buffer, index + 1) + State::Fn(Box::new(move |t, c| { + cdata_open_inside(t, c, buffer, index + 1) })), None, ) @@ -526,8 +524,8 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> Sta Code::Char(char) if char == '"' || char == '\'' => { tokenizer.consume(code); ( - State::Fn(Box::new(move |tokenizer, code| { - tag_open_attribute_value_quoted(tokenizer, code, char) + State::Fn(Box::new(move |t, c| { + tag_open_attribute_value_quoted(t, c, char) })), None, ) @@ -555,9 +553,7 @@ fn tag_open_attribute_value_quoted( Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => at_line_ending( tokenizer, code, - Box::new(move |tokenizer, code| { - tag_open_attribute_value_quoted(tokenizer, code, marker) - }), + Box::new(move |t, c| tag_open_attribute_value_quoted(t, c, marker)), ), Code::Char(char) if char == marker => { tokenizer.consume(code); @@ -569,8 +565,8 @@ fn tag_open_attribute_value_quoted( _ => { tokenizer.consume(code); ( - State::Fn(Box::new(move |tokenizer, code| { - tag_open_attribute_value_quoted(tokenizer, code, marker) + State::Fn(Box::new(move |t, c| { + tag_open_attribute_value_quoted(t, c, marker) })), None, ) diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index b00188d..624ee8e 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -129,23 +129,33 @@ fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // To do: If code is disabled, indented lines are allowed to interrupt. tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { - if ok { - Box::new(|_t, code| (State::Ok, Some(vec![code]))) - } else { - Box::new(|tokenizer, code| { - tokenizer.attempt_5( - blank_line, - code_fenced, - html_flow, - heading_atx, - thematic_break, - |ok| { - Box::new(move |_t, code| { - (if ok { State::Nok } else { State::Ok }, Some(vec![code])) - }) - }, - )(tokenizer, code) - }) - } + Box::new(if ok { interrupt_indent } else { interrupt_cont }) })(tokenizer, code) } + +/// At an indent. +/// +/// ```markdown +/// alpha +/// | +/// ``` +pub fn interrupt_indent(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + (State::Ok, Some(vec![code])) +} + +/// Not at an indented line. +/// +/// ```markdown +/// alpha +/// |<div> +/// ``` +pub fn interrupt_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt_5( + blank_line, + code_fenced, + html_flow, + heading_atx, + thematic_break, + |ok| Box::new(move |_t, code| (if ok { State::Nok } else { State::Ok }, Some(vec![code]))), + )(tokenizer, code) +} diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 55efd13..194165c 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -60,6 +60,17 @@ use crate::construct::partial_space_or_tab::space_or_tab_opt; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; use crate::util::link::link; +/// State needed to parse labels. +#[derive(Debug)] +struct Info { + /// Whether we’ve seen our first `ChunkString`. + connect: bool, + /// Whether there are non-blank characters in the label. + data: bool, + /// Number of characters in the label. + size: usize, +} + /// Before a label. /// /// ```markdown @@ -73,10 +84,12 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.consume(code); tokenizer.exit(TokenType::DefinitionLabelMarker); tokenizer.enter(TokenType::DefinitionLabelData); - ( - State::Fn(Box::new(|t, c| at_break(t, c, false, 0, false))), - None, - ) + let info = Info { + connect: false, + data: false, + size: 0, + }; + (State::Fn(Box::new(|t, c| at_break(t, c, info))), None) } _ => (State::Nok, None), } @@ -88,17 +101,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// [|a] /// [a|] /// ``` -fn at_break( - tokenizer: &mut Tokenizer, - code: Code, - data: bool, - size: usize, - connect: bool, -) -> StateFnResult { +fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::None | Code::Char('[') => (State::Nok, None), - Code::Char(']') if !data => (State::Nok, None), - _ if size > LINK_REFERENCE_SIZE_MAX => (State::Nok, None), + Code::Char(']') if !info.data => (State::Nok, None), + _ if info.size > LINK_REFERENCE_SIZE_MAX => (State::Nok, None), Code::Char(']') => { tokenizer.exit(TokenType::DefinitionLabelData); tokenizer.enter(TokenType::DefinitionLabelMarker); @@ -110,12 +117,12 @@ fn at_break( _ => { tokenizer.enter(TokenType::ChunkString); - if connect { + if info.connect { let index = tokenizer.events.len() - 1; link(&mut tokenizer.events, index); } - label(tokenizer, code, data, size) + label(tokenizer, code, info) } } } @@ -126,16 +133,8 @@ fn at_break( /// [a /// |b] /// ``` -fn line_start( - tokenizer: &mut Tokenizer, - code: Code, - data: bool, - size: usize, - connect: bool, -) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), move |t, c| { - line_begin(t, c, data, size, connect) - })(tokenizer, code) +fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, info))(tokenizer, code) } /// After a line ending, after optional whitespace. @@ -144,17 +143,11 @@ fn line_start( /// [a /// |b] /// ``` -fn line_begin( - tokenizer: &mut Tokenizer, - code: Code, - data: bool, - size: usize, - connect: bool, -) -> StateFnResult { +fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { // Blank line not allowed. Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None), - _ => at_break(tokenizer, code, data, size, connect), + _ => at_break(tokenizer, code, info), } } @@ -163,44 +156,46 @@ fn line_begin( /// ```markdown /// [a|b] /// ``` -fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult { +fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { + if !info.connect { + info.connect = true; + } + match code { Code::None | Code::Char('[' | ']') => { tokenizer.exit(TokenType::ChunkString); - at_break(tokenizer, code, data, size, true) + at_break(tokenizer, code, info) } - _ if size > LINK_REFERENCE_SIZE_MAX => { + _ if info.size > LINK_REFERENCE_SIZE_MAX => { tokenizer.exit(TokenType::ChunkString); - at_break(tokenizer, code, data, size, true) + at_break(tokenizer, code, info) } Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { tokenizer.consume(code); + info.size += 1; tokenizer.exit(TokenType::ChunkString); - ( - State::Fn(Box::new(move |t, c| line_start(t, c, data, size + 1, true))), - None, - ) + (State::Fn(Box::new(|t, c| line_start(t, c, info))), None) } Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| label(t, c, data, size + 1))), - None, - ) + info.size += 1; + (State::Fn(Box::new(|t, c| label(t, c, info))), None) } Code::Char('/') => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| escape(t, c, true, size + 1))), - None, - ) + info.size += 1; + if !info.data { + info.data = true; + } + (State::Fn(Box::new(|t, c| escape(t, c, info))), None) } Code::Char(_) => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| label(t, c, true, size + 1))), - None, - ) + info.size += 1; + if !info.data { + info.data = true; + } + (State::Fn(Box::new(|t, c| label(t, c, info))), None) } } } @@ -210,15 +205,13 @@ fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> Stat /// ```markdown /// [a\|[b] /// ``` -fn escape(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult { +fn escape(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::Char('[' | '\\' | ']') => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| label(t, c, true, size + 1))), - None, - ) + info.size += 1; + (State::Fn(Box::new(|t, c| label(t, c, info))), None) } - _ => label(tokenizer, code, data, size), + _ => label(tokenizer, code, info), } } diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 40ece49..1c4b367 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -6,6 +6,19 @@ use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer}; +/// Options to parse whitespace. +#[derive(Debug)] +struct Info { + /// Current size. + size: usize, + /// Minimum allowed characters (inclusive). + min: usize, + /// Maximum allowed characters (inclusive). + max: usize, + /// Token type to use for whitespace events. + kind: TokenType, +} + /// Optional `space_or_tab` /// /// ```bnf @@ -30,7 +43,13 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> { /// space_or_tab ::= x*y( ' ' '\t' ) /// ``` pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> { - Box::new(move |t, c| start(t, c, kind, min, max)) + let info = Info { + size: 0, + min, + max, + kind, + }; + Box::new(|t, c| start(t, c, info)) } /// Before whitespace. @@ -38,26 +57,16 @@ pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> { /// ```markdown /// alpha| bravo /// ``` -fn start( - tokenizer: &mut Tokenizer, - code: Code, - kind: TokenType, - min: usize, - max: usize, -) -> StateFnResult { +fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { - Code::VirtualSpace | Code::Char('\t' | ' ') if max > 0 => { - tokenizer.enter(kind.clone()); + Code::VirtualSpace | Code::Char('\t' | ' ') if info.max > 0 => { + tokenizer.enter(info.kind.clone()); tokenizer.consume(code); - ( - State::Fn(Box::new(move |tokenizer, code| { - inside(tokenizer, code, kind, min, max, 1) - })), - None, - ) + info.size += 1; + (State::Fn(Box::new(|t, c| inside(t, c, info))), None) } _ => ( - if min == 0 { State::Ok } else { State::Nok }, + if info.min == 0 { State::Ok } else { State::Nok }, Some(vec![code]), ), } @@ -69,28 +78,21 @@ fn start( /// alpha |bravo /// alpha | bravo /// ``` -fn inside( - tokenizer: &mut Tokenizer, - code: Code, - kind: TokenType, - min: usize, - max: usize, - size: usize, -) -> StateFnResult { +fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { - Code::VirtualSpace | Code::Char('\t' | ' ') if size < max => { + Code::VirtualSpace | Code::Char('\t' | ' ') if info.size < info.max => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |tokenizer, code| { - inside(tokenizer, code, kind, min, max, size + 1) - })), - None, - ) + info.size += 1; + (State::Fn(Box::new(|t, c| inside(t, c, info))), None) } _ => { - tokenizer.exit(kind); + tokenizer.exit(info.kind.clone()); ( - if size >= min { State::Ok } else { State::Nok }, + if info.size >= info.min { + State::Ok + } else { + State::Nok + }, Some(vec![code]), ) } diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 22c3209..aa1e067 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -38,22 +38,59 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; use crate::util::link::link; /// Type of title. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, PartialEq)] enum Kind { /// In a parenthesized (`(` and `)`) title. + /// + /// ## Example + /// + /// ```markdown + /// [a] b (c) + /// ``` Paren, /// In a double quoted (`"`) title. + /// + /// ## Example + /// + /// ```markdown + /// [a] b "c" + /// ``` Double, /// In a single quoted (`'`) title. + /// + /// ## Example + /// + /// ```markdown + /// [a] b 'c' + /// ``` Single, } -/// Display a marker. -fn kind_to_marker(kind: &Kind) -> char { - match kind { - Kind::Double => '"', - Kind::Single => '\'', - Kind::Paren => ')', +impl Kind { + /// Turn the kind into a [char]. + /// + /// > 👉 **Note**: a closing paren is used. + fn as_char(&self) -> char { + match self { + Kind::Paren => ')', + Kind::Double => '"', + Kind::Single => '\'', + } + } + /// Turn a [char] into a kind. + /// + /// > 👉 **Note**: an opening paren must be used. + /// + /// ## Panics + /// + /// Panics if `char` is not `(`, `"`, or `'`. + fn from_char(char: char) -> Kind { + match char { + '(' => Kind::Paren, + '"' => Kind::Double, + '\'' => Kind::Single, + _ => unreachable!("invalid char"), + } } } @@ -65,21 +102,16 @@ fn kind_to_marker(kind: &Kind) -> char { /// |(a) /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - let kind = match code { - Code::Char('"') => Some(Kind::Double), - Code::Char('\'') => Some(Kind::Single), - Code::Char('(') => Some(Kind::Paren), - _ => None, - }; - - if let Some(kind) = kind { - tokenizer.enter(TokenType::DefinitionTitle); - tokenizer.enter(TokenType::DefinitionTitleMarker); - tokenizer.consume(code); - tokenizer.exit(TokenType::DefinitionTitleMarker); - (State::Fn(Box::new(|t, c| begin(t, c, kind))), None) - } else { - (State::Nok, None) + match code { + Code::Char(char) if char == '(' || char == '"' || char == '\'' => { + let kind = Kind::from_char(char); + tokenizer.enter(TokenType::DefinitionTitle); + tokenizer.enter(TokenType::DefinitionTitleMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::DefinitionTitleMarker); + (State::Fn(Box::new(|t, c| begin(t, c, kind))), None) + } + _ => (State::Nok, None), } } @@ -94,7 +126,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { match code { - Code::Char(char) if char == kind_to_marker(&kind) => { + Code::Char(char) if char == kind.as_char() => { tokenizer.enter(TokenType::DefinitionTitleMarker); tokenizer.consume(code); tokenizer.exit(TokenType::DefinitionTitleMarker); @@ -118,7 +150,7 @@ fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { /// ``` fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind, connect: bool) -> StateFnResult { match code { - Code::Char(char) if char == kind_to_marker(&kind) => { + Code::Char(char) if char == kind.as_char() => { tokenizer.exit(TokenType::DefinitionTitleString); begin(tokenizer, code, kind) } @@ -165,7 +197,7 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResul /// ``` fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { match code { - Code::Char(char) if char == kind_to_marker(&kind) => { + Code::Char(char) if char == kind.as_char() => { tokenizer.exit(TokenType::ChunkString); at_break(tokenizer, code, kind, true) } @@ -176,10 +208,7 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { tokenizer.consume(code); tokenizer.exit(TokenType::ChunkString); - ( - State::Fn(Box::new(move |t, c| line_start(t, c, kind))), - None, - ) + (State::Fn(Box::new(|t, c| line_start(t, c, kind))), None) } Code::Char('\\') => { tokenizer.consume(code); @@ -199,9 +228,9 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { /// ``` fn escape(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { match code { - Code::Char(char) if char == kind_to_marker(&kind) => { + Code::Char(char) if char == kind.as_char() => { tokenizer.consume(code); - (State::Fn(Box::new(move |t, c| title(t, c, kind))), None) + (State::Fn(Box::new(|t, c| title(t, c, kind))), None) } _ => title(tokenizer, code, kind), } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index abf733d..a9e5732 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -48,6 +48,68 @@ use super::partial_space_or_tab::space_or_tab_opt; use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +/// Type of thematic break. +#[derive(Debug, PartialEq)] +enum Kind { + /// In a thematic break using asterisks (`*`). + /// + /// ## Example + /// + /// ```markdown + /// *** + /// ``` + Asterisk, + /// In a thematic break using dashes (`-`). + /// + /// ## Example + /// + /// ```markdown + /// --- + /// ``` + Dash, + /// In a thematic break using underscores (`_`). + /// + /// ## Example + /// + /// ```markdown + /// ___ + /// ``` + Underscore, +} + +impl Kind { + /// Turn the kind into a [char]. + fn as_char(&self) -> char { + match self { + Kind::Asterisk => '*', + Kind::Dash => '-', + Kind::Underscore => '_', + } + } + /// Turn a [char] into a kind. + /// + /// ## Panics + /// + /// Panics if `char` is not `*`, `_`, or `_`. + fn from_char(char: char) -> Kind { + match char { + '*' => Kind::Asterisk, + '-' => Kind::Dash, + '_' => Kind::Underscore, + _ => unreachable!("invalid char"), + } + } +} + +/// State needed to parse thematic breaks. +#[derive(Debug)] +struct Info { + /// Kind of marker. + kind: Kind, + /// Number of markers. + size: usize, +} + /// Start of a thematic break. /// /// ```markdown @@ -65,9 +127,14 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::Char(char) if char == '*' || char == '-' || char == '_' => { - at_break(tokenizer, code, char, 0) - } + Code::Char(char) if char == '*' || char == '-' || char == '_' => at_break( + tokenizer, + code, + Info { + kind: Kind::from_char(char), + size: 0, + }, + ), _ => (State::Nok, None), } } @@ -79,17 +146,17 @@ pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// *| * * /// * |* * /// ``` -fn at_break(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult { +fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') - if size >= THEMATIC_BREAK_MARKER_COUNT_MIN => + if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => { tokenizer.exit(TokenType::ThematicBreak); (State::Ok, Some(vec![code])) } - Code::Char(char) if char == marker => { + Code::Char(char) if char == info.kind.as_char() => { tokenizer.enter(TokenType::ThematicBreakSequence); - sequence(tokenizer, code, marker, size) + sequence(tokenizer, code, info) } _ => (State::Nok, None), } @@ -102,22 +169,16 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> /// *|** /// **|* /// ``` -fn sequence(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult { +fn sequence(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { - Code::Char(char) if char == marker => { + Code::Char(char) if char == info.kind.as_char() => { tokenizer.consume(code); - ( - State::Fn(Box::new(move |tokenizer, code| { - sequence(tokenizer, code, marker, size + 1) - })), - None, - ) + info.size += 1; + (State::Fn(Box::new(|t, c| sequence(t, c, info))), None) } _ => { tokenizer.exit(TokenType::ThematicBreakSequence); - tokenizer.go(space_or_tab_opt(), move |t, c| at_break(t, c, marker, size))( - tokenizer, code, - ) + tokenizer.go(space_or_tab_opt(), |t, c| at_break(t, c, info))(tokenizer, code) } } } |