diff options
Diffstat (limited to '')
| -rw-r--r-- | src/compiler.rs | 58 | ||||
| -rw-r--r-- | src/construct/autolink.rs | 7 | ||||
| -rw-r--r-- | src/construct/character_reference.rs | 64 | ||||
| -rw-r--r-- | src/construct/code_fenced.rs | 68 | ||||
| -rw-r--r-- | src/construct/code_text.rs | 26 | ||||
| -rw-r--r-- | src/construct/definition.rs | 12 | ||||
| -rw-r--r-- | src/construct/heading_setext.rs | 56 | ||||
| -rw-r--r-- | src/construct/html_flow.rs | 462 | ||||
| -rw-r--r-- | src/construct/html_text.rs | 20 | ||||
| -rw-r--r-- | src/construct/paragraph.rs | 46 | ||||
| -rw-r--r-- | src/construct/partial_label.rs | 111 | ||||
| -rw-r--r-- | src/construct/partial_space_or_tab.rs | 70 | ||||
| -rw-r--r-- | src/construct/partial_title.rs | 91 | ||||
| -rw-r--r-- | src/construct/thematic_break.rs | 97 | ||||
| -rw-r--r-- | src/content/string.rs | 8 | ||||
| -rw-r--r-- | src/tokenizer.rs | 1 | 
16 files changed, 605 insertions, 592 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 5c7f6d8..9bc2488 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -9,13 +9,37 @@ use crate::util::{  };  /// To do. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)]  pub enum LineEnding {      CarriageReturnLineFeed,      CarriageReturn,      LineFeed,  } +impl LineEnding { +    /// Turn the line ending into a [str]. +    fn as_str(&self) -> &str { +        match self { +            LineEnding::CarriageReturnLineFeed => "\r\n", +            LineEnding::CarriageReturn => "\r", +            LineEnding::LineFeed => "\n", +        } +    } +    /// Turn a [Code] into a line ending. +    /// +    /// ## Panics +    /// +    /// Panics if `code` is not `\r\n`, `\r`, or `\n`. +    fn from_code(code: Code) -> LineEnding { +        match code { +            Code::CarriageReturnLineFeed => LineEnding::CarriageReturnLineFeed, +            Code::Char('\r') => LineEnding::CarriageReturn, +            Code::Char('\n') => LineEnding::LineFeed, +            _ => unreachable!("invalid code"), +        } +    } +} +  /// Configuration (optional).  #[derive(Default, Debug)]  pub struct Options { @@ -120,29 +144,20 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {                  || event.token_type == TokenType::LineEnding)          {              let codes = codes_from_span(codes, &from_exit_event(events, index)); -            let code = *codes.first().unwrap(); -            line_ending_inferred = Some(if code == Code::CarriageReturnLineFeed { -                LineEnding::CarriageReturnLineFeed -            } else if code == Code::Char('\r') { -                LineEnding::CarriageReturn -            } else { -                LineEnding::LineFeed -            }); +            line_ending_inferred = Some(LineEnding::from_code(*codes.first().unwrap()));              break;          }          index += 1;      } -    let line_ending_default: LineEnding; - -    if let Some(value) = line_ending_inferred { -        line_ending_default = value; +    let line_ending_default = if let Some(value) = line_ending_inferred { +        value      } else if let Some(value) = &options.default_line_ending { -        line_ending_default = value.clone(); +        value.clone()      } else { -        line_ending_default = LineEnding::LineFeed; -    } +        LineEnding::LineFeed +    };      index = 0; @@ -557,17 +572,8 @@ fn buf_tail(buffers: &mut [Vec<String>]) -> &Vec<String> {  /// Add a line ending.  fn line_ending(buffers: &mut [Vec<String>], default: &LineEnding) {      let tail = buf_tail_mut(buffers); - -    println!("xxx: {:?}", default); - -    let line_ending = match default { -        LineEnding::CarriageReturnLineFeed => "\r\n", -        LineEnding::CarriageReturn => "\r", -        LineEnding::LineFeed => "\n", -    }; -      // lastWasTag = false -    tail.push(line_ending.to_string()); +    tail.push(default.as_str().to_string());  }  /// Add a line ending if needed (as in, there’s no eol/eof already). diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index c9596a6..8376b98 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -266,12 +266,11 @@ fn email_label(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnRes              )          }          Code::Char('>') => { -            let tail_index = tokenizer.events.len(); -            let head_index = tokenizer.events.len() - 1; +            let index = tokenizer.events.len();              tokenizer.exit(TokenType::AutolinkProtocol);              // Change the token type. -            tokenizer.events[head_index].token_type = TokenType::AutolinkEmail; -            tokenizer.events[tail_index].token_type = TokenType::AutolinkEmail; +            tokenizer.events[index - 1].token_type = TokenType::AutolinkEmail; +            tokenizer.events[index].token_type = TokenType::AutolinkEmail;              end(tokenizer, code)          }          _ => email_value(tokenizer, code, size), diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index c946dae..decf852 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -59,7 +59,7 @@ use crate::constant::{  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  /// Kind of a character reference. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)]  pub enum Kind {      /// Numeric decimal character reference (`	`).      Decimal, @@ -69,6 +69,28 @@ pub enum Kind {      Named,  } +impl Kind { +    /// Get the maximum size of characters allowed in a character reference. +    fn max(&self) -> usize { +        match self { +            Kind::Hexadecimal => CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, +            Kind::Decimal => CHARACTER_REFERENCE_DECIMAL_SIZE_MAX, +            Kind::Named => CHARACTER_REFERENCE_NAMED_SIZE_MAX, +        } +    } + +    /// Check if a char is allowed. +    fn allowed(&self, char: char) -> bool { +        let check = match self { +            Kind::Hexadecimal => char::is_ascii_hexdigit, +            Kind::Decimal => char::is_ascii_digit, +            Kind::Named => char::is_ascii_alphanumeric, +        }; + +        check(&char) +    } +} +  /// State needed to parse character references.  #[derive(Debug, Clone)]  struct Info { @@ -141,10 +163,10 @@ fn numeric(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {              tokenizer.enter(TokenType::CharacterReferenceValue);              ( -                State::Fn(Box::new(|tokenizer, code| { +                State::Fn(Box::new(|t, c| {                      value( -                        tokenizer, -                        code, +                        t, +                        c,                          Info {                              buffer: vec![],                              kind: Kind::Hexadecimal, @@ -179,7 +201,7 @@ fn numeric(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// a|23;b  /// a&#x|9;b  /// ``` -fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { +fn value(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {      match code {          Code::Char(';') if !info.buffer.is_empty() => {              tokenizer.exit(TokenType::CharacterReferenceValue); @@ -198,36 +220,10 @@ fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {              (State::Ok, None)          }          Code::Char(char) => { -            let len = info.buffer.len(); - -            let cont = match info.kind { -                Kind::Hexadecimal -                    if char.is_ascii_hexdigit() -                        && len < CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX => -                { -                    true -                } -                Kind::Decimal -                    if char.is_ascii_digit() && len < CHARACTER_REFERENCE_DECIMAL_SIZE_MAX => -                { -                    true -                } -                Kind::Named -                    if char.is_ascii_alphanumeric() && len < CHARACTER_REFERENCE_NAMED_SIZE_MAX => -                { -                    true -                } -                _ => false, -            }; - -            if cont { -                let mut clone = info; -                clone.buffer.push(char); +            if info.buffer.len() < info.kind.max() && info.kind.allowed(char) { +                info.buffer.push(char);                  tokenizer.consume(code); -                ( -                    State::Fn(Box::new(|tokenizer, code| value(tokenizer, code, clone))), -                    None, -                ) +                (State::Fn(Box::new(|t, c| value(t, c, info))), None)              } else {                  (State::Nok, None)              } diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 30ec911..f79705c 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -99,11 +99,49 @@ use crate::util::span::from_exit_event;  #[derive(Debug, Clone, PartialEq)]  pub enum Kind {      /// Grave accent (tick) code. +    /// +    /// ## Example +    /// +    /// ````markdown +    /// ```rust +    /// println!("I <3 🦀"); +    /// ``` +    /// ````      GraveAccent,      /// Tilde code. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// ~~~rust +    /// println!("I <3 🦀"); +    /// ~~~ +    /// ```      Tilde,  } +impl Kind { +    /// Turn the kind into a [char]. +    fn as_char(&self) -> char { +        match self { +            Kind::GraveAccent => '`', +            Kind::Tilde => '~', +        } +    } +    /// Turn a [char] into a kind. +    /// +    /// ## Panics +    /// +    /// Panics if `char` is not `~` or `` ` ``. +    fn from_char(char: char) -> Kind { +        match char { +            '`' => Kind::GraveAccent, +            '~' => Kind::Tilde, +            _ => unreachable!("invalid char"), +        } +    } +} +  /// State needed to parse code (fenced).  #[derive(Debug, Clone)]  struct Info { @@ -160,11 +198,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult                  Info {                      prefix,                      size: 0, -                    kind: if char == '`' { -                        Kind::GraveAccent -                    } else { -                        Kind::Tilde -                    }, +                    kind: Kind::from_char(char),                  },              )          } @@ -180,14 +214,8 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult  /// ~~~  /// ```  fn sequence_open(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { -    let marker = if info.kind == Kind::GraveAccent { -        '`' -    } else { -        '~' -    }; -      match code { -        Code::Char(char) if char == marker => { +        Code::Char(char) if char == info.kind.as_char() => {              tokenizer.consume(code);              (                  State::Fn(Box::new(|t, c| { @@ -375,14 +403,8 @@ fn close_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu  ///   |~~~  /// ```  fn close_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { -    let marker = if info.kind == Kind::GraveAccent { -        '`' -    } else { -        '~' -    }; -      match code { -        Code::Char(char) if char == marker => { +        Code::Char(char) if char == info.kind.as_char() => {              tokenizer.enter(TokenType::CodeFencedFenceSequence);              close_sequence(tokenizer, code, info, 0)          } @@ -398,14 +420,8 @@ fn close_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes  /// ~|~~  /// ```  fn close_sequence(tokenizer: &mut Tokenizer, code: Code, info: Info, size: usize) -> StateFnResult { -    let marker = if info.kind == Kind::GraveAccent { -        '`' -    } else { -        '~' -    }; -      match code { -        Code::Char(char) if char == marker => { +        Code::Char(char) if char == info.kind.as_char() => {              tokenizer.consume(code);              (                  State::Fn(Box::new(move |t, c| close_sequence(t, c, info, size + 1))), diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index 1f34e41..94e0106 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -113,9 +113,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnR      if let Code::Char('`') = code {          tokenizer.consume(code);          ( -            State::Fn(Box::new(move |tokenizer, code| { -                sequence_open(tokenizer, code, size + 1) -            })), +            State::Fn(Box::new(move |t, c| sequence_open(t, c, size + 1))),              None,          )      } else { @@ -138,9 +136,7 @@ fn between(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnRe              tokenizer.consume(code);              tokenizer.exit(TokenType::CodeTextLineEnding);              ( -                State::Fn(Box::new(move |tokenizer, code| { -                    between(tokenizer, code, size_open) -                })), +                State::Fn(Box::new(move |t, c| between(t, c, size_open))),                  None,              )          } @@ -168,12 +164,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnResul          }          _ => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(move |tokenizer, code| { -                    data(tokenizer, code, size_open) -                })), -                None, -            ) +            (State::Fn(Box::new(move |t, c| data(t, c, size_open))), None)          }      }  } @@ -193,8 +184,8 @@ fn sequence_close(          Code::Char('`') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(move |tokenizer, code| { -                    sequence_close(tokenizer, code, size_open, size + 1) +                State::Fn(Box::new(move |t, c| { +                    sequence_close(t, c, size_open, size + 1)                  })),                  None,              ) @@ -205,12 +196,11 @@ fn sequence_close(              (State::Ok, Some(vec![code]))          }          _ => { -            let tail_index = tokenizer.events.len(); -            let head_index = tokenizer.events.len() - 1; +            let index = tokenizer.events.len();              tokenizer.exit(TokenType::CodeTextSequence);              // Change the token type. -            tokenizer.events[head_index].token_type = TokenType::CodeTextData; -            tokenizer.events[tail_index].token_type = TokenType::CodeTextData; +            tokenizer.events[index - 1].token_type = TokenType::CodeTextData; +            tokenizer.events[index].token_type = TokenType::CodeTextData;              between(tokenizer, code, size_open)          }      } diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 03baee6..61c4d34 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -144,16 +144,14 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ```  fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      let event = tokenizer.events.last().unwrap(); -    // Blank line not ok. -    let char_nok = matches!( -        code, -        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') -    );      // Whitespace.      if (event.token_type == TokenType::LineEnding || event.token_type == TokenType::Whitespace) -        && !char_nok -    { +    // Blank line not ok. +        && !matches!( +        code, +        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') +    ) {          tokenizer.go(destination, destination_after)(tokenizer, code)      } else {          (State::Nok, None) diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index a418041..7c41855 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -57,12 +57,49 @@ use crate::util::{link::link, span::from_exit_event};  /// Kind of underline.  #[derive(Debug, Clone, PartialEq)]  pub enum Kind { -    /// Grave accent (tick) code. +    /// Dash (rank 2) heading. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// alpha +    /// ----- +    /// ```      Dash, -    /// Tilde code. + +    /// Equals to (rank 1) heading. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// alpha +    /// ===== +    /// ```      EqualsTo,  } +impl Kind { +    /// Turn the kind into a [char]. +    fn as_char(&self) -> char { +        match self { +            Kind::Dash => '-', +            Kind::EqualsTo => '=', +        } +    } +    /// Turn a [char] into a kind. +    /// +    /// ## Panics +    /// +    /// Panics if `char` is not `-` or `=`. +    fn from_char(char: char) -> Kind { +        match char { +            '-' => Kind::Dash, +            '=' => Kind::EqualsTo, +            _ => unreachable!("invalid char"), +        } +    } +} +  /// Start of a heading (setext).  ///  /// ```markdown @@ -232,13 +269,8 @@ fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes      match code {          Code::Char(char) if char == '-' || char == '=' => { -            let marker = if char == '-' { -                Kind::Dash -            } else { -                Kind::EqualsTo -            };              tokenizer.enter(TokenType::HeadingSetextUnderline); -            underline_sequence_inside(tokenizer, code, marker) +            underline_sequence_inside(tokenizer, code, Kind::from_char(char))          }          _ => (State::Nok, None),      } @@ -251,15 +283,11 @@ fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes  /// =|=  /// ```  fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { -    let marker = if kind == Kind::Dash { '-' } else { '=' }; -      match code { -        Code::Char(char) if char == marker => { +        Code::Char(char) if char == kind.as_char() => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(move |tokenizer, code| { -                    underline_sequence_inside(tokenizer, code, kind) -                })), +                State::Fn(Box::new(move |t, c| underline_sequence_inside(t, c, kind))),                  None,              )          } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 4819e63..d5937c5 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -97,10 +97,8 @@ use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::sp  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  /// Kind of HTML (flow). -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, PartialEq)]  enum Kind { -    /// Not yet known. -    Unknown,      /// Symbol for `<script>` (condition 1).      Raw,      /// Symbol for `<!---->` (condition 2). @@ -117,19 +115,51 @@ enum Kind {      Complete,  } -/// Type of quote, if we’re in an attribure, in complete (condition 7). -#[derive(Debug, Clone, PartialEq)] +/// Type of quote, if we’re in a quoted attribute, in complete (condition 7). +#[derive(Debug, PartialEq)]  enum QuoteKind { -    /// Not in a quoted attribute. -    None, -    /// In a double quoted (`"`) attribute. +    /// In a double quoted (`"`) attribute value. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// <a b="c" /> +    /// ```      Double, -    /// In a single quoted (`"`) attribute. +    /// In a single quoted (`'`) attribute value. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// <a b='c' /> +    /// ```      Single,  } +impl QuoteKind { +    /// Turn the kind into a [char]. +    fn as_char(&self) -> char { +        match self { +            QuoteKind::Double => '"', +            QuoteKind::Single => '\'', +        } +    } +    /// Turn a [char] into a kind. +    /// +    /// ## Panics +    /// +    /// Panics if `char` is not `"` or `'`. +    fn from_char(char: char) -> QuoteKind { +        match char { +            '"' => QuoteKind::Double, +            '\'' => QuoteKind::Single, +            _ => unreachable!("invalid char"), +        } +    } +} +  /// State needed to parse HTML (flow). -#[derive(Debug, Clone)] +#[derive(Debug)]  struct Info {      /// Kind of HTML (flow).      kind: Kind, @@ -141,7 +171,7 @@ struct Info {      /// `index` into `buffer` when expecting certain characters.      index: usize,      /// Current quote, when in a double or single quoted attribute value. -    quote: QuoteKind, +    quote: Option<QuoteKind>,  }  // To do: mark as concrete (block quotes or lists can’t “pierce” into HTML). @@ -166,22 +196,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      if Code::Char('<') == code {          tokenizer.consume(code); -        ( -            State::Fn(Box::new(|tokenizer, code| { -                open( -                    tokenizer, -                    Info { -                        kind: Kind::Unknown, -                        start_tag: false, -                        buffer: vec![], -                        index: 0, -                        quote: QuoteKind::None, -                    }, -                    code, -                ) -            })), -            None, -        ) +        (State::Fn(Box::new(open)), None)      } else {          (State::Nok, None)      } @@ -194,45 +209,44 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// <|!doctype>  /// <|!--xxx-->  /// ``` -fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    let mut info = Info { +        // Assume basic. +        kind: Kind::Basic, +        start_tag: false, +        buffer: vec![], +        index: 0, +        quote: None, +    }; +      match code {          Code::Char('!') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    declaration_open(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| declaration_open(t, c, info))),                  None,              )          }          Code::Char('/') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    tag_close_start(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| tag_close_start(t, c, info))),                  None,              )          }          Code::Char('?') => { -            // To do: life times. -            let mut clone = info; -            clone.kind = Kind::Instruction; +            info.kind = Kind::Instruction;              tokenizer.consume(code);              // While we’re in an instruction instead of a declaration, we’re on a `?`              // right now, so we do need to search for `>`, similar to declarations.              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_declaration_inside(tokenizer, clone, code) -                })), +                State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),                  None,              )          }          Code::Char(char) if char.is_ascii_alphabetic() => { -            // To do: life times. -            let mut clone = info; -            clone.start_tag = true; -            tag_name(tokenizer, clone, code) +            info.start_tag = true; +            tag_name(tokenizer, code, info)          }          _ => (State::Nok, None),      } @@ -245,41 +259,31 @@ fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {  /// <!|--xxx-->  /// <!|[CDATA[>&<]]>  /// ``` -fn declaration_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {      match code {          Code::Char('-') => {              tokenizer.consume(code); -            let mut clone = info; -            clone.kind = Kind::Comment; +            info.kind = Kind::Comment;              ( -                State::Fn(Box::new(|tokenizer, code| { -                    comment_open_inside(tokenizer, clone, code) -                })), +                State::Fn(Box::new(|t, c| comment_open_inside(t, c, info))),                  None,              )          }          Code::Char('[') => {              tokenizer.consume(code); -            let mut clone = info; -            clone.kind = Kind::Cdata; -            clone.buffer = vec!['C', 'D', 'A', 'T', 'A', '[']; -            clone.index = 0; +            info.kind = Kind::Cdata; +            info.buffer = vec!['C', 'D', 'A', 'T', 'A', '[']; +            info.index = 0;              ( -                State::Fn(Box::new(|tokenizer, code| { -                    cdata_open_inside(tokenizer, clone, code) -                })), +                State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))),                  None,              )          }          Code::Char(char) if char.is_ascii_alphabetic() => {              tokenizer.consume(code); -            // To do: life times. -            let mut clone = info; -            clone.kind = Kind::Declaration; +            info.kind = Kind::Declaration;              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_declaration_inside(tokenizer, clone, code) -                })), +                State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),                  None,              )          } @@ -292,14 +296,12 @@ fn declaration_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateF  /// ```markdown  /// <!-|-xxx-->  /// ``` -fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::Char('-') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_declaration_inside(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),                  None,              )          } @@ -316,26 +318,18 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Sta  /// <![CDAT|A[>&<]]>  /// <![CDATA|[>&<]]>  /// ``` -fn cdata_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {      match code {          Code::Char(char) if char == info.buffer[info.index] => { -            let mut clone = info; -            clone.index += 1; +            info.index += 1;              tokenizer.consume(code); -            if clone.index == clone.buffer.len() { -                clone.buffer.clear(); -                ( -                    State::Fn(Box::new(|tokenizer, code| { -                        continuation(tokenizer, clone, code) -                    })), -                    None, -                ) +            if info.index == info.buffer.len() { +                info.buffer.clear(); +                (State::Fn(Box::new(|t, c| continuation(t, c, info))), None)              } else {                  ( -                    State::Fn(Box::new(|tokenizer, code| { -                        cdata_open_inside(tokenizer, clone, code) -                    })), +                    State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))),                      None,                  )              } @@ -349,17 +343,12 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> State  /// ```markdown  /// </|x>  /// ``` -fn tag_close_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn tag_close_start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {      match code {          Code::Char(char) if char.is_ascii_alphabetic() => {              tokenizer.consume(code); -            // To do: life times. -            let mut clone = info; -            clone.buffer.push(char); -            ( -                State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))), -                None, -            ) +            info.buffer.push(char); +            (State::Fn(Box::new(|t, c| tag_name(t, c, info))), None)          }          _ => (State::Nok, None),      } @@ -371,7 +360,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFn  /// <a|b>  /// </a|b>  /// ``` -fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {      match code {          Code::None          | Code::CarriageReturnLineFeed @@ -385,50 +374,37 @@ fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult                  false              }; +            info.buffer.clear(); +              if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) { -                // To do: life times. -                let mut clone = info; -                clone.kind = Kind::Raw; -                clone.buffer.clear(); -                continuation(tokenizer, clone, code) +                info.kind = Kind::Raw; +                continuation(tokenizer, code, info)              } else if HTML_BLOCK_NAMES.contains(&name) { -                // To do: life times. -                let mut clone = info; -                clone.kind = Kind::Basic; -                clone.buffer.clear(); - +                // Basic is assumed, no need to set `kind`.                  if slash {                      tokenizer.consume(code);                      ( -                        State::Fn(Box::new(|tokenizer, code| { -                            basic_self_closing(tokenizer, clone, code) -                        })), +                        State::Fn(Box::new(|t, c| basic_self_closing(t, c, info))),                          None,                      )                  } else { -                    continuation(tokenizer, clone, code) +                    continuation(tokenizer, code, info)                  }              } else { -                // To do: life times. -                let mut clone = info; -                clone.kind = Kind::Complete; +                info.kind = Kind::Complete;                  // To do: do not support complete HTML when interrupting. -                if clone.start_tag { -                    complete_attribute_name_before(tokenizer, clone, code) +                if info.start_tag { +                    complete_attribute_name_before(tokenizer, code, info)                  } else { -                    complete_closing_tag_after(tokenizer, clone, code) +                    complete_closing_tag_after(tokenizer, code, info)                  }              }          }          Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => {              tokenizer.consume(code); -            let mut clone = info; -            clone.buffer.push(char); -            ( -                State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))), -                None, -            ) +            info.buffer.push(char); +            (State::Fn(Box::new(|t, c| tag_name(t, c, info))), None)          }          Code::Char(_) => (State::Nok, None),      } @@ -439,16 +415,11 @@ fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult  /// ```markdown  /// <div/|>  /// ``` -fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn basic_self_closing(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::Char('>') => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation(tokenizer, info, code) -                })), -                None, -            ) +            (State::Fn(Box::new(|t, c| continuation(t, c, info))), None)          }          _ => (State::Nok, None),      } @@ -460,18 +431,16 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Stat  /// <x/|>  /// </x/|>  /// ``` -fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn complete_closing_tag_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::VirtualSpace | Code::Char('\t' | ' ') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_closing_tag_after(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| complete_closing_tag_after(t, c, info))),                  None,              )          } -        _ => complete_end(tokenizer, info, code), +        _ => complete_end(tokenizer, code, info),      }  } @@ -492,38 +461,29 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info, code: Code)  /// ```  fn complete_attribute_name_before(      tokenizer: &mut Tokenizer, -    info: Info,      code: Code, +    info: Info,  ) -> StateFnResult {      match code {          Code::Char('/') => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_end(tokenizer, info, code) -                })), -                None, -            ) +            (State::Fn(Box::new(|t, c| complete_end(t, c, info))), None)          }          Code::Char(char) if char == ':' || char == '_' || char.is_ascii_alphabetic() => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_name(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))),                  None,              )          }          Code::VirtualSpace | Code::Char('\t' | ' ') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_name_before(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| complete_attribute_name_before(t, c, info))),                  None,              )          } -        _ => complete_end(tokenizer, info, code), +        _ => complete_end(tokenizer, code, info),      }  } @@ -534,7 +494,7 @@ fn complete_attribute_name_before(  /// <x _|>  /// <x a|>  /// ``` -fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn complete_attribute_name(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::Char(char)              if char == '-' @@ -545,13 +505,11 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) ->          {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_name(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))),                  None,              )          } -        _ => complete_attribute_name_after(tokenizer, info, code), +        _ => complete_attribute_name_after(tokenizer, code, info),      }  } @@ -565,29 +523,25 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) ->  /// ```  fn complete_attribute_name_after(      tokenizer: &mut Tokenizer, -    info: Info,      code: Code, +    info: Info,  ) -> StateFnResult {      match code {          Code::Char('=') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_value_before(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| complete_attribute_value_before(t, c, info))),                  None,              )          }          Code::VirtualSpace | Code::Char('\t' | ' ') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_name_after(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| complete_attribute_name_after(t, c, info))),                  None,              )          } -        _ => complete_attribute_name_before(tokenizer, info, code), +        _ => complete_attribute_name_before(tokenizer, code, info),      }  } @@ -600,38 +554,27 @@ fn complete_attribute_name_after(  /// ```  fn complete_attribute_value_before(      tokenizer: &mut Tokenizer, -    info: Info,      code: Code, +    mut info: Info,  ) -> StateFnResult {      match code {          Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None),          Code::Char(char) if char == '"' || char == '\'' => {              tokenizer.consume(code); -            // To do: life times. -            let mut clone = info; -            clone.quote = if char == '"' { -                QuoteKind::Double -            } else { -                QuoteKind::Single -            }; - +            info.quote = Some(QuoteKind::from_char(char));              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_value_quoted(tokenizer, clone, code) -                })), +                State::Fn(Box::new(|t, c| complete_attribute_value_quoted(t, c, info))),                  None,              )          }          Code::VirtualSpace | Code::Char('\t' | ' ') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_value_before(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| complete_attribute_value_before(t, c, info))),                  None,              )          } -        _ => complete_attribute_value_unquoted(tokenizer, info, code), +        _ => complete_attribute_value_unquoted(tokenizer, code, info),      }  } @@ -643,22 +586,16 @@ fn complete_attribute_value_before(  /// ```  fn complete_attribute_value_quoted(      tokenizer: &mut Tokenizer, -    info: Info,      code: Code, +    info: Info,  ) -> StateFnResult { -    let marker = if info.quote == QuoteKind::Double { -        '"' -    } else { -        '\'' -    }; -      match code {          Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None), -        Code::Char(char) if char == marker => { +        Code::Char(char) if char == info.quote.as_ref().unwrap().as_char() => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_value_quoted_after(tokenizer, info, code) +                State::Fn(Box::new(|t, c| { +                    complete_attribute_value_quoted_after(t, c, info)                  })),                  None,              ) @@ -666,9 +603,7 @@ fn complete_attribute_value_quoted(          _ => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_value_quoted(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| complete_attribute_value_quoted(t, c, info))),                  None,              )          } @@ -682,21 +617,21 @@ fn complete_attribute_value_quoted(  /// ```  fn complete_attribute_value_unquoted(      tokenizer: &mut Tokenizer, -    info: Info,      code: Code, +    info: Info,  ) -> StateFnResult {      match code {          Code::None          | Code::CarriageReturnLineFeed          | Code::VirtualSpace          | Code::Char('\t' | '\n' | '\r' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => { -            complete_attribute_name_after(tokenizer, info, code) +            complete_attribute_name_after(tokenizer, code, info)          }          Code::Char(_) => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_attribute_value_unquoted(tokenizer, info, code) +                State::Fn(Box::new(|t, c| { +                    complete_attribute_value_unquoted(t, c, info)                  })),                  None,              ) @@ -712,12 +647,12 @@ fn complete_attribute_value_unquoted(  /// ```  fn complete_attribute_value_quoted_after(      tokenizer: &mut Tokenizer, -    info: Info,      code: Code, +    info: Info,  ) -> StateFnResult {      match code {          Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => { -            complete_attribute_name_before(tokenizer, info, code) +            complete_attribute_name_before(tokenizer, code, info)          }          _ => (State::Nok, None),      } @@ -728,16 +663,11 @@ fn complete_attribute_value_quoted_after(  /// ```markdown  /// <x a="b"|>  /// ``` -fn complete_end(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn complete_end(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::Char('>') => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_after(tokenizer, info, code) -                })), -                None, -            ) +            (State::Fn(Box::new(|t, c| complete_after(t, c, info))), None)          }          _ => (State::Nok, None),      } @@ -748,19 +678,14 @@ fn complete_end(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes  /// ```markdown  /// <x>|  /// ``` -fn complete_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn complete_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            continuation(tokenizer, info, code) +            continuation(tokenizer, code, info)          }          Code::VirtualSpace | Code::Char('\t' | ' ') => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(|tokenizer, code| { -                    complete_after(tokenizer, info, code) -                })), -                None, -            ) +            (State::Fn(Box::new(|t, c| complete_after(t, c, info))), None)          }          Code::Char(_) => (State::Nok, None),      } @@ -771,49 +696,41 @@ fn complete_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnR  /// ```markdown  /// <!--x|xx-->  /// ``` -fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::Char('-') if info.kind == Kind::Comment => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_comment_inside(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_comment_inside(t, c, info))),                  None,              )          }          Code::Char('<') if info.kind == Kind::Raw => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_raw_tag_open(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_raw_tag_open(t, c, info))),                  None,              )          }          Code::Char('>') if info.kind == Kind::Declaration => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_close(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_close(t, c, info))),                  None,              )          }          Code::Char('?') if info.kind == Kind::Instruction => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_declaration_inside(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),                  None,              )          }          Code::Char(']') if info.kind == Kind::Cdata => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_character_data_inside(tokenizer, info, code) +                State::Fn(Box::new(|t, c| { +                    continuation_character_data_inside(t, c, info)                  })),                  None,              ) @@ -821,27 +738,21 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes          Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')              if info.kind == Kind::Basic || info.kind == Kind::Complete =>          { -            let clone = info; -              tokenizer.check(blank_line_before, |ok| { -                if ok { -                    Box::new(|tokenizer, code| continuation_close(tokenizer, clone, code)) +                let func = if ok { +                    continuation_close                  } else { -                    Box::new(|tokenizer, code| continuation_at_line_ending(tokenizer, clone, code)) -                } +                    continuation_at_line_ending +                }; +                Box::new(move |t, c| func(t, c, info))              })(tokenizer, code)          }          Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            continuation_at_line_ending(tokenizer, info, code) +            continuation_at_line_ending(tokenizer, code, info)          }          _ => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation(tokenizer, info, code) -                })), -                None, -            ) +            (State::Fn(Box::new(|t, c| continuation(t, c, info))), None)          }      }  } @@ -851,9 +762,9 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes  /// ```markdown  /// <x>|  /// ``` -fn continuation_at_line_ending(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_at_line_ending(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      tokenizer.exit(TokenType::HtmlFlowData); -    html_continue_start(tokenizer, info, code) +    html_continue_start(tokenizer, code, info)  }  /// In continuation, after an eol. @@ -862,7 +773,7 @@ fn continuation_at_line_ending(tokenizer: &mut Tokenizer, info: Info, code: Code  /// <x>|  /// asd  /// ``` -fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::None => {              tokenizer.exit(TokenType::HtmlFlow); @@ -874,15 +785,13 @@ fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Sta              tokenizer.consume(code);              tokenizer.exit(TokenType::LineEnding);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    html_continue_start(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| html_continue_start(t, c, info))),                  None,              )          }          _ => {              tokenizer.enter(TokenType::HtmlFlowData); -            continuation(tokenizer, info, code) +            continuation(tokenizer, code, info)          }      }  } @@ -892,18 +801,16 @@ fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Sta  /// ```markdown  /// <!--xxx-|->  /// ``` -fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_comment_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code { -        Code::Char('-') if info.kind == Kind::Comment => { +        Code::Char('-') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_declaration_inside(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),                  None,              )          } -        _ => continuation(tokenizer, info, code), +        _ => continuation(tokenizer, code, info),      }  } @@ -912,18 +819,16 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info, code: Code  /// ```markdown  /// <script>console.log(1)<|/script>  /// ``` -fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::Char('/') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_raw_end_tag(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_raw_end_tag(t, c, info))),                  None,              )          } -        _ => continuation(tokenizer, info, code), +        _ => continuation(tokenizer, code, info),      }  } @@ -934,39 +839,38 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info, code: Code)  /// <script>console.log(1)</s|cript>  /// <script>console.log(1)</script|>  /// ``` -fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_raw_end_tag( +    tokenizer: &mut Tokenizer, +    code: Code, +    mut info: Info, +) -> StateFnResult {      match code {          Code::Char('>') => {              let tag_name_buffer = info.buffer.iter().collect::<String>().to_lowercase(); -            // To do: life times. -            let mut clone = info; -            clone.buffer.clear(); +            info.buffer.clear();              if HTML_RAW_NAMES.contains(&tag_name_buffer.as_str()) {                  tokenizer.consume(code);                  ( -                    State::Fn(Box::new(|tokenizer, code| { -                        continuation_close(tokenizer, clone, code) -                    })), +                    State::Fn(Box::new(|t, c| continuation_close(t, c, info))),                      None,                  )              } else { -                continuation(tokenizer, clone, code) +                continuation(tokenizer, code, info)              }          }          Code::Char(char) if char.is_ascii_alphabetic() && info.buffer.len() < HTML_RAW_SIZE_MAX => {              tokenizer.consume(code); -            // To do: life times. -            let mut clone = info; -            clone.buffer.push(char); +            info.buffer.push(char);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_raw_end_tag(tokenizer, clone, code) -                })), +                State::Fn(Box::new(|t, c| continuation_raw_end_tag(t, c, info))),                  None,              )          } -        _ => continuation(tokenizer, info, code), +        _ => { +            info.buffer.clear(); +            continuation(tokenizer, code, info) +        }      }  } @@ -977,20 +881,18 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, info: Info, code: Code) -  /// ```  fn continuation_character_data_inside(      tokenizer: &mut Tokenizer, -    info: Info,      code: Code, +    info: Info,  ) -> StateFnResult {      match code {          Code::Char(']') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_declaration_inside(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),                  None,              )          } -        _ => continuation(tokenizer, info, code), +        _ => continuation(tokenizer, code, info),      }  } @@ -1008,29 +910,25 @@ fn continuation_character_data_inside(  /// ```  fn continuation_declaration_inside(      tokenizer: &mut Tokenizer, -    info: Info,      code: Code, +    info: Info,  ) -> StateFnResult {      match code {          Code::Char('>') => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_close(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_close(t, c, info))),                  None,              )          }          Code::Char('-') if info.kind == Kind::Comment => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_declaration_inside(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),                  None,              )          } -        _ => continuation(tokenizer, info, code), +        _ => continuation(tokenizer, code, info),      }  } @@ -1039,7 +937,7 @@ fn continuation_declaration_inside(  /// ```markdown  /// <!doctype>|  /// ``` -fn continuation_close(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult { +fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {              tokenizer.exit(TokenType::HtmlFlowData); @@ -1049,9 +947,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Stat          _ => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(|tokenizer, code| { -                    continuation_close(tokenizer, info, code) -                })), +                State::Fn(Box::new(|t, c| continuation_close(t, c, info))),                  None,              )          } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index a91113f..c4d2353 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -114,9 +114,7 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {              tokenizer.consume(code);              let buffer = vec!['C', 'D', 'A', 'T', 'A', '['];              ( -                State::Fn(Box::new(|tokenizer, code| { -                    cdata_open_inside(tokenizer, code, buffer, 0) -                })), +                State::Fn(Box::new(|t, c| cdata_open_inside(t, c, buffer, 0))),                  None,              )          } @@ -247,8 +245,8 @@ fn cdata_open_inside(                  (State::Fn(Box::new(cdata)), None)              } else {                  ( -                    State::Fn(Box::new(move |tokenizer, code| { -                        cdata_open_inside(tokenizer, code, buffer, index + 1) +                    State::Fn(Box::new(move |t, c| { +                        cdata_open_inside(t, c, buffer, index + 1)                      })),                      None,                  ) @@ -526,8 +524,8 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> Sta          Code::Char(char) if char == '"' || char == '\'' => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(move |tokenizer, code| { -                    tag_open_attribute_value_quoted(tokenizer, code, char) +                State::Fn(Box::new(move |t, c| { +                    tag_open_attribute_value_quoted(t, c, char)                  })),                  None,              ) @@ -555,9 +553,7 @@ fn tag_open_attribute_value_quoted(          Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => at_line_ending(              tokenizer,              code, -            Box::new(move |tokenizer, code| { -                tag_open_attribute_value_quoted(tokenizer, code, marker) -            }), +            Box::new(move |t, c| tag_open_attribute_value_quoted(t, c, marker)),          ),          Code::Char(char) if char == marker => {              tokenizer.consume(code); @@ -569,8 +565,8 @@ fn tag_open_attribute_value_quoted(          _ => {              tokenizer.consume(code);              ( -                State::Fn(Box::new(move |tokenizer, code| { -                    tag_open_attribute_value_quoted(tokenizer, code, marker) +                State::Fn(Box::new(move |t, c| { +                    tag_open_attribute_value_quoted(t, c, marker)                  })),                  None,              ) diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index b00188d..624ee8e 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -129,23 +129,33 @@ fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      // To do: If code is disabled, indented lines are allowed to interrupt.      tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { -        if ok { -            Box::new(|_t, code| (State::Ok, Some(vec![code]))) -        } else { -            Box::new(|tokenizer, code| { -                tokenizer.attempt_5( -                    blank_line, -                    code_fenced, -                    html_flow, -                    heading_atx, -                    thematic_break, -                    |ok| { -                        Box::new(move |_t, code| { -                            (if ok { State::Nok } else { State::Ok }, Some(vec![code])) -                        }) -                    }, -                )(tokenizer, code) -            }) -        } +        Box::new(if ok { interrupt_indent } else { interrupt_cont })      })(tokenizer, code)  } + +/// At an indent. +/// +/// ```markdown +/// alpha +///     | +/// ``` +pub fn interrupt_indent(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    (State::Ok, Some(vec![code])) +} + +/// Not at an indented line. +/// +/// ```markdown +/// alpha +/// |<div> +/// ``` +pub fn interrupt_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    tokenizer.attempt_5( +        blank_line, +        code_fenced, +        html_flow, +        heading_atx, +        thematic_break, +        |ok| Box::new(move |_t, code| (if ok { State::Nok } else { State::Ok }, Some(vec![code]))), +    )(tokenizer, code) +} diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 55efd13..194165c 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -60,6 +60,17 @@ use crate::construct::partial_space_or_tab::space_or_tab_opt;  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  use crate::util::link::link; +/// State needed to parse labels. +#[derive(Debug)] +struct Info { +    /// Whether we’ve seen our first `ChunkString`. +    connect: bool, +    /// Whether there are non-blank characters in the label. +    data: bool, +    /// Number of characters in the label. +    size: usize, +} +  /// Before a label.  ///  /// ```markdown @@ -73,10 +84,12 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {              tokenizer.consume(code);              tokenizer.exit(TokenType::DefinitionLabelMarker);              tokenizer.enter(TokenType::DefinitionLabelData); -            ( -                State::Fn(Box::new(|t, c| at_break(t, c, false, 0, false))), -                None, -            ) +            let info = Info { +                connect: false, +                data: false, +                size: 0, +            }; +            (State::Fn(Box::new(|t, c| at_break(t, c, info))), None)          }          _ => (State::Nok, None),      } @@ -88,17 +101,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// [|a]  /// [a|]  /// ``` -fn at_break( -    tokenizer: &mut Tokenizer, -    code: Code, -    data: bool, -    size: usize, -    connect: bool, -) -> StateFnResult { +fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::None | Code::Char('[') => (State::Nok, None), -        Code::Char(']') if !data => (State::Nok, None), -        _ if size > LINK_REFERENCE_SIZE_MAX => (State::Nok, None), +        Code::Char(']') if !info.data => (State::Nok, None), +        _ if info.size > LINK_REFERENCE_SIZE_MAX => (State::Nok, None),          Code::Char(']') => {              tokenizer.exit(TokenType::DefinitionLabelData);              tokenizer.enter(TokenType::DefinitionLabelMarker); @@ -110,12 +117,12 @@ fn at_break(          _ => {              tokenizer.enter(TokenType::ChunkString); -            if connect { +            if info.connect {                  let index = tokenizer.events.len() - 1;                  link(&mut tokenizer.events, index);              } -            label(tokenizer, code, data, size) +            label(tokenizer, code, info)          }      }  } @@ -126,16 +133,8 @@ fn at_break(  /// [a  /// |b]  /// ``` -fn line_start( -    tokenizer: &mut Tokenizer, -    code: Code, -    data: bool, -    size: usize, -    connect: bool, -) -> StateFnResult { -    tokenizer.go(space_or_tab_opt(), move |t, c| { -        line_begin(t, c, data, size, connect) -    })(tokenizer, code) +fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { +    tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, info))(tokenizer, code)  }  /// After a line ending, after optional whitespace. @@ -144,17 +143,11 @@ fn line_start(  /// [a  /// |b]  /// ``` -fn line_begin( -    tokenizer: &mut Tokenizer, -    code: Code, -    data: bool, -    size: usize, -    connect: bool, -) -> StateFnResult { +fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          // Blank line not allowed.          Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None), -        _ => at_break(tokenizer, code, data, size, connect), +        _ => at_break(tokenizer, code, info),      }  } @@ -163,44 +156,46 @@ fn line_begin(  /// ```markdown  /// [a|b]  /// ``` -fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult { +fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { +    if !info.connect { +        info.connect = true; +    } +      match code {          Code::None | Code::Char('[' | ']') => {              tokenizer.exit(TokenType::ChunkString); -            at_break(tokenizer, code, data, size, true) +            at_break(tokenizer, code, info)          } -        _ if size > LINK_REFERENCE_SIZE_MAX => { +        _ if info.size > LINK_REFERENCE_SIZE_MAX => {              tokenizer.exit(TokenType::ChunkString); -            at_break(tokenizer, code, data, size, true) +            at_break(tokenizer, code, info)          }          Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {              tokenizer.consume(code); +            info.size += 1;              tokenizer.exit(TokenType::ChunkString); -            ( -                State::Fn(Box::new(move |t, c| line_start(t, c, data, size + 1, true))), -                None, -            ) +            (State::Fn(Box::new(|t, c| line_start(t, c, info))), None)          }          Code::VirtualSpace | Code::Char('\t' | ' ') => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(move |t, c| label(t, c, data, size + 1))), -                None, -            ) +            info.size += 1; +            (State::Fn(Box::new(|t, c| label(t, c, info))), None)          }          Code::Char('/') => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(move |t, c| escape(t, c, true, size + 1))), -                None, -            ) +            info.size += 1; +            if !info.data { +                info.data = true; +            } +            (State::Fn(Box::new(|t, c| escape(t, c, info))), None)          }          Code::Char(_) => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(move |t, c| label(t, c, true, size + 1))), -                None, -            ) +            info.size += 1; +            if !info.data { +                info.data = true; +            } +            (State::Fn(Box::new(|t, c| label(t, c, info))), None)          }      }  } @@ -210,15 +205,13 @@ fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> Stat  /// ```markdown  /// [a\|[b]  /// ``` -fn escape(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult { +fn escape(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {      match code {          Code::Char('[' | '\\' | ']') => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(move |t, c| label(t, c, true, size + 1))), -                None, -            ) +            info.size += 1; +            (State::Fn(Box::new(|t, c| label(t, c, info))), None)          } -        _ => label(tokenizer, code, data, size), +        _ => label(tokenizer, code, info),      }  } diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 40ece49..1c4b367 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -6,6 +6,19 @@  use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer}; +/// Options to parse whitespace. +#[derive(Debug)] +struct Info { +    /// Current size. +    size: usize, +    /// Minimum allowed characters (inclusive). +    min: usize, +    /// Maximum allowed characters (inclusive). +    max: usize, +    /// Token type to use for whitespace events. +    kind: TokenType, +} +  /// Optional `space_or_tab`  ///  /// ```bnf @@ -30,7 +43,13 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {  /// space_or_tab ::= x*y( ' ' '\t' )  /// ```  pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> { -    Box::new(move |t, c| start(t, c, kind, min, max)) +    let info = Info { +        size: 0, +        min, +        max, +        kind, +    }; +    Box::new(|t, c| start(t, c, info))  }  /// Before whitespace. @@ -38,26 +57,16 @@ pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> {  /// ```markdown  /// alpha| bravo  /// ``` -fn start( -    tokenizer: &mut Tokenizer, -    code: Code, -    kind: TokenType, -    min: usize, -    max: usize, -) -> StateFnResult { +fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {      match code { -        Code::VirtualSpace | Code::Char('\t' | ' ') if max > 0 => { -            tokenizer.enter(kind.clone()); +        Code::VirtualSpace | Code::Char('\t' | ' ') if info.max > 0 => { +            tokenizer.enter(info.kind.clone());              tokenizer.consume(code); -            ( -                State::Fn(Box::new(move |tokenizer, code| { -                    inside(tokenizer, code, kind, min, max, 1) -                })), -                None, -            ) +            info.size += 1; +            (State::Fn(Box::new(|t, c| inside(t, c, info))), None)          }          _ => ( -            if min == 0 { State::Ok } else { State::Nok }, +            if info.min == 0 { State::Ok } else { State::Nok },              Some(vec![code]),          ),      } @@ -69,28 +78,21 @@ fn start(  /// alpha |bravo  /// alpha | bravo  /// ``` -fn inside( -    tokenizer: &mut Tokenizer, -    code: Code, -    kind: TokenType, -    min: usize, -    max: usize, -    size: usize, -) -> StateFnResult { +fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {      match code { -        Code::VirtualSpace | Code::Char('\t' | ' ') if size < max => { +        Code::VirtualSpace | Code::Char('\t' | ' ') if info.size < info.max => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(move |tokenizer, code| { -                    inside(tokenizer, code, kind, min, max, size + 1) -                })), -                None, -            ) +            info.size += 1; +            (State::Fn(Box::new(|t, c| inside(t, c, info))), None)          }          _ => { -            tokenizer.exit(kind); +            tokenizer.exit(info.kind.clone());              ( -                if size >= min { State::Ok } else { State::Nok }, +                if info.size >= info.min { +                    State::Ok +                } else { +                    State::Nok +                },                  Some(vec![code]),              )          } diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 22c3209..aa1e067 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -38,22 +38,59 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  use crate::util::link::link;  /// Type of title. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, PartialEq)]  enum Kind {      /// In a parenthesized (`(` and `)`) title. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// [a] b (c) +    /// ```      Paren,      /// In a double quoted (`"`) title. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// [a] b "c" +    /// ```      Double,      /// In a single quoted (`'`) title. +    /// +    /// ## Example +    /// +    /// ```markdown +    /// [a] b 'c' +    /// ```      Single,  } -/// Display a marker. -fn kind_to_marker(kind: &Kind) -> char { -    match kind { -        Kind::Double => '"', -        Kind::Single => '\'', -        Kind::Paren => ')', +impl Kind { +    /// Turn the kind into a [char]. +    /// +    /// > 👉 **Note**: a closing paren is used. +    fn as_char(&self) -> char { +        match self { +            Kind::Paren => ')', +            Kind::Double => '"', +            Kind::Single => '\'', +        } +    } +    /// Turn a [char] into a kind. +    /// +    /// > 👉 **Note**: an opening paren must be used. +    /// +    /// ## Panics +    /// +    /// Panics if `char` is not `(`, `"`, or `'`. +    fn from_char(char: char) -> Kind { +        match char { +            '(' => Kind::Paren, +            '"' => Kind::Double, +            '\'' => Kind::Single, +            _ => unreachable!("invalid char"), +        }      }  } @@ -65,21 +102,16 @@ fn kind_to_marker(kind: &Kind) -> char {  /// |(a)  /// ```  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    let kind = match code { -        Code::Char('"') => Some(Kind::Double), -        Code::Char('\'') => Some(Kind::Single), -        Code::Char('(') => Some(Kind::Paren), -        _ => None, -    }; - -    if let Some(kind) = kind { -        tokenizer.enter(TokenType::DefinitionTitle); -        tokenizer.enter(TokenType::DefinitionTitleMarker); -        tokenizer.consume(code); -        tokenizer.exit(TokenType::DefinitionTitleMarker); -        (State::Fn(Box::new(|t, c| begin(t, c, kind))), None) -    } else { -        (State::Nok, None) +    match code { +        Code::Char(char) if char == '(' || char == '"' || char == '\'' => { +            let kind = Kind::from_char(char); +            tokenizer.enter(TokenType::DefinitionTitle); +            tokenizer.enter(TokenType::DefinitionTitleMarker); +            tokenizer.consume(code); +            tokenizer.exit(TokenType::DefinitionTitleMarker); +            (State::Fn(Box::new(|t, c| begin(t, c, kind))), None) +        } +        _ => (State::Nok, None),      }  } @@ -94,7 +126,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ```  fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {      match code { -        Code::Char(char) if char == kind_to_marker(&kind) => { +        Code::Char(char) if char == kind.as_char() => {              tokenizer.enter(TokenType::DefinitionTitleMarker);              tokenizer.consume(code);              tokenizer.exit(TokenType::DefinitionTitleMarker); @@ -118,7 +150,7 @@ fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {  /// ```  fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind, connect: bool) -> StateFnResult {      match code { -        Code::Char(char) if char == kind_to_marker(&kind) => { +        Code::Char(char) if char == kind.as_char() => {              tokenizer.exit(TokenType::DefinitionTitleString);              begin(tokenizer, code, kind)          } @@ -165,7 +197,7 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResul  /// ```  fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {      match code { -        Code::Char(char) if char == kind_to_marker(&kind) => { +        Code::Char(char) if char == kind.as_char() => {              tokenizer.exit(TokenType::ChunkString);              at_break(tokenizer, code, kind, true)          } @@ -176,10 +208,7 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {          Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {              tokenizer.consume(code);              tokenizer.exit(TokenType::ChunkString); -            ( -                State::Fn(Box::new(move |t, c| line_start(t, c, kind))), -                None, -            ) +            (State::Fn(Box::new(|t, c| line_start(t, c, kind))), None)          }          Code::Char('\\') => {              tokenizer.consume(code); @@ -199,9 +228,9 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {  /// ```  fn escape(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {      match code { -        Code::Char(char) if char == kind_to_marker(&kind) => { +        Code::Char(char) if char == kind.as_char() => {              tokenizer.consume(code); -            (State::Fn(Box::new(move |t, c| title(t, c, kind))), None) +            (State::Fn(Box::new(|t, c| title(t, c, kind))), None)          }          _ => title(tokenizer, code, kind),      } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index abf733d..a9e5732 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -48,6 +48,68 @@ use super::partial_space_or_tab::space_or_tab_opt;  use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN;  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +/// Type of thematic break. +#[derive(Debug, PartialEq)] +enum Kind { +    /// In a thematic break using asterisks (`*`). +    /// +    /// ## Example +    /// +    /// ```markdown +    /// *** +    /// ``` +    Asterisk, +    /// In a thematic break using dashes (`-`). +    /// +    /// ## Example +    /// +    /// ```markdown +    /// --- +    /// ``` +    Dash, +    /// In a thematic break using underscores (`_`). +    /// +    /// ## Example +    /// +    /// ```markdown +    /// ___ +    /// ``` +    Underscore, +} + +impl Kind { +    /// Turn the kind into a [char]. +    fn as_char(&self) -> char { +        match self { +            Kind::Asterisk => '*', +            Kind::Dash => '-', +            Kind::Underscore => '_', +        } +    } +    /// Turn a [char] into a kind. +    /// +    /// ## Panics +    /// +    /// Panics if `char` is not `*`, `_`, or `_`. +    fn from_char(char: char) -> Kind { +        match char { +            '*' => Kind::Asterisk, +            '-' => Kind::Dash, +            '_' => Kind::Underscore, +            _ => unreachable!("invalid char"), +        } +    } +} + +/// State needed to parse thematic breaks. +#[derive(Debug)] +struct Info { +    /// Kind of marker. +    kind: Kind, +    /// Number of markers. +    size: usize, +} +  /// Start of a thematic break.  ///  /// ```markdown @@ -65,9 +127,14 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ```  pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code { -        Code::Char(char) if char == '*' || char == '-' || char == '_' => { -            at_break(tokenizer, code, char, 0) -        } +        Code::Char(char) if char == '*' || char == '-' || char == '_' => at_break( +            tokenizer, +            code, +            Info { +                kind: Kind::from_char(char), +                size: 0, +            }, +        ),          _ => (State::Nok, None),      }  } @@ -79,17 +146,17 @@ pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// *| * *  /// * |* *  /// ``` -fn at_break(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult { +fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      match code {          Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') -            if size >= THEMATIC_BREAK_MARKER_COUNT_MIN => +            if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN =>          {              tokenizer.exit(TokenType::ThematicBreak);              (State::Ok, Some(vec![code]))          } -        Code::Char(char) if char == marker => { +        Code::Char(char) if char == info.kind.as_char() => {              tokenizer.enter(TokenType::ThematicBreakSequence); -            sequence(tokenizer, code, marker, size) +            sequence(tokenizer, code, info)          }          _ => (State::Nok, None),      } @@ -102,22 +169,16 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) ->  /// *|**  /// **|*  /// ``` -fn sequence(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult { +fn sequence(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {      match code { -        Code::Char(char) if char == marker => { +        Code::Char(char) if char == info.kind.as_char() => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(move |tokenizer, code| { -                    sequence(tokenizer, code, marker, size + 1) -                })), -                None, -            ) +            info.size += 1; +            (State::Fn(Box::new(|t, c| sequence(t, c, info))), None)          }          _ => {              tokenizer.exit(TokenType::ThematicBreakSequence); -            tokenizer.go(space_or_tab_opt(), move |t, c| at_break(t, c, marker, size))( -                tokenizer, code, -            ) +            tokenizer.go(space_or_tab_opt(), |t, c| at_break(t, c, info))(tokenizer, code)          }      }  } diff --git a/src/content/string.rs b/src/content/string.rs index c3e825e..bae2646 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -55,14 +55,6 @@ fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {              (State::Fn(Box::new(in_data)), None)          }      } - -    // if let Code::None = code { -    //     (State::Ok, None) -    // } else { -    //     tokenizer.enter(TokenType::Data); -    //     tokenizer.consume(code); -    //     (State::Fn(Box::new(in_data)), None) -    // }  }  /// In data. diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 909a1d1..8e9c7c6 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -142,6 +142,7 @@ pub struct Event {  /// It’s responsible for dealing with that single passed [`Code`][].  /// It yields a [`StateFnResult`][].  pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> StateFnResult; +  /// Each [`StateFn`][] yields something back: primarily the state.  /// In certain cases, it can also yield back up parsed codes that were passed down.  pub type StateFnResult = (State, Option<Vec<Code>>);  | 
