diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-22 15:20:33 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-22 15:20:33 +0200 |
commit | 227e844154d9a592b80a88d7b8731d3d2f2fb3e2 (patch) | |
tree | 09d4243db0ae3d51c2420e70b9e364d470d520bd /src | |
parent | 6fdaffb3a8b4517a3b5c1e39dc1e16649c6eb0da (diff) | |
download | markdown-rs-227e844154d9a592b80a88d7b8731d3d2f2fb3e2.tar.gz markdown-rs-227e844154d9a592b80a88d7b8731d3d2f2fb3e2.tar.bz2 markdown-rs-227e844154d9a592b80a88d7b8731d3d2f2fb3e2.zip |
Add `attempt_opt` to tokenizer
Diffstat (limited to 'src')
-rw-r--r-- | src/construct/blank_line.rs | 4 | ||||
-rw-r--r-- | src/construct/code_fenced.rs | 56 | ||||
-rw-r--r-- | src/construct/code_indented.rs | 4 | ||||
-rw-r--r-- | src/construct/definition.rs | 20 | ||||
-rw-r--r-- | src/construct/heading_atx.rs | 18 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 12 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 4 | ||||
-rw-r--r-- | src/construct/html_text.rs | 4 | ||||
-rw-r--r-- | src/construct/partial_label.rs | 4 | ||||
-rw-r--r-- | src/construct/partial_space_or_tab.rs | 59 | ||||
-rw-r--r-- | src/construct/partial_title.rs | 4 | ||||
-rw-r--r-- | src/construct/thematic_break.rs | 6 | ||||
-rw-r--r-- | src/tokenizer.rs | 65 |
13 files changed, 150 insertions, 110 deletions
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index 153a008..3ca3266 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -33,7 +33,7 @@ //! //! <!-- To do: link `list` --> -use crate::construct::partial_space_or_tab::space_or_tab_opt; +use crate::construct::partial_space_or_tab::space_or_tab; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; /// Start of a blank line. @@ -45,7 +45,7 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; /// | /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), after)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), after)(tokenizer, code) } /// After zero or more spaces or tabs, before a line ending or EOF. diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index d71c01e..3b220b9 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -102,7 +102,7 @@ //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE}; -use crate::construct::partial_space_or_tab::{space_or_tab_min_max, space_or_tab_opt}; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; use crate::util::span::from_exit_event; @@ -179,7 +179,7 @@ struct Info { pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(TokenType::CodeFenced); tokenizer.enter(TokenType::CodeFencedFence); - tokenizer.go(space_or_tab_opt(), before_sequence_open)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), before_sequence_open)(tokenizer, code) } /// Inside the opening fence, after an optional prefix, before a sequence. @@ -240,7 +240,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRe _ if info.size < CODE_FENCED_SEQUENCE_SIZE_MIN => (State::Nok, None), _ => { tokenizer.exit(TokenType::CodeFencedFenceSequence); - tokenizer.go(space_or_tab_opt(), |t, c| info_before(t, c, info))(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), |t, c| info_before(t, c, info))(tokenizer, code) } } } @@ -289,7 +289,7 @@ fn info_inside( Code::VirtualSpace | Code::Char('\t' | ' ') => { tokenizer.exit(TokenType::ChunkString); tokenizer.exit(TokenType::CodeFencedFenceInfo); - tokenizer.go(space_or_tab_opt(), |t, c| meta_before(t, c, info))(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), |t, c| meta_before(t, c, info))(tokenizer, code) } Code::Char(char) if char == '`' && info.kind == Kind::GraveAccent => (State::Nok, None), Code::Char(_) => { @@ -361,22 +361,12 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult match code { Code::None => after(tokenizer, code), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt( - |tokenizer, code| { - tokenizer.enter(TokenType::LineEnding); - tokenizer.consume(code); - tokenizer.exit(TokenType::LineEnding); - (State::Fn(Box::new(|t, c| close_start(t, c, info))), None) - }, + |t, c| close_begin(t, c, info), |ok| { if ok { Box::new(after) } else { - Box::new(|tokenizer, code| { - tokenizer.enter(TokenType::LineEnding); - tokenizer.consume(code); - tokenizer.exit(TokenType::LineEnding); - (State::Fn(Box::new(|t, c| content_start(t, c, clone))), None) - }) + Box::new(|t, c| content_before(t, c, clone)) } }, )(tokenizer, code), @@ -384,6 +374,25 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult } } +/// Before a closing fence, at the line ending. +/// +/// ```markdown +/// ~~~js +/// console.log('1')| +/// ~~~ +/// ``` +fn close_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + match code { + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + (State::Fn(Box::new(|t, c| close_start(t, c, info))), None) + } + _ => unreachable!("expected eol"), + } +} + /// Before a closing fence, before optional whitespace. /// /// ```markdown @@ -441,7 +450,7 @@ fn close_sequence(tokenizer: &mut Tokenizer, code: Code, info: Info, size: usize } _ if size >= CODE_FENCED_SEQUENCE_SIZE_MIN && size >= info.size => { tokenizer.exit(TokenType::CodeFencedFenceSequence); - tokenizer.go(space_or_tab_opt(), close_sequence_after)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), close_sequence_after)(tokenizer, code) } _ => (State::Nok, None), } @@ -464,6 +473,19 @@ fn close_sequence_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult } } +/// Before a closing fence, at the line ending. +/// +/// ```markdown +/// ~~~js +/// console.log('1')| +/// ~~~ +/// ``` +fn content_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + (State::Fn(Box::new(|t, c| content_start(t, c, info))), None) +} /// Before code content, definitely not before a closing fence. /// /// ```markdown diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index f476965..99445b9 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -45,7 +45,7 @@ //! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element -use super::partial_space_or_tab::{space_or_tab_min_max, space_or_tab_opt}; +use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::constant::TAB_SIZE; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -154,7 +154,7 @@ fn further_end(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// asd /// ``` fn further_begin(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), further_after)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), further_after)(tokenizer, code) } /// After whitespace. diff --git a/src/construct/definition.rs b/src/construct/definition.rs index 7f32858..b545643 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -77,7 +77,7 @@ use crate::construct::{ partial_destination::{start as destination, Options as DestinationOptions}, partial_label::{start as label, Options as LabelOptions}, - partial_space_or_tab::space_or_tab_opt, + partial_space_or_tab::space_or_tab, partial_title::{start as title, Options as TitleOptions}, }; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -89,7 +89,7 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(TokenType::Definition); - tokenizer.go(space_or_tab_opt(), before)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) } /// At the start of a definition, after whitespace. @@ -134,7 +134,9 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.consume(code); tokenizer.exit(TokenType::DefinitionMarker); ( - State::Fn(Box::new(tokenizer.go(space_or_tab_opt(), marker_after))), + State::Fn(Box::new( + tokenizer.attempt_opt(space_or_tab(), marker_after), + )), None, ) } @@ -158,7 +160,7 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(TokenType::LineEnding); ( State::Fn(Box::new( - tokenizer.go(space_or_tab_opt(), destination_before), + tokenizer.attempt_opt(space_or_tab(), destination_before), )), None, ) @@ -216,7 +218,7 @@ fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// "c" /// ``` fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.attempt(title_before, |_ok| Box::new(after))(tokenizer, code) + tokenizer.attempt_opt(title_before, after)(tokenizer, code) } /// After a definition. @@ -226,7 +228,7 @@ fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// [a]: b "c"| /// ``` fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), after_whitespace)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), after_whitespace)(tokenizer, code) } /// After a definition, after optional whitespace. @@ -254,7 +256,7 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// "c" /// ``` fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), title_before_after_optional_whitespace)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), title_before_after_optional_whitespace)(tokenizer, code) } /// Before a title, after optional whitespace. @@ -273,7 +275,7 @@ fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code) tokenizer.exit(TokenType::LineEnding); ( State::Fn(Box::new( - tokenizer.go(space_or_tab_opt(), title_before_marker), + tokenizer.attempt_opt(space_or_tab(), title_before_marker), )), None, ) @@ -320,7 +322,7 @@ fn title_before_marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// "c"| /// ``` fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), title_after_after_optional_whitespace)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), title_after_after_optional_whitespace)(tokenizer, code) } /// After a title, after optional whitespace. diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index ae16d3d..93c57f9 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -54,7 +54,9 @@ //! [wiki-setext]: https://en.wikipedia.org/wiki/Setext //! [atx]: http://www.aaronsw.com/2002/atx/ -use super::partial_space_or_tab::{space_or_tab, space_or_tab_opt}; +use super::partial_space_or_tab::{ + space_or_tab, space_or_tab_with_options, Options as SpaceOrTabOptions, +}; use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -65,7 +67,7 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(TokenType::HeadingAtx); - tokenizer.go(space_or_tab_opt(), before)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) } /// Start of a heading (atx), after whitespace. @@ -105,7 +107,11 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnR _ if rank > 0 => { tokenizer.exit(TokenType::HeadingAtxSequence); tokenizer.go( - space_or_tab(TokenType::HeadingAtxSpaceOrTab, 1, usize::MAX), + space_or_tab_with_options(SpaceOrTabOptions { + kind: TokenType::HeadingAtxSpaceOrTab, + min: 1, + max: usize::MAX, + }), at_break, )(tokenizer, code) } @@ -129,7 +135,11 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { (State::Ok, Some(vec![code])) } Code::VirtualSpace | Code::Char('\t' | ' ') => tokenizer.go( - space_or_tab(TokenType::HeadingAtxSpaceOrTab, 1, usize::MAX), + space_or_tab_with_options(SpaceOrTabOptions { + kind: TokenType::HeadingAtxSpaceOrTab, + min: 1, + max: usize::MAX, + }), at_break, )(tokenizer, code), Code::Char('#') => { diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 0cb8687..91e494d 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -56,7 +56,7 @@ //! [atx]: http://www.aaronsw.com/2002/atx/ use crate::constant::TAB_SIZE; -use crate::construct::partial_space_or_tab::space_or_tab_opt; +use crate::construct::partial_space_or_tab::space_or_tab; use crate::subtokenize::link; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; use crate::util::span::from_exit_event; @@ -115,7 +115,7 @@ impl Kind { /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(TokenType::HeadingSetext); - tokenizer.go(space_or_tab_opt(), before)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) } /// Start of a heading (setext), after whitespace. @@ -183,7 +183,9 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(TokenType::LineEnding); ( - State::Fn(Box::new(tokenizer.go(space_or_tab_opt(), text_line_start))), + State::Fn(Box::new( + tokenizer.attempt_opt(space_or_tab(), text_line_start), + )), None, ) } @@ -243,7 +245,7 @@ fn underline_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.exit(TokenType::LineEnding); ( State::Fn(Box::new( - tokenizer.go(space_or_tab_opt(), underline_sequence_start), + tokenizer.attempt_opt(space_or_tab(), underline_sequence_start), )), None, ) @@ -298,7 +300,7 @@ fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) None, ) } - _ => tokenizer.go(space_or_tab_opt(), underline_after)(tokenizer, code), + _ => tokenizer.attempt_opt(space_or_tab(), underline_after)(tokenizer, code), } } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 0af9e3c..bb7457d 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -99,7 +99,7 @@ //! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX}; -use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::space_or_tab_opt}; +use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::space_or_tab}; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// Kind of HTML (flow). @@ -191,7 +191,7 @@ struct Info { pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(TokenType::HtmlFlow); tokenizer.enter(TokenType::HtmlFlowData); - tokenizer.go(space_or_tab_opt(), before)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) } /// After optional whitespace, before `<`. diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 18c5f9c..2ac0ccd 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -54,7 +54,7 @@ //! [html_flow]: crate::construct::html_flow //! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing -use crate::construct::partial_space_or_tab::space_or_tab_opt; +use crate::construct::partial_space_or_tab::space_or_tab; use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer}; /// Start of HTML (text) @@ -674,7 +674,7 @@ fn after_line_ending( code: Code, return_state: Box<StateFn>, ) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), |t, c| { + tokenizer.attempt_opt(space_or_tab(), |t, c| { after_line_ending_prefix(t, c, return_state) })(tokenizer, code) } diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index c78278e..1cb7d4b 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -56,7 +56,7 @@ // To do: pass token types in. use crate::constant::LINK_REFERENCE_SIZE_MAX; -use crate::construct::partial_space_or_tab::space_or_tab_opt; +use crate::construct::partial_space_or_tab::space_or_tab; use crate::subtokenize::link; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -152,7 +152,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes /// |b] /// ``` fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, info))(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), |t, c| line_begin(t, c, info))(tokenizer, code) } /// After a line ending, after optional whitespace. diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index cbb2cf3..024a4b2 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -8,24 +8,31 @@ use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer /// Options to parse whitespace. #[derive(Debug)] -struct Info { - /// Current size. - size: usize, +pub struct Options { /// Minimum allowed characters (inclusive). - min: usize, + pub min: usize, /// Maximum allowed characters (inclusive). - max: usize, + pub max: usize, /// Token type to use for whitespace events. - kind: TokenType, + pub kind: TokenType, +} + +/// Options to parse whitespace. +#[derive(Debug)] +struct Info { + /// Current size. + size: usize, + /// Configuration. + options: Options, } -/// Optional `space_or_tab` +/// One or more `space_or_tab`. /// /// ```bnf -/// space_or_tab_opt ::= *( ' ' '\t' ) +/// space_or_tab ::= 1*( ' ' '\t' ) /// ``` -pub fn space_or_tab_opt() -> Box<StateFn> { - space_or_tab_min_max(0, usize::MAX) +pub fn space_or_tab() -> Box<StateFn> { + space_or_tab_min_max(1, usize::MAX) } /// Between `x` and `y` `space_or_tab` @@ -34,7 +41,11 @@ pub fn space_or_tab_opt() -> Box<StateFn> { /// space_or_tab_min_max ::= x*y( ' ' '\t' ) /// ``` pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> { - space_or_tab(TokenType::SpaceOrTab, min, max) + space_or_tab_with_options(Options { + kind: TokenType::SpaceOrTab, + min, + max, + }) } /// Between `x` and `y` `space_or_tab`, with the given token type. @@ -42,14 +53,8 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> { /// ```bnf /// space_or_tab ::= x*y( ' ' '\t' ) /// ``` -pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> { - let info = Info { - size: 0, - min, - max, - kind, - }; - Box::new(|t, c| start(t, c, info)) +pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> { + Box::new(|t, c| start(t, c, Info { size: 0, options })) } /// Before whitespace. @@ -59,14 +64,18 @@ pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> { /// ``` fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { - Code::VirtualSpace | Code::Char('\t' | ' ') if info.max > 0 => { - tokenizer.enter(info.kind.clone()); + Code::VirtualSpace | Code::Char('\t' | ' ') if info.options.max > 0 => { + tokenizer.enter(info.options.kind.clone()); tokenizer.consume(code); info.size += 1; (State::Fn(Box::new(|t, c| inside(t, c, info))), None) } _ => ( - if info.min == 0 { State::Ok } else { State::Nok }, + if info.options.min == 0 { + State::Ok + } else { + State::Nok + }, Some(vec![code]), ), } @@ -80,15 +89,15 @@ fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult /// ``` fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { - Code::VirtualSpace | Code::Char('\t' | ' ') if info.size < info.max => { + Code::VirtualSpace | Code::Char('\t' | ' ') if info.size < info.options.max => { tokenizer.consume(code); info.size += 1; (State::Fn(Box::new(|t, c| inside(t, c, info))), None) } _ => { - tokenizer.exit(info.kind.clone()); + tokenizer.exit(info.options.kind.clone()); ( - if info.size >= info.min { + if info.size >= info.options.min { State::Ok } else { State::Nok diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index f2278c0..3e61788 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -31,7 +31,7 @@ //! //! <!-- To do: link label end. --> -use crate::construct::partial_space_or_tab::space_or_tab_opt; +use crate::construct::partial_space_or_tab::space_or_tab; use crate::subtokenize::link; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -203,7 +203,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes /// |b" /// ``` fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { - tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, info))(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), |t, c| line_begin(t, c, info))(tokenizer, code) } /// After a line ending, after optional whitespace. diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index f33f8f3..3d24566 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -49,7 +49,7 @@ //! //! <!-- To do: link `lists` --> -use super::partial_space_or_tab::space_or_tab_opt; +use super::partial_space_or_tab::space_or_tab; use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN; use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -122,7 +122,7 @@ struct Info { /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { tokenizer.enter(TokenType::ThematicBreak); - tokenizer.go(space_or_tab_opt(), before)(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) } /// Start of a thematic break, after whitespace. @@ -183,7 +183,7 @@ fn sequence(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes } _ => { tokenizer.exit(TokenType::ThematicBreakSequence); - tokenizer.go(space_or_tab_opt(), |t, c| at_break(t, c, info))(tokenizer, code) + tokenizer.attempt_opt(space_or_tab(), |t, c| at_break(t, c, info))(tokenizer, code) } } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index e8bf21b..f76a0f8 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1457,8 +1457,8 @@ impl Tokenizer { self.stack.truncate(previous.stack_len); } - /// Parse with `state` and its future states, switching to `ok` when - /// successful, and passing [`State::Nok`][] back if it occurs. + /// Parse with `state_fn` and its future states, switching to `ok` when + /// successful, and passing [`State::Nok`][] back up if it occurs. /// /// This function does not capture the current state, in case of /// `State::Nok`, as it is assumed that this `go` is itself wrapped in @@ -1466,23 +1466,15 @@ impl Tokenizer { #[allow(clippy::unused_self)] pub fn go( &mut self, - state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, - ok: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + state_fn: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + after: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, ) -> Box<StateFn> { attempt_impl( - state, + state_fn, vec![], - |result: (Vec<Code>, Vec<Code>), is_ok, tokenizer: &mut Tokenizer| { - let codes = if is_ok { result.1 } else { result.0 }; - log::debug!( - "go: {:?}, codes: {:?}, at {:?}", - is_ok, - codes, - tokenizer.point - ); - - if is_ok { - tokenizer.feed(&codes, ok, false) + |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| { + if ok { + tokenizer.feed(&if ok { result.1 } else { result.0 }, after, false) } else { (State::Nok, None) } @@ -1490,59 +1482,52 @@ impl Tokenizer { ) } - /// Parse with `state` and its future states, to check if it result in + /// Parse with `state_fn` and its future states, to check if it result in /// [`State::Ok`][] or [`State::Nok`][], revert on both cases, and then /// call `done` with whether it was successful or not. /// /// This captures the current state of the tokenizer, returns a wrapped - /// state that captures all codes and feeds them to `state` and its future - /// states until it yields `State::Ok` or `State::Nok`. + /// state that captures all codes and feeds them to `state_fn` and its + /// future states until it yields `State::Ok` or `State::Nok`. /// It then applies the captured state, calls `done`, and feeds all /// captured codes to its future states. pub fn check( &mut self, - state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + state_fn: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, done: impl FnOnce(bool) -> Box<StateFn> + 'static, ) -> Box<StateFn> { let previous = self.capture(); attempt_impl( - state, + state_fn, vec![], |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| { - let codes = result.0; tokenizer.free(previous); - log::debug!( - "check: {:?}, codes: {:?}, at {:?}", - ok, - codes, - tokenizer.point - ); - tokenizer.feed(&codes, done(ok), false) + tokenizer.feed(&result.0, done(ok), false) }, ) } - /// Parse with `state` and its future states, to check if it result in + /// Parse with `state_fn` and its future states, to check if it results in /// [`State::Ok`][] or [`State::Nok`][], revert on the case of /// `State::Nok`, and then call `done` with whether it was successful or /// not. /// /// This captures the current state of the tokenizer, returns a wrapped - /// state that captures all codes and feeds them to `state` and its future - /// states until it yields `State::Ok`, at which point it calls `done` and - /// yields its result. + /// state that captures all codes and feeds them to `state_fn` and its + /// future states until it yields `State::Ok`, at which point it calls + /// `done` and yields its result. /// If instead `State::Nok` was yielded, the captured state is applied, /// `done` is called, and all captured codes are fed to its future states. pub fn attempt( &mut self, - state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + state_fn: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, done: impl FnOnce(bool) -> Box<StateFn> + 'static, ) -> Box<StateFn> { let previous = self.capture(); attempt_impl( - state, + state_fn, vec![], |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| { if !ok { @@ -1582,6 +1567,16 @@ impl Tokenizer { } } + /// Just like [`attempt`][Tokenizer::attempt], but for when you don’t care + /// about `ok`. + pub fn attempt_opt( + &mut self, + state_fn: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + after: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + ) -> Box<StateFn> { + self.attempt(state_fn, |_ok| Box::new(after)) + } + /// Feed a list of `codes` into `start`. /// /// This is set up to support repeatedly calling `feed`, and thus streaming |