aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-22 15:20:33 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-22 15:20:33 +0200
commit227e844154d9a592b80a88d7b8731d3d2f2fb3e2 (patch)
tree09d4243db0ae3d51c2420e70b9e364d470d520bd
parent6fdaffb3a8b4517a3b5c1e39dc1e16649c6eb0da (diff)
downloadmarkdown-rs-227e844154d9a592b80a88d7b8731d3d2f2fb3e2.tar.gz
markdown-rs-227e844154d9a592b80a88d7b8731d3d2f2fb3e2.tar.bz2
markdown-rs-227e844154d9a592b80a88d7b8731d3d2f2fb3e2.zip
Add `attempt_opt` to tokenizer
-rw-r--r--src/construct/blank_line.rs4
-rw-r--r--src/construct/code_fenced.rs56
-rw-r--r--src/construct/code_indented.rs4
-rw-r--r--src/construct/definition.rs20
-rw-r--r--src/construct/heading_atx.rs18
-rw-r--r--src/construct/heading_setext.rs12
-rw-r--r--src/construct/html_flow.rs4
-rw-r--r--src/construct/html_text.rs4
-rw-r--r--src/construct/partial_label.rs4
-rw-r--r--src/construct/partial_space_or_tab.rs59
-rw-r--r--src/construct/partial_title.rs4
-rw-r--r--src/construct/thematic_break.rs6
-rw-r--r--src/tokenizer.rs65
13 files changed, 150 insertions, 110 deletions
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index 153a008..3ca3266 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -33,7 +33,7 @@
//!
//! <!-- To do: link `list` -->
-use crate::construct::partial_space_or_tab::space_or_tab_opt;
+use crate::construct::partial_space_or_tab::space_or_tab;
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
/// Start of a blank line.
@@ -45,7 +45,7 @@ use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
/// |
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.go(space_or_tab_opt(), after)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), after)(tokenizer, code)
}
/// After zero or more spaces or tabs, before a line ending or EOF.
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index d71c01e..3b220b9 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -102,7 +102,7 @@
//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
-use crate::construct::partial_space_or_tab::{space_or_tab_min_max, space_or_tab_opt};
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
use crate::util::span::from_exit_event;
@@ -179,7 +179,7 @@ struct Info {
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::CodeFenced);
tokenizer.enter(TokenType::CodeFencedFence);
- tokenizer.go(space_or_tab_opt(), before_sequence_open)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), before_sequence_open)(tokenizer, code)
}
/// Inside the opening fence, after an optional prefix, before a sequence.
@@ -240,7 +240,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRe
_ if info.size < CODE_FENCED_SEQUENCE_SIZE_MIN => (State::Nok, None),
_ => {
tokenizer.exit(TokenType::CodeFencedFenceSequence);
- tokenizer.go(space_or_tab_opt(), |t, c| info_before(t, c, info))(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), |t, c| info_before(t, c, info))(tokenizer, code)
}
}
}
@@ -289,7 +289,7 @@ fn info_inside(
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.exit(TokenType::ChunkString);
tokenizer.exit(TokenType::CodeFencedFenceInfo);
- tokenizer.go(space_or_tab_opt(), |t, c| meta_before(t, c, info))(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), |t, c| meta_before(t, c, info))(tokenizer, code)
}
Code::Char(char) if char == '`' && info.kind == Kind::GraveAccent => (State::Nok, None),
Code::Char(_) => {
@@ -361,22 +361,12 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult
match code {
Code::None => after(tokenizer, code),
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt(
- |tokenizer, code| {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (State::Fn(Box::new(|t, c| close_start(t, c, info))), None)
- },
+ |t, c| close_begin(t, c, info),
|ok| {
if ok {
Box::new(after)
} else {
- Box::new(|tokenizer, code| {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (State::Fn(Box::new(|t, c| content_start(t, c, clone))), None)
- })
+ Box::new(|t, c| content_before(t, c, clone))
}
},
)(tokenizer, code),
@@ -384,6 +374,25 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult
}
}
+/// Before a closing fence, at the line ending.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')|
+/// ~~~
+/// ```
+fn close_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ match code {
+ Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ tokenizer.enter(TokenType::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LineEnding);
+ (State::Fn(Box::new(|t, c| close_start(t, c, info))), None)
+ }
+ _ => unreachable!("expected eol"),
+ }
+}
+
/// Before a closing fence, before optional whitespace.
///
/// ```markdown
@@ -441,7 +450,7 @@ fn close_sequence(tokenizer: &mut Tokenizer, code: Code, info: Info, size: usize
}
_ if size >= CODE_FENCED_SEQUENCE_SIZE_MIN && size >= info.size => {
tokenizer.exit(TokenType::CodeFencedFenceSequence);
- tokenizer.go(space_or_tab_opt(), close_sequence_after)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), close_sequence_after)(tokenizer, code)
}
_ => (State::Nok, None),
}
@@ -464,6 +473,19 @@ fn close_sequence_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
}
}
+/// Before a closing fence, at the line ending.
+///
+/// ```markdown
+/// ~~~js
+/// console.log('1')|
+/// ~~~
+/// ```
+fn content_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ tokenizer.enter(TokenType::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LineEnding);
+ (State::Fn(Box::new(|t, c| content_start(t, c, info))), None)
+}
/// Before code content, definitely not before a closing fence.
///
/// ```markdown
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index f476965..99445b9 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -45,7 +45,7 @@
//! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
-use super::partial_space_or_tab::{space_or_tab_min_max, space_or_tab_opt};
+use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::TAB_SIZE;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -154,7 +154,7 @@ fn further_end(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// asd
/// ```
fn further_begin(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.go(space_or_tab_opt(), further_after)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), further_after)(tokenizer, code)
}
/// After whitespace.
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 7f32858..b545643 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -77,7 +77,7 @@
use crate::construct::{
partial_destination::{start as destination, Options as DestinationOptions},
partial_label::{start as label, Options as LabelOptions},
- partial_space_or_tab::space_or_tab_opt,
+ partial_space_or_tab::space_or_tab,
partial_title::{start as title, Options as TitleOptions},
};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -89,7 +89,7 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::Definition);
- tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
}
/// At the start of a definition, after whitespace.
@@ -134,7 +134,9 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.consume(code);
tokenizer.exit(TokenType::DefinitionMarker);
(
- State::Fn(Box::new(tokenizer.go(space_or_tab_opt(), marker_after))),
+ State::Fn(Box::new(
+ tokenizer.attempt_opt(space_or_tab(), marker_after),
+ )),
None,
)
}
@@ -158,7 +160,7 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::LineEnding);
(
State::Fn(Box::new(
- tokenizer.go(space_or_tab_opt(), destination_before),
+ tokenizer.attempt_opt(space_or_tab(), destination_before),
)),
None,
)
@@ -216,7 +218,7 @@ fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// "c"
/// ```
fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt(title_before, |_ok| Box::new(after))(tokenizer, code)
+ tokenizer.attempt_opt(title_before, after)(tokenizer, code)
}
/// After a definition.
@@ -226,7 +228,7 @@ fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// [a]: b "c"|
/// ```
fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.go(space_or_tab_opt(), after_whitespace)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), after_whitespace)(tokenizer, code)
}
/// After a definition, after optional whitespace.
@@ -254,7 +256,7 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// "c"
/// ```
fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.go(space_or_tab_opt(), title_before_after_optional_whitespace)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), title_before_after_optional_whitespace)(tokenizer, code)
}
/// Before a title, after optional whitespace.
@@ -273,7 +275,7 @@ fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code)
tokenizer.exit(TokenType::LineEnding);
(
State::Fn(Box::new(
- tokenizer.go(space_or_tab_opt(), title_before_marker),
+ tokenizer.attempt_opt(space_or_tab(), title_before_marker),
)),
None,
)
@@ -320,7 +322,7 @@ fn title_before_marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// "c"|
/// ```
fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.go(space_or_tab_opt(), title_after_after_optional_whitespace)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), title_after_after_optional_whitespace)(tokenizer, code)
}
/// After a title, after optional whitespace.
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index ae16d3d..93c57f9 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -54,7 +54,9 @@
//! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
//! [atx]: http://www.aaronsw.com/2002/atx/
-use super::partial_space_or_tab::{space_or_tab, space_or_tab_opt};
+use super::partial_space_or_tab::{
+ space_or_tab, space_or_tab_with_options, Options as SpaceOrTabOptions,
+};
use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -65,7 +67,7 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::HeadingAtx);
- tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
}
/// Start of a heading (atx), after whitespace.
@@ -105,7 +107,11 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnR
_ if rank > 0 => {
tokenizer.exit(TokenType::HeadingAtxSequence);
tokenizer.go(
- space_or_tab(TokenType::HeadingAtxSpaceOrTab, 1, usize::MAX),
+ space_or_tab_with_options(SpaceOrTabOptions {
+ kind: TokenType::HeadingAtxSpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ }),
at_break,
)(tokenizer, code)
}
@@ -129,7 +135,11 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
(State::Ok, Some(vec![code]))
}
Code::VirtualSpace | Code::Char('\t' | ' ') => tokenizer.go(
- space_or_tab(TokenType::HeadingAtxSpaceOrTab, 1, usize::MAX),
+ space_or_tab_with_options(SpaceOrTabOptions {
+ kind: TokenType::HeadingAtxSpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ }),
at_break,
)(tokenizer, code),
Code::Char('#') => {
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 0cb8687..91e494d 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -56,7 +56,7 @@
//! [atx]: http://www.aaronsw.com/2002/atx/
use crate::constant::TAB_SIZE;
-use crate::construct::partial_space_or_tab::space_or_tab_opt;
+use crate::construct::partial_space_or_tab::space_or_tab;
use crate::subtokenize::link;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
use crate::util::span::from_exit_event;
@@ -115,7 +115,7 @@ impl Kind {
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::HeadingSetext);
- tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
}
/// Start of a heading (setext), after whitespace.
@@ -183,7 +183,9 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::LineEnding);
(
- State::Fn(Box::new(tokenizer.go(space_or_tab_opt(), text_line_start))),
+ State::Fn(Box::new(
+ tokenizer.attempt_opt(space_or_tab(), text_line_start),
+ )),
None,
)
}
@@ -243,7 +245,7 @@ fn underline_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::LineEnding);
(
State::Fn(Box::new(
- tokenizer.go(space_or_tab_opt(), underline_sequence_start),
+ tokenizer.attempt_opt(space_or_tab(), underline_sequence_start),
)),
None,
)
@@ -298,7 +300,7 @@ fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind)
None,
)
}
- _ => tokenizer.go(space_or_tab_opt(), underline_after)(tokenizer, code),
+ _ => tokenizer.attempt_opt(space_or_tab(), underline_after)(tokenizer, code),
}
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 0af9e3c..bb7457d 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -99,7 +99,7 @@
//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX};
-use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::space_or_tab_opt};
+use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::space_or_tab};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Kind of HTML (flow).
@@ -191,7 +191,7 @@ struct Info {
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::HtmlFlow);
tokenizer.enter(TokenType::HtmlFlowData);
- tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
}
/// After optional whitespace, before `<`.
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index 18c5f9c..2ac0ccd 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -54,7 +54,7 @@
//! [html_flow]: crate::construct::html_flow
//! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
-use crate::construct::partial_space_or_tab::space_or_tab_opt;
+use crate::construct::partial_space_or_tab::space_or_tab;
use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer};
/// Start of HTML (text)
@@ -674,7 +674,7 @@ fn after_line_ending(
code: Code,
return_state: Box<StateFn>,
) -> StateFnResult {
- tokenizer.go(space_or_tab_opt(), |t, c| {
+ tokenizer.attempt_opt(space_or_tab(), |t, c| {
after_line_ending_prefix(t, c, return_state)
})(tokenizer, code)
}
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index c78278e..1cb7d4b 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -56,7 +56,7 @@
// To do: pass token types in.
use crate::constant::LINK_REFERENCE_SIZE_MAX;
-use crate::construct::partial_space_or_tab::space_or_tab_opt;
+use crate::construct::partial_space_or_tab::space_or_tab;
use crate::subtokenize::link;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -152,7 +152,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
/// |b]
/// ```
fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
- tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, info))(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), |t, c| line_begin(t, c, info))(tokenizer, code)
}
/// After a line ending, after optional whitespace.
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index cbb2cf3..024a4b2 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -8,24 +8,31 @@ use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer
/// Options to parse whitespace.
#[derive(Debug)]
-struct Info {
- /// Current size.
- size: usize,
+pub struct Options {
/// Minimum allowed characters (inclusive).
- min: usize,
+ pub min: usize,
/// Maximum allowed characters (inclusive).
- max: usize,
+ pub max: usize,
/// Token type to use for whitespace events.
- kind: TokenType,
+ pub kind: TokenType,
+}
+
+/// Options to parse whitespace.
+#[derive(Debug)]
+struct Info {
+ /// Current size.
+ size: usize,
+ /// Configuration.
+ options: Options,
}
-/// Optional `space_or_tab`
+/// One or more `space_or_tab`.
///
/// ```bnf
-/// space_or_tab_opt ::= *( ' ' '\t' )
+/// space_or_tab ::= 1*( ' ' '\t' )
/// ```
-pub fn space_or_tab_opt() -> Box<StateFn> {
- space_or_tab_min_max(0, usize::MAX)
+pub fn space_or_tab() -> Box<StateFn> {
+ space_or_tab_min_max(1, usize::MAX)
}
/// Between `x` and `y` `space_or_tab`
@@ -34,7 +41,11 @@ pub fn space_or_tab_opt() -> Box<StateFn> {
/// space_or_tab_min_max ::= x*y( ' ' '\t' )
/// ```
pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
- space_or_tab(TokenType::SpaceOrTab, min, max)
+ space_or_tab_with_options(Options {
+ kind: TokenType::SpaceOrTab,
+ min,
+ max,
+ })
}
/// Between `x` and `y` `space_or_tab`, with the given token type.
@@ -42,14 +53,8 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
/// ```bnf
/// space_or_tab ::= x*y( ' ' '\t' )
/// ```
-pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> {
- let info = Info {
- size: 0,
- min,
- max,
- kind,
- };
- Box::new(|t, c| start(t, c, info))
+pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
+ Box::new(|t, c| start(t, c, Info { size: 0, options }))
}
/// Before whitespace.
@@ -59,14 +64,18 @@ pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> {
/// ```
fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
- Code::VirtualSpace | Code::Char('\t' | ' ') if info.max > 0 => {
- tokenizer.enter(info.kind.clone());
+ Code::VirtualSpace | Code::Char('\t' | ' ') if info.options.max > 0 => {
+ tokenizer.enter(info.options.kind.clone());
tokenizer.consume(code);
info.size += 1;
(State::Fn(Box::new(|t, c| inside(t, c, info))), None)
}
_ => (
- if info.min == 0 { State::Ok } else { State::Nok },
+ if info.options.min == 0 {
+ State::Ok
+ } else {
+ State::Nok
+ },
Some(vec![code]),
),
}
@@ -80,15 +89,15 @@ fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult
/// ```
fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
- Code::VirtualSpace | Code::Char('\t' | ' ') if info.size < info.max => {
+ Code::VirtualSpace | Code::Char('\t' | ' ') if info.size < info.options.max => {
tokenizer.consume(code);
info.size += 1;
(State::Fn(Box::new(|t, c| inside(t, c, info))), None)
}
_ => {
- tokenizer.exit(info.kind.clone());
+ tokenizer.exit(info.options.kind.clone());
(
- if info.size >= info.min {
+ if info.size >= info.options.min {
State::Ok
} else {
State::Nok
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index f2278c0..3e61788 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -31,7 +31,7 @@
//!
//! <!-- To do: link label end. -->
-use crate::construct::partial_space_or_tab::space_or_tab_opt;
+use crate::construct::partial_space_or_tab::space_or_tab;
use crate::subtokenize::link;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -203,7 +203,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
/// |b"
/// ```
fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
- tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, info))(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), |t, c| line_begin(t, c, info))(tokenizer, code)
}
/// After a line ending, after optional whitespace.
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index f33f8f3..3d24566 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -49,7 +49,7 @@
//!
//! <!-- To do: link `lists` -->
-use super::partial_space_or_tab::space_or_tab_opt;
+use super::partial_space_or_tab::space_or_tab;
use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -122,7 +122,7 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::ThematicBreak);
- tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code)
}
/// Start of a thematic break, after whitespace.
@@ -183,7 +183,7 @@ fn sequence(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
}
_ => {
tokenizer.exit(TokenType::ThematicBreakSequence);
- tokenizer.go(space_or_tab_opt(), |t, c| at_break(t, c, info))(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab(), |t, c| at_break(t, c, info))(tokenizer, code)
}
}
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index e8bf21b..f76a0f8 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1457,8 +1457,8 @@ impl Tokenizer {
self.stack.truncate(previous.stack_len);
}
- /// Parse with `state` and its future states, switching to `ok` when
- /// successful, and passing [`State::Nok`][] back if it occurs.
+ /// Parse with `state_fn` and its future states, switching to `ok` when
+ /// successful, and passing [`State::Nok`][] back up if it occurs.
///
/// This function does not capture the current state, in case of
/// `State::Nok`, as it is assumed that this `go` is itself wrapped in
@@ -1466,23 +1466,15 @@ impl Tokenizer {
#[allow(clippy::unused_self)]
pub fn go(
&mut self,
- state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
- ok: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ state_fn: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ after: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
) -> Box<StateFn> {
attempt_impl(
- state,
+ state_fn,
vec![],
- |result: (Vec<Code>, Vec<Code>), is_ok, tokenizer: &mut Tokenizer| {
- let codes = if is_ok { result.1 } else { result.0 };
- log::debug!(
- "go: {:?}, codes: {:?}, at {:?}",
- is_ok,
- codes,
- tokenizer.point
- );
-
- if is_ok {
- tokenizer.feed(&codes, ok, false)
+ |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| {
+ if ok {
+ tokenizer.feed(&if ok { result.1 } else { result.0 }, after, false)
} else {
(State::Nok, None)
}
@@ -1490,59 +1482,52 @@ impl Tokenizer {
)
}
- /// Parse with `state` and its future states, to check if it result in
+ /// Parse with `state_fn` and its future states, to check if it result in
/// [`State::Ok`][] or [`State::Nok`][], revert on both cases, and then
/// call `done` with whether it was successful or not.
///
/// This captures the current state of the tokenizer, returns a wrapped
- /// state that captures all codes and feeds them to `state` and its future
- /// states until it yields `State::Ok` or `State::Nok`.
+ /// state that captures all codes and feeds them to `state_fn` and its
+ /// future states until it yields `State::Ok` or `State::Nok`.
/// It then applies the captured state, calls `done`, and feeds all
/// captured codes to its future states.
pub fn check(
&mut self,
- state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ state_fn: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
done: impl FnOnce(bool) -> Box<StateFn> + 'static,
) -> Box<StateFn> {
let previous = self.capture();
attempt_impl(
- state,
+ state_fn,
vec![],
|result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| {
- let codes = result.0;
tokenizer.free(previous);
- log::debug!(
- "check: {:?}, codes: {:?}, at {:?}",
- ok,
- codes,
- tokenizer.point
- );
- tokenizer.feed(&codes, done(ok), false)
+ tokenizer.feed(&result.0, done(ok), false)
},
)
}
- /// Parse with `state` and its future states, to check if it result in
+ /// Parse with `state_fn` and its future states, to check if it results in
/// [`State::Ok`][] or [`State::Nok`][], revert on the case of
/// `State::Nok`, and then call `done` with whether it was successful or
/// not.
///
/// This captures the current state of the tokenizer, returns a wrapped
- /// state that captures all codes and feeds them to `state` and its future
- /// states until it yields `State::Ok`, at which point it calls `done` and
- /// yields its result.
+ /// state that captures all codes and feeds them to `state_fn` and its
+ /// future states until it yields `State::Ok`, at which point it calls
+ /// `done` and yields its result.
/// If instead `State::Nok` was yielded, the captured state is applied,
/// `done` is called, and all captured codes are fed to its future states.
pub fn attempt(
&mut self,
- state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ state_fn: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
done: impl FnOnce(bool) -> Box<StateFn> + 'static,
) -> Box<StateFn> {
let previous = self.capture();
attempt_impl(
- state,
+ state_fn,
vec![],
|result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| {
if !ok {
@@ -1582,6 +1567,16 @@ impl Tokenizer {
}
}
+ /// Just like [`attempt`][Tokenizer::attempt], but for when you don’t care
+ /// about `ok`.
+ pub fn attempt_opt(
+ &mut self,
+ state_fn: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ after: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ ) -> Box<StateFn> {
+ self.attempt(state_fn, |_ok| Box::new(after))
+ }
+
/// Feed a list of `codes` into `start`.
///
/// This is set up to support repeatedly calling `feed`, and thus streaming