aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--readme.md6
-rw-r--r--src/construct/definition.rs6
-rw-r--r--src/construct/label_end.rs8
-rw-r--r--src/construct/partial_label.rs6
-rw-r--r--src/construct/partial_space_or_tab.rs218
-rw-r--r--src/construct/partial_title.rs6
-rw-r--r--src/construct/partial_whitespace.rs6
7 files changed, 146 insertions, 110 deletions
diff --git a/readme.md b/readme.md
index 4144440..765c40a 100644
--- a/readme.md
+++ b/readme.md
@@ -122,7 +122,6 @@ cargo doc --document-private-items
#### Docs
-- [ ] (1) `space_or_tab_one_line_ending`
- [ ] (1) `edit_map`
- [ ] (1) Go through all bnf
- [ ] (1) Go through all docs
@@ -131,9 +130,7 @@ cargo doc --document-private-items
#### Refactor
- [ ] (1) Clean shifting, assertions in `edit_map`
-- [ ] (1) Clean `space_or_tab_one_line_ending`
-- [ ] (1) Use `link_to` (and `space_or_tab_one_line_ending`) in more places?
- It’s probably better
+- [ ] (1) Use `link_to` in more places? It’s probably better
- [ ] (1) Use `edit_map` in `subtokenize`
#### Parse
@@ -278,3 +275,4 @@ important.
- [x] (1) Add docs on resolver, clean feed
- [x] (3) Clean compiler
- [x] (1) Parse initial and final space_or_tab of paragraphs (in string, text)
+- [x] (1) Refactor to clean and document `space_or_tab`
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index af94d12..2b3e4b3 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -96,7 +96,7 @@
use crate::construct::{
partial_destination::{start as destination, Options as DestinationOptions},
partial_label::{start as label, Options as LabelOptions},
- partial_space_or_tab::{space_or_tab, space_or_tab_one_line_ending},
+ partial_space_or_tab::{space_or_tab, space_or_tab_eol},
partial_title::{start as title, Options as TitleOptions},
};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -149,7 +149,7 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::DefinitionMarker);
(
State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab_one_line_ending(), destination_before),
+ tokenizer.attempt_opt(space_or_tab_eol(), destination_before),
)),
None,
)
@@ -233,7 +233,7 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// "c"
/// ```
fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.go(space_or_tab_one_line_ending(), title_before_marker)(tokenizer, code)
+ tokenizer.go(space_or_tab_eol(), title_before_marker)(tokenizer, code)
}
/// Before a title, after a line ending.
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 0da12b8..6901cb3 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -150,7 +150,7 @@ use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX;
use crate::construct::{
partial_destination::{start as destination, Options as DestinationOptions},
partial_label::{start as label, Options as LabelOptions},
- partial_space_or_tab::space_or_tab_one_line_ending,
+ partial_space_or_tab::space_or_tab_eol,
partial_title::{start as title, Options as TitleOptions},
};
use crate::tokenizer::{
@@ -561,7 +561,7 @@ fn resource(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// [a](|b) c
/// ```
fn resource_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_open)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab_eol(), resource_open)(tokenizer, code)
}
/// At the start of a resource, after optional whitespace.
@@ -599,7 +599,7 @@ fn resource_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// [a](b| "c") d
/// ```
fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt(space_or_tab_one_line_ending(), |ok| {
+ tokenizer.attempt(space_or_tab_eol(), |ok| {
Box::new(if ok { resource_between } else { resource_end })
})(tokenizer, code)
}
@@ -636,7 +636,7 @@ fn resource_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// [a](b "c"|) d
/// ```
fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_end)(tokenizer, code)
+ tokenizer.attempt_opt(space_or_tab_eol(), resource_end)(tokenizer, code)
}
/// In a resource, at the `)`.
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 1e4d7f2..e505997 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -59,9 +59,7 @@
//!
//! <!-- To do: link attention. -->
-use super::partial_space_or_tab::{
- space_or_tab_one_line_ending_with_options, OneLineEndingOptions,
-};
+use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::constant::LINK_REFERENCE_SIZE_MAX;
use crate::subtokenize::link;
use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
@@ -137,7 +135,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
(State::Ok, None)
}
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => tokenizer.go(
- space_or_tab_one_line_ending_with_options(OneLineEndingOptions {
+ space_or_tab_eol_with_options(EolOptions {
content_type: Some(ContentType::String),
connect: info.connect,
}),
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 8df7601..d2934b3 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -1,4 +1,4 @@
-//! Several helpers to parse whitespace (`space_or_tab`).
+//! Several helpers to parse whitespace (`space_or_tab`, `space_or_tab_eol`).
//!
//! ## References
//!
@@ -7,7 +7,7 @@
use crate::subtokenize::link;
use crate::tokenizer::{Code, ContentType, State, StateFn, StateFnResult, TokenType, Tokenizer};
-/// Options to parse whitespace.
+/// Options to parse `space_or_tab`.
#[derive(Debug)]
pub struct Options {
/// Minimum allowed characters (inclusive).
@@ -16,28 +16,22 @@ pub struct Options {
pub max: usize,
/// Token type to use for whitespace events.
pub kind: TokenType,
- /// To do.
- pub content_type: Option<ContentType>,
+ /// Connect this whitespace to the previous.
pub connect: bool,
-}
-
-#[derive(Debug)]
-pub struct OneLineEndingOptions {
- /// To do.
+ /// Embedded content type to use.
pub content_type: Option<ContentType>,
- pub connect: bool,
}
-/// Options to parse whitespace.
+/// Options to parse `space_or_tab` and one optional eol, but no blank line.
#[derive(Debug)]
-struct OneLineInfo {
- /// Whether something was seen.
- connect: bool,
- /// Configuration.
- options: OneLineEndingOptions,
+pub struct EolOptions {
+ /// Connect this whitespace to the previous.
+ pub connect: bool,
+ /// Embedded content type to use.
+ pub content_type: Option<ContentType>,
}
-/// Options to parse whitespace.
+/// State needed to parse `space_or_tab`.
#[derive(Debug)]
struct Info {
/// Current size.
@@ -46,6 +40,17 @@ struct Info {
options: Options,
}
+/// State needed to parse `space_or_tab_eol`.
+#[derive(Debug)]
+struct EolInfo {
+ /// Whether to connect the next whitespace to the event before.
+ connect: bool,
+ /// Whether there was initial whitespace.
+ ok: bool,
+ /// Configuration.
+ options: EolOptions,
+}
+
/// One or more `space_or_tab`.
///
/// ```bnf
@@ -55,7 +60,7 @@ pub fn space_or_tab() -> Box<StateFn> {
space_or_tab_min_max(1, usize::MAX)
}
-/// Between `x` and `y` `space_or_tab`
+/// Between `x` and `y` `space_or_tab`.
///
/// ```bnf
/// space_or_tab_min_max ::= x*y( ' ' '\t' )
@@ -70,16 +75,57 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
})
}
-/// Between `x` and `y` `space_or_tab`, with the given token type.
+/// `space_or_tab`, with the given options.
+pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
+ Box::new(|t, c| start(t, c, Info { size: 0, options }))
+}
+
+/// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and
+/// optionally `space_or_tab`.
///
/// ```bnf
-/// space_or_tab ::= x*y( ' ' '\t' )
+/// space_or_tab_eol ::= 1*( ' ' '\t' ) | 0*( ' ' '\t' ) eol 0*( ' ' '\t' )
/// ```
-pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
- Box::new(|t, c| start(t, c, Info { size: 0, options }))
+pub fn space_or_tab_eol() -> Box<StateFn> {
+ space_or_tab_eol_with_options(EolOptions {
+ content_type: None,
+ connect: false,
+ })
}
-/// Before whitespace.
+/// `space_or_tab_eol`, with the given options.
+pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
+ Box::new(move |tokenizer, code| {
+ let mut info = EolInfo {
+ connect: false,
+ ok: false,
+ options,
+ };
+
+ tokenizer.attempt(
+ space_or_tab_with_options(Options {
+ kind: TokenType::SpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ content_type: info.options.content_type,
+ connect: info.options.connect,
+ }),
+ move |ok| {
+ if ok {
+ info.ok = ok;
+
+ if info.options.content_type.is_some() {
+ info.connect = true;
+ }
+ }
+
+ Box::new(|t, c| after_space_or_tab(t, c, info))
+ },
+ )(tokenizer, code)
+ })
+}
+
+/// Before `space_or_tab`.
///
/// ```markdown
/// alpha| bravo
@@ -109,7 +155,7 @@ fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult
}
}
-/// In whitespace.
+/// In `space_or_tab`.
///
/// ```markdown
/// alpha |bravo
@@ -136,85 +182,75 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResul
}
}
-pub fn space_or_tab_one_line_ending() -> Box<StateFn> {
- space_or_tab_one_line_ending_with_options(OneLineEndingOptions {
- content_type: None,
- connect: false,
- })
-}
-
-pub fn space_or_tab_one_line_ending_with_options(options: OneLineEndingOptions) -> Box<StateFn> {
- Box::new(move |tokenizer, code| {
- let mut info = OneLineInfo {
- connect: false,
- options,
- };
-
- tokenizer.attempt(
- space_or_tab_with_options(Options {
- kind: TokenType::SpaceOrTab,
- min: 1,
- max: usize::MAX,
- content_type: info.options.content_type,
- connect: info.options.connect,
- }),
- move |ok| {
- if ok && info.options.content_type.is_some() {
- info.connect = true;
- }
-
- Box::new(move |tokenizer, code| match code {
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- at_eol(tokenizer, code, info)
- }
- _ => {
- if ok {
- (State::Ok, Some(vec![code]))
- } else {
- (State::Nok, None)
- }
- }
- })
- },
- )(tokenizer, code)
- })
-}
-
-fn at_eol(tokenizer: &mut Tokenizer, code: Code, mut info: OneLineInfo) -> StateFnResult {
+/// `space_or_tab_eol`: after optionally first `space_or_tab`.
+///
+/// ```markdown
+/// alpha |
+/// bravo
+/// ```
+///
+/// ```markdown
+/// alpha|
+/// bravo
+/// ```
+fn after_space_or_tab(tokenizer: &mut Tokenizer, code: Code, mut info: EolInfo) -> StateFnResult {
match code {
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
tokenizer.enter_with_content(TokenType::LineEnding, info.options.content_type);
- if info.options.content_type.is_some() {
- if info.connect {
- let index = tokenizer.events.len() - 1;
- link(&mut tokenizer.events, index);
- } else {
- info.connect = true;
- }
+ if info.connect {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ } else if info.options.content_type.is_some() {
+ info.connect = true;
}
tokenizer.consume(code);
tokenizer.exit(TokenType::LineEnding);
- (
- State::Fn(Box::new(tokenizer.attempt_opt(
- space_or_tab_with_options(Options {
- kind: TokenType::SpaceOrTab,
- min: 1,
- max: usize::MAX,
- content_type: info.options.content_type,
- connect: info.connect,
- }),
- after_eol,
- ))),
- None,
- )
+ (State::Fn(Box::new(|t, c| after_eol(t, c, info))), None)
}
- _ => unreachable!("expected eol"),
+ _ if info.ok => (State::Ok, Some(vec![code])),
+ _ => (State::Nok, None),
}
}
-fn after_eol(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+/// `space_or_tab_eol`: after eol.
+///
+/// ```markdown
+/// alpha
+/// |bravo
+/// ```
+///
+/// ```markdown
+/// alpha
+/// |bravo
+/// ```
+#[allow(clippy::needless_pass_by_value)]
+fn after_eol(tokenizer: &mut Tokenizer, code: Code, info: EolInfo) -> StateFnResult {
+ tokenizer.attempt_opt(
+ space_or_tab_with_options(Options {
+ kind: TokenType::SpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ content_type: info.options.content_type,
+ connect: info.connect,
+ }),
+ after_more_space_or_tab,
+ )(tokenizer, code)
+}
+
+/// `space_or_tab_eol`: after more (optional) `space_or_tab`.
+///
+/// ```markdown
+/// alpha
+/// |bravo
+/// ```
+///
+/// ```markdown
+/// alpha
+/// |bravo
+/// ```
+fn after_more_space_or_tab(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
// Blank line not allowed.
if matches!(
code,
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 044a8db..3d0bfb6 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -30,9 +30,7 @@
//! [character_reference]: crate::construct::character_reference
//! [label_end]: crate::construct::label_end
-use super::partial_space_or_tab::{
- space_or_tab_one_line_ending_with_options, OneLineEndingOptions,
-};
+use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::subtokenize::link;
use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
@@ -183,7 +181,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
}
Code::None => (State::Nok, None),
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => tokenizer.go(
- space_or_tab_one_line_ending_with_options(OneLineEndingOptions {
+ space_or_tab_eol_with_options(EolOptions {
content_type: Some(ContentType::String),
connect: info.connect,
}),
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index 9a7a54d..62b1205 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -10,12 +10,18 @@
//! whitespace ::= 0.*space_or_tab eol 0.*space_or_tab
//! ```
//!
+//! This is similar to [`space_or_tab_eol`][space_or_tab_eol], with the main
+//! difference that that *does not* require a line ending and parses any
+//! `space_or_tab` with one line ending.
+//! This instead *requires* the line ending (or eol).
+//!
//! ## References
//!
//! * [`initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js)
//!
//! [string]: crate::content::string
//! [text]: crate::content::text
+//! [space_or_tab_eol]: crate::construct::partial_space_or_tab::space_or_tab_eol
use super::partial_space_or_tab::space_or_tab;
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};