diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/construct/definition.rs | 6 | ||||
| -rw-r--r-- | src/construct/label_end.rs | 8 | ||||
| -rw-r--r-- | src/construct/partial_label.rs | 6 | ||||
| -rw-r--r-- | src/construct/partial_space_or_tab.rs | 218 | ||||
| -rw-r--r-- | src/construct/partial_title.rs | 6 | ||||
| -rw-r--r-- | src/construct/partial_whitespace.rs | 6 | 
6 files changed, 144 insertions, 106 deletions
| diff --git a/src/construct/definition.rs b/src/construct/definition.rs index af94d12..2b3e4b3 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -96,7 +96,7 @@  use crate::construct::{      partial_destination::{start as destination, Options as DestinationOptions},      partial_label::{start as label, Options as LabelOptions}, -    partial_space_or_tab::{space_or_tab, space_or_tab_one_line_ending}, +    partial_space_or_tab::{space_or_tab, space_or_tab_eol},      partial_title::{start as title, Options as TitleOptions},  };  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; @@ -149,7 +149,7 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {              tokenizer.exit(TokenType::DefinitionMarker);              (                  State::Fn(Box::new( -                    tokenizer.attempt_opt(space_or_tab_one_line_ending(), destination_before), +                    tokenizer.attempt_opt(space_or_tab_eol(), destination_before),                  )),                  None,              ) @@ -233,7 +233,7 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  ///  "c"  /// ```  fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.go(space_or_tab_one_line_ending(), title_before_marker)(tokenizer, code) +    tokenizer.go(space_or_tab_eol(), title_before_marker)(tokenizer, code)  }  /// Before a title, after a line ending. diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 0da12b8..6901cb3 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -150,7 +150,7 @@ use crate::constant::RESOURCE_DESTINATION_BALANCE_MAX;  use crate::construct::{      partial_destination::{start as destination, Options as DestinationOptions},      partial_label::{start as label, Options as LabelOptions}, -    partial_space_or_tab::space_or_tab_one_line_ending, +    partial_space_or_tab::space_or_tab_eol,      partial_title::{start as title, Options as TitleOptions},  };  use crate::tokenizer::{ @@ -561,7 +561,7 @@ fn resource(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// [a](|b) c  /// ```  fn resource_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_open)(tokenizer, code) +    tokenizer.attempt_opt(space_or_tab_eol(), resource_open)(tokenizer, code)  }  /// At the start of a resource, after optional whitespace. @@ -599,7 +599,7 @@ fn resource_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// [a](b| "c") d  /// ```  fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.attempt(space_or_tab_one_line_ending(), |ok| { +    tokenizer.attempt(space_or_tab_eol(), |ok| {          Box::new(if ok { resource_between } else { resource_end })      })(tokenizer, code)  } @@ -636,7 +636,7 @@ fn resource_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// [a](b "c"|) d  /// ```  fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_end)(tokenizer, code) +    tokenizer.attempt_opt(space_or_tab_eol(), resource_end)(tokenizer, code)  }  /// In a resource, at the `)`. diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 1e4d7f2..e505997 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -59,9 +59,7 @@  //!  //! <!-- To do: link attention. --> -use super::partial_space_or_tab::{ -    space_or_tab_one_line_ending_with_options, OneLineEndingOptions, -}; +use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};  use crate::constant::LINK_REFERENCE_SIZE_MAX;  use crate::subtokenize::link;  use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer}; @@ -137,7 +135,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes              (State::Ok, None)          }          Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => tokenizer.go( -            space_or_tab_one_line_ending_with_options(OneLineEndingOptions { +            space_or_tab_eol_with_options(EolOptions {                  content_type: Some(ContentType::String),                  connect: info.connect,              }), diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs index 8df7601..d2934b3 100644 --- a/src/construct/partial_space_or_tab.rs +++ b/src/construct/partial_space_or_tab.rs @@ -1,4 +1,4 @@ -//! Several helpers to parse whitespace (`space_or_tab`). +//! Several helpers to parse whitespace (`space_or_tab`, `space_or_tab_eol`).  //!  //! ## References  //! @@ -7,7 +7,7 @@  use crate::subtokenize::link;  use crate::tokenizer::{Code, ContentType, State, StateFn, StateFnResult, TokenType, Tokenizer}; -/// Options to parse whitespace. +/// Options to parse `space_or_tab`.  #[derive(Debug)]  pub struct Options {      /// Minimum allowed characters (inclusive). @@ -16,28 +16,22 @@ pub struct Options {      pub max: usize,      /// Token type to use for whitespace events.      pub kind: TokenType, -    /// To do. -    pub content_type: Option<ContentType>, +    /// Connect this whitespace to the previous.      pub connect: bool, -} - -#[derive(Debug)] -pub struct OneLineEndingOptions { -    /// To do. +    /// Embedded content type to use.      pub content_type: Option<ContentType>, -    pub connect: bool,  } -/// Options to parse whitespace. +/// Options to parse `space_or_tab` and one optional eol, but no blank line.  #[derive(Debug)] -struct OneLineInfo { -    /// Whether something was seen. -    connect: bool, -    /// Configuration. -    options: OneLineEndingOptions, +pub struct EolOptions { +    /// Connect this whitespace to the previous. +    pub connect: bool, +    /// Embedded content type to use. +    pub content_type: Option<ContentType>,  } -/// Options to parse whitespace. +/// State needed to parse `space_or_tab`.  #[derive(Debug)]  struct Info {      /// Current size. @@ -46,6 +40,17 @@ struct Info {      options: Options,  } +/// State needed to parse `space_or_tab_eol`. +#[derive(Debug)] +struct EolInfo { +    /// Whether to connect the next whitespace to the event before. +    connect: bool, +    /// Whether there was initial whitespace. +    ok: bool, +    /// Configuration. +    options: EolOptions, +} +  /// One or more `space_or_tab`.  ///  /// ```bnf @@ -55,7 +60,7 @@ pub fn space_or_tab() -> Box<StateFn> {      space_or_tab_min_max(1, usize::MAX)  } -/// Between `x` and `y` `space_or_tab` +/// Between `x` and `y` `space_or_tab`.  ///  /// ```bnf  /// space_or_tab_min_max ::= x*y( ' ' '\t' ) @@ -70,16 +75,57 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {      })  } -/// Between `x` and `y` `space_or_tab`, with the given token type. +/// `space_or_tab`, with the given options. +pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> { +    Box::new(|t, c| start(t, c, Info { size: 0, options })) +} + +/// `space_or_tab`, or optionally `space_or_tab`, one `eol`, and +/// optionally `space_or_tab`.  ///  /// ```bnf -/// space_or_tab ::= x*y( ' ' '\t' ) +/// space_or_tab_eol ::= 1*( ' ' '\t' ) | 0*( ' ' '\t' ) eol 0*( ' ' '\t' )  /// ``` -pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> { -    Box::new(|t, c| start(t, c, Info { size: 0, options })) +pub fn space_or_tab_eol() -> Box<StateFn> { +    space_or_tab_eol_with_options(EolOptions { +        content_type: None, +        connect: false, +    })  } -/// Before whitespace. +/// `space_or_tab_eol`, with the given options. +pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> { +    Box::new(move |tokenizer, code| { +        let mut info = EolInfo { +            connect: false, +            ok: false, +            options, +        }; + +        tokenizer.attempt( +            space_or_tab_with_options(Options { +                kind: TokenType::SpaceOrTab, +                min: 1, +                max: usize::MAX, +                content_type: info.options.content_type, +                connect: info.options.connect, +            }), +            move |ok| { +                if ok { +                    info.ok = ok; + +                    if info.options.content_type.is_some() { +                        info.connect = true; +                    } +                } + +                Box::new(|t, c| after_space_or_tab(t, c, info)) +            }, +        )(tokenizer, code) +    }) +} + +/// Before `space_or_tab`.  ///  /// ```markdown  /// alpha| bravo @@ -109,7 +155,7 @@ fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult      }  } -/// In whitespace. +/// In `space_or_tab`.  ///  /// ```markdown  /// alpha |bravo @@ -136,85 +182,75 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResul      }  } -pub fn space_or_tab_one_line_ending() -> Box<StateFn> { -    space_or_tab_one_line_ending_with_options(OneLineEndingOptions { -        content_type: None, -        connect: false, -    }) -} - -pub fn space_or_tab_one_line_ending_with_options(options: OneLineEndingOptions) -> Box<StateFn> { -    Box::new(move |tokenizer, code| { -        let mut info = OneLineInfo { -            connect: false, -            options, -        }; - -        tokenizer.attempt( -            space_or_tab_with_options(Options { -                kind: TokenType::SpaceOrTab, -                min: 1, -                max: usize::MAX, -                content_type: info.options.content_type, -                connect: info.options.connect, -            }), -            move |ok| { -                if ok && info.options.content_type.is_some() { -                    info.connect = true; -                } - -                Box::new(move |tokenizer, code| match code { -                    Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { -                        at_eol(tokenizer, code, info) -                    } -                    _ => { -                        if ok { -                            (State::Ok, Some(vec![code])) -                        } else { -                            (State::Nok, None) -                        } -                    } -                }) -            }, -        )(tokenizer, code) -    }) -} - -fn at_eol(tokenizer: &mut Tokenizer, code: Code, mut info: OneLineInfo) -> StateFnResult { +/// `space_or_tab_eol`: after optionally first `space_or_tab`. +/// +/// ```markdown +/// alpha | +/// bravo +/// ``` +/// +/// ```markdown +/// alpha| +/// bravo +/// ``` +fn after_space_or_tab(tokenizer: &mut Tokenizer, code: Code, mut info: EolInfo) -> StateFnResult {      match code {          Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {              tokenizer.enter_with_content(TokenType::LineEnding, info.options.content_type); -            if info.options.content_type.is_some() { -                if info.connect { -                    let index = tokenizer.events.len() - 1; -                    link(&mut tokenizer.events, index); -                } else { -                    info.connect = true; -                } +            if info.connect { +                let index = tokenizer.events.len() - 1; +                link(&mut tokenizer.events, index); +            } else if info.options.content_type.is_some() { +                info.connect = true;              }              tokenizer.consume(code);              tokenizer.exit(TokenType::LineEnding); -            ( -                State::Fn(Box::new(tokenizer.attempt_opt( -                    space_or_tab_with_options(Options { -                        kind: TokenType::SpaceOrTab, -                        min: 1, -                        max: usize::MAX, -                        content_type: info.options.content_type, -                        connect: info.connect, -                    }), -                    after_eol, -                ))), -                None, -            ) +            (State::Fn(Box::new(|t, c| after_eol(t, c, info))), None)          } -        _ => unreachable!("expected eol"), +        _ if info.ok => (State::Ok, Some(vec![code])), +        _ => (State::Nok, None),      }  } -fn after_eol(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +/// `space_or_tab_eol`: after eol. +/// +/// ```markdown +/// alpha +/// |bravo +/// ``` +/// +/// ```markdown +/// alpha +/// |bravo +/// ``` +#[allow(clippy::needless_pass_by_value)] +fn after_eol(tokenizer: &mut Tokenizer, code: Code, info: EolInfo) -> StateFnResult { +    tokenizer.attempt_opt( +        space_or_tab_with_options(Options { +            kind: TokenType::SpaceOrTab, +            min: 1, +            max: usize::MAX, +            content_type: info.options.content_type, +            connect: info.connect, +        }), +        after_more_space_or_tab, +    )(tokenizer, code) +} + +/// `space_or_tab_eol`: after more (optional) `space_or_tab`. +/// +/// ```markdown +/// alpha +/// |bravo +/// ``` +/// +/// ```markdown +/// alpha +///  |bravo +/// ``` +fn after_more_space_or_tab(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      // Blank line not allowed.      if matches!(          code, diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs index 044a8db..3d0bfb6 100644 --- a/src/construct/partial_title.rs +++ b/src/construct/partial_title.rs @@ -30,9 +30,7 @@  //! [character_reference]: crate::construct::character_reference  //! [label_end]: crate::construct::label_end -use super::partial_space_or_tab::{ -    space_or_tab_one_line_ending_with_options, OneLineEndingOptions, -}; +use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};  use crate::subtokenize::link;  use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer}; @@ -183,7 +181,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes          }          Code::None => (State::Nok, None),          Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => tokenizer.go( -            space_or_tab_one_line_ending_with_options(OneLineEndingOptions { +            space_or_tab_eol_with_options(EolOptions {                  content_type: Some(ContentType::String),                  connect: info.connect,              }), diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs index 9a7a54d..62b1205 100644 --- a/src/construct/partial_whitespace.rs +++ b/src/construct/partial_whitespace.rs @@ -10,12 +10,18 @@  //! whitespace ::= 0.*space_or_tab eol 0.*space_or_tab  //! ```  //! +//! This is similar to [`space_or_tab_eol`][space_or_tab_eol], with the main +//! difference that that *does not* require a line ending and parses any +//! `space_or_tab` with one line ending. +//! This instead *requires* the line ending (or eol). +//!  //! ## References  //!  //! *   [`initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js)  //!  //! [string]: crate::content::string  //! [text]: crate::content::text +//! [space_or_tab_eol]: crate::construct::partial_space_or_tab::space_or_tab_eol  use super::partial_space_or_tab::space_or_tab;  use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; | 
