diff options
Diffstat (limited to '')
| -rw-r--r-- | src/construct/mod.rs | 20 | ||||
| -rw-r--r-- | src/construct/partial_data.rs | 69 | ||||
| -rw-r--r-- | src/content/string.rs | 50 | ||||
| -rw-r--r-- | src/content/text.rs | 53 | 
4 files changed, 105 insertions, 87 deletions
diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 407dc6b..9e5da0e 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -7,11 +7,10 @@  //! For example, [code (fenced)][code_fenced] and  //! [code (indented)][code_indented] are considered different constructs  //! -//! <!-- To do: can these rest things be made into constructs? --> -//! -//! Content types also have a *rest* thing: after all character escapes and -//! character references are parsed, there’s something left. -//! This remainder is, currently, not called a constructs. +//! Content types also have a *rest* thing: after all things are parsed, +//! there’s something left. +//! In flow, that is a [paragraph][]. +//! In string and text, that is [data][partial_data].  //!  //! The following constructs are found in markdown:  //! @@ -38,6 +37,14 @@  //! *   [paragraph][]  //! *   [thematic break][thematic_break]  //! +//! There are also several routines used in different places: +//! +//! *   [data][partial_data] +//! *   [destination][partial_destination] +//! *   [label][partial_label] +//! *   [space or tab][partial_space_or_tab] +//! *   [title][partial_title] +//!  //! Each construct maintained here is explained with a BNF diagram.  //! For example, the docs for [character escape][character_escape] contain:  //! @@ -52,6 +59,8 @@  //! They also contain references to character as defined by [char][], so for  //! example `ascii_punctuation` refers to  //! [`char::is_ascii_punctuation`][char::is_ascii_punctuation]. +//! +//!  pub mod autolink;  pub mod blank_line; @@ -68,6 +77,7 @@ pub mod heading_setext;  pub mod html_flow;  pub mod html_text;  pub mod paragraph; +pub mod partial_data;  pub mod partial_destination;  pub mod partial_label;  pub mod partial_space_or_tab; diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs new file mode 100644 index 0000000..d83787a --- /dev/null +++ b/src/construct/partial_data.rs @@ -0,0 +1,69 @@ +//! Data occurs in [text][] and [string][]. +//! +//! It can include anything (including line endings), and stops at certain +//! characters. +//! +//! [string]: crate::content::string +//! [text]: crate::content::text + +// To do: pass token types in? + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// At the beginning of data. +/// +/// ```markdown +/// |&qwe +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult { +    if stop.contains(&code) { +        tokenizer.enter(TokenType::Data); +        tokenizer.consume(code); +        (State::Fn(Box::new(|t, c| data(t, c, stop))), None) +    } else { +        at_break(tokenizer, code, stop) +    } +} + +/// Before something. +/// +/// ```markdown +/// |qwe| |& +/// ``` +fn at_break(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult { +    match code { +        Code::None => (State::Ok, None), +        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { +            tokenizer.enter(TokenType::LineEnding); +            tokenizer.consume(code); +            tokenizer.exit(TokenType::LineEnding); +            (State::Fn(Box::new(|t, c| at_break(t, c, stop))), None) +        } +        _ if stop.contains(&code) => (State::Ok, Some(vec![code])), +        _ => { +            tokenizer.enter(TokenType::Data); +            data(tokenizer, code, stop) +        } +    } +} + +/// In data. +/// +/// ```markdown +/// q|w|e +/// ``` +fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult { +    let done = match code { +        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => true, +        _ if stop.contains(&code) => true, +        _ => false, +    }; + +    if done { +        tokenizer.exit(TokenType::Data); +        at_break(tokenizer, code, stop) +    } else { +        tokenizer.consume(code); +        (State::Fn(Box::new(|t, c| data(t, c, stop))), None) +    } +} diff --git a/src/content/string.rs b/src/content/string.rs index bae2646..3338c90 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -14,8 +14,14 @@  use crate::construct::{      character_escape::start as character_escape, character_reference::start as character_reference, +    partial_data::start as data,  }; -use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +const MARKERS: [Code; 2] = [ +    Code::Char('&'),  // `character_reference` +    Code::Char('\\'), // `character_escape` +];  /// Before string.  /// @@ -33,49 +39,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      }  } -/// Before string, not at a character reference or character escape. -/// -/// We’re at data. +/// At data.  ///  /// ```markdown  /// |qwe  /// ```  fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::None => (State::Ok, None), -        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { -            tokenizer.enter(TokenType::LineEnding); -            tokenizer.consume(code); -            tokenizer.exit(TokenType::LineEnding); -            (State::Fn(Box::new(start)), None) -        } -        _ => { -            tokenizer.enter(TokenType::Data); -            tokenizer.consume(code); -            (State::Fn(Box::new(in_data)), None) -        } -    } -} - -/// In data. -/// -/// ```markdown -/// q|w|e -/// ``` -fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { -            tokenizer.exit(TokenType::Data); -            before_data(tokenizer, code) -        } -        // To do: somehow get these markers from constructs. -        Code::Char('&' | '\\') => { -            tokenizer.exit(TokenType::Data); -            start(tokenizer, code) -        } -        _ => { -            tokenizer.consume(code); -            (State::Fn(Box::new(in_data)), None) -        } -    } +    tokenizer.go(|t, c| data(t, c, MARKERS.to_vec()), start)(tokenizer, code)  } diff --git a/src/content/text.rs b/src/content/text.rs index 6a30d4c..857e9a0 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -21,8 +21,17 @@ use crate::construct::{      character_reference::start as character_reference, code_text::start as code_text,      hard_break_escape::start as hard_break_escape,      hard_break_trailing::start as hard_break_trailing, html_text::start as html_text, +    partial_data::start as data,  }; -use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +const MARKERS: [Code; 5] = [ +    Code::Char(' '),  // `hard_break_trailing` +    Code::Char('&'),  // `character_reference` +    Code::Char('<'),  // `autolink`, `html_text` +    Code::Char('\\'), // `character_escape`, `hard_break_escape` +    Code::Char('`'),  // `code_text` +];  /// Before text.  /// @@ -49,49 +58,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      }  } -/// Before text. -/// -/// We’re at data. +/// At data.  ///  /// ```markdown  /// |qwe  /// ```  fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::None => (State::Ok, None), -        Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { -            tokenizer.enter(TokenType::LineEnding); -            tokenizer.consume(code); -            tokenizer.exit(TokenType::LineEnding); -            (State::Fn(Box::new(start)), None) -        } -        _ => { -            tokenizer.enter(TokenType::Data); -            tokenizer.consume(code); -            (State::Fn(Box::new(in_data)), None) -        } -    } -} - -/// In data. -/// -/// ```markdown -/// q|w|e -/// ``` -fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { -            tokenizer.exit(TokenType::Data); -            before_data(tokenizer, code) -        } -        // To do: somehow get these markers from constructs. -        Code::Char(' ' | '&' | '<' | '\\' | '`') => { -            tokenizer.exit(TokenType::Data); -            start(tokenizer, code) -        } -        _ => { -            tokenizer.consume(code); -            (State::Fn(Box::new(in_data)), None) -        } -    } +    tokenizer.go(|t, c| data(t, c, MARKERS.to_vec()), start)(tokenizer, code)  }  | 
