diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-21 17:24:56 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-21 17:24:56 +0200 |
commit | 56ff5c73c7ec19b349e7d60d04ce1057c006d6ec (patch) | |
tree | b4107ae0e0219f871a2f2764215ad979b2b0d75f /src/construct | |
parent | 7effd171218fff68f051671f1373cee467a8f921 (diff) | |
download | markdown-rs-56ff5c73c7ec19b349e7d60d04ce1057c006d6ec.tar.gz markdown-rs-56ff5c73c7ec19b349e7d60d04ce1057c006d6ec.tar.bz2 markdown-rs-56ff5c73c7ec19b349e7d60d04ce1057c006d6ec.zip |
Make data a construct
Diffstat (limited to '')
-rw-r--r-- | src/construct/mod.rs | 20 | ||||
-rw-r--r-- | src/construct/partial_data.rs | 69 |
2 files changed, 84 insertions, 5 deletions
diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 407dc6b..9e5da0e 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -7,11 +7,10 @@ //! For example, [code (fenced)][code_fenced] and //! [code (indented)][code_indented] are considered different constructs //! -//! <!-- To do: can these rest things be made into constructs? --> -//! -//! Content types also have a *rest* thing: after all character escapes and -//! character references are parsed, there’s something left. -//! This remainder is, currently, not called a constructs. +//! Content types also have a *rest* thing: after all things are parsed, +//! there’s something left. +//! In flow, that is a [paragraph][]. +//! In string and text, that is [data][partial_data]. //! //! The following constructs are found in markdown: //! @@ -38,6 +37,14 @@ //! * [paragraph][] //! * [thematic break][thematic_break] //! +//! There are also several routines used in different places: +//! +//! * [data][partial_data] +//! * [destination][partial_destination] +//! * [label][partial_label] +//! * [space or tab][partial_space_or_tab] +//! * [title][partial_title] +//! //! Each construct maintained here is explained with a BNF diagram. //! For example, the docs for [character escape][character_escape] contain: //! @@ -52,6 +59,8 @@ //! They also contain references to character as defined by [char][], so for //! example `ascii_punctuation` refers to //! [`char::is_ascii_punctuation`][char::is_ascii_punctuation]. +//! +//! pub mod autolink; pub mod blank_line; @@ -68,6 +77,7 @@ pub mod heading_setext; pub mod html_flow; pub mod html_text; pub mod paragraph; +pub mod partial_data; pub mod partial_destination; pub mod partial_label; pub mod partial_space_or_tab; diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs new file mode 100644 index 0000000..d83787a --- /dev/null +++ b/src/construct/partial_data.rs @@ -0,0 +1,69 @@ +//! Data occurs in [text][] and [string][]. +//! +//! It can include anything (including line endings), and stops at certain +//! characters. +//! +//! [string]: crate::content::string +//! [text]: crate::content::text + +// To do: pass token types in? + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// At the beginning of data. +/// +/// ```markdown +/// |&qwe +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult { + if stop.contains(&code) { + tokenizer.enter(TokenType::Data); + tokenizer.consume(code); + (State::Fn(Box::new(|t, c| data(t, c, stop))), None) + } else { + at_break(tokenizer, code, stop) + } +} + +/// Before something. +/// +/// ```markdown +/// |qwe| |& +/// ``` +fn at_break(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult { + match code { + Code::None => (State::Ok, None), + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + (State::Fn(Box::new(|t, c| at_break(t, c, stop))), None) + } + _ if stop.contains(&code) => (State::Ok, Some(vec![code])), + _ => { + tokenizer.enter(TokenType::Data); + data(tokenizer, code, stop) + } + } +} + +/// In data. +/// +/// ```markdown +/// q|w|e +/// ``` +fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult { + let done = match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => true, + _ if stop.contains(&code) => true, + _ => false, + }; + + if done { + tokenizer.exit(TokenType::Data); + at_break(tokenizer, code, stop) + } else { + tokenizer.consume(code); + (State::Fn(Box::new(|t, c| data(t, c, stop))), None) + } +} |