diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-21 17:24:56 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-21 17:24:56 +0200 |
commit | 56ff5c73c7ec19b349e7d60d04ce1057c006d6ec (patch) | |
tree | b4107ae0e0219f871a2f2764215ad979b2b0d75f | |
parent | 7effd171218fff68f051671f1373cee467a8f921 (diff) | |
download | markdown-rs-56ff5c73c7ec19b349e7d60d04ce1057c006d6ec.tar.gz markdown-rs-56ff5c73c7ec19b349e7d60d04ce1057c006d6ec.tar.bz2 markdown-rs-56ff5c73c7ec19b349e7d60d04ce1057c006d6ec.zip |
Make data a construct
Diffstat (limited to '')
-rw-r--r-- | readme.md | 4 | ||||
-rw-r--r-- | src/construct/mod.rs | 20 | ||||
-rw-r--r-- | src/construct/partial_data.rs | 69 | ||||
-rw-r--r-- | src/content/string.rs | 50 | ||||
-rw-r--r-- | src/content/text.rs | 53 | ||||
-rw-r--r-- | tests/autolink.rs | 2 |
6 files changed, 108 insertions, 90 deletions
@@ -84,8 +84,6 @@ cargo doc --document-private-items #### Refactor -- [ ] (1) Make text data, string data constructs (document in - `construct/mod.rs`) - [ ] (1) Configurable tokens (destination, label, title) - [ ] (1) Configurable limit (destination) @@ -237,6 +235,8 @@ cargo doc --document-private-items - [x] (1) Figure out lifetimes of things (see `life time` in source) - [x] (1) Use traits for a bunch of enums, e.g., markers - [x] (1) Move safe protocols to constants +- [x] (1) Make text data, string data constructs (document in + `construct/mod.rs`) ### Extensions diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 407dc6b..9e5da0e 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -7,11 +7,10 @@ //! For example, [code (fenced)][code_fenced] and //! [code (indented)][code_indented] are considered different constructs //! -//! <!-- To do: can these rest things be made into constructs? --> -//! -//! Content types also have a *rest* thing: after all character escapes and -//! character references are parsed, there’s something left. -//! This remainder is, currently, not called a constructs. +//! Content types also have a *rest* thing: after all things are parsed, +//! there’s something left. +//! In flow, that is a [paragraph][]. +//! In string and text, that is [data][partial_data]. //! //! The following constructs are found in markdown: //! @@ -38,6 +37,14 @@ //! * [paragraph][] //! * [thematic break][thematic_break] //! +//! There are also several routines used in different places: +//! +//! * [data][partial_data] +//! * [destination][partial_destination] +//! * [label][partial_label] +//! * [space or tab][partial_space_or_tab] +//! * [title][partial_title] +//! //! Each construct maintained here is explained with a BNF diagram. //! For example, the docs for [character escape][character_escape] contain: //! @@ -52,6 +59,8 @@ //! They also contain references to character as defined by [char][], so for //! example `ascii_punctuation` refers to //! [`char::is_ascii_punctuation`][char::is_ascii_punctuation]. +//! +//! pub mod autolink; pub mod blank_line; @@ -68,6 +77,7 @@ pub mod heading_setext; pub mod html_flow; pub mod html_text; pub mod paragraph; +pub mod partial_data; pub mod partial_destination; pub mod partial_label; pub mod partial_space_or_tab; diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs new file mode 100644 index 0000000..d83787a --- /dev/null +++ b/src/construct/partial_data.rs @@ -0,0 +1,69 @@ +//! Data occurs in [text][] and [string][]. +//! +//! It can include anything (including line endings), and stops at certain +//! characters. +//! +//! [string]: crate::content::string +//! [text]: crate::content::text + +// To do: pass token types in? + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// At the beginning of data. +/// +/// ```markdown +/// |&qwe +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult { + if stop.contains(&code) { + tokenizer.enter(TokenType::Data); + tokenizer.consume(code); + (State::Fn(Box::new(|t, c| data(t, c, stop))), None) + } else { + at_break(tokenizer, code, stop) + } +} + +/// Before something. +/// +/// ```markdown +/// |qwe| |& +/// ``` +fn at_break(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult { + match code { + Code::None => (State::Ok, None), + Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + (State::Fn(Box::new(|t, c| at_break(t, c, stop))), None) + } + _ if stop.contains(&code) => (State::Ok, Some(vec![code])), + _ => { + tokenizer.enter(TokenType::Data); + data(tokenizer, code, stop) + } + } +} + +/// In data. +/// +/// ```markdown +/// q|w|e +/// ``` +fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult { + let done = match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => true, + _ if stop.contains(&code) => true, + _ => false, + }; + + if done { + tokenizer.exit(TokenType::Data); + at_break(tokenizer, code, stop) + } else { + tokenizer.consume(code); + (State::Fn(Box::new(|t, c| data(t, c, stop))), None) + } +} diff --git a/src/content/string.rs b/src/content/string.rs index bae2646..3338c90 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -14,8 +14,14 @@ use crate::construct::{ character_escape::start as character_escape, character_reference::start as character_reference, + partial_data::start as data, }; -use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +const MARKERS: [Code; 2] = [ + Code::Char('&'), // `character_reference` + Code::Char('\\'), // `character_escape` +]; /// Before string. /// @@ -33,49 +39,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// Before string, not at a character reference or character escape. -/// -/// We’re at data. +/// At data. /// /// ```markdown /// |qwe /// ``` fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None => (State::Ok, None), - Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { - tokenizer.enter(TokenType::LineEnding); - tokenizer.consume(code); - tokenizer.exit(TokenType::LineEnding); - (State::Fn(Box::new(start)), None) - } - _ => { - tokenizer.enter(TokenType::Data); - tokenizer.consume(code); - (State::Fn(Box::new(in_data)), None) - } - } -} - -/// In data. -/// -/// ```markdown -/// q|w|e -/// ``` -fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { - tokenizer.exit(TokenType::Data); - before_data(tokenizer, code) - } - // To do: somehow get these markers from constructs. - Code::Char('&' | '\\') => { - tokenizer.exit(TokenType::Data); - start(tokenizer, code) - } - _ => { - tokenizer.consume(code); - (State::Fn(Box::new(in_data)), None) - } - } + tokenizer.go(|t, c| data(t, c, MARKERS.to_vec()), start)(tokenizer, code) } diff --git a/src/content/text.rs b/src/content/text.rs index 6a30d4c..857e9a0 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -21,8 +21,17 @@ use crate::construct::{ character_reference::start as character_reference, code_text::start as code_text, hard_break_escape::start as hard_break_escape, hard_break_trailing::start as hard_break_trailing, html_text::start as html_text, + partial_data::start as data, }; -use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +const MARKERS: [Code; 5] = [ + Code::Char(' '), // `hard_break_trailing` + Code::Char('&'), // `character_reference` + Code::Char('<'), // `autolink`, `html_text` + Code::Char('\\'), // `character_escape`, `hard_break_escape` + Code::Char('`'), // `code_text` +]; /// Before text. /// @@ -49,49 +58,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// Before text. -/// -/// We’re at data. +/// At data. /// /// ```markdown /// |qwe /// ``` fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None => (State::Ok, None), - Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { - tokenizer.enter(TokenType::LineEnding); - tokenizer.consume(code); - tokenizer.exit(TokenType::LineEnding); - (State::Fn(Box::new(start)), None) - } - _ => { - tokenizer.enter(TokenType::Data); - tokenizer.consume(code); - (State::Fn(Box::new(in_data)), None) - } - } -} - -/// In data. -/// -/// ```markdown -/// q|w|e -/// ``` -fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => { - tokenizer.exit(TokenType::Data); - before_data(tokenizer, code) - } - // To do: somehow get these markers from constructs. - Code::Char(' ' | '&' | '<' | '\\' | '`') => { - tokenizer.exit(TokenType::Data); - start(tokenizer, code) - } - _ => { - tokenizer.consume(code); - (State::Fn(Box::new(in_data)), None) - } - } + tokenizer.go(|t, c| data(t, c, MARKERS.to_vec()), start)(tokenizer, code) } diff --git a/tests/autolink.rs b/tests/autolink.rs index 3882264..f0486ef 100644 --- a/tests/autolink.rs +++ b/tests/autolink.rs @@ -8,7 +8,7 @@ const DANGER: &Options = &Options { }; #[test] -fn code_fenced() { +fn autolink() { assert_eq!( micromark("```\n<\n >\n```"), "<pre><code><\n >\n</code></pre>", |