diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-10 16:47:43 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-10 16:47:43 +0200 |
commit | 17f4eec55ad0a5f74aedbcff6c2f0119ad52e584 (patch) | |
tree | 1839c796de977421456d1b9006f2f2c1e23cf809 /src/content/text.rs | |
parent | 5133042973f31a3992f216e591d840bb491bfd45 (diff) | |
download | markdown-rs-17f4eec55ad0a5f74aedbcff6c2f0119ad52e584.tar.gz markdown-rs-17f4eec55ad0a5f74aedbcff6c2f0119ad52e584.tar.bz2 markdown-rs-17f4eec55ad0a5f74aedbcff6c2f0119ad52e584.zip |
Add text content type
* Add character reference and character escapes in text
* Add recursive subtokenization
Diffstat (limited to 'src/content/text.rs')
-rw-r--r-- | src/content/text.rs | 80 |
1 files changed, 80 insertions, 0 deletions
diff --git a/src/content/text.rs b/src/content/text.rs new file mode 100644 index 0000000..2c93b18 --- /dev/null +++ b/src/content/text.rs @@ -0,0 +1,80 @@ +//! The text content type. +//! +//! **Text** contains phrasing content such as attention (emphasis, strong), +//! media (links, images), and actual text. +//! +//! The constructs found in text are: +//! +//! * Autolink +//! * Attention +//! * HTML (text) +//! * Hard break escape +//! * Code (text) +//! * Line ending +//! * Label start (image) +//! * Label start (link) +//! * [Character escape][crate::construct::character_escape] +//! * [Character reference][crate::construct::character_reference] + +use crate::construct::{ + character_escape::start as character_escape, character_reference::start as character_reference, +}; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Before text. +/// +/// First we assume character reference. +/// +/// ```markdown +/// |& +/// |\& +/// |qwe +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => (State::Ok, None), + _ => tokenizer.attempt_2(character_reference, character_escape, |ok| { + Box::new(if ok { start } else { before_data }) + })(tokenizer, code), + } +} + +/// Before text. +/// +/// We’re at data. +/// +/// ```markdown +/// |qwe +/// ``` +fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if let Code::None = code { + (State::Ok, None) + } else { + tokenizer.enter(TokenType::Data); + tokenizer.consume(code); + (State::Fn(Box::new(in_data)), None) + } +} + +/// In data. +/// +/// ```markdown +/// q|w|e +/// ``` +fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => { + tokenizer.exit(TokenType::Data); + (State::Ok, None) + } + // To do: somehow get these markers from constructs. + Code::Char('&' | '\\') => { + tokenizer.exit(TokenType::Data); + start(tokenizer, code) + } + _ => { + tokenizer.consume(code); + (State::Fn(Box::new(in_data)), None) + } + } +} |