From 17f4eec55ad0a5f74aedbcff6c2f0119ad52e584 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 10 Jun 2022 16:47:43 +0200 Subject: Add text content type * Add character reference and character escapes in text * Add recursive subtokenization --- src/content/text.rs | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 src/content/text.rs (limited to 'src/content/text.rs') diff --git a/src/content/text.rs b/src/content/text.rs new file mode 100644 index 0000000..2c93b18 --- /dev/null +++ b/src/content/text.rs @@ -0,0 +1,80 @@ +//! The text content type. +//! +//! **Text** contains phrasing content such as attention (emphasis, strong), +//! media (links, images), and actual text. +//! +//! The constructs found in text are: +//! +//! * Autolink +//! * Attention +//! * HTML (text) +//! * Hard break escape +//! * Code (text) +//! * Line ending +//! * Label start (image) +//! * Label start (link) +//! * [Character escape][crate::construct::character_escape] +//! * [Character reference][crate::construct::character_reference] + +use crate::construct::{ + character_escape::start as character_escape, character_reference::start as character_reference, +}; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Before text. +/// +/// First we assume character reference. +/// +/// ```markdown +/// |& +/// |\& +/// |qwe +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => (State::Ok, None), + _ => tokenizer.attempt_2(character_reference, character_escape, |ok| { + Box::new(if ok { start } else { before_data }) + })(tokenizer, code), + } +} + +/// Before text. +/// +/// We’re at data. +/// +/// ```markdown +/// |qwe +/// ``` +fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if let Code::None = code { + (State::Ok, None) + } else { + tokenizer.enter(TokenType::Data); + tokenizer.consume(code); + (State::Fn(Box::new(in_data)), None) + } +} + +/// In data. +/// +/// ```markdown +/// q|w|e +/// ``` +fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => { + tokenizer.exit(TokenType::Data); + (State::Ok, None) + } + // To do: somehow get these markers from constructs. + Code::Char('&' | '\\') => { + tokenizer.exit(TokenType::Data); + start(tokenizer, code) + } + _ => { + tokenizer.consume(code); + (State::Fn(Box::new(in_data)), None) + } + } +} -- cgit