diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-08 15:52:16 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-08 15:52:16 +0200 |
commit | 4c06c8554c35887f8f5147783953b2b7e7c2327f (patch) | |
tree | 1b2463848a3ae4c645f7f1a325877ee829ab65c5 /src/content/string.rs | |
download | markdown-rs-4c06c8554c35887f8f5147783953b2b7e7c2327f.tar.gz markdown-rs-4c06c8554c35887f8f5147783953b2b7e7c2327f.tar.bz2 markdown-rs-4c06c8554c35887f8f5147783953b2b7e7c2327f.zip |
.
Diffstat (limited to '')
-rw-r--r-- | src/content/string.rs | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/src/content/string.rs b/src/content/string.rs new file mode 100644 index 0000000..a8a81b2 --- /dev/null +++ b/src/content/string.rs @@ -0,0 +1,120 @@ +//! The string content type. +//! +//! **String** is a limited **text** like content type which only allows +//! character escapes and character references. +//! It exists in things such as identifiers (media references, definitions), +//! titles, URLs, code (fenced) info and meta parts. +//! +//! The constructs found in strin are: +//! +//! * [Character escape][crate::construct::character_escape] +//! * [Character reference][crate::construct::character_reference] + +use crate::construct::{ + character_escape::start as character_escape, character_reference::start as character_reference, +}; +use crate::tokenizer::{Code, Event, State, StateFnResult, TokenType, Tokenizer}; + +/// Turn `codes` as the string content type into events. +// To do: remove this `allow` when all the content types are glued together. +#[allow(dead_code)] +pub fn string(codes: Vec<Code>) -> Vec<Event> { + let mut tokenizer = Tokenizer::new(); + let (state, remainder) = tokenizer.feed(codes, Box::new(before), true); + + if let Some(ref x) = remainder { + if !x.is_empty() { + unreachable!("expected no final remainder {:?}", x); + } + } + + match state { + State::Ok => {} + _ => unreachable!("expected final state to be `State::Ok`"), + } + + tokenizer.events +} + +/// Before string. +/// +/// First we assume character reference. +/// +/// ```markdown +/// |& +/// |\& +/// |qwe +/// ``` +fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => (State::Ok, None), + _ => tokenizer.attempt(character_reference, |ok| { + Box::new(if ok { + before + } else { + before_not_character_reference + }) + })(tokenizer, code), + } +} + +/// Before string, not at a character reference. +/// +/// Assume character escape. +/// +/// ```markdown +/// |\& +/// |qwe +/// ``` +fn before_not_character_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => (State::Ok, None), + _ => tokenizer.attempt(character_escape, |ok| { + Box::new(if ok { + before + } else { + before_not_character_escape + }) + })(tokenizer, code), + } +} + +/// Before string, not at a character reference or character escape. +/// +/// We’re at data. +/// +/// ```markdown +/// |qwe +/// ``` +fn before_not_character_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if let Code::None = code { + (State::Ok, None) + } else { + tokenizer.enter(TokenType::Data); + tokenizer.consume(code); + (State::Fn(Box::new(in_data)), None) + } +} + +/// In data. +/// +/// ```markdown +/// q|w|e +/// ``` +fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => { + tokenizer.exit(TokenType::Data); + (State::Ok, None) + } + // To do: somehow get these markers from constructs. + Code::Char('&' | '\\') => { + tokenizer.exit(TokenType::Data); + before(tokenizer, code) + } + _ => { + tokenizer.consume(code); + (State::Fn(Box::new(in_data)), None) + } + } +} |