From 4c06c8554c35887f8f5147783953b2b7e7c2327f Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Wed, 8 Jun 2022 15:52:16 +0200 Subject: . --- src/content/string.rs | 120 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) create mode 100644 src/content/string.rs (limited to 'src/content/string.rs') diff --git a/src/content/string.rs b/src/content/string.rs new file mode 100644 index 0000000..a8a81b2 --- /dev/null +++ b/src/content/string.rs @@ -0,0 +1,120 @@ +//! The string content type. +//! +//! **String** is a limited **text** like content type which only allows +//! character escapes and character references. +//! It exists in things such as identifiers (media references, definitions), +//! titles, URLs, code (fenced) info and meta parts. +//! +//! The constructs found in strin are: +//! +//! * [Character escape][crate::construct::character_escape] +//! * [Character reference][crate::construct::character_reference] + +use crate::construct::{ + character_escape::start as character_escape, character_reference::start as character_reference, +}; +use crate::tokenizer::{Code, Event, State, StateFnResult, TokenType, Tokenizer}; + +/// Turn `codes` as the string content type into events. +// To do: remove this `allow` when all the content types are glued together. +#[allow(dead_code)] +pub fn string(codes: Vec) -> Vec { + let mut tokenizer = Tokenizer::new(); + let (state, remainder) = tokenizer.feed(codes, Box::new(before), true); + + if let Some(ref x) = remainder { + if !x.is_empty() { + unreachable!("expected no final remainder {:?}", x); + } + } + + match state { + State::Ok => {} + _ => unreachable!("expected final state to be `State::Ok`"), + } + + tokenizer.events +} + +/// Before string. +/// +/// First we assume character reference. +/// +/// ```markdown +/// |& +/// |\& +/// |qwe +/// ``` +fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => (State::Ok, None), + _ => tokenizer.attempt(character_reference, |ok| { + Box::new(if ok { + before + } else { + before_not_character_reference + }) + })(tokenizer, code), + } +} + +/// Before string, not at a character reference. +/// +/// Assume character escape. +/// +/// ```markdown +/// |\& +/// |qwe +/// ``` +fn before_not_character_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => (State::Ok, None), + _ => tokenizer.attempt(character_escape, |ok| { + Box::new(if ok { + before + } else { + before_not_character_escape + }) + })(tokenizer, code), + } +} + +/// Before string, not at a character reference or character escape. +/// +/// We’re at data. +/// +/// ```markdown +/// |qwe +/// ``` +fn before_not_character_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if let Code::None = code { + (State::Ok, None) + } else { + tokenizer.enter(TokenType::Data); + tokenizer.consume(code); + (State::Fn(Box::new(in_data)), None) + } +} + +/// In data. +/// +/// ```markdown +/// q|w|e +/// ``` +fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => { + tokenizer.exit(TokenType::Data); + (State::Ok, None) + } + // To do: somehow get these markers from constructs. + Code::Char('&' | '\\') => { + tokenizer.exit(TokenType::Data); + before(tokenizer, code) + } + _ => { + tokenizer.consume(code); + (State::Fn(Box::new(in_data)), None) + } + } +} -- cgit