From 5133042973f31a3992f216e591d840bb491bfd45 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 10 Jun 2022 16:29:56 +0200 Subject: Add proper support for subtokenization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add “content” content type - Add paragraph - Add skips - Add linked tokens --- src/content/content.rs | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 src/content/content.rs (limited to 'src/content/content.rs') diff --git a/src/content/content.rs b/src/content/content.rs new file mode 100644 index 0000000..7bf692f --- /dev/null +++ b/src/content/content.rs @@ -0,0 +1,84 @@ +//! The `content`, ahum, content type. +//! +//! **Content** is zero or more definitions, and then zero or one paragraph. +//! It’s a weird one, and needed to make certain edge cases around definitions +//! spec compliant. +//! Definitions are unlike other things in markdown, in that they behave like +//! **text** in that they can contain arbitrary line endings, but *have* to end +//! at a line ending. +//! If they end in something else, the whole definition instead is seen as a +//! paragraph. +//! +//! The constructs found in content are: +//! +//! * Definition +//! * Paragraph + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Before content. +/// +/// ```markdown +/// |[x]: y +/// |asd +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + unreachable!("expected non-eol/eof"); + } + _ => paragraph_initial(tokenizer, code) + // To do: definition. + // _ => tokenizer.attempt(definition, |ok| { + // Box::new(if ok { + // a + // } else { + // b + // }) + // })(tokenizer, code), + } +} + +/// Before a paragraph. +/// +/// ```markdown +/// |asd +/// ``` +fn paragraph_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + unreachable!("expected non-eol/eof"); + } + _ => { + tokenizer.enter(TokenType::Paragraph); + tokenizer.enter(TokenType::ChunkText); + data(tokenizer, code) + } + } +} + +/// In a line in a paragraph. +/// +/// ```markdown +/// |\& +/// |qwe +/// ``` +fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => { + tokenizer.exit(TokenType::ChunkText); + tokenizer.exit(TokenType::Paragraph); + (State::Ok, None) + } + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.consume(code); + tokenizer.exit(TokenType::ChunkText); + tokenizer.enter(TokenType::ChunkText); + (State::Fn(Box::new(data)), None) + } + _ => { + tokenizer.consume(code); + (State::Fn(Box::new(data)), None) + } + } +} -- cgit