diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-10 16:29:56 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-10 16:29:56 +0200 |
commit | 5133042973f31a3992f216e591d840bb491bfd45 (patch) | |
tree | 810a44ac1d98f65dd2eedd0d9e8387eac0753e25 /src/content/content.rs | |
parent | 021d5f989ae41ae39a9b937b498141d9dc70d894 (diff) | |
download | markdown-rs-5133042973f31a3992f216e591d840bb491bfd45.tar.gz markdown-rs-5133042973f31a3992f216e591d840bb491bfd45.tar.bz2 markdown-rs-5133042973f31a3992f216e591d840bb491bfd45.zip |
Add proper support for subtokenization
- Add “content” content type
- Add paragraph
- Add skips
- Add linked tokens
Diffstat (limited to '')
-rw-r--r-- | src/content/content.rs | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/src/content/content.rs b/src/content/content.rs new file mode 100644 index 0000000..7bf692f --- /dev/null +++ b/src/content/content.rs @@ -0,0 +1,84 @@ +//! The `content`, ahum, content type. +//! +//! **Content** is zero or more definitions, and then zero or one paragraph. +//! It’s a weird one, and needed to make certain edge cases around definitions +//! spec compliant. +//! Definitions are unlike other things in markdown, in that they behave like +//! **text** in that they can contain arbitrary line endings, but *have* to end +//! at a line ending. +//! If they end in something else, the whole definition instead is seen as a +//! paragraph. +//! +//! The constructs found in content are: +//! +//! * Definition +//! * Paragraph + +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +/// Before content. +/// +/// ```markdown +/// |[x]: y +/// |asd +/// ``` +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + unreachable!("expected non-eol/eof"); + } + _ => paragraph_initial(tokenizer, code) + // To do: definition. + // _ => tokenizer.attempt(definition, |ok| { + // Box::new(if ok { + // a + // } else { + // b + // }) + // })(tokenizer, code), + } +} + +/// Before a paragraph. +/// +/// ```markdown +/// |asd +/// ``` +fn paragraph_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + unreachable!("expected non-eol/eof"); + } + _ => { + tokenizer.enter(TokenType::Paragraph); + tokenizer.enter(TokenType::ChunkText); + data(tokenizer, code) + } + } +} + +/// In a line in a paragraph. +/// +/// ```markdown +/// |\& +/// |qwe +/// ``` +fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::None => { + tokenizer.exit(TokenType::ChunkText); + tokenizer.exit(TokenType::Paragraph); + (State::Ok, None) + } + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.consume(code); + tokenizer.exit(TokenType::ChunkText); + tokenizer.enter(TokenType::ChunkText); + (State::Fn(Box::new(data)), None) + } + _ => { + tokenizer.consume(code); + (State::Fn(Box::new(data)), None) + } + } +} |