Add proper support for subtokenization

- Add “content” content type - Add paragraph - Add skips - Add linked tokens
author: Titus Wormer <tituswormer@gmail.com> 2022-06-10 16:29:56 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-10 16:29:56 +0200
commit: 5133042973f31a3992f216e591d840bb491bfd45 (patch)
tree: 810a44ac1d98f65dd2eedd0d9e8387eac0753e25 /src/content/content.rs
parent: 021d5f989ae41ae39a9b937b498141d9dc70d894 (diff)
download: markdown-rs-5133042973f31a3992f216e591d840bb491bfd45.tar.gz
markdown-rs-5133042973f31a3992f216e591d840bb491bfd45.tar.bz2
markdown-rs-5133042973f31a3992f216e591d840bb491bfd45.zip
1 files changed, 84 insertions, 0 deletions
diff --git a/src/content/content.rs b/src/content/content.rs
new file mode 100644
index 0000000..7bf692f
--- /dev/null
+++ b/src/content/content.rs
@@ -0,0 +1,84 @@
+//! The `content`, ahum, content type.
+//!
+//! **Content** is zero or more definitions, and then zero or one paragraph.
+//! It’s a weird one, and needed to make certain edge cases around definitions
+//! spec compliant.
+//! Definitions are unlike other things in markdown, in that they behave like
+//! **text** in that they can contain arbitrary line endings, but *have* to end
+//! at a line ending.
+//! If they end in something else, the whole definition instead is seen as a
+//! paragraph.
+//!
+//! The constructs found in content are:
+//!
+//! *   Definition
+//! *   Paragraph
+
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Before content.
+///
+/// ```markdown
+/// |[x]: y
+/// |asd
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            unreachable!("expected non-eol/eof");
+        }
+        _ => paragraph_initial(tokenizer, code)
+        // To do: definition.
+        // _ => tokenizer.attempt(definition, |ok| {
+        //     Box::new(if ok {
+        //         a
+        //     } else {
+        //         b
+        //     })
+        // })(tokenizer, code),
+    }
+}
+
+/// Before a paragraph.
+///
+/// ```markdown
+/// |asd
+/// ```
+fn paragraph_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            unreachable!("expected non-eol/eof");
+        }
+        _ => {
+            tokenizer.enter(TokenType::Paragraph);
+            tokenizer.enter(TokenType::ChunkText);
+            data(tokenizer, code)
+        }
+    }
+}
+
+/// In a line in a paragraph.
+///
+/// ```markdown
+/// |\&
+/// |qwe
+/// ```
+fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => {
+            tokenizer.exit(TokenType::ChunkText);
+            tokenizer.exit(TokenType::Paragraph);
+            (State::Ok, None)
+        }
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::ChunkText);
+            tokenizer.enter(TokenType::ChunkText);
+            (State::Fn(Box::new(data)), None)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(data)), None)
+        }
+    }
+}
author	Titus Wormer <tituswormer@gmail.com>	2022-06-10 16:29:56 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-10 16:29:56 +0200
commit	5133042973f31a3992f216e591d840bb491bfd45 (patch)
tree	810a44ac1d98f65dd2eedd0d9e8387eac0753e25 /src/content/content.rs
parent	021d5f989ae41ae39a9b937b498141d9dc70d894 (diff)
download	markdown-rs-5133042973f31a3992f216e591d840bb491bfd45.tar.gz markdown-rs-5133042973f31a3992f216e591d840bb491bfd45.tar.bz2 markdown-rs-5133042973f31a3992f216e591d840bb491bfd45.zip