From 5133042973f31a3992f216e591d840bb491bfd45 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Fri, 10 Jun 2022 16:29:56 +0200
Subject: Add proper support for subtokenization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add “content” content type
- Add paragraph
- Add skips
- Add linked tokens
---
 src/compiler.rs | 23 ++++++++---------------
 1 file changed, 8 insertions(+), 15 deletions(-)

(limited to 'src/compiler.rs')
diff --git a/src/compiler.rs b/src/compiler.rs
index 3632d29..05a56e1 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -38,7 +38,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
 
         match event.event_type {
             EventType::Enter => match token_type {
-                TokenType::Content => {
+                TokenType::Paragraph => {
                     buf_tail_mut(buffers).push("<p>".to_string());
                 }
                 TokenType::CodeIndented => {
@@ -62,7 +62,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                         ignore_encode = true;
                     }
                 }
-                TokenType::ContentChunk
+                TokenType::Content
                 | TokenType::AtxHeading
                 | TokenType::AtxHeadingSequence
                 | TokenType::AtxHeadingWhitespace
@@ -79,7 +79,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 | TokenType::HtmlFlowData
                 | TokenType::CodeFencedFence
                 | TokenType::CodeFencedFenceSequence
-                | TokenType::ChunkString
+                | TokenType::ChunkText
                 | TokenType::CodeFencedFenceWhitespace
                 | TokenType::Data
                 | TokenType::CharacterEscape
@@ -97,7 +97,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 }
             },
             EventType::Exit => match token_type {
-                TokenType::ThematicBreakSequence
+                TokenType::Content
+                | TokenType::ThematicBreakSequence
                 | TokenType::ThematicBreakWhitespace
                 | TokenType::CodeIndentedPrefixWhitespace
                 | TokenType::BlankLineEnding
@@ -120,7 +121,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                     // last_was_tag = false;
                     buf_tail_mut(buffers).push(res);
                 }
-                TokenType::Content => {
+                TokenType::Paragraph => {
                     buf_tail_mut(buffers).push("</p>".to_string());
                 }
                 TokenType::CodeIndented | TokenType::CodeFenced => {
@@ -278,17 +279,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
 
                     character_reference_kind = None;
                 }
-                // To do: `ContentPhrasing` should be parsed as phrasing first.
                 // This branch below currently acts as the resulting `data` tokens.
-                // To do: initial and final whitespace should be handled in `text`.
-                TokenType::ContentChunk => {
-                    // last_was_tag = false;
-                    buf_tail_mut(buffers).push(encode(
-                        slice_serialize(codes, &get_span(events, index), false).trim(),
-                    ));
-                }
-                // To do: `ChunkString` does not belong here. Remove it when subtokenization is supported.
-                TokenType::ChunkString | TokenType::Data | TokenType::CharacterEscapeValue => {
+                // To do: `ChunkText` does not belong here. Remove it when subtokenization is supported.
+                TokenType::ChunkText | TokenType::Data | TokenType::CharacterEscapeValue => {
                     // last_was_tag = false;
                     buf_tail_mut(buffers).push(encode(&slice_serialize(
                         codes,
-- 
cgit