From 262aec96cece3e9dd55828397b8ec859e7cff606 Mon Sep 17 00:00:00 2001
From: Titus Wormer <tituswormer@gmail.com>
Date: Mon, 20 Jun 2022 12:59:06 +0200
Subject: Remove unneeded `content` content type

---
 readme.md                    | 25 +++++++---------
 src/compiler.rs              |  2 --
 src/constant.rs              |  5 ++--
 src/construct/code_fenced.rs |  2 +-
 src/construct/html_text.rs   |  6 ++--
 src/construct/mod.rs         |  1 -
 src/content/content.rs       | 70 --------------------------------------------
 src/content/flow.rs          | 54 +++++++++++++++++-----------------
 src/content/mod.rs           |  1 -
 src/content/string.rs        |  4 ++-
 src/subtokenize.rs           |  9 ++----
 src/tokenizer.rs             |  2 --
 12 files changed, 50 insertions(+), 131 deletions(-)
 delete mode 100644 src/content/content.rs

diff --git a/readme.md b/readme.md
index 0cd5bd2..082dd4c 100644
--- a/readme.md
+++ b/readme.md
@@ -46,9 +46,9 @@ cargo doc --document-private-items
 
 ### Some major obstacles
 
-- [ ] (8) Can content (and to a lesser extent string and text) operate more
-      performantly than checking whether other flow constructs start a line,
-      before exiting and actually attempting flow constructs?
+- [ ] (8) Can paragraphs (and to a lesser extent string data and text data)
+      operate more performantly than checking whether other flow constructs
+      start a line, before exiting and actually attempting flow constructs?
 - [ ] (5) Figure out sharing definition and identifiers, and references before
       definitions
 - [ ] (3) Interrupting: sometimes flow can or cannot start depending on the
@@ -57,8 +57,8 @@ cargo doc --document-private-items
       subtokenization is solved
 - [ ] (3) Concrete constructs: HTML or code (fenced) cannot be “pierced” into by
       containers
-- [ ] (3) Lazy lines, in containers, in flow and content in a paragraph, a line
-      does not need to be indented
+- [ ] (3) Lazy lines, in containers, in flow in a paragraph, a line does not
+      need to be indented
 - [ ] (5) There’s a lot of rust-related choosing whether to pass (mutable)
       references or whatever around that should be refactored
 - [ ] (5) Figure out extensions
@@ -66,11 +66,9 @@ cargo doc --document-private-items
 
 ### Small things
 
-- [ ] (1) Remove `content` content type, as it is no longer needed
 - [ ] (1) Connect `ChunkString` in label, destination, title
 - [ ] (1) Add support for line endings in `string`
 - [ ] (1) Add docs to subtokenize
-- [ ] (1) Add module docs to content
 - [ ] (1) Add module docs to parser
 - [ ] (1) Add overview docs on how everything works
 - [ ] (1) Move safe protocols to constants
@@ -109,8 +107,7 @@ cargo doc --document-private-items
 - [x] character reference
 - [x] code (fenced)
 - [x] code (indented)
-- [x] (1) code (text)
-- [ ] (3) content
+- [x] code (text)
 - [x] definition
 - [x] hard break (escape)
 - [x] hard break (trailing)
@@ -134,14 +131,12 @@ cargo doc --document-private-items
   - [x] blank line
   - [x] code (fenced)
   - [x] code (indented)
-  - [x] content
   - [x] definition
   - [x] heading (atx)
   - [x] heading (setext)
   - [x] html (flow)
-  - [x] thematic break
-- [x] content
   - [x] paragraph
+  - [x] thematic break
 - [ ] (5) text
   - [ ] attention (strong, emphasis) (text)
   - [x] autolink
@@ -170,10 +165,10 @@ cargo doc --document-private-items
 - [x] (1) Add examples to `CompileOptions` docs
 - [x] (3) Fix deep subtokenization
 - [x] (1) text in heading
-- [x] (1) Setext headings: can they be solved in content, or do they have to be
-      solved in flow somehow
+- [x] (1) Setext headings, solved in flow
 - [x] (1) Add docs to partials
 - [x] (1) Remove all `pub fn`s from constructs, except for start
+- [x] (1) Remove `content` content type, as it is no longer needed
 
 ### Extensions
 
@@ -188,7 +183,7 @@ important.
       — [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter)
 - [ ] (3) autolink literal (GFM) (text)
       — [`micromark-extension-gfm-autolink-literal`](https://github.com/micromark/micromark-extension-gfm-autolink-literal)
-- [ ] (3) footnote (GFM) (content, text)
+- [ ] (3) footnote (GFM) (flow, text)
       — [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote)
 - [ ] (3) strikethrough (GFM) (text)
       — [`micromark-extension-gfm-strikethrough`](https://github.com/micromark/micromark-extension-gfm-strikethrough)
diff --git a/src/compiler.rs b/src/compiler.rs
index be5d0fe..59fcd22 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -126,7 +126,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 | TokenType::CodeTextData
                 | TokenType::CodeTextLineEnding
                 | TokenType::CodeTextSequence
-                | TokenType::Content
                 | TokenType::Data
                 | TokenType::DefinitionLabel
                 | TokenType::DefinitionLabelMarker
@@ -213,7 +212,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 | TokenType::CodeFencedFenceWhitespace
                 | TokenType::CodeIndentedPrefixWhitespace
                 | TokenType::CodeTextSequence
-                | TokenType::Content
                 | TokenType::DefinitionLabel
                 | TokenType::DefinitionLabelMarker
                 | TokenType::DefinitionLabelData
diff --git a/src/constant.rs b/src/constant.rs
index 1f833c2..e7594b9 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -103,8 +103,9 @@ pub const HTML_RAW_SIZE_MAX: usize = 8;
 /// List of HTML tag names that form the **basic** production of
 /// [HTML (flow)][html_flow].
 ///
-/// The **basic** production allows interleaving HTML and markdown with blank lines
-/// and allows flow (block) elements to interrupt content.
+/// The **basic** production allows interleaving HTML and markdown with blank
+/// lines and allows flow (block) elements to interrupt definitions, paragraphs,
+/// and heading (setext).
 /// Tag name matching must be performed insensitive to case, and thus this list
 /// includes lowercase tag names.
 ///
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 12c8bd6..28ac20b 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -27,7 +27,7 @@
 //! The above grammar does not show how whitespace is handled.
 //! To parse code (fenced), let `X` be the number of whitespace characters
 //! before the opening fence sequence.
-//! Each line of content is then allowed (not required) to be indented with up
+//! Each line of text is then allowed (not required) to be indented with up
 //! to `X` spaces or tabs, which are then ignored as an indent instead of being
 //! considered as part of the code.
 //! This indent does not affect the closing fence.
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index d50a8ce..93b4b62 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -632,7 +632,7 @@ fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 
 /// At an allowed line ending.
 ///
-/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > **Note**: we can’t have blank lines in text, so no need to worry about
 /// > empty tokens.
 ///
 /// ```markdown
@@ -661,7 +661,7 @@ fn at_line_ending(
 
 /// After a line ending.
 ///
-/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > **Note**: we can’t have blank lines in text, so no need to worry about
 /// > empty tokens.
 ///
 /// ```markdown
@@ -681,7 +681,7 @@ fn after_line_ending(
 
 /// After a line ending, after indent.
 ///
-/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > **Note**: we can’t have blank lines in text, so no need to worry about
 /// > empty tokens.
 ///
 /// ```markdown
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index a5e95bc..3195205 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -24,7 +24,6 @@
 //! *   [code (fenced)][code_fenced]
 //! *   [code (indented)][code_indented]
 //! *   [code (text)][code_text]
-//! *   content
 //! *   [definition][]
 //! *   [hard break (escape)][hard_break_escape]
 //! *   [hard break (trailing)][hard_break_trailing]
diff --git a/src/content/content.rs b/src/content/content.rs
deleted file mode 100644
index 86bc290..0000000
--- a/src/content/content.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-//! The `content`, ahum, content type.
-//!
-//! **Content** is zero or more definitions, and then zero or one paragraph.
-//! It’s a weird one, and needed to make certain edge cases around definitions
-//! spec compliant.
-//! Definitions are unlike other things in markdown, in that they behave like
-//! **text** in that they can contain arbitrary line endings, but *have* to end
-//! at a line ending.
-//! If they end in something else, the whole definition instead is seen as a
-//! paragraph.
-//!
-//! The constructs found in content are:
-//!
-//! *   Definition
-//! *   Paragraph
-
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-
-/// Before a paragraph.
-///
-/// ```markdown
-/// |asd
-/// ```
-pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    match code {
-        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            unreachable!("expected non-eol/eof");
-        }
-        _ => {
-            tokenizer.enter(TokenType::Paragraph);
-            tokenizer.enter(TokenType::ChunkText);
-            inside(tokenizer, code, tokenizer.events.len() - 1)
-        }
-    }
-}
-
-/// In a line in a paragraph.
-///
-/// ```markdown
-/// |\&
-/// |qwe
-/// ```
-fn inside(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult {
-    match code {
-        Code::None => {
-            tokenizer.exit(TokenType::ChunkText);
-            tokenizer.exit(TokenType::Paragraph);
-            (State::Ok, None)
-        }
-        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            tokenizer.consume(code);
-            tokenizer.exit(TokenType::ChunkText);
-            tokenizer.enter(TokenType::ChunkText);
-            let next_index = tokenizer.events.len() - 1;
-            tokenizer.events[previous_index].next = Some(next_index);
-            tokenizer.events[next_index].previous = Some(previous_index);
-            (
-                State::Fn(Box::new(move |t, c| inside(t, c, next_index))),
-                None,
-            )
-        }
-        _ => {
-            tokenizer.consume(code);
-            (
-                State::Fn(Box::new(move |t, c| inside(t, c, previous_index))),
-                None,
-            )
-        }
-    }
-}
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 3fab523..58be61d 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -1,7 +1,7 @@
 //! The flow content type.
 //!
-//! **Flow** represents the sections, such as headings, code, and content, which
-//! is parsed per line.
+//! **Flow** represents the sections, such as headings and code, which are
+//! parsed per line.
 //! An example is HTML, which has a certain starting condition (such as
 //! `<script>` on its own line), then continues for a while, until an end
 //! condition is found (such as `</style>`).
@@ -18,8 +18,6 @@
 //! *   [Heading (setext)][crate::construct::heading_setext]
 //! *   [HTML (flow)][crate::construct::html_flow]
 //! *   [Thematic break][crate::construct::thematic_break]
-//!
-//! <!-- To do: Link to content. -->
 
 use crate::constant::TAB_SIZE;
 use crate::construct::{
@@ -153,45 +151,43 @@ pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResu
         thematic_break,
         definition,
         heading_setext,
-        |ok| Box::new(if ok { after } else { content_before }),
+        |ok| Box::new(if ok { after } else { paragraph_before }),
     )(tokenizer, code)
 }
 
-/// Before content.
+/// Before a paragraph.
 ///
 /// ```markdown
 /// |qwe
 /// ```
-///
-// To do: we don’t need content anymore in `micromark-rs` it seems?
-fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+fn paragraph_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             after(tokenizer, code)
         }
         _ => {
-            tokenizer.enter(TokenType::Content);
-            tokenizer.enter(TokenType::ChunkContent);
-            content(tokenizer, code, tokenizer.events.len() - 1)
+            tokenizer.enter(TokenType::Paragraph);
+            tokenizer.enter(TokenType::ChunkText);
+            paragraph_inside(tokenizer, code, tokenizer.events.len() - 1)
         }
     }
 }
 
-/// In content.
+/// In a paragraph.
 ///
 /// ```markdown
 /// al|pha
 /// ```
-fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult {
+fn paragraph_inside(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult {
     match code {
-        Code::None => content_end(tokenizer, code),
+        Code::None => paragraph_end(tokenizer, code),
         Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             tokenizer.check(continuation_construct, move |ok| {
                 Box::new(move |t, c| {
                     if ok {
-                        content_continue(t, c, previous)
+                        paragraph_continue(t, c, previous)
                     } else {
-                        content_end(t, c)
+                        paragraph_end(t, c)
                     }
                 })
             })(tokenizer, code)
@@ -199,7 +195,7 @@ fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnRes
         _ => {
             tokenizer.consume(code);
             (
-                State::Fn(Box::new(move |t, c| content(t, c, previous))),
+                State::Fn(Box::new(move |t, c| paragraph_inside(t, c, previous))),
                 None,
             )
         }
@@ -248,9 +244,9 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) ->
     }
 
     match code {
-        // Blank lines are not allowed in content.
+        // Blank lines are not allowed in paragraph.
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
-        // To do: If code is disabled, indented lines are part of the content.
+        // To do: If code is disabled, indented lines are part of the paragraph.
         _ if prefix >= TAB_SIZE => (State::Ok, None),
         // To do: definitions, setext headings, etc?
         _ => tokenizer.attempt_2(heading_atx, thematic_break, |ok| {
@@ -264,21 +260,25 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) ->
     }
 }
 
-fn content_continue(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult {
+fn paragraph_continue(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    previous_index: usize,
+) -> StateFnResult {
     tokenizer.consume(code);
-    tokenizer.exit(TokenType::ChunkContent);
-    tokenizer.enter(TokenType::ChunkContent);
+    tokenizer.exit(TokenType::ChunkText);
+    tokenizer.enter(TokenType::ChunkText);
     let next_index = tokenizer.events.len() - 1;
     tokenizer.events[previous_index].next = Some(next_index);
     tokenizer.events[next_index].previous = Some(previous_index);
     (
-        State::Fn(Box::new(move |t, c| content(t, c, next_index))),
+        State::Fn(Box::new(move |t, c| paragraph_inside(t, c, next_index))),
         None,
     )
 }
 
-fn content_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.exit(TokenType::ChunkContent);
-    tokenizer.exit(TokenType::Content);
+fn paragraph_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.exit(TokenType::ChunkText);
+    tokenizer.exit(TokenType::Paragraph);
     after(tokenizer, code)
 }
diff --git a/src/content/mod.rs b/src/content/mod.rs
index d13df79..395e41b 100644
--- a/src/content/mod.rs
+++ b/src/content/mod.rs
@@ -1,7 +1,6 @@
 //! Content types found in markdown.
 
 #[allow(clippy::module_inception)]
-pub mod content;
 pub mod flow;
 pub mod string;
 pub mod text;
diff --git a/src/content/string.rs b/src/content/string.rs
index e8134c4..f591cd7 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -1,6 +1,6 @@
 //! The string content type.
 //!
-//! **String** is a limited **text** like content type which only allows
+//! **String** is a limited [text][] like content type which only allows
 //! character escapes and character references.
 //! It exists in things such as identifiers (media references, definitions),
 //! titles, URLs, code (fenced) info and meta parts.
@@ -9,6 +9,8 @@
 //!
 //! *   [Character escape][crate::construct::character_escape]
 //! *   [Character reference][crate::construct::character_reference]
+//!
+//! [text]: crate::content::text
 
 use crate::construct::{
     character_escape::start as character_escape, character_reference::start as character_reference,
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 71a84e1..4a29a01 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -1,4 +1,4 @@
-use crate::content::{content::start as content, string::start as string, text::start as text};
+use crate::content::{string::start as string, text::start as text};
 use crate::tokenizer::{
     Code, Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer,
 };
@@ -20,8 +20,7 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
 
         // Find each first opening chunk.
         if (event.token_type == TokenType::ChunkString
-                || event.token_type == TokenType::ChunkText
-                || event.token_type == TokenType::ChunkContent) &&
+                || event.token_type == TokenType::ChunkText) &&
             event.event_type == EventType::Enter &&
             // No need to enter linked events again.
             event.previous == None
@@ -33,9 +32,7 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
             let mut tokenizer = Tokenizer::new(event.point.clone(), event.index);
             // Substate.
             let mut result: StateFnResult = (
-                State::Fn(Box::new(if event.token_type == TokenType::ChunkContent {
-                    content
-                } else if event.token_type == TokenType::ChunkString {
+                State::Fn(Box::new(if event.token_type == TokenType::ChunkString {
                     string
                 } else {
                     text
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9884986..c1bb61b 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -48,7 +48,6 @@ pub enum TokenType {
     CodeTextSequence,
     CodeTextLineEnding,
     CodeTextData,
-    Content,
     Data,
     Definition,
     DefinitionLabel,
@@ -86,7 +85,6 @@ pub enum TokenType {
     Whitespace,
 
     // Chunks are tokenizer, but unraveled by `subtokenize`.
-    ChunkContent,
     ChunkString,
     ChunkText,
 }
-- 
cgit