aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 12:59:06 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 12:59:06 +0200
commit262aec96cece3e9dd55828397b8ec859e7cff606 (patch)
tree0024565a50902b67a2cc3088bd5c37c705116096
parent5bf187fab2df0122e51523d1c731e457ab366121 (diff)
downloadmarkdown-rs-262aec96cece3e9dd55828397b8ec859e7cff606.tar.gz
markdown-rs-262aec96cece3e9dd55828397b8ec859e7cff606.tar.bz2
markdown-rs-262aec96cece3e9dd55828397b8ec859e7cff606.zip
Remove unneeded `content` content type
-rw-r--r--readme.md25
-rw-r--r--src/compiler.rs2
-rw-r--r--src/constant.rs5
-rw-r--r--src/construct/code_fenced.rs2
-rw-r--r--src/construct/html_text.rs6
-rw-r--r--src/construct/mod.rs1
-rw-r--r--src/content/content.rs70
-rw-r--r--src/content/flow.rs54
-rw-r--r--src/content/mod.rs1
-rw-r--r--src/content/string.rs4
-rw-r--r--src/subtokenize.rs9
-rw-r--r--src/tokenizer.rs2
12 files changed, 50 insertions, 131 deletions
diff --git a/readme.md b/readme.md
index 0cd5bd2..082dd4c 100644
--- a/readme.md
+++ b/readme.md
@@ -46,9 +46,9 @@ cargo doc --document-private-items
### Some major obstacles
-- [ ] (8) Can content (and to a lesser extent string and text) operate more
- performantly than checking whether other flow constructs start a line,
- before exiting and actually attempting flow constructs?
+- [ ] (8) Can paragraphs (and to a lesser extent string data and text data)
+ operate more performantly than checking whether other flow constructs
+ start a line, before exiting and actually attempting flow constructs?
- [ ] (5) Figure out sharing definition and identifiers, and references before
definitions
- [ ] (3) Interrupting: sometimes flow can or cannot start depending on the
@@ -57,8 +57,8 @@ cargo doc --document-private-items
subtokenization is solved
- [ ] (3) Concrete constructs: HTML or code (fenced) cannot be “pierced” into by
containers
-- [ ] (3) Lazy lines, in containers, in flow and content in a paragraph, a line
- does not need to be indented
+- [ ] (3) Lazy lines, in containers, in flow in a paragraph, a line does not
+ need to be indented
- [ ] (5) There’s a lot of rust-related choosing whether to pass (mutable)
references or whatever around that should be refactored
- [ ] (5) Figure out extensions
@@ -66,11 +66,9 @@ cargo doc --document-private-items
### Small things
-- [ ] (1) Remove `content` content type, as it is no longer needed
- [ ] (1) Connect `ChunkString` in label, destination, title
- [ ] (1) Add support for line endings in `string`
- [ ] (1) Add docs to subtokenize
-- [ ] (1) Add module docs to content
- [ ] (1) Add module docs to parser
- [ ] (1) Add overview docs on how everything works
- [ ] (1) Move safe protocols to constants
@@ -109,8 +107,7 @@ cargo doc --document-private-items
- [x] character reference
- [x] code (fenced)
- [x] code (indented)
-- [x] (1) code (text)
-- [ ] (3) content
+- [x] code (text)
- [x] definition
- [x] hard break (escape)
- [x] hard break (trailing)
@@ -134,14 +131,12 @@ cargo doc --document-private-items
- [x] blank line
- [x] code (fenced)
- [x] code (indented)
- - [x] content
- [x] definition
- [x] heading (atx)
- [x] heading (setext)
- [x] html (flow)
- - [x] thematic break
-- [x] content
- [x] paragraph
+ - [x] thematic break
- [ ] (5) text
- [ ] attention (strong, emphasis) (text)
- [x] autolink
@@ -170,10 +165,10 @@ cargo doc --document-private-items
- [x] (1) Add examples to `CompileOptions` docs
- [x] (3) Fix deep subtokenization
- [x] (1) text in heading
-- [x] (1) Setext headings: can they be solved in content, or do they have to be
- solved in flow somehow
+- [x] (1) Setext headings, solved in flow
- [x] (1) Add docs to partials
- [x] (1) Remove all `pub fn`s from constructs, except for start
+- [x] (1) Remove `content` content type, as it is no longer needed
### Extensions
@@ -188,7 +183,7 @@ important.
— [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter)
- [ ] (3) autolink literal (GFM) (text)
— [`micromark-extension-gfm-autolink-literal`](https://github.com/micromark/micromark-extension-gfm-autolink-literal)
-- [ ] (3) footnote (GFM) (content, text)
+- [ ] (3) footnote (GFM) (flow, text)
— [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote)
- [ ] (3) strikethrough (GFM) (text)
— [`micromark-extension-gfm-strikethrough`](https://github.com/micromark/micromark-extension-gfm-strikethrough)
diff --git a/src/compiler.rs b/src/compiler.rs
index be5d0fe..59fcd22 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -126,7 +126,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
| TokenType::CodeTextData
| TokenType::CodeTextLineEnding
| TokenType::CodeTextSequence
- | TokenType::Content
| TokenType::Data
| TokenType::DefinitionLabel
| TokenType::DefinitionLabelMarker
@@ -213,7 +212,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
| TokenType::CodeFencedFenceWhitespace
| TokenType::CodeIndentedPrefixWhitespace
| TokenType::CodeTextSequence
- | TokenType::Content
| TokenType::DefinitionLabel
| TokenType::DefinitionLabelMarker
| TokenType::DefinitionLabelData
diff --git a/src/constant.rs b/src/constant.rs
index 1f833c2..e7594b9 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -103,8 +103,9 @@ pub const HTML_RAW_SIZE_MAX: usize = 8;
/// List of HTML tag names that form the **basic** production of
/// [HTML (flow)][html_flow].
///
-/// The **basic** production allows interleaving HTML and markdown with blank lines
-/// and allows flow (block) elements to interrupt content.
+/// The **basic** production allows interleaving HTML and markdown with blank
+/// lines and allows flow (block) elements to interrupt definitions, paragraphs,
+/// and heading (setext).
/// Tag name matching must be performed insensitive to case, and thus this list
/// includes lowercase tag names.
///
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 12c8bd6..28ac20b 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -27,7 +27,7 @@
//! The above grammar does not show how whitespace is handled.
//! To parse code (fenced), let `X` be the number of whitespace characters
//! before the opening fence sequence.
-//! Each line of content is then allowed (not required) to be indented with up
+//! Each line of text is then allowed (not required) to be indented with up
//! to `X` spaces or tabs, which are then ignored as an indent instead of being
//! considered as part of the code.
//! This indent does not affect the closing fence.
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index d50a8ce..93b4b62 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -632,7 +632,7 @@ fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// At an allowed line ending.
///
-/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > **Note**: we can’t have blank lines in text, so no need to worry about
/// > empty tokens.
///
/// ```markdown
@@ -661,7 +661,7 @@ fn at_line_ending(
/// After a line ending.
///
-/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > **Note**: we can’t have blank lines in text, so no need to worry about
/// > empty tokens.
///
/// ```markdown
@@ -681,7 +681,7 @@ fn after_line_ending(
/// After a line ending, after indent.
///
-/// > **Note**: we can’t have blank lines in content, so no need to worry about
+/// > **Note**: we can’t have blank lines in text, so no need to worry about
/// > empty tokens.
///
/// ```markdown
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index a5e95bc..3195205 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -24,7 +24,6 @@
//! * [code (fenced)][code_fenced]
//! * [code (indented)][code_indented]
//! * [code (text)][code_text]
-//! * content
//! * [definition][]
//! * [hard break (escape)][hard_break_escape]
//! * [hard break (trailing)][hard_break_trailing]
diff --git a/src/content/content.rs b/src/content/content.rs
deleted file mode 100644
index 86bc290..0000000
--- a/src/content/content.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-//! The `content`, ahum, content type.
-//!
-//! **Content** is zero or more definitions, and then zero or one paragraph.
-//! It’s a weird one, and needed to make certain edge cases around definitions
-//! spec compliant.
-//! Definitions are unlike other things in markdown, in that they behave like
-//! **text** in that they can contain arbitrary line endings, but *have* to end
-//! at a line ending.
-//! If they end in something else, the whole definition instead is seen as a
-//! paragraph.
-//!
-//! The constructs found in content are:
-//!
-//! * Definition
-//! * Paragraph
-
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-
-/// Before a paragraph.
-///
-/// ```markdown
-/// |asd
-/// ```
-pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- unreachable!("expected non-eol/eof");
- }
- _ => {
- tokenizer.enter(TokenType::Paragraph);
- tokenizer.enter(TokenType::ChunkText);
- inside(tokenizer, code, tokenizer.events.len() - 1)
- }
- }
-}
-
-/// In a line in a paragraph.
-///
-/// ```markdown
-/// |\&
-/// |qwe
-/// ```
-fn inside(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult {
- match code {
- Code::None => {
- tokenizer.exit(TokenType::ChunkText);
- tokenizer.exit(TokenType::Paragraph);
- (State::Ok, None)
- }
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.consume(code);
- tokenizer.exit(TokenType::ChunkText);
- tokenizer.enter(TokenType::ChunkText);
- let next_index = tokenizer.events.len() - 1;
- tokenizer.events[previous_index].next = Some(next_index);
- tokenizer.events[next_index].previous = Some(previous_index);
- (
- State::Fn(Box::new(move |t, c| inside(t, c, next_index))),
- None,
- )
- }
- _ => {
- tokenizer.consume(code);
- (
- State::Fn(Box::new(move |t, c| inside(t, c, previous_index))),
- None,
- )
- }
- }
-}
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 3fab523..58be61d 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -1,7 +1,7 @@
//! The flow content type.
//!
-//! **Flow** represents the sections, such as headings, code, and content, which
-//! is parsed per line.
+//! **Flow** represents the sections, such as headings and code, which are
+//! parsed per line.
//! An example is HTML, which has a certain starting condition (such as
//! `<script>` on its own line), then continues for a while, until an end
//! condition is found (such as `</style>`).
@@ -18,8 +18,6 @@
//! * [Heading (setext)][crate::construct::heading_setext]
//! * [HTML (flow)][crate::construct::html_flow]
//! * [Thematic break][crate::construct::thematic_break]
-//!
-//! <!-- To do: Link to content. -->
use crate::constant::TAB_SIZE;
use crate::construct::{
@@ -153,45 +151,43 @@ pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResu
thematic_break,
definition,
heading_setext,
- |ok| Box::new(if ok { after } else { content_before }),
+ |ok| Box::new(if ok { after } else { paragraph_before }),
)(tokenizer, code)
}
-/// Before content.
+/// Before a paragraph.
///
/// ```markdown
/// |qwe
/// ```
-///
-// To do: we don’t need content anymore in `micromark-rs` it seems?
-fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+fn paragraph_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
after(tokenizer, code)
}
_ => {
- tokenizer.enter(TokenType::Content);
- tokenizer.enter(TokenType::ChunkContent);
- content(tokenizer, code, tokenizer.events.len() - 1)
+ tokenizer.enter(TokenType::Paragraph);
+ tokenizer.enter(TokenType::ChunkText);
+ paragraph_inside(tokenizer, code, tokenizer.events.len() - 1)
}
}
}
-/// In content.
+/// In a paragraph.
///
/// ```markdown
/// al|pha
/// ```
-fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult {
+fn paragraph_inside(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult {
match code {
- Code::None => content_end(tokenizer, code),
+ Code::None => paragraph_end(tokenizer, code),
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.check(continuation_construct, move |ok| {
Box::new(move |t, c| {
if ok {
- content_continue(t, c, previous)
+ paragraph_continue(t, c, previous)
} else {
- content_end(t, c)
+ paragraph_end(t, c)
}
})
})(tokenizer, code)
@@ -199,7 +195,7 @@ fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnRes
_ => {
tokenizer.consume(code);
(
- State::Fn(Box::new(move |t, c| content(t, c, previous))),
+ State::Fn(Box::new(move |t, c| paragraph_inside(t, c, previous))),
None,
)
}
@@ -248,9 +244,9 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) ->
}
match code {
- // Blank lines are not allowed in content.
+ // Blank lines are not allowed in paragraph.
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
- // To do: If code is disabled, indented lines are part of the content.
+ // To do: If code is disabled, indented lines are part of the paragraph.
_ if prefix >= TAB_SIZE => (State::Ok, None),
// To do: definitions, setext headings, etc?
_ => tokenizer.attempt_2(heading_atx, thematic_break, |ok| {
@@ -264,21 +260,25 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) ->
}
}
-fn content_continue(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult {
+fn paragraph_continue(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ previous_index: usize,
+) -> StateFnResult {
tokenizer.consume(code);
- tokenizer.exit(TokenType::ChunkContent);
- tokenizer.enter(TokenType::ChunkContent);
+ tokenizer.exit(TokenType::ChunkText);
+ tokenizer.enter(TokenType::ChunkText);
let next_index = tokenizer.events.len() - 1;
tokenizer.events[previous_index].next = Some(next_index);
tokenizer.events[next_index].previous = Some(previous_index);
(
- State::Fn(Box::new(move |t, c| content(t, c, next_index))),
+ State::Fn(Box::new(move |t, c| paragraph_inside(t, c, next_index))),
None,
)
}
-fn content_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.exit(TokenType::ChunkContent);
- tokenizer.exit(TokenType::Content);
+fn paragraph_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.exit(TokenType::ChunkText);
+ tokenizer.exit(TokenType::Paragraph);
after(tokenizer, code)
}
diff --git a/src/content/mod.rs b/src/content/mod.rs
index d13df79..395e41b 100644
--- a/src/content/mod.rs
+++ b/src/content/mod.rs
@@ -1,7 +1,6 @@
//! Content types found in markdown.
#[allow(clippy::module_inception)]
-pub mod content;
pub mod flow;
pub mod string;
pub mod text;
diff --git a/src/content/string.rs b/src/content/string.rs
index e8134c4..f591cd7 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -1,6 +1,6 @@
//! The string content type.
//!
-//! **String** is a limited **text** like content type which only allows
+//! **String** is a limited [text][] like content type which only allows
//! character escapes and character references.
//! It exists in things such as identifiers (media references, definitions),
//! titles, URLs, code (fenced) info and meta parts.
@@ -9,6 +9,8 @@
//!
//! * [Character escape][crate::construct::character_escape]
//! * [Character reference][crate::construct::character_reference]
+//!
+//! [text]: crate::content::text
use crate::construct::{
character_escape::start as character_escape, character_reference::start as character_reference,
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 71a84e1..4a29a01 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -1,4 +1,4 @@
-use crate::content::{content::start as content, string::start as string, text::start as text};
+use crate::content::{string::start as string, text::start as text};
use crate::tokenizer::{
Code, Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer,
};
@@ -20,8 +20,7 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
// Find each first opening chunk.
if (event.token_type == TokenType::ChunkString
- || event.token_type == TokenType::ChunkText
- || event.token_type == TokenType::ChunkContent) &&
+ || event.token_type == TokenType::ChunkText) &&
event.event_type == EventType::Enter &&
// No need to enter linked events again.
event.previous == None
@@ -33,9 +32,7 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
let mut tokenizer = Tokenizer::new(event.point.clone(), event.index);
// Substate.
let mut result: StateFnResult = (
- State::Fn(Box::new(if event.token_type == TokenType::ChunkContent {
- content
- } else if event.token_type == TokenType::ChunkString {
+ State::Fn(Box::new(if event.token_type == TokenType::ChunkString {
string
} else {
text
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9884986..c1bb61b 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -48,7 +48,6 @@ pub enum TokenType {
CodeTextSequence,
CodeTextLineEnding,
CodeTextData,
- Content,
Data,
Definition,
DefinitionLabel,
@@ -86,7 +85,6 @@ pub enum TokenType {
Whitespace,
// Chunks are tokenizer, but unraveled by `subtokenize`.
- ChunkContent,
ChunkString,
ChunkText,
}