aboutsummaryrefslogtreecommitdiffstats
path: root/src/content
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/content/content.rs70
-rw-r--r--src/content/flow.rs54
-rw-r--r--src/content/mod.rs1
-rw-r--r--src/content/string.rs4
4 files changed, 30 insertions, 99 deletions
diff --git a/src/content/content.rs b/src/content/content.rs
deleted file mode 100644
index 86bc290..0000000
--- a/src/content/content.rs
+++ /dev/null
@@ -1,70 +0,0 @@
-//! The `content`, ahum, content type.
-//!
-//! **Content** is zero or more definitions, and then zero or one paragraph.
-//! It’s a weird one, and needed to make certain edge cases around definitions
-//! spec compliant.
-//! Definitions are unlike other things in markdown, in that they behave like
-//! **text** in that they can contain arbitrary line endings, but *have* to end
-//! at a line ending.
-//! If they end in something else, the whole definition instead is seen as a
-//! paragraph.
-//!
-//! The constructs found in content are:
-//!
-//! * Definition
-//! * Paragraph
-
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-
-/// Before a paragraph.
-///
-/// ```markdown
-/// |asd
-/// ```
-pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- unreachable!("expected non-eol/eof");
- }
- _ => {
- tokenizer.enter(TokenType::Paragraph);
- tokenizer.enter(TokenType::ChunkText);
- inside(tokenizer, code, tokenizer.events.len() - 1)
- }
- }
-}
-
-/// In a line in a paragraph.
-///
-/// ```markdown
-/// |\&
-/// |qwe
-/// ```
-fn inside(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult {
- match code {
- Code::None => {
- tokenizer.exit(TokenType::ChunkText);
- tokenizer.exit(TokenType::Paragraph);
- (State::Ok, None)
- }
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.consume(code);
- tokenizer.exit(TokenType::ChunkText);
- tokenizer.enter(TokenType::ChunkText);
- let next_index = tokenizer.events.len() - 1;
- tokenizer.events[previous_index].next = Some(next_index);
- tokenizer.events[next_index].previous = Some(previous_index);
- (
- State::Fn(Box::new(move |t, c| inside(t, c, next_index))),
- None,
- )
- }
- _ => {
- tokenizer.consume(code);
- (
- State::Fn(Box::new(move |t, c| inside(t, c, previous_index))),
- None,
- )
- }
- }
-}
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 3fab523..58be61d 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -1,7 +1,7 @@
//! The flow content type.
//!
-//! **Flow** represents the sections, such as headings, code, and content, which
-//! is parsed per line.
+//! **Flow** represents the sections, such as headings and code, which are
+//! parsed per line.
//! An example is HTML, which has a certain starting condition (such as
//! `<script>` on its own line), then continues for a while, until an end
//! condition is found (such as `</style>`).
@@ -18,8 +18,6 @@
//! * [Heading (setext)][crate::construct::heading_setext]
//! * [HTML (flow)][crate::construct::html_flow]
//! * [Thematic break][crate::construct::thematic_break]
-//!
-//! <!-- To do: Link to content. -->
use crate::constant::TAB_SIZE;
use crate::construct::{
@@ -153,45 +151,43 @@ pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResu
thematic_break,
definition,
heading_setext,
- |ok| Box::new(if ok { after } else { content_before }),
+ |ok| Box::new(if ok { after } else { paragraph_before }),
)(tokenizer, code)
}
-/// Before content.
+/// Before a paragraph.
///
/// ```markdown
/// |qwe
/// ```
-///
-// To do: we don’t need content anymore in `micromark-rs` it seems?
-fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+fn paragraph_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
after(tokenizer, code)
}
_ => {
- tokenizer.enter(TokenType::Content);
- tokenizer.enter(TokenType::ChunkContent);
- content(tokenizer, code, tokenizer.events.len() - 1)
+ tokenizer.enter(TokenType::Paragraph);
+ tokenizer.enter(TokenType::ChunkText);
+ paragraph_inside(tokenizer, code, tokenizer.events.len() - 1)
}
}
}
-/// In content.
+/// In a paragraph.
///
/// ```markdown
/// al|pha
/// ```
-fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult {
+fn paragraph_inside(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult {
match code {
- Code::None => content_end(tokenizer, code),
+ Code::None => paragraph_end(tokenizer, code),
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.check(continuation_construct, move |ok| {
Box::new(move |t, c| {
if ok {
- content_continue(t, c, previous)
+ paragraph_continue(t, c, previous)
} else {
- content_end(t, c)
+ paragraph_end(t, c)
}
})
})(tokenizer, code)
@@ -199,7 +195,7 @@ fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnRes
_ => {
tokenizer.consume(code);
(
- State::Fn(Box::new(move |t, c| content(t, c, previous))),
+ State::Fn(Box::new(move |t, c| paragraph_inside(t, c, previous))),
None,
)
}
@@ -248,9 +244,9 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) ->
}
match code {
- // Blank lines are not allowed in content.
+ // Blank lines are not allowed in paragraph.
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
- // To do: If code is disabled, indented lines are part of the content.
+ // To do: If code is disabled, indented lines are part of the paragraph.
_ if prefix >= TAB_SIZE => (State::Ok, None),
// To do: definitions, setext headings, etc?
_ => tokenizer.attempt_2(heading_atx, thematic_break, |ok| {
@@ -264,21 +260,25 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) ->
}
}
-fn content_continue(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult {
+fn paragraph_continue(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ previous_index: usize,
+) -> StateFnResult {
tokenizer.consume(code);
- tokenizer.exit(TokenType::ChunkContent);
- tokenizer.enter(TokenType::ChunkContent);
+ tokenizer.exit(TokenType::ChunkText);
+ tokenizer.enter(TokenType::ChunkText);
let next_index = tokenizer.events.len() - 1;
tokenizer.events[previous_index].next = Some(next_index);
tokenizer.events[next_index].previous = Some(previous_index);
(
- State::Fn(Box::new(move |t, c| content(t, c, next_index))),
+ State::Fn(Box::new(move |t, c| paragraph_inside(t, c, next_index))),
None,
)
}
-fn content_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.exit(TokenType::ChunkContent);
- tokenizer.exit(TokenType::Content);
+fn paragraph_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.exit(TokenType::ChunkText);
+ tokenizer.exit(TokenType::Paragraph);
after(tokenizer, code)
}
diff --git a/src/content/mod.rs b/src/content/mod.rs
index d13df79..395e41b 100644
--- a/src/content/mod.rs
+++ b/src/content/mod.rs
@@ -1,7 +1,6 @@
//! Content types found in markdown.
#[allow(clippy::module_inception)]
-pub mod content;
pub mod flow;
pub mod string;
pub mod text;
diff --git a/src/content/string.rs b/src/content/string.rs
index e8134c4..f591cd7 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -1,6 +1,6 @@
//! The string content type.
//!
-//! **String** is a limited **text** like content type which only allows
+//! **String** is a limited [text][] like content type which only allows
//! character escapes and character references.
//! It exists in things such as identifiers (media references, definitions),
//! titles, URLs, code (fenced) info and meta parts.
@@ -9,6 +9,8 @@
//!
//! * [Character escape][crate::construct::character_escape]
//! * [Character reference][crate::construct::character_reference]
+//!
+//! [text]: crate::content::text
use crate::construct::{
character_escape::start as character_escape, character_reference::start as character_reference,