aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/paragraph.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/construct/paragraph.rs')
-rw-r--r--src/construct/paragraph.rs149
1 files changed, 48 insertions, 101 deletions
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index c1e7311..78fbacb 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -1,4 +1,4 @@
-//! Paragraph occurs in the [flow][] content type.
+//! Paragraph occurs in the [content][] content type.
//!
//! ## Grammar
//!
@@ -11,14 +11,15 @@
//! paragraph ::= 1*line *(eol 1*line)
//! ```
//!
-//! As this construct occurs in flow, like all flow constructs, it must be
-//! followed by an eol (line ending) or eof (end of file).
+//! This construct must be followed by an eol (line ending) or eof (end of
+//! file), like flow constructs.
//!
//! Paragraphs can contain line endings and whitespace, but they are not
//! allowed to contain blank lines, or to be blank themselves.
//!
//! The paragraph is interpreted as the [text][] content type.
-//! That means that [autolinks][autolink], [code (text)][raw_text], etc are allowed.
+//! That means that [autolinks][autolink], [code (text)][raw_text], etc are
+//! allowed.
//!
//! ## HTML
//!
@@ -34,40 +35,57 @@
//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)
//! * [*§ 4.8 Paragraphs* in `CommonMark`](https://spec.commonmark.org/0.30/#paragraphs)
//!
-//! [flow]: crate::construct::flow
+//! [content]: crate::construct::content
//! [text]: crate::construct::text
//! [autolink]: crate::construct::autolink
//! [raw_text]: crate::construct::raw_text
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
-use crate::event::{Content, Kind, Link, Name};
-use crate::resolve::Name as ResolveName;
+use crate::event::{Content, Link, Name};
use crate::state::{Name as StateName, State};
+use crate::subtokenize::link;
use crate::tokenizer::Tokenizer;
-use alloc::vec;
-/// Before paragraph.
+/// Paragraph start.
///
/// ```markdown
/// > | abc
/// ^
+/// | def
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- None | Some(b'\n') => unreachable!("unexpected eol/eof"),
- _ => {
- tokenizer.enter(Name::Paragraph);
- tokenizer.enter_link(
- Name::Data,
- Link {
- previous: None,
- next: None,
- content: Content::Text,
- },
- );
- State::Retry(StateName::ParagraphInside)
- }
+ debug_assert!(tokenizer.current.is_some());
+ tokenizer.enter(Name::Paragraph);
+ State::Retry(StateName::ParagraphLineStart)
+}
+
+/// Start of a line in a paragraph.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// > | def
+/// ^
+/// ```
+pub fn line_start(tokenizer: &mut Tokenizer) -> State {
+ debug_assert!(tokenizer.current.is_some());
+ tokenizer.enter_link(
+ Name::Data,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::Text,
+ },
+ );
+
+ if tokenizer.tokenize_state.connect {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ } else {
+ tokenizer.tokenize_state.connect = true;
}
+
+ State::Retry(StateName::ParagraphInside)
}
/// In paragraph.
@@ -78,91 +96,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'\n') => {
+ None => {
+ tokenizer.tokenize_state.connect = false;
tokenizer.exit(Name::Data);
tokenizer.exit(Name::Paragraph);
- tokenizer.register_resolver_before(ResolveName::Paragraph);
- // You’d be interrupting.
- tokenizer.interrupt = true;
State::Ok
}
+ Some(b'\n') => {
+ tokenizer.consume();
+ tokenizer.exit(Name::Data);
+ State::Next(StateName::ParagraphLineStart)
+ }
_ => {
tokenizer.consume();
State::Next(StateName::ParagraphInside)
}
}
}
-
-/// Merge “`Paragraph`”s, which currently span a single line, into actual
-/// `Paragraph`s that span multiple lines.
-pub fn resolve(tokenizer: &mut Tokenizer) {
- let mut index = 0;
-
- while index < tokenizer.events.len() {
- let event = &tokenizer.events[index];
-
- if event.kind == Kind::Enter && event.name == Name::Paragraph {
- // Exit:Paragraph
- let mut exit_index = index + 3;
-
- loop {
- let mut enter_index = exit_index + 1;
-
- if enter_index == tokenizer.events.len()
- || tokenizer.events[enter_index].name != Name::LineEnding
- {
- break;
- }
-
- enter_index += 2;
-
- while enter_index < tokenizer.events.len() {
- let event = &tokenizer.events[enter_index];
-
- if event.name != Name::SpaceOrTab
- && event.name != Name::BlockQuotePrefix
- && event.name != Name::BlockQuoteMarker
- {
- break;
- }
-
- enter_index += 1;
- }
-
- if enter_index == tokenizer.events.len()
- || tokenizer.events[enter_index].name != Name::Paragraph
- {
- break;
- }
-
- // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding.
- tokenizer.map.add(exit_index, 3, vec![]);
-
- // Remove Enter:Paragraph.
- tokenizer.map.add(enter_index, 1, vec![]);
-
- // Add Exit:LineEnding position info to Exit:Data.
- tokenizer.events[exit_index - 1].point =
- tokenizer.events[exit_index + 2].point.clone();
-
- // Link Enter:Data on the previous line to Enter:Data on this line.
- if let Some(link) = &mut tokenizer.events[exit_index - 2].link {
- link.next = Some(enter_index + 1);
- }
- if let Some(link) = &mut tokenizer.events[enter_index + 1].link {
- link.previous = Some(exit_index - 2);
- }
-
- // Potential next start.
- exit_index = enter_index + 3;
- }
-
- // Move to `Exit:Paragraph`.
- index = exit_index;
- }
-
- index += 1;
- }
-
- tokenizer.map.consume(&mut tokenizer.events);
-}