diff options
Diffstat (limited to 'src/construct/paragraph.rs')
-rw-r--r-- | src/construct/paragraph.rs | 149 |
1 files changed, 48 insertions, 101 deletions
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index c1e7311..78fbacb 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -1,4 +1,4 @@ -//! Paragraph occurs in the [flow][] content type. +//! Paragraph occurs in the [content][] content type. //! //! ## Grammar //! @@ -11,14 +11,15 @@ //! paragraph ::= 1*line *(eol 1*line) //! ``` //! -//! As this construct occurs in flow, like all flow constructs, it must be -//! followed by an eol (line ending) or eof (end of file). +//! This construct must be followed by an eol (line ending) or eof (end of +//! file), like flow constructs. //! //! Paragraphs can contain line endings and whitespace, but they are not //! allowed to contain blank lines, or to be blank themselves. //! //! The paragraph is interpreted as the [text][] content type. -//! That means that [autolinks][autolink], [code (text)][raw_text], etc are allowed. +//! That means that [autolinks][autolink], [code (text)][raw_text], etc are +//! allowed. //! //! ## HTML //! @@ -34,40 +35,57 @@ //! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js) //! * [*§ 4.8 Paragraphs* in `CommonMark`](https://spec.commonmark.org/0.30/#paragraphs) //! -//! [flow]: crate::construct::flow +//! [content]: crate::construct::content //! [text]: crate::construct::text //! [autolink]: crate::construct::autolink //! [raw_text]: crate::construct::raw_text //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element -use crate::event::{Content, Kind, Link, Name}; -use crate::resolve::Name as ResolveName; +use crate::event::{Content, Link, Name}; use crate::state::{Name as StateName, State}; +use crate::subtokenize::link; use crate::tokenizer::Tokenizer; -use alloc::vec; -/// Before paragraph. +/// Paragraph start. /// /// ```markdown /// > | abc /// ^ +/// | def /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { - match tokenizer.current { - None | Some(b'\n') => unreachable!("unexpected eol/eof"), - _ => { - tokenizer.enter(Name::Paragraph); - tokenizer.enter_link( - Name::Data, - Link { - previous: None, - next: None, - content: Content::Text, - }, - ); - State::Retry(StateName::ParagraphInside) - } + debug_assert!(tokenizer.current.is_some()); + tokenizer.enter(Name::Paragraph); + State::Retry(StateName::ParagraphLineStart) +} + +/// Start of a line in a paragraph. +/// +/// ```markdown +/// > | abc +/// ^ +/// > | def +/// ^ +/// ``` +pub fn line_start(tokenizer: &mut Tokenizer) -> State { + debug_assert!(tokenizer.current.is_some()); + tokenizer.enter_link( + Name::Data, + Link { + previous: None, + next: None, + content: Content::Text, + }, + ); + + if tokenizer.tokenize_state.connect { + let index = tokenizer.events.len() - 1; + link(&mut tokenizer.events, index); + } else { + tokenizer.tokenize_state.connect = true; } + + State::Retry(StateName::ParagraphInside) } /// In paragraph. @@ -78,91 +96,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn inside(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'\n') => { + None => { + tokenizer.tokenize_state.connect = false; tokenizer.exit(Name::Data); tokenizer.exit(Name::Paragraph); - tokenizer.register_resolver_before(ResolveName::Paragraph); - // You’d be interrupting. - tokenizer.interrupt = true; State::Ok } + Some(b'\n') => { + tokenizer.consume(); + tokenizer.exit(Name::Data); + State::Next(StateName::ParagraphLineStart) + } _ => { tokenizer.consume(); State::Next(StateName::ParagraphInside) } } } - -/// Merge “`Paragraph`”s, which currently span a single line, into actual -/// `Paragraph`s that span multiple lines. -pub fn resolve(tokenizer: &mut Tokenizer) { - let mut index = 0; - - while index < tokenizer.events.len() { - let event = &tokenizer.events[index]; - - if event.kind == Kind::Enter && event.name == Name::Paragraph { - // Exit:Paragraph - let mut exit_index = index + 3; - - loop { - let mut enter_index = exit_index + 1; - - if enter_index == tokenizer.events.len() - || tokenizer.events[enter_index].name != Name::LineEnding - { - break; - } - - enter_index += 2; - - while enter_index < tokenizer.events.len() { - let event = &tokenizer.events[enter_index]; - - if event.name != Name::SpaceOrTab - && event.name != Name::BlockQuotePrefix - && event.name != Name::BlockQuoteMarker - { - break; - } - - enter_index += 1; - } - - if enter_index == tokenizer.events.len() - || tokenizer.events[enter_index].name != Name::Paragraph - { - break; - } - - // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding. - tokenizer.map.add(exit_index, 3, vec![]); - - // Remove Enter:Paragraph. - tokenizer.map.add(enter_index, 1, vec![]); - - // Add Exit:LineEnding position info to Exit:Data. - tokenizer.events[exit_index - 1].point = - tokenizer.events[exit_index + 2].point.clone(); - - // Link Enter:Data on the previous line to Enter:Data on this line. - if let Some(link) = &mut tokenizer.events[exit_index - 2].link { - link.next = Some(enter_index + 1); - } - if let Some(link) = &mut tokenizer.events[enter_index + 1].link { - link.previous = Some(exit_index - 2); - } - - // Potential next start. - exit_index = enter_index + 3; - } - - // Move to `Exit:Paragraph`. - index = exit_index; - } - - index += 1; - } - - tokenizer.map.consume(&mut tokenizer.events); -} |