//! Paragraph is a construct that occurs in the [flow] content type.
//! They’re formed with the following BNF:
//! ```bnf
//! ; Restriction: lines cannot start other flow constructs.
//! ; Restriction: lines cannot be blank.
//! paragraph ::= 1*line *( eol 1*line )
//! ```
//! Paragraphs in markdown relate to the `<p>` element in HTML.
//! See [*§ 4.4.1 The `p` element* in the HTML spec][html] for more info.
//! Paragraphs can contain line endings and whitespace, but they are not
//! allowed to contain blank lines, or to be blank themselves.
//! The paragraph is interpreted as the [text][] content type.
//! That means that [autolinks][autolink], [code (text)][code_text], etc are allowed.
//! ## References
//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)
//! * [*§ 4.8 Paragraphs* in `CommonMark`](https://spec.commonmark.org/0.30/#paragraphs)
//! [flow]: crate::content::flow
//! [text]: crate::content::text
//! [autolink]: crate::construct::autolink
//! [code_text]: crate::construct::code_text
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
use crate::constant::TAB_SIZE;
use crate::construct::{
code_fenced::start as code_fenced, heading_atx::start as heading_atx,
html_flow::start as html_flow, partial_whitespace::start as whitespace,
thematic_break::start as thematic_break,
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
use crate::util::span::from_exit_event;
/// Before a paragraph.
/// ```markdown
/// |qwe
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
unreachable!("unexpected eol/eof at start of paragraph")
_ => {
inside(tokenizer, code)
/// In a paragraph.
/// ```markdown
/// al|pha
/// ```
fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => end(tokenizer, code),
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer
.check(interrupt, |ok| {
Box::new(if ok { at_line_ending } else { end })
})(tokenizer, code),
_ => {
(State::Fn(Box::new(inside)), None)
/// At a line ending, not interrupting.
/// ```markdown
/// alpha|
/// bravo.
/// ```
fn at_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
let next_index = tokenizer.events.len() - 1;
tokenizer.events[next_index - 2].next = Some(next_index);
tokenizer.events[next_index].previous = Some(next_index - 2);
(State::Fn(Box::new(inside)), None)
/// At a line ending, done.
/// ```markdown
/// alpha|
/// ***
/// ```
fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
(State::Ok, Some(vec![code]))
/// Before a potential interruption.
/// ```markdown
/// alpha|
/// ***
/// ```
fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
(State::Fn(Box::new(interrupt_initial)), None)
_ => unreachable!("expected eol"),
/// After a line ending.
/// ```markdown
/// alpha|
/// ~~~js
/// ~~~
/// ```
fn interrupt_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.attempt_2(code_fenced, html_flow, |ok| {
if ok {
Box::new(|_tokenizer, _code| (State::Nok, None))
} else {
Box::new(|tokenizer, code| {
|tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
|_ok| Box::new(interrupt_start),
)(tokenizer, code)
})(tokenizer, code)
/// After a line ending, after optional whitespace.
/// ```markdown
/// alpha|
/// # bravo
/// ```
fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
let tail = tokenizer.events.last();
let mut prefix = 0;
if let Some(event) = tail {
if event.token_type == TokenType::Whitespace {
let span = from_exit_event(&tokenizer.events, tokenizer.events.len() - 1);
prefix = span.end_index - span.start_index;
match code {
// Blank lines are not allowed in paragraph.
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
// To do: If code is disabled, indented lines are allowed.
_ if prefix >= TAB_SIZE => (State::Ok, None),
// To do: definitions, setext headings, etc?
_ => tokenizer.attempt_2(heading_atx, thematic_break, |ok| {
let result = if ok {
(State::Nok, None)
} else {
(State::Ok, None)
Box::new(|_t, _c| result)
})(tokenizer, code),