Add heading (setext)

author: Titus Wormer <tituswormer@gmail.com> 2022-06-16 19:04:16 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-16 19:04:16 +0200
commit: 60ea2fd3a09f10fa28bf48575736b47afebf3221 (patch)
tree: f7aae5cec9181f7ff5df23e648fe1da22a94209f /src
parent: ef14d6581848ba5052d3389bb61fc96645551eef (diff)
download: markdown-rs-60ea2fd3a09f10fa28bf48575736b47afebf3221.tar.gz
markdown-rs-60ea2fd3a09f10fa28bf48575736b47afebf3221.tar.bz2
markdown-rs-60ea2fd3a09f10fa28bf48575736b47afebf3221.zip
8 files changed, 428 insertions, 91 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 50c06e1..9941fa5 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -5,7 +5,7 @@ use crate::util::{
     decode_character_reference::{decode_named, decode_numeric},
     encode::encode,
     sanitize_uri::sanitize_uri,
-    span::{from_exit_event, serialize},
+    span::{codes as codes_from_span, from_exit_event, serialize},
 };
 
 /// Configuration (optional).
@@ -78,6 +78,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
     let buffers: &mut Vec<Vec<String>> = &mut vec![vec![]];
     let mut atx_opening_sequence_size: Option<usize> = None;
     let mut atx_heading_buffer: Option<String> = None;
+    let mut heading_setext_buffer: Option<String> = None;
     let mut code_flow_seen_data: Option<bool> = None;
     let mut code_fenced_fences_count: Option<usize> = None;
     let mut slurp_one_line_ending = false;
@@ -102,10 +103,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
 
         match event.event_type {
             EventType::Enter => match token_type {
-                TokenType::AtxHeading
-                | TokenType::AtxHeadingSequence
-                | TokenType::AtxHeadingWhitespace
-                | TokenType::Autolink
+                TokenType::Autolink
                 | TokenType::AutolinkEmail
                 | TokenType::AutolinkMarker
                 | TokenType::AutolinkProtocol
@@ -134,6 +132,11 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 | TokenType::HardBreakEscapeMarker
                 | TokenType::HardBreakTrailing
                 | TokenType::HardBreakTrailingSpace
+                | TokenType::HeadingAtx
+                | TokenType::HeadingAtxSequence
+                | TokenType::HeadingAtxWhitespace
+                | TokenType::HeadingSetext
+                | TokenType::HeadingSetextUnderline
                 | TokenType::HtmlFlowData
                 | TokenType::HtmlTextData
                 | TokenType::LineEnding
@@ -143,9 +146,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 | TokenType::Whitespace => {
                     // Ignore.
                 }
-                TokenType::AtxHeadingText
-                | TokenType::CodeFencedFenceInfo
-                | TokenType::CodeFencedFenceMeta => {
+                TokenType::CodeFencedFenceInfo
+                | TokenType::CodeFencedFenceMeta
+                | TokenType::HeadingAtxText
+                | TokenType::HeadingSetextText => {
                     buffer(buffers);
                 }
                 TokenType::CodeIndented => {
@@ -199,6 +203,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 | TokenType::Content
                 | TokenType::HardBreakEscapeMarker
                 | TokenType::HardBreakTrailingSpace
+                | TokenType::HeadingSetext
                 | TokenType::ThematicBreakSequence
                 | TokenType::ThematicBreakWhitespace
                 | TokenType::Whitespace => {
@@ -213,52 +218,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                         false,
                     )));
                 }
-                TokenType::AtxHeading => {
-                    let rank = atx_opening_sequence_size
-                        .expect("`atx_opening_sequence_size` must be set in headings");
-                    buf_tail_mut(buffers).push(format!("</h{}>", rank));
-                    atx_opening_sequence_size = None;
-                    atx_heading_buffer = None;
-                }
-                // `AtxHeadingWhitespace` is ignored after the opening sequence,
-                // before the closing sequence, and after the closing sequence.
-                // But it is used around intermediate sequences.
-                // `atx_heading_buffer` is set to `Some` by the first `AtxHeadingText`.
-                // `AtxHeadingSequence` is ignored as the opening and closing sequence,
-                // but not when intermediate.
-                TokenType::AtxHeadingSequence | TokenType::AtxHeadingWhitespace => {
-                    if let Some(buf) = atx_heading_buffer {
-                        atx_heading_buffer = Some(
-                            buf.to_string()
-                                + &encode(&serialize(
-                                    codes,
-                                    &from_exit_event(events, index),
-                                    false,
-                                )),
-                        );
-                    }
-
-                    // First fence we see.
-                    if None == atx_opening_sequence_size {
-                        let rank = serialize(codes, &from_exit_event(events, index), false).len();
-                        atx_opening_sequence_size = Some(rank);
-                        buf_tail_mut(buffers).push(format!("<h{}>", rank));
-                    }
-                }
-                TokenType::AtxHeadingText => {
-                    let result = resume(buffers);
-
-                    if let Some(ref buf) = atx_heading_buffer {
-                        if !buf.is_empty() {
-                            buf_tail_mut(buffers).push(encode(buf));
-                            atx_heading_buffer = Some("".to_string());
-                        }
-                    } else {
-                        atx_heading_buffer = Some("".to_string());
-                    }
-
-                    buf_tail_mut(buffers).push(encode(&result));
-                }
                 TokenType::AutolinkEmail => {
                     let slice = serialize(codes, &from_exit_event(events, index), false);
                     let buf = buf_tail_mut(buffers);
@@ -394,11 +353,68 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 TokenType::CodeTextLineEnding => {
                     buf_tail_mut(buffers).push(" ".to_string());
                 }
-
                 TokenType::HardBreakEscape | TokenType::HardBreakTrailing => {
                     buf_tail_mut(buffers).push("<br />".to_string());
                 }
+                TokenType::HeadingAtx => {
+                    let rank = atx_opening_sequence_size
+                        .expect("`atx_opening_sequence_size` must be set in headings");
+                    buf_tail_mut(buffers).push(format!("</h{}>", rank));
+                    atx_opening_sequence_size = None;
+                    atx_heading_buffer = None;
+                }
+                // `HeadingAtxWhitespace` is ignored after the opening sequence,
+                // before the closing sequence, and after the closing sequence.
+                // But it is used around intermediate sequences.
+                // `atx_heading_buffer` is set to `Some` by the first `HeadingAtxText`.
+                // `HeadingAtxSequence` is ignored as the opening and closing sequence,
+                // but not when intermediate.
+                TokenType::HeadingAtxSequence | TokenType::HeadingAtxWhitespace => {
+                    if let Some(buf) = atx_heading_buffer {
+                        atx_heading_buffer = Some(
+                            buf.to_string()
+                                + &encode(&serialize(
+                                    codes,
+                                    &from_exit_event(events, index),
+                                    false,
+                                )),
+                        );
+                    }
+
+                    // First fence we see.
+                    if None == atx_opening_sequence_size {
+                        let rank = serialize(codes, &from_exit_event(events, index), false).len();
+                        atx_opening_sequence_size = Some(rank);
+                        buf_tail_mut(buffers).push(format!("<h{}>", rank));
+                    }
+                }
+                TokenType::HeadingAtxText => {
+                    let result = resume(buffers);
 
+                    if let Some(ref buf) = atx_heading_buffer {
+                        if !buf.is_empty() {
+                            buf_tail_mut(buffers).push(encode(buf));
+                            atx_heading_buffer = Some("".to_string());
+                        }
+                    } else {
+                        atx_heading_buffer = Some("".to_string());
+                    }
+
+                    buf_tail_mut(buffers).push(encode(&result));
+                }
+                TokenType::HeadingSetextText => {
+                    heading_setext_buffer = Some(resume(buffers));
+                    slurp_one_line_ending = true;
+                }
+                TokenType::HeadingSetextUnderline => {
+                    let text = heading_setext_buffer
+                        .expect("`atx_opening_sequence_size` must be set in headings");
+                    let head = codes_from_span(codes, &from_exit_event(events, index))[0];
+                    let level: usize = if head == Code::Char('-') { 2 } else { 1 };
+
+                    heading_setext_buffer = None;
+                    buf_tail_mut(buffers).push(format!("<h{}>{}</h{}>", level, text, level));
+                }
                 TokenType::HtmlFlow | TokenType::HtmlText => {
                     ignore_encode = false;
                 }
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 1a9ed03..3ff6fea 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -18,9 +18,11 @@
 //! In older markdown versions, this was not required, and headings would form
 //! without it.
 //!
-//! In markdown, it is also possible to create headings with the setext heading
-//! construct.
-//! The benefit of setext headings is that their text can include line endings.
+//! In markdown, it is also possible to create headings with a
+//! [heading (setext)][heading_setext] construct.
+//! The benefit of setext headings is that their text can include line endings,
+//! and by extensions also hard breaks (e.g., with
+//! [hard break (escape)][hard_break_escape]).
 //! However, their limit is that they cannot form `<h3>` through `<h6>`
 //! headings.
 //! Due to this limitation, it is recommended to use atx headings.
@@ -39,11 +41,11 @@
 //! *   [*§ 4.2 ATX headings* in `CommonMark`](https://spec.commonmark.org/0.30/#atx-headings)
 //!
 //! [flow]: crate::content::flow
+//! [heading_setext]: crate::construct::heading_setext
+//! [hard_break_escape]: crate::construct::hard_break_escape
 //! [html]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
 //! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
 //! [atx]: http://www.aaronsw.com/2002/atx/
-//!
-//! <!-- To do: link `setext` -->
 
 use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -55,8 +57,8 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 /// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     if Code::Char('#') == code {
-        tokenizer.enter(TokenType::AtxHeading);
-        tokenizer.enter(TokenType::AtxHeadingSequence);
+        tokenizer.enter(TokenType::HeadingAtx);
+        tokenizer.enter(TokenType::HeadingAtxSequence);
         sequence_open(tokenizer, code, 0)
     } else {
         (State::Nok, None)
@@ -76,7 +78,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnR
         | Code::Char('\t' | '\n' | '\r' | ' ')
             if rank > 0 =>
         {
-            tokenizer.exit(TokenType::AtxHeadingSequence);
+            tokenizer.exit(TokenType::HeadingAtxSequence);
             at_break(tokenizer, code)
         }
         Code::Char('#') if rank < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
@@ -104,19 +106,19 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnR
 fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            tokenizer.exit(TokenType::AtxHeading);
+            tokenizer.exit(TokenType::HeadingAtx);
             (State::Ok, Some(vec![code]))
         }
         Code::VirtualSpace | Code::Char('\t' | ' ') => {
-            tokenizer.enter(TokenType::AtxHeadingWhitespace);
+            tokenizer.enter(TokenType::HeadingAtxWhitespace);
             whitespace(tokenizer, code)
         }
         Code::Char('#') => {
-            tokenizer.enter(TokenType::AtxHeadingSequence);
+            tokenizer.enter(TokenType::HeadingAtxSequence);
             further_sequence(tokenizer, code)
         }
         Code::Char(_) => {
-            tokenizer.enter(TokenType::AtxHeadingText);
+            tokenizer.enter(TokenType::HeadingAtxText);
             tokenizer.enter(TokenType::ChunkText);
             data(tokenizer, code)
         }
@@ -134,7 +136,7 @@ fn further_sequence(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         tokenizer.consume(code);
         (State::Fn(Box::new(further_sequence)), None)
     } else {
-        tokenizer.exit(TokenType::AtxHeadingSequence);
+        tokenizer.exit(TokenType::HeadingAtxSequence);
         at_break(tokenizer, code)
     }
 }
@@ -151,7 +153,7 @@ fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             (State::Fn(Box::new(whitespace)), None)
         }
         _ => {
-            tokenizer.exit(TokenType::AtxHeadingWhitespace);
+            tokenizer.exit(TokenType::HeadingAtxWhitespace);
             at_break(tokenizer, code)
         }
     }
@@ -167,7 +169,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         // Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\t' | '\n' | '\r' | ' ') => {
             tokenizer.exit(TokenType::ChunkText);
-            tokenizer.exit(TokenType::AtxHeadingText);
+            tokenizer.exit(TokenType::HeadingAtxText);
             at_break(tokenizer, code)
         }
         _ => {
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
new file mode 100644
index 0000000..8cc4f6d
--- /dev/null
+++ b/src/construct/heading_setext.rs
@@ -0,0 +1,301 @@
+//! Heading (setext) is a construct that occurs in the [flow] content type.
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! heading_setext ::= line *(eol line) eol whitespace_optional (1*'-' | 1*'=') whitespace_optional
+//!
+//! whitespace ::= 1*space_or_tab
+//! whitespace_optional ::= [ whitespace ]
+//! line ::= code - eol
+//! eol ::= '\r' | '\r\n' | '\n'
+//! ```
+//!
+//! Heading (setext) in markdown relates to the `<h1>` and `<h2>` elements in
+//! HTML.
+//! See [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements* in the
+//! HTML spec][html] for more info.
+//!
+//! In markdown, it is also possible to create headings with a
+//! [heading (atx)][heading_atx] construct.
+//! The benefit of setext headings is that their text can include line endings,
+//! and by extensions also hard breaks (e.g., with
+//! [hard break (escape)][hard_break_escape]).
+//! However, their limit is that they cannot form `<h3>` through `<h6>`
+//! headings.
+//! Due to this limitation, it is recommended to use atx headings.
+//!
+//! [Thematic breaks][thematic_break] formed with dashes (without whitespace)
+//! can also form heading (setext).
+//!
+//! > 🏛 **Background**: the word *setext* originates from a small markup
+//! > language by Ian Feldman from 1991.
+//! > See [*§ Setext* on Wikipedia][wiki-setext] for more info.
+//! > The word *atx* originates from a tiny markup language by Aaron Swartz
+//! > from 2002.
+//! > See [*§ atx, the true structured text format* on `aaronsw.com`][atx] for
+//! > more info.
+//!
+//! ## References
+//!
+//! *   [`setext-underline.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/setext-underline.js)
+//! *   [*§ 4.3 Setext headings* in `CommonMark`](https://spec.commonmark.org/0.30/#setext-headings)
+//!
+//! [flow]: crate::content::flow
+//! [heading_atx]: crate::construct::heading_atx
+//! [thematic_break]: crate::construct::thematic_break
+//! [hard_break_escape]: crate::construct::hard_break_escape
+//! [html]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
+//! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
+//! [atx]: http://www.aaronsw.com/2002/atx/
+
+use crate::constant::TAB_SIZE;
+use crate::construct::partial_whitespace::start as whitespace;
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::span::from_exit_event;
+
+/// Kind of underline.
+#[derive(Debug, Clone, PartialEq)]
+pub enum Kind {
+    /// Grave accent (tick) code.
+    Dash,
+    /// Tilde code.
+    EqualsTo,
+}
+
+/// Start of a heading (setext).
+///
+/// ```markdown
+/// |alpha
+/// ==
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            unreachable!("expected non-eol/eof");
+        }
+        _ => {
+            tokenizer.enter(TokenType::HeadingSetext);
+            tokenizer.enter(TokenType::HeadingSetextText);
+            tokenizer.enter(TokenType::ChunkText);
+            text_inside(tokenizer, code)
+        }
+    }
+}
+
+/// Inside text.
+///
+/// ```markdown
+/// al|pha
+/// bra|vo
+/// ==
+/// ```
+pub fn text_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Nok, None),
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::ChunkText);
+            tokenizer.exit(TokenType::HeadingSetextText);
+            tokenizer.attempt(underline_before, |ok| {
+                Box::new(if ok { after } else { text_continue })
+            })(tokenizer, code)
+        }
+        _ => {
+            tokenizer.consume(code);
+            (State::Fn(Box::new(text_inside)), None)
+        }
+    }
+}
+
+/// At a line ending, not at an underline.
+///
+/// ```markdown
+/// alpha
+/// |bravo
+/// ==
+/// ```
+fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // Needed to connect the text.
+    // To do: does it work?
+    tokenizer.enter(TokenType::HeadingSetextText);
+    tokenizer.events.pop();
+    tokenizer.events.pop();
+
+    match code {
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            let next = tokenizer.events.len();
+            let previous = next - 2;
+
+            tokenizer.enter(TokenType::LineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::LineEnding);
+
+            tokenizer.events[previous].next = Some(next);
+            tokenizer.events[next].previous = Some(previous);
+
+            (
+                State::Fn(Box::new(tokenizer.attempt(
+                    |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+                    |_ok| Box::new(text_line_start),
+                ))),
+                None,
+            )
+        }
+        _ => unreachable!("expected eol"),
+    }
+}
+
+/// At a line ending after whitespace, not at an underline.
+///
+/// ```markdown
+/// alpha
+/// |bravo
+/// ==
+/// ```
+fn text_line_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    let next = tokenizer.events.len() - 2;
+    let previous = next - 2;
+
+    // Link the whitespace, if it exists.
+    if tokenizer.events[next].token_type == TokenType::Whitespace {
+        tokenizer.events[previous].next = Some(next);
+        tokenizer.events[next].previous = Some(previous);
+    }
+
+    match code {
+        // Blank lines not allowed.
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
+        _ => {
+            let next = tokenizer.events.len();
+            let previous = next - 2;
+
+            tokenizer.enter(TokenType::ChunkText);
+
+            tokenizer.events[previous].next = Some(next);
+            tokenizer.events[next].previous = Some(previous);
+
+            text_inside(tokenizer, code)
+        }
+    }
+}
+
+/// After a heading (setext).
+///
+/// ```markdown
+/// alpha
+/// ==|
+/// ```
+fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.exit(TokenType::HeadingSetext);
+    (State::Ok, Some(vec![code]))
+}
+
+/// At a line ending, presumably an underline.
+///
+/// ```markdown
+/// alpha|
+/// ==
+/// ```
+fn underline_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.enter(TokenType::LineEnding);
+            tokenizer.consume(code);
+            tokenizer.exit(TokenType::LineEnding);
+            (State::Fn(Box::new(underline_start)), None)
+        }
+        _ => unreachable!("expected eol"),
+    }
+}
+
+/// After a line ending, presumably an underline.
+///
+/// ```markdown
+/// alpha
+/// |==
+/// ```
+fn underline_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.attempt(
+        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+        |_ok| Box::new(underline_sequence_start),
+    )(tokenizer, code)
+}
+
+/// After optional whitespace, presumably an underline.
+///
+/// ```markdown
+/// alpha
+/// |==
+/// ```
+fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    let tail = tokenizer.events.last();
+    let mut prefix = 0;
+
+    if let Some(event) = tail {
+        if event.token_type == TokenType::Whitespace {
+            let span = from_exit_event(&tokenizer.events, tokenizer.events.len() - 1);
+            prefix = span.end_index - span.start_index;
+        }
+    }
+
+    // To do: 4+ should be okay if code (indented) is turned off!
+    if prefix >= TAB_SIZE {
+        return (State::Nok, None);
+    }
+
+    match code {
+        Code::Char(char) if char == '-' || char == '=' => {
+            let marker = if char == '-' {
+                Kind::Dash
+            } else {
+                Kind::EqualsTo
+            };
+            tokenizer.enter(TokenType::HeadingSetextUnderline);
+            underline_sequence_inside(tokenizer, code, marker)
+        }
+        _ => (State::Nok, None),
+    }
+}
+
+/// In an underline sequence.
+///
+/// ```markdown
+/// alpha
+/// =|=
+/// ```
+fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
+    let marker = if kind == Kind::Dash { '-' } else { '=' };
+
+    match code {
+        Code::Char(char) if char == marker => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    underline_sequence_inside(tokenizer, code, kind)
+                })),
+                None,
+            )
+        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => tokenizer.attempt(
+            |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
+            |_ok| Box::new(underline_after),
+        )(tokenizer, code),
+        _ => underline_after(tokenizer, code),
+    }
+}
+
+/// After an underline sequence, after optional whitespace.
+///
+/// ```markdown
+/// alpha
+/// ==|
+/// ```
+fn underline_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            tokenizer.exit(TokenType::HeadingSetextUnderline);
+            (State::Ok, Some(vec![code]))
+        }
+        _ => (State::Nok, None),
+    }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 880d055..ca1149f 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -29,7 +29,7 @@
 //! *   [hard break (escape)][hard_break_escape]
 //! *   [hard break (trailing)][hard_break_trailing]
 //! *   [heading (atx)][heading_atx]
-//! *   heading (setext)
+//! *   [heading (setext)][heading_setext]
 //! *   [html (flow)][html_flow]
 //! *   [html (text)][html_text]
 //! *   label end
@@ -64,6 +64,7 @@ pub mod code_text;
 pub mod hard_break_escape;
 pub mod hard_break_trailing;
 pub mod heading_atx;
+pub mod heading_setext;
 pub mod html_flow;
 pub mod html_text;
 pub mod partial_whitespace;
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 7a4f71a..bc41991 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -24,7 +24,7 @@
 //! For these reasons, it is recommend to not use spaces or tabs between the
 //! markers.
 //! Thematic breaks formed with dashes (without whitespace) can also form
-//! setext headings.
+//! [heading (setext)][heading_setext].
 //! As dashes and underscores frequently occur in natural language and URLs, it
 //! is recommended to use asterisks for thematic breaks to distinguish from
 //! such use.
@@ -39,9 +39,10 @@
 //! *   [*§ 4.1 Thematic breaks* in `CommonMark`](https://spec.commonmark.org/0.30/#thematic-breaks)
 //!
 //! [flow]: crate::content::flow
+//! [heading_setext]: crate::construct::heading_setext
 //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-hr-element
 //!
-//! <!-- To do: link `lists`, `setext heading` -->
+//! <!-- To do: link `lists` -->
 
 use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
diff --git a/src/content/content.rs b/src/content/content.rs
index 4660fbe..4ca69ee 100644
--- a/src/content/content.rs
+++ b/src/content/content.rs
@@ -27,7 +27,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             unreachable!("expected non-eol/eof");
         }
-        _ => paragraph_initial(tokenizer, code)
+        _ => after_definitions(tokenizer, code)
         // To do: definition.
         // _ => tokenizer.attempt(definition, |ok| {
         //     Box::new(if ok {
@@ -44,10 +44,26 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// ```markdown
 /// |asd
 /// ```
+fn after_definitions(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    match code {
+        Code::None => (State::Ok, None),
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            unreachable!("to do: handle eol after definition");
+        }
+        _ => paragraph_initial(tokenizer, code),
+    }
+}
+
+/// Before a paragraph.
+///
+/// ```markdown
+/// |asd
+/// ```
 fn paragraph_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
-        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            unreachable!("expected non-eol/eof");
+        Code::None => (State::Ok, None),
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+            unreachable!("to do: handle eol after definition");
         }
         _ => {
             tokenizer.enter(TokenType::Paragraph);
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 4d2ece1..d7509d7 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -14,17 +14,18 @@
 //! *   [Code (fenced)][crate::construct::code_fenced]
 //! *   [Code (indented)][crate::construct::code_indented]
 //! *   [Heading (atx)][crate::construct::heading_atx]
+//! *   [Heading (setext)][crate::construct::heading_setext]
 //! *   [HTML (flow)][crate::construct::html_flow]
 //! *   [Thematic break][crate::construct::thematic_break]
 //!
-//! <!-- To do: `setext` in content? Link to content. -->
+//! <!-- To do: Link to content. -->
 
 use crate::constant::TAB_SIZE;
 use crate::construct::{
     blank_line::start as blank_line, code_fenced::start as code_fenced,
     code_indented::start as code_indented, heading_atx::start as heading_atx,
-    html_flow::start as html_flow, partial_whitespace::start as whitespace,
-    thematic_break::start as thematic_break,
+    heading_setext::start as heading_setext, html_flow::start as html_flow,
+    partial_whitespace::start as whitespace, thematic_break::start as thematic_break,
 };
 use crate::subtokenize::subtokenize;
 use crate::tokenizer::{Code, Event, Point, State, StateFnResult, TokenType, Tokenizer};
@@ -144,24 +145,20 @@ pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// |***
 /// ```
 pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt_2(heading_atx, thematic_break, |ok| {
+    tokenizer.attempt_3(heading_atx, thematic_break, heading_setext, |ok| {
         Box::new(if ok { after } else { content_before })
     })(tokenizer, code)
 }
 
-/// Before flow, but not before a heading (atx) or thematic break.
-///
-/// At this point, we’re at content (zero or more definitions and zero or one
-/// paragraph/setext heading).
+/// Before content.
 ///
 /// ```markdown
 /// |qwe
 /// ```
-// To do: currently only parses a single line.
+///
 // To do:
 // - Multiline
 // - One or more definitions.
-// - Setext heading.
 fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
@@ -174,12 +171,12 @@ fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         }
     }
 }
+
 /// In content.
 ///
 /// ```markdown
 /// al|pha
 /// ```
-// To do: lift limitations as documented above.
 fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult {
     match code {
         Code::None => content_end(tokenizer, code),
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 0aae480..fc9e177 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -24,10 +24,6 @@ pub enum TokenType {
     AutolinkMarker,
     AutolinkProtocol,
     AutolinkEmail,
-    AtxHeading,
-    AtxHeadingSequence,
-    AtxHeadingWhitespace,
-    AtxHeadingText,
     BlankLineEnding,
     BlankLineWhitespace,
     CharacterEscape,
@@ -58,6 +54,13 @@ pub enum TokenType {
     HardBreakEscapeMarker,
     HardBreakTrailing,
     HardBreakTrailingSpace,
+    HeadingAtx,
+    HeadingAtxSequence,
+    HeadingAtxWhitespace,
+    HeadingAtxText,
+    HeadingSetext,
+    HeadingSetextText,
+    HeadingSetextUnderline,
     HtmlFlow,
     HtmlFlowData,
     HtmlText,
author	Titus Wormer <tituswormer@gmail.com>	2022-06-16 19:04:16 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-16 19:04:16 +0200
commit	60ea2fd3a09f10fa28bf48575736b47afebf3221 (patch)
tree	f7aae5cec9181f7ff5df23e648fe1da22a94209f /src
parent	ef14d6581848ba5052d3389bb61fc96645551eef (diff)
download	markdown-rs-60ea2fd3a09f10fa28bf48575736b47afebf3221.tar.gz markdown-rs-60ea2fd3a09f10fa28bf48575736b47afebf3221.tar.bz2 markdown-rs-60ea2fd3a09f10fa28bf48575736b47afebf3221.zip