Refactor to reorder thing alphabetically

author: Titus Wormer <tituswormer@gmail.com> 2022-06-16 13:34:05 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-06-16 13:34:05 +0200
commit: ef14d6581848ba5052d3389bb61fc96645551eef (patch)
tree: c1b2de69814950c5d2813b6b5798998c45635394 /src
parent: 7350acc692a79d9d4cf56afbc53ac3c9f2a6237c (diff)
download: markdown-rs-ef14d6581848ba5052d3389bb61fc96645551eef.tar.gz
markdown-rs-ef14d6581848ba5052d3389bb61fc96645551eef.tar.bz2
markdown-rs-ef14d6581848ba5052d3389bb61fc96645551eef.zip
4 files changed, 176 insertions, 189 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 9f84a38..50c06e1 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -102,8 +102,51 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
 
         match event.event_type {
             EventType::Enter => match token_type {
-                TokenType::Paragraph => {
-                    buf_tail_mut(buffers).push("<p>".to_string());
+                TokenType::AtxHeading
+                | TokenType::AtxHeadingSequence
+                | TokenType::AtxHeadingWhitespace
+                | TokenType::Autolink
+                | TokenType::AutolinkEmail
+                | TokenType::AutolinkMarker
+                | TokenType::AutolinkProtocol
+                | TokenType::BlankLineEnding
+                | TokenType::BlankLineWhitespace
+                | TokenType::CharacterEscape
+                | TokenType::CharacterEscapeMarker
+                | TokenType::CharacterEscapeValue
+                | TokenType::CharacterReference
+                | TokenType::CharacterReferenceMarker
+                | TokenType::CharacterReferenceMarkerHexadecimal
+                | TokenType::CharacterReferenceMarkerNumeric
+                | TokenType::CharacterReferenceMarkerSemi
+                | TokenType::CharacterReferenceValue
+                | TokenType::CodeIndentedPrefixWhitespace
+                | TokenType::CodeFencedFence
+                | TokenType::CodeFencedFenceSequence
+                | TokenType::CodeFencedFenceWhitespace
+                | TokenType::CodeFlowChunk
+                | TokenType::CodeTextData
+                | TokenType::CodeTextLineEnding
+                | TokenType::CodeTextSequence
+                | TokenType::Content
+                | TokenType::Data
+                | TokenType::HardBreakEscape
+                | TokenType::HardBreakEscapeMarker
+                | TokenType::HardBreakTrailing
+                | TokenType::HardBreakTrailingSpace
+                | TokenType::HtmlFlowData
+                | TokenType::HtmlTextData
+                | TokenType::LineEnding
+                | TokenType::ThematicBreak
+                | TokenType::ThematicBreakSequence
+                | TokenType::ThematicBreakWhitespace
+                | TokenType::Whitespace => {
+                    // Ignore.
+                }
+                TokenType::AtxHeadingText
+                | TokenType::CodeFencedFenceInfo
+                | TokenType::CodeFencedFenceMeta => {
+                    buffer(buffers);
                 }
                 TokenType::CodeIndented => {
                     code_flow_seen_data = Some(false);
@@ -117,9 +160,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                     buf_tail_mut(buffers).push("<pre><code".to_string());
                     code_fenced_fences_count = Some(0);
                 }
-                TokenType::AtxHeadingText
-                | TokenType::CodeFencedFenceInfo
-                | TokenType::CodeFencedFenceMeta => {
+                TokenType::CodeText => {
+                    buf_tail_mut(buffers).push("<code>".to_string());
                     buffer(buffers);
                 }
                 TokenType::HtmlFlow => {
@@ -133,161 +175,58 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                         ignore_encode = true;
                     }
                 }
-                TokenType::CodeText => {
-                    buf_tail_mut(buffers).push("<code>".to_string());
-                    buffer(buffers);
+                TokenType::Paragraph => {
+                    buf_tail_mut(buffers).push("<p>".to_string());
                 }
-                TokenType::Content
-                | TokenType::AtxHeading
-                | TokenType::AtxHeadingSequence
-                | TokenType::AtxHeadingWhitespace
-                | TokenType::LineEnding
-                | TokenType::ThematicBreak
-                | TokenType::ThematicBreakSequence
-                | TokenType::ThematicBreakWhitespace
-                | TokenType::CodeIndentedPrefixWhitespace
-                | TokenType::CodeFlowChunk
-                | TokenType::BlankLineEnding
-                | TokenType::BlankLineWhitespace
-                | TokenType::Whitespace
-                | TokenType::HardBreakEscape
-                | TokenType::HardBreakEscapeMarker
-                | TokenType::HardBreakTrailing
-                | TokenType::HardBreakTrailingSpace
-                | TokenType::HtmlFlowData
-                | TokenType::HtmlTextData
-                | TokenType::CodeFencedFence
-                | TokenType::CodeFencedFenceSequence
-                | TokenType::CodeFencedFenceWhitespace
-                | TokenType::CodeTextSequence
-                | TokenType::CodeTextData
-                | TokenType::CodeTextLineEnding
-                | TokenType::Data
-                | TokenType::CharacterEscape
-                | TokenType::CharacterEscapeMarker
-                | TokenType::CharacterEscapeValue
-                | TokenType::CharacterReference
-                | TokenType::CharacterReferenceMarker
-                | TokenType::CharacterReferenceMarkerNumeric
-                | TokenType::CharacterReferenceMarkerHexadecimal
-                | TokenType::CharacterReferenceMarkerSemi
-                | TokenType::CharacterReferenceValue
-                | TokenType::Autolink
-                | TokenType::AutolinkMarker
-                | TokenType::AutolinkProtocol
-                | TokenType::AutolinkEmail => {}
                 #[allow(unreachable_patterns)]
                 _ => {
                     unreachable!("unhandled `enter` of TokenType {:?}", token_type)
                 }
             },
             EventType::Exit => match token_type {
-                TokenType::Content
-                | TokenType::ThematicBreakSequence
-                | TokenType::ThematicBreakWhitespace
-                | TokenType::CodeIndentedPrefixWhitespace
+                TokenType::Autolink
+                | TokenType::AutolinkMarker
                 | TokenType::BlankLineEnding
                 | TokenType::BlankLineWhitespace
-                | TokenType::Whitespace
-                | TokenType::CodeFencedFenceSequence
-                | TokenType::CodeFencedFenceWhitespace
-                | TokenType::CodeTextSequence
                 | TokenType::CharacterEscape
                 | TokenType::CharacterEscapeMarker
                 | TokenType::CharacterReference
                 | TokenType::CharacterReferenceMarkerSemi
+                | TokenType::CodeFencedFenceSequence
+                | TokenType::CodeFencedFenceWhitespace
+                | TokenType::CodeIndentedPrefixWhitespace
+                | TokenType::CodeTextSequence
+                | TokenType::Content
                 | TokenType::HardBreakEscapeMarker
                 | TokenType::HardBreakTrailingSpace
-                | TokenType::Autolink
-                | TokenType::AutolinkMarker => {}
-                TokenType::HtmlFlow | TokenType::HtmlText => {
-                    ignore_encode = false;
+                | TokenType::ThematicBreakSequence
+                | TokenType::ThematicBreakWhitespace
+                | TokenType::Whitespace => {
+                    // Ignore.
                 }
-                TokenType::HtmlFlowData | TokenType::HtmlTextData => {
-                    let slice = serialize(codes, &from_exit_event(events, index), false);
-
-                    let res = if ignore_encode { slice } else { encode(&slice) };
-
+                // Just output it.
+                TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {
                     // last_was_tag = false;
-                    buf_tail_mut(buffers).push(res);
-                }
-                TokenType::Paragraph => {
-                    buf_tail_mut(buffers).push("</p>".to_string());
-                }
-                TokenType::HardBreakEscape | TokenType::HardBreakTrailing => {
-                    buf_tail_mut(buffers).push("<br />".to_string());
-                }
-                TokenType::CodeIndented | TokenType::CodeFenced => {
-                    let seen_data =
-                        code_flow_seen_data.expect("`code_flow_seen_data` must be defined");
-
-                    // To do: containers.
-                    // One special case is if we are inside a container, and the fenced code was
-                    // not closed (meaning it runs to the end).
-                    // In that case, the following line ending, is considered *outside* the
-                    // fenced code and block quote by micromark, but CM wants to treat that
-                    // ending as part of the code.
-                    // if fenced_count != None && fenced_count < 2 && tightStack.length > 0 && !last_was_tag {
-                    //     line_ending();
-                    // }
-
-                    // But in most cases, it’s simpler: when we’ve seen some data, emit an extra
-                    // line ending when needed.
-                    if seen_data {
-                        line_ending_if_needed(buffers);
-                    }
-
-                    buf_tail_mut(buffers).push("</code></pre>".to_string());
-
-                    if let Some(count) = code_fenced_fences_count {
-                        if count < 2 {
-                            line_ending_if_needed(buffers);
-                        }
-                    }
-
-                    code_flow_seen_data = None;
-                    code_fenced_fences_count = None;
-                    slurp_one_line_ending = false;
-                }
-                TokenType::CodeFencedFence => {
-                    let count = if let Some(count) = code_fenced_fences_count {
-                        count
-                    } else {
-                        0
-                    };
-
-                    if count == 0 {
-                        buf_tail_mut(buffers).push(">".to_string());
-                        // tag = true;
-                        slurp_one_line_ending = true;
-                    }
-
-                    code_fenced_fences_count = Some(count + 1);
-                }
-                TokenType::CodeFencedFenceInfo => {
-                    let value = resume(buffers);
-                    buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value));
-                    // tag = true;
-                }
-                TokenType::CodeFencedFenceMeta => {
-                    resume(buffers);
-                }
-                TokenType::CodeFlowChunk => {
-                    code_flow_seen_data = Some(true);
                     buf_tail_mut(buffers).push(encode(&serialize(
                         codes,
                         &from_exit_event(events, index),
                         false,
                     )));
                 }
-
+                TokenType::AtxHeading => {
+                    let rank = atx_opening_sequence_size
+                        .expect("`atx_opening_sequence_size` must be set in headings");
+                    buf_tail_mut(buffers).push(format!("</h{}>", rank));
+                    atx_opening_sequence_size = None;
+                    atx_heading_buffer = None;
+                }
                 // `AtxHeadingWhitespace` is ignored after the opening sequence,
                 // before the closing sequence, and after the closing sequence.
                 // But it is used around intermediate sequences.
                 // `atx_heading_buffer` is set to `Some` by the first `AtxHeadingText`.
                 // `AtxHeadingSequence` is ignored as the opening and closing sequence,
                 // but not when intermediate.
-                TokenType::AtxHeadingWhitespace | TokenType::AtxHeadingSequence => {
+                TokenType::AtxHeadingSequence | TokenType::AtxHeadingWhitespace => {
                     if let Some(buf) = atx_heading_buffer {
                         atx_heading_buffer = Some(
                             buf.to_string()
@@ -320,50 +259,26 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
 
                     buf_tail_mut(buffers).push(encode(&result));
                 }
-                TokenType::AtxHeading => {
-                    let rank = atx_opening_sequence_size
-                        .expect("`atx_opening_sequence_size` must be set in headings");
-                    buf_tail_mut(buffers).push(format!("</h{}>", rank));
-                    atx_opening_sequence_size = None;
-                    atx_heading_buffer = None;
-                }
-                TokenType::AutolinkProtocol => {
+                TokenType::AutolinkEmail => {
                     let slice = serialize(codes, &from_exit_event(events, index), false);
                     let buf = buf_tail_mut(buffers);
                     buf.push(format!(
-                        "<a href=\"{}\">",
+                        "<a href=\"mailto:{}\">",
                         sanitize_uri(slice.as_str(), &protocol_href)
                     ));
                     buf.push(encode(&slice));
                     buf.push("</a>".to_string());
                 }
-                TokenType::AutolinkEmail => {
+                TokenType::AutolinkProtocol => {
                     let slice = serialize(codes, &from_exit_event(events, index), false);
                     let buf = buf_tail_mut(buffers);
                     buf.push(format!(
-                        "<a href=\"mailto:{}\">",
+                        "<a href=\"{}\">",
                         sanitize_uri(slice.as_str(), &protocol_href)
                     ));
                     buf.push(encode(&slice));
                     buf.push("</a>".to_string());
                 }
-                TokenType::ThematicBreak => {
-                    buf_tail_mut(buffers).push("<hr />".to_string());
-                }
-                TokenType::LineEnding => {
-                    // if slurp_all_line_endings {
-                    //     // Empty.
-                    // } else
-                    if slurp_one_line_ending {
-                        slurp_one_line_ending = false;
-                    } else {
-                        buf_tail_mut(buffers).push(encode(&serialize(
-                            codes,
-                            &from_exit_event(events, index),
-                            false,
-                        )));
-                    }
-                }
                 TokenType::CharacterReferenceMarker => {
                     character_reference_kind = Some(CharacterReferenceKind::Named);
                 }
@@ -389,9 +304,71 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                     };
 
                     buf_tail_mut(buffers).push(encode(&value));
-
                     character_reference_kind = None;
                 }
+                TokenType::CodeFenced | TokenType::CodeIndented => {
+                    let seen_data =
+                        code_flow_seen_data.expect("`code_flow_seen_data` must be defined");
+
+                    // To do: containers.
+                    // One special case is if we are inside a container, and the fenced code was
+                    // not closed (meaning it runs to the end).
+                    // In that case, the following line ending, is considered *outside* the
+                    // fenced code and block quote by micromark, but CM wants to treat that
+                    // ending as part of the code.
+                    // if fenced_count != None && fenced_count < 2 && tightStack.length > 0 && !last_was_tag {
+                    //     line_ending();
+                    // }
+
+                    // But in most cases, it’s simpler: when we’ve seen some data, emit an extra
+                    // line ending when needed.
+                    if seen_data {
+                        line_ending_if_needed(buffers);
+                    }
+
+                    buf_tail_mut(buffers).push("</code></pre>".to_string());
+
+                    if let Some(count) = code_fenced_fences_count {
+                        if count < 2 {
+                            line_ending_if_needed(buffers);
+                        }
+                    }
+
+                    code_flow_seen_data = None;
+                    code_fenced_fences_count = None;
+                    slurp_one_line_ending = false;
+                }
+                TokenType::CodeFencedFence => {
+                    let count = if let Some(count) = code_fenced_fences_count {
+                        count
+                    } else {
+                        0
+                    };
+
+                    if count == 0 {
+                        buf_tail_mut(buffers).push(">".to_string());
+                        // tag = true;
+                        slurp_one_line_ending = true;
+                    }
+
+                    code_fenced_fences_count = Some(count + 1);
+                }
+                TokenType::CodeFencedFenceInfo => {
+                    let value = resume(buffers);
+                    buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value));
+                    // tag = true;
+                }
+                TokenType::CodeFencedFenceMeta => {
+                    resume(buffers);
+                }
+                TokenType::CodeFlowChunk => {
+                    code_flow_seen_data = Some(true);
+                    buf_tail_mut(buffers).push(encode(&serialize(
+                        codes,
+                        &from_exit_event(events, index),
+                        false,
+                    )));
+                }
                 TokenType::CodeText => {
                     let result = resume(buffers);
                     let mut chars = result.chars();
@@ -417,14 +394,38 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 TokenType::CodeTextLineEnding => {
                     buf_tail_mut(buffers).push(" ".to_string());
                 }
-                // This branch below currently acts as the resulting `data` tokens.
-                TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {
+
+                TokenType::HardBreakEscape | TokenType::HardBreakTrailing => {
+                    buf_tail_mut(buffers).push("<br />".to_string());
+                }
+
+                TokenType::HtmlFlow | TokenType::HtmlText => {
+                    ignore_encode = false;
+                }
+                TokenType::HtmlFlowData | TokenType::HtmlTextData => {
+                    let slice = serialize(codes, &from_exit_event(events, index), false);
                     // last_was_tag = false;
-                    buf_tail_mut(buffers).push(encode(&serialize(
-                        codes,
-                        &from_exit_event(events, index),
-                        false,
-                    )));
+                    buf_tail_mut(buffers).push(if ignore_encode { slice } else { encode(&slice) });
+                }
+                TokenType::LineEnding => {
+                    // if slurp_all_line_endings {
+                    //     // Empty.
+                    // } else
+                    if slurp_one_line_ending {
+                        slurp_one_line_ending = false;
+                    } else {
+                        buf_tail_mut(buffers).push(encode(&serialize(
+                            codes,
+                            &from_exit_event(events, index),
+                            false,
+                        )));
+                    }
+                }
+                TokenType::Paragraph => {
+                    buf_tail_mut(buffers).push("</p>".to_string());
+                }
+                TokenType::ThematicBreak => {
+                    buf_tail_mut(buffers).push("<hr />".to_string());
                 }
                 #[allow(unreachable_patterns)]
                 _ => {
diff --git a/src/content/string.rs b/src/content/string.rs
index 2723785..25d8582 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -55,6 +55,7 @@ fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// ```
 fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
+        // To do: line endings.
         Code::None => {
             tokenizer.exit(TokenType::Data);
             (State::Ok, None)
diff --git a/src/content/text.rs b/src/content/text.rs
index f61b390..6a30d4c 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -11,7 +11,6 @@
 //! *   [Hard break (escape)][crate::construct::hard_break_escape]
 //! *   [Hard break (trailing)][crate::construct::hard_break_trailing]
 //! *   [Code (text)][crate::construct::code_text]
-//! *   Line ending
 //! *   Label start (image)
 //! *   Label start (link)
 //! *   [Character escape][crate::construct::character_escape]
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index da45ee5..0aae480 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -24,66 +24,52 @@ pub enum TokenType {
     AutolinkMarker,
     AutolinkProtocol,
     AutolinkEmail,
-
     AtxHeading,
     AtxHeadingSequence,
     AtxHeadingWhitespace,
     AtxHeadingText,
-
+    BlankLineEnding,
+    BlankLineWhitespace,
     CharacterEscape,
     CharacterEscapeMarker,
     CharacterEscapeValue,
-
     CharacterReference,
     CharacterReferenceMarker,
     CharacterReferenceMarkerNumeric,
     CharacterReferenceMarkerHexadecimal,
     CharacterReferenceMarkerSemi,
     CharacterReferenceValue,
-
     CodeFenced,
     CodeFencedFence,
     CodeFencedFenceSequence,
     CodeFencedFenceWhitespace,
     CodeFencedFenceInfo,
     CodeFencedFenceMeta,
-
+    CodeFlowChunk,
     CodeIndented,
     CodeIndentedPrefixWhitespace,
-
     CodeText,
     CodeTextSequence,
     CodeTextLineEnding,
     CodeTextData,
-
-    CodeFlowChunk,
-
+    Content,
     Data,
-
     HardBreakEscape,
     HardBreakEscapeMarker,
     HardBreakTrailing,
     HardBreakTrailingSpace,
-
     HtmlFlow,
     HtmlFlowData,
-
     HtmlText,
     HtmlTextData,
-
+    LineEnding,
+    Paragraph,
     ThematicBreak,
     ThematicBreakSequence,
     ThematicBreakWhitespace,
-
     Whitespace,
-    LineEnding,
-    BlankLineEnding,
-    BlankLineWhitespace,
-
-    Content,
-
-    Paragraph,
 
+    // Chunks are tokenizer, but unraveled by `subtokenize`.
     ChunkContent,
     ChunkString,
     ChunkText,
author	Titus Wormer <tituswormer@gmail.com>	2022-06-16 13:34:05 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-06-16 13:34:05 +0200
commit	ef14d6581848ba5052d3389bb61fc96645551eef (patch)
tree	c1b2de69814950c5d2813b6b5798998c45635394 /src
parent	7350acc692a79d9d4cf56afbc53ac3c9f2a6237c (diff)
download	markdown-rs-ef14d6581848ba5052d3389bb61fc96645551eef.tar.gz markdown-rs-ef14d6581848ba5052d3389bb61fc96645551eef.tar.bz2 markdown-rs-ef14d6581848ba5052d3389bb61fc96645551eef.zip