From 9cb9e37c33173c16cbafd345f43e43b5a550537d Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 19 Sep 2022 17:29:59 +0200 Subject: Add structs, enums for `mdast` --- readme.md | 8 +- src/compiler.rs | 1924 ------------------------------------------------------- src/lib.rs | 45 +- src/mdast.rs | 1047 ++++++++++++++++++++++++++++++ src/to_html.rs | 1922 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ src/to_mdast.rs | 40 ++ 6 files changed, 3052 insertions(+), 1934 deletions(-) delete mode 100644 src/compiler.rs create mode 100644 src/mdast.rs create mode 100644 src/to_html.rs create mode 100644 src/to_mdast.rs diff --git a/readme.md b/readme.md index 4236338..0a03e0d 100644 --- a/readme.md +++ b/readme.md @@ -180,12 +180,12 @@ The files in `src/` are as follows: — CommonMark, GFM, and other extension constructs used in micromark - `util/*.rs` — helpers often needed when parsing markdown -- `compiler.rs` - — turns events into a string of HTML - `event.rs` — things with meaning happening somewhere - `lib.rs` — core module +- `mdast.rs` + — syntax tree - `parser.rs` — turn a string of markdown into events - `resolve.rs` @@ -194,6 +194,10 @@ The files in `src/` are as follows: — steps of the state machine - `subtokenize.rs` — handle content in other content +- `to_html.rs` + — turns events into a string of HTML +- `to_mdast.rs` + — turns events into a syntax tree - `tokenizer.rs` — glue the states of the state machine together diff --git a/src/compiler.rs b/src/compiler.rs deleted file mode 100644 index eaa15ee..0000000 --- a/src/compiler.rs +++ /dev/null @@ -1,1924 +0,0 @@ -//! Turn events into a string of HTML. -use crate::event::{Event, Kind, Name}; -use crate::util::{ - constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC}, - decode_character_reference::{decode_named, decode_numeric}, - encode::encode, - gfm_tagfilter::gfm_tagfilter, - normalize_identifier::normalize_identifier, - sanitize_uri::{sanitize, sanitize_with_protocols}, - skip, - slice::{Position, Slice}, -}; -use crate::{LineEnding, Options}; -use alloc::{ - format, - string::{String, ToString}, - vec, - vec::Vec, -}; -use core::str; - -/// Link, image, or footnote call. -/// Resource or reference. -/// Reused for temporary definitions as well, in the first pass. -#[derive(Debug)] -struct Media { - /// Whether this represents an image (`true`) or a link or definition - /// (`false`). - image: bool, - /// The text between the brackets (`x` in `![x]()` and `[x]()`). - /// - /// Not interpreted. - label_id: Option<(usize, usize)>, - /// The result of interpreting the text between the brackets - /// (`x` in `![x]()` and `[x]()`). - /// - /// When this is a link, it contains further text content and thus HTML - /// tags. - /// Otherwise, when an image, text content is also allowed, but resulting - /// tags are ignored. - label: Option, - /// The string between the explicit brackets of the reference (`y` in - /// `[x][y]`), as content. - /// - /// Not interpreted. - reference_id: Option<(usize, usize)>, - /// The destination (url). - /// - /// Interpreted string content. - destination: Option, - /// The destination (url). - /// - /// Interpreted string content. - title: Option, -} - -/// Representation of a definition. -#[derive(Debug)] -struct Definition { - /// Identifier. - id: String, - /// The destination (url). - /// - /// Interpreted string content. - destination: Option, - /// The title. - /// - /// Interpreted string content. - title: Option, -} - -/// GFM table: column alignment. -#[derive(Debug, PartialEq, Eq, Copy, Clone)] -enum GfmTableAlign { - /// No alignment. - /// - /// ```markdown - /// | | aaa | - /// > | | --- | - /// ^^^ - /// ``` - None, - /// Left alignment. - /// - /// ```markdown - /// | | aaa | - /// > | | :-- | - /// ^^^ - /// ``` - Left, - /// Center alignment. - /// - /// ```markdown - /// | | aaa | - /// > | | :-: | - /// ^^^ - /// ``` - Center, - /// Right alignment. - /// - /// ```markdown - /// | | aaa | - /// > | | --: | - /// ^^^ - /// ``` - Right, -} - -/// Context used to compile markdown. -#[allow(clippy::struct_excessive_bools)] -#[derive(Debug)] -struct CompileContext<'a> { - // Static info. - /// List of events. - pub events: &'a [Event], - /// List of bytes. - pub bytes: &'a [u8], - /// Configuration. - pub options: &'a Options, - // Fields used by handlers to track the things they need to track to - // compile markdown. - /// Rank of heading (atx). - pub heading_atx_rank: Option, - /// Buffer of heading (setext) text. - pub heading_setext_buffer: Option, - /// Whether raw (flow) (code (fenced), math (flow)) or code (indented) contains data. - pub raw_flow_seen_data: Option, - /// Number of raw (flow) fences. - pub raw_flow_fences_count: Option, - /// Whether we are in code (text). - pub raw_text_inside: bool, - /// Whether we are in image text. - pub image_alt_inside: bool, - /// Marker of character reference. - pub character_reference_marker: Option, - /// Whether we are expecting the first list item marker. - pub list_expect_first_marker: Option, - /// Stack of media (link, image). - pub media_stack: Vec, - /// Stack of containers. - pub tight_stack: Vec, - /// List of definitions. - pub definitions: Vec, - /// List of definitions. - pub gfm_footnote_definitions: Vec<(String, String)>, - pub gfm_footnote_definition_calls: Vec<(String, usize)>, - pub gfm_footnote_definition_stack: Vec<(usize, usize)>, - /// Whether we are in a GFM table head. - pub gfm_table_in_head: bool, - /// Current GFM table alignment. - pub gfm_table_align: Option>, - /// Current GFM table column. - pub gfm_table_column: usize, - // Fields used to influance the current compilation. - /// Ignore the next line ending. - pub slurp_one_line_ending: bool, - /// Whether to encode HTML. - pub encode_html: bool, - // Configuration - /// Line ending to use. - pub line_ending_default: LineEnding, - // Intermediate results. - /// Stack of buffers. - pub buffers: Vec, - /// Current event index. - pub index: usize, -} - -impl<'a> CompileContext<'a> { - /// Create a new compile context. - pub fn new( - events: &'a [Event], - bytes: &'a [u8], - options: &'a Options, - line_ending: LineEnding, - ) -> CompileContext<'a> { - CompileContext { - events, - bytes, - heading_atx_rank: None, - heading_setext_buffer: None, - raw_flow_seen_data: None, - raw_flow_fences_count: None, - raw_text_inside: false, - character_reference_marker: None, - list_expect_first_marker: None, - media_stack: vec![], - definitions: vec![], - gfm_footnote_definitions: vec![], - gfm_footnote_definition_calls: vec![], - gfm_footnote_definition_stack: vec![], - gfm_table_in_head: false, - gfm_table_align: None, - gfm_table_column: 0, - tight_stack: vec![], - slurp_one_line_ending: false, - image_alt_inside: false, - encode_html: true, - line_ending_default: line_ending, - buffers: vec![String::new()], - index: 0, - options, - } - } - - /// Push a buffer. - pub fn buffer(&mut self) { - self.buffers.push(String::new()); - } - - /// Pop a buffer, returning its value. - pub fn resume(&mut self) -> String { - self.buffers.pop().expect("Cannot resume w/o buffer") - } - - /// Push a str to the last buffer. - pub fn push(&mut self, value: &str) { - self.buffers - .last_mut() - .expect("Cannot push w/o buffer") - .push_str(value); - } - - /// Add a line ending. - pub fn line_ending(&mut self) { - let eol = self.line_ending_default.as_str().to_string(); - self.push(&eol); - } - - /// Add a line ending if needed (as in, there’s no eol/eof already). - pub fn line_ending_if_needed(&mut self) { - let tail = self - .buffers - .last() - .expect("at least one buffer should exist") - .as_bytes() - .last(); - - if !matches!(tail, None | Some(b'\n' | b'\r')) { - self.line_ending(); - } - } -} - -/// Turn events and codes into a string of HTML. -pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { - let mut index = 0; - let mut line_ending_inferred = None; - - // First, we figure out what the used line ending style is. - // Stop when we find a line ending. - while index < events.len() { - let event = &events[index]; - - if event.kind == Kind::Exit - && (event.name == Name::BlankLineEnding || event.name == Name::LineEnding) - { - line_ending_inferred = Some(LineEnding::from_str( - Slice::from_position(bytes, &Position::from_exit_event(events, index)).as_str(), - )); - break; - } - - index += 1; - } - - // Figure out which line ending style we’ll use. - let line_ending_default = if let Some(value) = line_ending_inferred { - value - } else { - options.default_line_ending.clone() - }; - - let mut context = CompileContext::new(events, bytes, options, line_ending_default); - let mut definition_indices = vec![]; - let mut index = 0; - let mut definition_inside = false; - - // Handle all definitions first. - // We must do two passes because we need to compile the events in - // definitions which come after references already. - // - // To speed things up, we collect the places we can jump over for the - // second pass. - // - // We don’t need to handle GFM footnote definitions like this, because - // unlike normal definitions, what they produce is not used in calls. - // It would also get very complex, because footnote definitions can be - // nested. - while index < events.len() { - let event = &events[index]; - - if definition_inside { - handle(&mut context, index); - } - - if event.kind == Kind::Enter { - if event.name == Name::Definition { - handle(&mut context, index); // Also handle start. - definition_inside = true; - definition_indices.push((index, index)); - } - } else if event.name == Name::Definition { - definition_inside = false; - definition_indices.last_mut().unwrap().1 = index; - } - - index += 1; - } - - index = 0; - let jump_default = (events.len(), events.len()); - let mut definition_index = 0; - let mut jump = definition_indices - .get(definition_index) - .unwrap_or(&jump_default); - - while index < events.len() { - if index == jump.0 { - index = jump.1 + 1; - definition_index += 1; - jump = definition_indices - .get(definition_index) - .unwrap_or(&jump_default); - } else { - handle(&mut context, index); - index += 1; - } - } - - // No section to generate. - if !context.gfm_footnote_definition_calls.is_empty() { - generate_footnote_section(&mut context); - } - - debug_assert_eq!(context.buffers.len(), 1, "expected 1 final buffer"); - context - .buffers - .get(0) - .expect("expected 1 final buffer") - .to_string() -} - -/// Handle the event at `index`. -fn handle(context: &mut CompileContext, index: usize) { - context.index = index; - - if context.events[index].kind == Kind::Enter { - enter(context); - } else { - exit(context); - } -} - -/// Handle [`Enter`][Kind::Enter]. -fn enter(context: &mut CompileContext) { - match context.events[context.index].name { - Name::CodeFencedFenceInfo - | Name::CodeFencedFenceMeta - | Name::MathFlowFenceMeta - | Name::DefinitionLabelString - | Name::DefinitionTitleString - | Name::GfmFootnoteDefinitionPrefix - | Name::HeadingAtxText - | Name::HeadingSetextText - | Name::Label - | Name::MdxEsm - | Name::MdxFlowExpression - | Name::MdxTextExpression - | Name::MdxJsxFlowTag - | Name::MdxJsxTextTag - | Name::ReferenceString - | Name::ResourceTitleString => on_enter_buffer(context), - - Name::BlockQuote => on_enter_block_quote(context), - Name::CodeIndented => on_enter_code_indented(context), - Name::CodeFenced | Name::MathFlow => on_enter_raw_flow(context), - Name::CodeText | Name::MathText => on_enter_raw_text(context), - Name::Definition => on_enter_definition(context), - Name::DefinitionDestinationString => on_enter_definition_destination_string(context), - Name::Emphasis => on_enter_emphasis(context), - Name::Frontmatter => on_enter_frontmatter(context), - Name::GfmFootnoteDefinition => on_enter_gfm_footnote_definition(context), - Name::GfmFootnoteCall => on_enter_gfm_footnote_call(context), - Name::GfmStrikethrough => on_enter_gfm_strikethrough(context), - Name::GfmTable => on_enter_gfm_table(context), - Name::GfmTableBody => on_enter_gfm_table_body(context), - Name::GfmTableCell => on_enter_gfm_table_cell(context), - Name::GfmTableHead => on_enter_gfm_table_head(context), - Name::GfmTableRow => on_enter_gfm_table_row(context), - Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context), - Name::HtmlFlow => on_enter_html_flow(context), - Name::HtmlText => on_enter_html_text(context), - Name::Image => on_enter_image(context), - Name::Link => on_enter_link(context), - Name::ListItemMarker => on_enter_list_item_marker(context), - Name::ListOrdered | Name::ListUnordered => on_enter_list(context), - Name::Paragraph => on_enter_paragraph(context), - Name::Resource => on_enter_resource(context), - Name::ResourceDestinationString => on_enter_resource_destination_string(context), - Name::Strong => on_enter_strong(context), - _ => {} - } -} - -/// Handle [`Exit`][Kind::Exit]. -fn exit(context: &mut CompileContext) { - match context.events[context.index].name { - Name::CodeFencedFenceMeta - | Name::MathFlowFenceMeta - | Name::MdxJsxTextTag - | Name::MdxTextExpression - | Name::Resource => { - on_exit_drop(context); - } - Name::MdxEsm | Name::MdxFlowExpression | Name::MdxJsxFlowTag => on_exit_drop_slurp(context), - Name::CharacterEscapeValue | Name::CodeTextData | Name::Data | Name::MathTextData => { - on_exit_data(context); - } - Name::AutolinkEmail => on_exit_autolink_email(context), - Name::AutolinkProtocol => on_exit_autolink_protocol(context), - Name::BlankLineEnding => on_exit_blank_line_ending(context), - Name::BlockQuote => on_exit_block_quote(context), - Name::CharacterReferenceMarker => on_exit_character_reference_marker(context), - Name::CharacterReferenceMarkerNumeric => { - on_exit_character_reference_marker_numeric(context); - } - Name::CharacterReferenceMarkerHexadecimal => { - on_exit_character_reference_marker_hexadecimal(context); - } - Name::CharacterReferenceValue => on_exit_character_reference_value(context), - Name::CodeFenced | Name::CodeIndented | Name::MathFlow => on_exit_raw_flow(context), - Name::CodeFencedFence | Name::MathFlowFence => on_exit_raw_flow_fence(context), - Name::CodeFencedFenceInfo => on_exit_raw_flow_fence_info(context), - Name::CodeFlowChunk | Name::MathFlowChunk => on_exit_raw_flow_chunk(context), - Name::CodeText | Name::MathText => on_exit_raw_text(context), - Name::Definition => on_exit_definition(context), - Name::DefinitionDestinationString => on_exit_definition_destination_string(context), - Name::DefinitionLabelString => on_exit_definition_label_string(context), - Name::DefinitionTitleString => on_exit_definition_title_string(context), - Name::Emphasis => on_exit_emphasis(context), - Name::Frontmatter => on_exit_frontmatter(context), - Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context), - Name::GfmAutolinkLiteralMailto => on_exit_gfm_autolink_literal_mailto(context), - Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context), - Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context), - Name::GfmAutolinkLiteralXmpp => on_exit_gfm_autolink_literal_xmpp(context), - Name::GfmFootnoteCall => on_exit_gfm_footnote_call(context), - Name::GfmFootnoteDefinitionLabelString => { - on_exit_gfm_footnote_definition_label_string(context); - } - Name::GfmFootnoteDefinitionPrefix => on_exit_gfm_footnote_definition_prefix(context), - Name::GfmFootnoteDefinition => on_exit_gfm_footnote_definition(context), - Name::GfmStrikethrough => on_exit_gfm_strikethrough(context), - Name::GfmTable => on_exit_gfm_table(context), - Name::GfmTableBody => on_exit_gfm_table_body(context), - Name::GfmTableCell => on_exit_gfm_table_cell(context), - Name::GfmTableHead => on_exit_gfm_table_head(context), - Name::GfmTableRow => on_exit_gfm_table_row(context), - Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context), - Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context), - Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context), - Name::HeadingAtx => on_exit_heading_atx(context), - Name::HeadingAtxSequence => on_exit_heading_atx_sequence(context), - Name::HeadingAtxText => on_exit_heading_atx_text(context), - Name::HeadingSetextText => on_exit_heading_setext_text(context), - Name::HeadingSetextUnderlineSequence => on_exit_heading_setext_underline_sequence(context), - Name::HtmlFlow | Name::HtmlText => on_exit_html(context), - Name::HtmlFlowData | Name::HtmlTextData => on_exit_html_data(context), - Name::Image | Name::Link => on_exit_media(context), - Name::Label => on_exit_label(context), - Name::LabelText => on_exit_label_text(context), - Name::LineEnding => on_exit_line_ending(context), - Name::ListOrdered | Name::ListUnordered => on_exit_list(context), - Name::ListItem => on_exit_list_item(context), - Name::ListItemValue => on_exit_list_item_value(context), - Name::Paragraph => on_exit_paragraph(context), - Name::ReferenceString => on_exit_reference_string(context), - Name::ResourceDestinationString => on_exit_resource_destination_string(context), - Name::ResourceTitleString => on_exit_resource_title_string(context), - Name::Strong => on_exit_strong(context), - Name::ThematicBreak => on_exit_thematic_break(context), - _ => {} - } -} - -/// Handle [`Enter`][Kind::Enter]:`*`. -/// -/// Buffers data. -fn on_enter_buffer(context: &mut CompileContext) { - context.buffer(); -} - -/// Handle [`Enter`][Kind::Enter]:[`BlockQuote`][Name::BlockQuote]. -fn on_enter_block_quote(context: &mut CompileContext) { - context.tight_stack.push(false); - context.line_ending_if_needed(); - context.push("
"); -} - -/// Handle [`Enter`][Kind::Enter]:[`CodeIndented`][Name::CodeIndented]. -fn on_enter_code_indented(context: &mut CompileContext) { - context.raw_flow_seen_data = Some(false); - context.line_ending_if_needed(); - context.push("
");
-}
-
-/// Handle [`Enter`][Kind::Enter]:{[`CodeFenced`][Name::CodeFenced],[`MathFlow`][Name::MathFlow]}.
-fn on_enter_raw_flow(context: &mut CompileContext) {
-    context.raw_flow_seen_data = Some(false);
-    context.line_ending_if_needed();
-    // Note that no `>` is used, which is added later (due to info)
-    context.push("
");
-    }
-    context.buffer();
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`Definition`][Name::Definition].
-fn on_enter_definition(context: &mut CompileContext) {
-    context.buffer();
-    context.media_stack.push(Media {
-        image: false,
-        label: None,
-        label_id: None,
-        reference_id: None,
-        destination: None,
-        title: None,
-    });
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`DefinitionDestinationString`][Name::DefinitionDestinationString].
-fn on_enter_definition_destination_string(context: &mut CompileContext) {
-    context.buffer();
-    context.encode_html = false;
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`Emphasis`][Name::Emphasis].
-fn on_enter_emphasis(context: &mut CompileContext) {
-    if !context.image_alt_inside {
-        context.push("");
-    }
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`Frontmatter`][Name::Frontmatter].
-fn on_enter_frontmatter(context: &mut CompileContext) {
-    context.buffer();
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition].
-fn on_enter_gfm_footnote_definition(context: &mut CompileContext) {
-    context.tight_stack.push(false);
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`GfmFootnoteCall`][Name::GfmFootnoteCall].
-fn on_enter_gfm_footnote_call(context: &mut CompileContext) {
-    context.media_stack.push(Media {
-        image: false,
-        label_id: None,
-        label: None,
-        reference_id: None,
-        destination: None,
-        title: None,
-    });
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`GfmStrikethrough`][Name::GfmStrikethrough].
-fn on_enter_gfm_strikethrough(context: &mut CompileContext) {
-    if !context.image_alt_inside {
-        context.push("");
-    }
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`GfmTable`][Name::GfmTable].
-fn on_enter_gfm_table(context: &mut CompileContext) {
-    // Find the alignment.
-    let mut index = context.index;
-    let mut in_delimiter_row = false;
-    let mut align = vec![];
-
-    while index < context.events.len() {
-        let event = &context.events[index];
-
-        if in_delimiter_row {
-            if event.kind == Kind::Enter {
-                // Start of alignment value: set a new column.
-                if event.name == Name::GfmTableDelimiterCellValue {
-                    align.push(
-                        if context.events[index + 1].name == Name::GfmTableDelimiterMarker {
-                            GfmTableAlign::Left
-                        } else {
-                            GfmTableAlign::None
-                        },
-                    );
-                }
-            } else {
-                // End of alignment value: change the column.
-                if event.name == Name::GfmTableDelimiterCellValue {
-                    if context.events[index - 1].name == Name::GfmTableDelimiterMarker {
-                        let align_index = align.len() - 1;
-                        align[align_index] = if align[align_index] == GfmTableAlign::Left {
-                            GfmTableAlign::Center
-                        } else {
-                            GfmTableAlign::Right
-                        }
-                    }
-                }
-                // Done!
-                else if event.name == Name::GfmTableDelimiterRow {
-                    break;
-                }
-            }
-        } else if event.kind == Kind::Enter && event.name == Name::GfmTableDelimiterRow {
-            in_delimiter_row = true;
-        }
-
-        index += 1;
-    }
-
-    // Generate.
-    context.gfm_table_align = Some(align);
-    context.line_ending_if_needed();
-    context.push("");
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`GfmTableBody`][Name::GfmTableBody].
-fn on_enter_gfm_table_body(context: &mut CompileContext) {
-    context.push("");
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`GfmTableCell`][Name::GfmTableCell].
-fn on_enter_gfm_table_cell(context: &mut CompileContext) {
-    let column = context.gfm_table_column;
-    let align = context.gfm_table_align.as_ref().unwrap();
-
-    if column >= align.len() {
-        // Capture cell to ignore it.
-        context.buffer();
-    } else {
-        let value = align[column];
-        context.line_ending_if_needed();
-
-        if context.gfm_table_in_head {
-            context.push(" context.push(" align=\"left\""),
-            GfmTableAlign::Right => context.push(" align=\"right\""),
-            GfmTableAlign::Center => context.push(" align=\"center\""),
-            GfmTableAlign::None => {}
-        }
-
-        context.push(">");
-    }
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`GfmTableHead`][Name::GfmTableHead].
-fn on_enter_gfm_table_head(context: &mut CompileContext) {
-    context.line_ending_if_needed();
-    context.push("");
-    context.gfm_table_in_head = true;
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`GfmTableRow`][Name::GfmTableRow].
-fn on_enter_gfm_table_row(context: &mut CompileContext) {
-    context.line_ending_if_needed();
-    context.push("");
-}
-
-/// Handle [`Enter`][Kind::Enter]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
-fn on_enter_gfm_task_list_item_check(context: &mut CompileContext) {
-    if !context.image_alt_inside {
-        context.push(" | -␊
-                //      ^
-                //   |   a
-                // ```
-                let mut at_prefix = false;
-                // Blank line directly after item, which is just a prefix.
-                //
-                // ```markdown
-                // > | -␊
-                //      ^
-                //   | - a
-                // ```
-                let mut at_empty_list_item = false;
-                // Blank line at block quote prefix:
-                //
-                // ```markdown
-                // > | * >␊
-                //        ^
-                //   | * a
-                // ```
-                let mut at_empty_block_quote = false;
-
-                if balance == 1 {
-                    let mut before = index - 2;
-
-                    if events[before].name == Name::ListItem {
-                        before -= 1;
-
-                        if events[before].name == Name::SpaceOrTab {
-                            before -= 2;
-                        }
-
-                        if events[before].name == Name::BlockQuote
-                            && events[before - 1].name == Name::BlockQuotePrefix
-                        {
-                            at_empty_block_quote = true;
-                        } else if events[before].name == Name::ListItemPrefix {
-                            at_empty_list_item = true;
-                        }
-                    }
-                } else {
-                    let mut before = index - 2;
-
-                    if events[before].name == Name::SpaceOrTab {
-                        before -= 2;
-                    }
-
-                    if events[before].name == Name::ListItemPrefix {
-                        at_prefix = true;
-                    }
-                }
-
-                if !at_prefix && !at_empty_list_item && !at_empty_block_quote {
-                    loose = true;
-                    break;
-                }
-            }
-
-            // Done.
-            if balance == 0 && event.name == *name {
-                break;
-            }
-        }
-
-        index += 1;
-    }
-
-    context.tight_stack.push(!loose);
-    context.line_ending_if_needed();
-    // Note: no `>`.
-    context.push(if *name == Name::ListOrdered {
-        "");
-    }
-
-    context.line_ending_if_needed();
-
-    context.push("
  • "); - context.list_expect_first_marker = Some(false); -} - -/// Handle [`Enter`][Kind::Enter]:[`Paragraph`][Name::Paragraph]. -fn on_enter_paragraph(context: &mut CompileContext) { - let tight = context.tight_stack.last().unwrap_or(&false); - - if !tight { - context.line_ending_if_needed(); - context.push("

    "); - } -} - -/// Handle [`Enter`][Kind::Enter]:[`Resource`][Name::Resource]. -fn on_enter_resource(context: &mut CompileContext) { - context.buffer(); // We can have line endings in the resource, ignore them. - context.media_stack.last_mut().unwrap().destination = Some("".to_string()); -} - -/// Handle [`Enter`][Kind::Enter]:[`ResourceDestinationString`][Name::ResourceDestinationString]. -fn on_enter_resource_destination_string(context: &mut CompileContext) { - context.buffer(); - // Ignore encoding the result, as we’ll first percent encode the url and - // encode manually after. - context.encode_html = false; -} - -/// Handle [`Enter`][Kind::Enter]:[`Strong`][Name::Strong]. -fn on_enter_strong(context: &mut CompileContext) { - if !context.image_alt_inside { - context.push(""); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`AutolinkEmail`][Name::AutolinkEmail]. -fn on_exit_autolink_email(context: &mut CompileContext) { - generate_autolink( - context, - Some("mailto:"), - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .as_str(), - false, - ); -} - -/// Handle [`Exit`][Kind::Exit]:[`AutolinkProtocol`][Name::AutolinkProtocol]. -fn on_exit_autolink_protocol(context: &mut CompileContext) { - generate_autolink( - context, - None, - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .as_str(), - false, - ); -} - -/// Handle [`Exit`][Kind::Exit]:{[`HardBreakEscape`][Name::HardBreakEscape],[`HardBreakTrailing`][Name::HardBreakTrailing]}. -fn on_exit_break(context: &mut CompileContext) { - if !context.image_alt_inside { - context.push("
    "); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`BlankLineEnding`][Name::BlankLineEnding]. -fn on_exit_blank_line_ending(context: &mut CompileContext) { - if context.index == context.events.len() - 1 { - context.line_ending_if_needed(); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`BlockQuote`][Name::BlockQuote]. -fn on_exit_block_quote(context: &mut CompileContext) { - context.tight_stack.pop(); - context.line_ending_if_needed(); - context.slurp_one_line_ending = false; - context.push(""); -} - -/// Handle [`Exit`][Kind::Exit]:[`CharacterReferenceMarker`][Name::CharacterReferenceMarker]. -fn on_exit_character_reference_marker(context: &mut CompileContext) { - context.character_reference_marker = Some(b'&'); -} - -/// Handle [`Exit`][Kind::Exit]:[`CharacterReferenceMarkerHexadecimal`][Name::CharacterReferenceMarkerHexadecimal]. -fn on_exit_character_reference_marker_hexadecimal(context: &mut CompileContext) { - context.character_reference_marker = Some(b'x'); -} - -/// Handle [`Exit`][Kind::Exit]:[`CharacterReferenceMarkerNumeric`][Name::CharacterReferenceMarkerNumeric]. -fn on_exit_character_reference_marker_numeric(context: &mut CompileContext) { - context.character_reference_marker = Some(b'#'); -} - -/// Handle [`Exit`][Kind::Exit]:[`CharacterReferenceValue`][Name::CharacterReferenceValue]. -fn on_exit_character_reference_value(context: &mut CompileContext) { - let marker = context - .character_reference_marker - .take() - .expect("expected `character_reference_kind` to be set"); - let slice = Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ); - let value = slice.as_str(); - - let value = match marker { - b'#' => decode_numeric(value, 10), - b'x' => decode_numeric(value, 16), - b'&' => decode_named(value), - _ => panic!("impossible"), - }; - - context.push(&encode(&value, context.encode_html)); -} - -/// Handle [`Exit`][Kind::Exit]:{[`CodeFlowChunk`][Name::CodeFlowChunk],[`MathFlowChunk`][Name::MathFlowChunk]}. -fn on_exit_raw_flow_chunk(context: &mut CompileContext) { - context.raw_flow_seen_data = Some(true); - context.push(&encode( - &Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - // Must serialize to get virtual spaces. - .serialize(), - context.encode_html, - )); -} - -/// Handle [`Exit`][Kind::Exit]:{[`CodeFencedFence`][Name::CodeFencedFence],[`MathFlowFence`][Name::MathFlowFence]}. -fn on_exit_raw_flow_fence(context: &mut CompileContext) { - let count = if let Some(count) = context.raw_flow_fences_count { - count - } else { - 0 - }; - - if count == 0 { - context.push(">"); - context.slurp_one_line_ending = true; - } - - context.raw_flow_fences_count = Some(count + 1); -} - -/// Handle [`Exit`][Kind::Exit]:[`CodeFencedFenceInfo`][Name::CodeFencedFenceInfo]. -/// -/// Note: math (flow) does not support `info`. -fn on_exit_raw_flow_fence_info(context: &mut CompileContext) { - let value = context.resume(); - context.push(" class=\"language-"); - context.push(&value); - context.push("\""); -} - -/// Handle [`Exit`][Kind::Exit]:{[`CodeFenced`][Name::CodeFenced],[`CodeIndented`][Name::CodeIndented],[`MathFlow`][Name::MathFlow]}. -fn on_exit_raw_flow(context: &mut CompileContext) { - // One special case is if we are inside a container, and the raw (flow) was - // not closed (meaning it runs to the end). - // In that case, the following line ending, is considered *outside* the - // fenced code and block quote by micromark, but CM wants to treat that - // ending as part of the code. - if let Some(count) = context.raw_flow_fences_count { - // No closing fence. - if count == 1 - // In a container. - && !context.tight_stack.is_empty() - // Empty (as the closing is right at the opening fence) - && !matches!(context.events[context.index - 1].name, Name::CodeFencedFence | Name::MathFlowFence) - { - context.line_ending(); - } - } - - // But in most cases, it’s simpler: when we’ve seen some data, emit an extra - // line ending when needed. - if context - .raw_flow_seen_data - .take() - .expect("`raw_flow_seen_data` must be defined") - { - context.line_ending_if_needed(); - } - - context.push(""); - - if let Some(count) = context.raw_flow_fences_count.take() { - if count < 2 { - context.line_ending_if_needed(); - } - } - - context.slurp_one_line_ending = false; -} - -/// Handle [`Exit`][Kind::Exit]:{[`CodeText`][Name::CodeText],[`MathText`][Name::MathText]}. -fn on_exit_raw_text(context: &mut CompileContext) { - let result = context.resume(); - let mut bytes = result.as_bytes().to_vec(); - - // If we are in a GFM table, we need to decode escaped pipes. - // This is a rather weird GFM feature. - if context.gfm_table_align.is_some() { - let mut index = 0; - let mut len = bytes.len(); - - while index < len { - if index + 1 < len && bytes[index] == b'\\' && bytes[index + 1] == b'|' { - bytes.remove(index); - len -= 1; - } - - index += 1; - } - } - - let mut trim = false; - let mut index = 0; - let mut end = bytes.len(); - - if end > 2 && bytes[index] == b' ' && bytes[end - 1] == b' ' { - index += 1; - end -= 1; - while index < end && !trim { - if bytes[index] != b' ' { - trim = true; - break; - } - index += 1; - } - } - - if trim { - bytes.remove(0); - bytes.pop(); - } - - context.raw_text_inside = false; - context.push(str::from_utf8(&bytes).unwrap()); - - if !context.image_alt_inside { - context.push(""); - } -} - -/// Handle [`Exit`][Kind::Exit]:*. -/// -/// Resumes, and ignores what was resumed. -fn on_exit_drop(context: &mut CompileContext) { - context.resume(); -} - -/// Handle [`Exit`][Kind::Exit]:*. -/// -/// Resumes, ignores what was resumed, and slurps the following line ending. -fn on_exit_drop_slurp(context: &mut CompileContext) { - context.resume(); - context.slurp_one_line_ending = true; -} - -/// Handle [`Exit`][Kind::Exit]:{[`CodeTextData`][Name::CodeTextData],[`Data`][Name::Data],[`CharacterEscapeValue`][Name::CharacterEscapeValue]}. -fn on_exit_data(context: &mut CompileContext) { - context.push(&encode( - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .as_str(), - context.encode_html, - )); -} - -/// Handle [`Exit`][Kind::Exit]:[`Definition`][Name::Definition]. -fn on_exit_definition(context: &mut CompileContext) { - context.resume(); - let media = context.media_stack.pop().unwrap(); - let indices = media.reference_id.unwrap(); - let id = - normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()); - - context.definitions.push(Definition { - id, - destination: media.destination, - title: media.title, - }); -} - -/// Handle [`Exit`][Kind::Exit]:[`DefinitionDestinationString`][Name::DefinitionDestinationString]. -fn on_exit_definition_destination_string(context: &mut CompileContext) { - let buf = context.resume(); - context.media_stack.last_mut().unwrap().destination = Some(buf); - context.encode_html = true; -} - -/// Handle [`Exit`][Kind::Exit]:[`DefinitionLabelString`][Name::DefinitionLabelString]. -fn on_exit_definition_label_string(context: &mut CompileContext) { - // Discard label, use the source content instead. - context.resume(); - context.media_stack.last_mut().unwrap().reference_id = - Some(Position::from_exit_event(context.events, context.index).to_indices()); -} - -/// Handle [`Exit`][Kind::Exit]:[`DefinitionTitleString`][Name::DefinitionTitleString]. -fn on_exit_definition_title_string(context: &mut CompileContext) { - let buf = context.resume(); - context.media_stack.last_mut().unwrap().title = Some(buf); -} - -/// Handle [`Exit`][Kind::Exit]:[`Emphasis`][Name::Emphasis]. -fn on_exit_emphasis(context: &mut CompileContext) { - if !context.image_alt_inside { - context.push(""); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`Frontmatter`][Name::Frontmatter]. -fn on_exit_frontmatter(context: &mut CompileContext) { - context.resume(); - context.slurp_one_line_ending = true; -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail]. -fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) { - generate_autolink( - context, - Some("mailto:"), - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .as_str(), - true, - ); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralMailto`][Name::GfmAutolinkLiteralMailto]. -fn on_exit_gfm_autolink_literal_mailto(context: &mut CompileContext) { - generate_autolink( - context, - None, - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .as_str(), - true, - ); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralProtocol`][Name::GfmAutolinkLiteralProtocol]. -fn on_exit_gfm_autolink_literal_protocol(context: &mut CompileContext) { - generate_autolink( - context, - None, - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .as_str(), - true, - ); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralWww`][Name::GfmAutolinkLiteralWww]. -fn on_exit_gfm_autolink_literal_www(context: &mut CompileContext) { - generate_autolink( - context, - Some("http://"), - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .as_str(), - true, - ); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralXmpp`][Name::GfmAutolinkLiteralXmpp]. -fn on_exit_gfm_autolink_literal_xmpp(context: &mut CompileContext) { - generate_autolink( - context, - None, - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .as_str(), - true, - ); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteCall`][Name::GfmFootnoteCall]. -fn on_exit_gfm_footnote_call(context: &mut CompileContext) { - let indices = context.media_stack.pop().unwrap().label_id.unwrap(); - let id = - normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()); - let safe_id = sanitize(&id.to_lowercase()); - let mut call_index = 0; - - // See if this has been called before. - while call_index < context.gfm_footnote_definition_calls.len() { - if context.gfm_footnote_definition_calls[call_index].0 == id { - break; - } - call_index += 1; - } - - // New. - if call_index == context.gfm_footnote_definition_calls.len() { - context.gfm_footnote_definition_calls.push((id, 0)); - } - - // Increment. - context.gfm_footnote_definition_calls[call_index].1 += 1; - - // No call is output in an image alt, though the definition and - // backreferences are generated as if it was the case. - if context.image_alt_inside { - return; - } - - context.push(" 1 { - context.push("-"); - context.push( - &context.gfm_footnote_definition_calls[call_index] - .1 - .to_string(), - ); - } - context.push("\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">"); - - context.push(&(call_index + 1).to_string()); - context.push(""); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinitionLabelString`][Name::GfmFootnoteDefinitionLabelString]. -fn on_exit_gfm_footnote_definition_label_string(context: &mut CompileContext) { - context - .gfm_footnote_definition_stack - .push(Position::from_exit_event(context.events, context.index).to_indices()); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix]. -fn on_exit_gfm_footnote_definition_prefix(context: &mut CompileContext) { - // Drop the prefix. - context.resume(); - // Capture everything until end of definition. - context.buffer(); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition]. -fn on_exit_gfm_footnote_definition(context: &mut CompileContext) { - let value = context.resume(); - let indices = context.gfm_footnote_definition_stack.pop().unwrap(); - context.tight_stack.pop(); - context.gfm_footnote_definitions.push(( - normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()), - value, - )); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmStrikethrough`][Name::GfmStrikethrough]. -fn on_exit_gfm_strikethrough(context: &mut CompileContext) { - if !context.image_alt_inside { - context.push(""); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmTable`][Name::GfmTable]. -fn on_exit_gfm_table(context: &mut CompileContext) { - context.gfm_table_align = None; - context.line_ending_if_needed(); - context.push("

  • "); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmTableBody`][Name::GfmTableBody]. -fn on_exit_gfm_table_body(context: &mut CompileContext) { - context.line_ending_if_needed(); - context.push(""); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmTableCell`][Name::GfmTableCell]. -fn on_exit_gfm_table_cell(context: &mut CompileContext) { - let align = context.gfm_table_align.as_ref().unwrap(); - - if context.gfm_table_column < align.len() { - if context.gfm_table_in_head { - context.push(""); - } else { - context.push(""); - } - } else { - // Stop capturing. - context.resume(); - } - - context.gfm_table_column += 1; -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmTableHead`][Name::GfmTableHead]. -fn on_exit_gfm_table_head(context: &mut CompileContext) { - context.gfm_table_in_head = false; - context.line_ending_if_needed(); - context.push(""); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmTableRow`][Name::GfmTableRow]. -fn on_exit_gfm_table_row(context: &mut CompileContext) { - let mut column = context.gfm_table_column; - let len = context.gfm_table_align.as_ref().unwrap().len(); - - // Add “phantom” cells, for body rows that are shorter than the delimiter - // row (which is equal to the head row). - while column < len { - on_enter_gfm_table_cell(context); - on_exit_gfm_table_cell(context); - column += 1; - } - - context.gfm_table_column = 0; - context.line_ending_if_needed(); - context.push(""); -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]. -fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) { - if !context.image_alt_inside { - context.push("/>"); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked]. -fn on_exit_gfm_task_list_item_value_checked(context: &mut CompileContext) { - if !context.image_alt_inside { - context.push("checked=\"\" "); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`HeadingAtx`][Name::HeadingAtx]. -fn on_exit_heading_atx(context: &mut CompileContext) { - let rank = context - .heading_atx_rank - .take() - .expect("`heading_atx_rank` must be set in headings"); - - context.push(""); -} - -/// Handle [`Exit`][Kind::Exit]:[`HeadingAtxSequence`][Name::HeadingAtxSequence]. -fn on_exit_heading_atx_sequence(context: &mut CompileContext) { - // First fence we see. - if context.heading_atx_rank.is_none() { - let rank = Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .len(); - context.line_ending_if_needed(); - context.heading_atx_rank = Some(rank); - context.push(""); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`HeadingAtxText`][Name::HeadingAtxText]. -fn on_exit_heading_atx_text(context: &mut CompileContext) { - let value = context.resume(); - context.push(&value); -} - -/// Handle [`Exit`][Kind::Exit]:[`HeadingSetextText`][Name::HeadingSetextText]. -fn on_exit_heading_setext_text(context: &mut CompileContext) { - let buf = context.resume(); - context.heading_setext_buffer = Some(buf); - context.slurp_one_line_ending = true; -} - -/// Handle [`Exit`][Kind::Exit]:[`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence]. -fn on_exit_heading_setext_underline_sequence(context: &mut CompileContext) { - let text = context - .heading_setext_buffer - .take() - .expect("`heading_atx_rank` must be set in headings"); - let head = Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .head(); - let rank = if head == Some(b'-') { "2" } else { "1" }; - - context.line_ending_if_needed(); - context.push(""); - context.push(&text); - context.push(""); -} - -/// Handle [`Exit`][Kind::Exit]:{[`HtmlFlow`][Name::HtmlFlow],[`HtmlText`][Name::HtmlText]}. -fn on_exit_html(context: &mut CompileContext) { - context.encode_html = true; -} - -/// Handle [`Exit`][Kind::Exit]:{[`HtmlFlowData`][Name::HtmlFlowData],[`HtmlTextData`][Name::HtmlTextData]}. -fn on_exit_html_data(context: &mut CompileContext) { - let slice = Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ); - let value = slice.as_str(); - - let encoded = if context.options.gfm_tagfilter && context.options.allow_dangerous_html { - encode(&gfm_tagfilter(value), context.encode_html) - } else { - encode(value, context.encode_html) - }; - - context.push(&encoded); -} - -/// Handle [`Exit`][Kind::Exit]:[`Label`][Name::Label]. -fn on_exit_label(context: &mut CompileContext) { - let buf = context.resume(); - context.media_stack.last_mut().unwrap().label = Some(buf); -} - -/// Handle [`Exit`][Kind::Exit]:[`LabelText`][Name::LabelText]. -fn on_exit_label_text(context: &mut CompileContext) { - context.media_stack.last_mut().unwrap().label_id = - Some(Position::from_exit_event(context.events, context.index).to_indices()); -} - -/// Handle [`Exit`][Kind::Exit]:[`LineEnding`][Name::LineEnding]. -fn on_exit_line_ending(context: &mut CompileContext) { - if context.raw_text_inside { - context.push(" "); - } else if context.slurp_one_line_ending - // Ignore line endings after definitions. - || (context.index > 1 - && (context.events[context.index - 2].name == Name::Definition - || context.events[context.index - 2].name == Name::GfmFootnoteDefinition)) - { - context.slurp_one_line_ending = false; - } else { - context.push(&encode( - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .as_str(), - context.encode_html, - )); - } -} - -/// Handle [`Exit`][Kind::Exit]:{[`ListOrdered`][Name::ListOrdered],[`ListUnordered`][Name::ListUnordered]}. -fn on_exit_list(context: &mut CompileContext) { - context.tight_stack.pop(); - context.line_ending(); - context.push(if context.events[context.index].name == Name::ListOrdered { - "" - } else { - "" - }); -} - -/// Handle [`Exit`][Kind::Exit]:[`ListItem`][Name::ListItem]. -fn on_exit_list_item(context: &mut CompileContext) { - let tight = context.tight_stack.last().unwrap_or(&false); - let before_item = skip::opt_back( - context.events, - context.index - 1, - &[ - Name::BlankLineEnding, - Name::BlockQuotePrefix, - Name::LineEnding, - Name::SpaceOrTab, - // Also ignore things that don’t contribute to the document. - Name::Definition, - Name::GfmFootnoteDefinition, - ], - ); - let previous = &context.events[before_item]; - let tight_paragraph = *tight && previous.name == Name::Paragraph; - let empty_item = previous.name == Name::ListItemPrefix; - - context.slurp_one_line_ending = false; - - if !tight_paragraph && !empty_item { - context.line_ending_if_needed(); - } - - context.push(""); -} - -/// Handle [`Exit`][Kind::Exit]:[`ListItemValue`][Name::ListItemValue]. -fn on_exit_list_item_value(context: &mut CompileContext) { - if context.list_expect_first_marker.unwrap() { - let slice = Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ); - let value = slice.as_str().parse::().ok().unwrap(); - - if value != 1 { - context.push(" start=\""); - context.push(&value.to_string()); - context.push("\""); - } - } -} - -/// Handle [`Exit`][Kind::Exit]:{[`Image`][Name::Image],[`Link`][Name::Link]}. -fn on_exit_media(context: &mut CompileContext) { - let mut is_in_image = false; - let mut index = 0; - - // Skip current. - let end = context.media_stack.len() - 1; - while index < end { - if context.media_stack[index].image { - is_in_image = true; - break; - } - index += 1; - } - - context.image_alt_inside = is_in_image; - - let media = context.media_stack.pop().unwrap(); - let label = media.label.unwrap(); - let id = media.reference_id.or(media.label_id).map(|indices| { - normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()) - }); - - let definition_index = if media.destination.is_none() { - id.and_then(|id| { - let mut index = 0; - - while index < context.definitions.len() { - if context.definitions[index].id == id { - return Some(index); - } - - index += 1; - } - - None - }) - } else { - None - }; - - if !is_in_image { - if media.image { - context.push("\"");"); - } - - if !media.image { - context.push(&label); - - if !is_in_image { - context.push(""); - } - } -} - -/// Handle [`Exit`][Kind::Exit]:[`Paragraph`][Name::Paragraph]. -fn on_exit_paragraph(context: &mut CompileContext) { - let tight = context.tight_stack.last().unwrap_or(&false); - - if *tight { - context.slurp_one_line_ending = true; - } else { - context.push("

    "); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`ReferenceString`][Name::ReferenceString]. -fn on_exit_reference_string(context: &mut CompileContext) { - // Drop stuff. - context.resume(); - - context.media_stack.last_mut().unwrap().reference_id = - Some(Position::from_exit_event(context.events, context.index).to_indices()); -} - -/// Handle [`Exit`][Kind::Exit]:[`ResourceDestinationString`][Name::ResourceDestinationString]. -fn on_exit_resource_destination_string(context: &mut CompileContext) { - let buf = context.resume(); - context.media_stack.last_mut().unwrap().destination = Some(buf); - context.encode_html = true; -} - -/// Handle [`Exit`][Kind::Exit]:[`ResourceTitleString`][Name::ResourceTitleString]. -fn on_exit_resource_title_string(context: &mut CompileContext) { - let buf = context.resume(); - context.media_stack.last_mut().unwrap().title = Some(buf); -} - -/// Handle [`Exit`][Kind::Exit]:[`Strong`][Name::Strong]. -fn on_exit_strong(context: &mut CompileContext) { - if !context.image_alt_inside { - context.push("
    "); - } -} - -/// Handle [`Exit`][Kind::Exit]:[`ThematicBreak`][Name::ThematicBreak]. -fn on_exit_thematic_break(context: &mut CompileContext) { - context.line_ending_if_needed(); - context.push("
    "); -} - -/// Generate a footnote section. -fn generate_footnote_section(context: &mut CompileContext) { - context.line_ending_if_needed(); - context.push("
    <"); - if let Some(ref value) = context.options.gfm_footnote_label_tag_name { - context.push(&encode(value, context.encode_html)); - } else { - context.push("h2"); - } - context.push(" id=\"footnote-label\" "); - if let Some(ref value) = context.options.gfm_footnote_label_attributes { - context.push(value); - } else { - context.push("class=\"sr-only\""); - } - context.push(">"); - if let Some(ref value) = context.options.gfm_footnote_label { - context.push(&encode(value, context.encode_html)); - } else { - context.push("Footnotes"); - } - context.push(""); - context.line_ending(); - context.push("
      "); - - let mut index = 0; - while index < context.gfm_footnote_definition_calls.len() { - generate_footnote_item(context, index); - index += 1; - } - - context.line_ending(); - context.push("
    "); - context.line_ending(); - context.push("
    "); - context.line_ending(); -} - -/// Generate a footnote item from a call. -fn generate_footnote_item(context: &mut CompileContext, index: usize) { - let id = &context.gfm_footnote_definition_calls[index].0; - let safe_id = sanitize(&id.to_lowercase()); - - // Find definition: we’ll always find it. - let mut definition_index = 0; - while definition_index < context.gfm_footnote_definitions.len() { - if &context.gfm_footnote_definitions[definition_index].0 == id { - break; - } - definition_index += 1; - } - - debug_assert_ne!( - definition_index, - context.gfm_footnote_definitions.len(), - "expected definition" - ); - - context.line_ending(); - context.push("
  • "); - context.line_ending(); - - // Create one or more backreferences. - let mut reference_index = 0; - let mut backreferences = String::new(); - while reference_index < context.gfm_footnote_definition_calls[index].1 { - if reference_index != 0 { - backreferences.push(' '); - } - backreferences.push_str("↩"); - if reference_index != 0 { - backreferences.push_str(""); - backreferences.push_str(&(reference_index + 1).to_string()); - backreferences.push_str(""); - } - backreferences.push_str(""); - - reference_index += 1; - } - - let value = context.gfm_footnote_definitions[definition_index].1.clone(); - let bytes = value.as_bytes(); - let mut byte_index = bytes.len(); - // Move back past EOL. - while byte_index > 0 && matches!(bytes[byte_index - 1], b'\n' | b'\r') { - byte_index -= 1; - } - // Check if it ends in `

    `. - // This is a bit funky if someone wrote a safe paragraph by hand in - // there. - // But in all other cases, `<` and `>` would be encoded, so we can be - // sure that this is generated by our compiler. - if byte_index > 3 - && bytes[byte_index - 4] == b'<' - && bytes[byte_index - 3] == b'/' - && bytes[byte_index - 2] == b'p' - && bytes[byte_index - 1] == b'>' - { - let (before, after) = bytes.split_at(byte_index - 4); - let mut result = String::new(); - result.push_str(str::from_utf8(before).unwrap()); - result.push(' '); - result.push_str(&backreferences); - result.push_str(str::from_utf8(after).unwrap()); - context.push(&result); - } else { - context.push(&value); - context.line_ending_if_needed(); - context.push(&backreferences); - } - context.line_ending_if_needed(); - context.push("
  • "); -} - -/// Generate an autolink (used by unicode autolinks and GFM autolink literals). -fn generate_autolink( - context: &mut CompileContext, - protocol: Option<&str>, - value: &str, - is_gfm_literal: bool, -) { - let mut is_in_link = false; - let mut index = 0; - - while index < context.media_stack.len() { - if !context.media_stack[index].image { - is_in_link = true; - break; - } - index += 1; - } - - if !context.image_alt_inside && (!is_in_link || !is_gfm_literal) { - context.push(""); - } - - context.push(&encode(value, context.encode_html)); - - if !context.image_alt_inside && (!is_in_link || !is_gfm_literal) { - context.push(""); - } -} diff --git a/src/lib.rs b/src/lib.rs index 0b1a571..669660b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,26 +1,37 @@ //! Public API of micromark. //! -//! This module exposes [`micromark`][] (and [`micromark_with_options`][]). -//! `micromark` is a safe way to transform (untrusted?) markdown into HTML. -//! `micromark_with_options` allows you to configure how markdown is turned into -//! HTML, such as by allowing dangerous HTML when you trust it. +//! This module exposes primarily [`micromark`][]. +//! It also exposes [`micromark_with_options`][] and [`micromark_to_mdast`][]. +//! +//! * [`micromark`][] +//! — safe way to transform (untrusted?) markdown into HTML +//! * [`micromark_with_options`][] +//! — like `micromark` but lets you configure how markdown is turned into +//! HTML, such as allowing dangerous HTML or turning on/off +//! different constructs (GFM, MDX, and the like) +//! * [`micromark_to_mdast`][] +//! — like `micromark_with_options` but compiles to a syntax tree #![no_std] extern crate alloc; -mod compiler; mod construct; mod event; +pub mod mdast; mod parser; mod resolve; mod state; mod subtokenize; +mod to_html; +mod to_mdast; mod tokenizer; mod util; -use crate::compiler::compile; -use crate::parser::parse; use alloc::{boxed::Box, fmt, string::String}; +use mdast::Root; +use parser::parse; +use to_html::compile as to_html; +use to_mdast::compile as to_mdast; /// Type of line endings in markdown. #[derive(Clone, Debug, Default, Eq, PartialEq)] @@ -1154,5 +1165,23 @@ pub fn micromark(value: &str) -> String { /// ``` pub fn micromark_with_options(value: &str, options: &Options) -> Result { let (events, bytes) = parse(value, options)?; - Ok(compile(&events, bytes, options)) + Ok(to_html(&events, bytes, options)) +} + +/// Turn markdown into a syntax tree. +/// +/// ## Errors +/// +/// `to_mdast` never errors with normal markdown because markdown does not have +/// syntax errors, so feel free to `unwrap()`. +/// However, MDX does have syntax errors. +/// When MDX is turned on, there are several errors that can occur with how +/// JSX, expressions, or ESM are written. +/// +/// ## Examples +/// +/// To do. +pub fn micromark_to_mdast(value: &str, options: &Options) -> Result { + let (events, bytes) = parse(value, options)?; + Ok(to_mdast(&events, bytes, options)) } diff --git a/src/mdast.rs b/src/mdast.rs new file mode 100644 index 0000000..b60e891 --- /dev/null +++ b/src/mdast.rs @@ -0,0 +1,1047 @@ +//! [mdast][] syntax tree. +//! +//! [mdast]: https://github.com/syntax-tree/mdast + +// To do: example. +// To do: math. + +use alloc::{string::String, vec::Vec}; + +/// One place in a source file. +#[derive(Clone, Debug)] +pub struct Point { + /// 1-indexed integer representing a line in a source file. + pub line: usize, + /// 1-indexed integer representing a column in a source file. + pub column: usize, + /// 0-indexed integer representing a character in a source file. + pub offset: usize, +} + +/// Location of a node in a source file. +#[derive(Clone, Debug)] +pub struct Position { + /// Represents the place of the first character of the parsed source region. + pub start: Point, + /// Represents the place of the first character after the parsed source + /// region, whether it exists or not. + pub end: Point, +} + +/// Explicitness of a reference. +#[derive(Clone, Debug)] +pub enum ReferenceKind { + /// The reference is implicit, its identifier inferred from its content. + Shortcut, + /// The reference is explicit, its identifier inferred from its content. + Collapsed, + /// The reference is explicit, its identifier explicitly set. + Full, +} + +/// Represents how phrasing content is aligned. +#[derive(Clone, Debug)] +pub enum AlignKind { + /// See the `left` value of the `text-align` CSS property. + Left, + /// See the `right` value of the `text-align` CSS property. + Right, + /// See the `center` value of the `text-align` CSS property. + Center, + /// Phrasing content is aligned as defined by the host environment. + None, +} + +/// Node type. +#[derive(Clone, Debug)] +pub enum Kind { + /// Root node. + Root, + /// Paragraph node. + Paragraph, + /// Heading node. + Heading, + /// Thematic break node. + ThematicBreak, + /// Block quote node. + BlockQuote, + /// List node. + List, + /// List item node. + ListItem, + /// Html node. + Html, + /// Code node. + Code, + /// Definition node. + Definition, + /// Text node. + Text, + /// Emphasis node. + Emphasis, + /// Strong node. + Strong, + /// Code (inline) node. + InlineCode, + /// Break node. + Break, + /// Link node. + Link, + /// Image node. + Image, + /// Link reference node. + LinkReference, + /// Image reference node. + ImageReference, + /// Footnote definition node. + FootnoteDefinition, + /// Footnote reference node. + FootnoteReference, + /// Table node. + Table, + /// Table row node. + TableRow, + /// Table cell node. + TableCell, + /// Strong node. + Delete, + /// Yaml node. + Yaml, + /// Toml node. + Toml, + /// MDX: ESM node. + MdxjsEsm, + /// MDX: expression (flow). + MdxFlowExpression, + /// MDX: expression (phrasing). + MdxTextExpression, + /// MDX: JSX element (flow). + MdxJsxFlowElement, + /// MDX: JSX element (phrasing). + MdxJsxTextElement, + /// MDX: JSX attribute expression. + MdxJsxExpressionAttribute, + /// MDX: JSX attribute. + MdxJsxAttribute, + /// MDX: JSX attribute value expression. + MdxJsxAttributeValueExpression, +} + +/// Document content. +#[derive(Clone, Debug)] +pub enum DocumentContent { + /// Container content. + Container(ContainerContent), + /// Frontmatter content. + Frontmatter(FrontmatterContent), +} + +/// Container content. +#[derive(Clone, Debug)] +pub enum ContainerContent { + /// Block quote. + BlockQuote(BlockQuote), + /// Flow content. + Flow(FlowContent), + /// Footnote definition. + FootnoteDefinition(FootnoteDefinition), + /// MDX: JSX element (container). + JsxElement(MdxJsxFlowElement), + /// List. + List(List), +} + +/// Frontmatter content. +#[derive(Clone, Debug)] +pub enum FrontmatterContent { + /// MDX.js ESM. + Esm(MdxjsEsm), + /// Toml. + Toml(Toml), + /// Yaml. + Yaml(Yaml), +} + +/// Phrasing content. +#[derive(Clone, Debug)] +pub enum PhrasingContent { + /// Break. + Break(Break), + /// Code (phrasing). + Code(InlineCode), + /// Delete. + Delete(Delete), + /// Emphasis. + Emphasis(Emphasis), + // MDX: expression (text). + Expression(MdxTextExpression), + /// Footnote reference. + FootnoteReference(FootnoteReference), + /// Html (phrasing). + Html(Html), + /// Image. + Image(Image), + /// Image reference. + ImageReference(ImageReference), + // MDX: JSX element (text). + JsxElement(MdxJsxTextElement), + /// Link. + Link(Link), + /// Link reference. + LinkReference(LinkReference), + /// Strong + Strong(Strong), + /// Text. + Text(Text), +} + +/// Flow content. +#[derive(Clone, Debug)] +pub enum FlowContent { + /// Code (flow). + Code(Code), + /// Content. + Content(ContentContent), + // MDX: expression (flow). + Expression(MdxFlowExpression), + /// Heading. + Heading(Heading), + /// Html (flow). + Html(Html), + /// Table. + Table(Table), + /// Thematic break. + ThematicBreak(ThematicBreak), +} + +/// Table content. +#[derive(Clone, Debug)] +pub enum TableContent { + /// Table row. + Row(TableRow), +} + +/// Row content. +#[derive(Clone, Debug)] +pub enum RowContent { + /// Table cell. + Cell(TableCell), +} + +/// List content. +#[derive(Clone, Debug)] +pub enum ListContent { + /// List item. + Item(ListItem), +} + +/// Content. +#[derive(Clone, Debug)] +pub enum ContentContent { + /// Definition. + Definition(Definition), + /// Paragraph. + Paragraph(Paragraph), +} + +/// MDX: attribute content. +#[derive(Clone, Debug)] +pub enum AttributeContent { + /// MDX: JSX attribute expression. + Expression(MdxJsxExpressionAttribute), + /// MDX: JSX attribute. + Property(MdxJsxAttribute), +} + +/// MDX: attribute value. +#[derive(Clone, Debug)] +pub enum AttributeValue { + /// Expression value. + Expression(MdxJsxAttributeValueExpression), + /// Static value. + Literal(String), +} + +/// Document. +/// +/// ```markdown +/// > | a +/// ^ +/// ``` +#[derive(Clone, Debug)] +pub struct Root { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::Root`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +/// Paragraph. +/// +/// ```markdown +/// > | a +/// ^ +/// ``` +#[derive(Clone, Debug)] +pub struct Paragraph { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::Paragraph`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +/// Heading. +/// +/// ```markdown +/// > | # a +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Heading { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::Heading`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // Extra. + /// Rank (between `1` and `6`, both including). + pub depth: u8, +} + +/// Thematic break. +/// +/// ```markdown +/// > | *** +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct ThematicBreak { + // Void. + /// Node type. + pub kind: Kind, // `Kind::ThematicBreak`. + /// Positional info. + pub position: Option, +} + +/// Block quote. +/// +/// ```markdown +/// > | > a +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct BlockQuote { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::BlockQuote`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +/// List. +/// +/// ```markdown +/// > | * a +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct List { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::List`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // Extra. + /// Ordered (`true`) or unordered (`false`). + pub ordered: bool, + /// Starting number of the list. + /// `None` when unordered. + pub start: Option, + /// One or more of its children are separated with a blank line from its + /// siblings (when `true`), or not (when `false`). + pub spread: bool, +} + +/// List item. +/// +/// ```markdown +/// > | * a +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct ListItem { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::ListItem`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // Extra. + /// The item contains two or more children separated by a blank line + /// (when `true`), or not (when `false`). + pub spread: bool, + /// GFM: whether the item is done (when `true`), not done (when `false`), + /// or indeterminate or not applicable (`None`). + pub checked: Option, +} + +/// Html (flow or phrasing). +/// +/// ```markdown +/// > | +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Html { + // Text. + /// Node type. + pub kind: Kind, // `Kind::Html`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// Code (flow). +/// +/// ```markdown +/// > | ~~~ +/// ^^^ +/// > | a +/// ^^^ +/// > | ~~~ +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Code { + // Text. + /// Node type. + pub kind: Kind, // `Kind::Code`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, + // Extra. + /// The language of computer code being marked up. + pub lang: Option, + /// Custom info relating to the node. + pub meta: Option, +} + +/// Definition. +/// +/// ```markdown +/// > | [a]: b +/// ^^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Definition { + // Void. + /// Node type. + pub kind: Kind, // `Kind::Definition`. + /// Positional info. + pub position: Option, + // Resource. + /// URL to the referenced resource. + pub url: String, + /// Advisory info for the resource, such as something that would be + /// appropriate for a tooltip. + pub title: Option, + // Association. + /// Value that can match another node. + /// `identifier` is a source value: character escapes and character references + /// are *not* parsed. + /// Its value must be normalized. + pub identifier: String, + /// `label` is a string value: it works just like `title` on a link or a + /// `lang` on code: character escapes and character references are parsed. + /// + /// To normalize a value, collapse markdown whitespace (`[\t\n\r ]+`) to a + /// space, trim the optional initial and/or final space, and perform + /// case-folding. + pub label: Option, +} + +/// Text. +/// +/// ```markdown +/// > | a +/// ^ +/// ``` +#[derive(Clone, Debug)] +pub struct Text { + // Text. + /// Node type. + pub kind: Kind, // `Kind::Text`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// Emphasis. +/// +/// ```markdown +/// > | *a* +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Emphasis { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::Emphasis`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +/// Strong. +/// +/// ```markdown +/// > | **a** +/// ^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Strong { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::Strong`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +/// Code (phrasing). +/// +/// ```markdown +/// > | `a` +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct InlineCode { + // Text. + /// Node type. + pub kind: Kind, // `Kind::InlineCode`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// Break. +/// +/// ```markdown +/// > | a\ +/// ^ +/// | b +/// ``` +#[derive(Clone, Debug)] +pub struct Break { + // Void. + /// Node type. + pub kind: Kind, // `Kind::Break`. + /// Positional info. + pub position: Option, +} + +/// Link. +/// +/// ```markdown +/// > | [a](b) +/// ^^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Link { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::Link`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // Resource. + /// URL to the referenced resource. + pub url: String, + /// Advisory info for the resource, such as something that would be + /// appropriate for a tooltip. + pub title: Option, +} + +/// Image. +/// +/// ```markdown +/// > | ![a](b) +/// ^^^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Image { + // Void. + /// Node type. + pub kind: Kind, // `Kind::Image`. + /// Positional info. + pub position: Option, + // Alternative. + /// Equivalent content for environments that cannot represent the node as + /// intended. + pub alt: String, + // Resource. + /// URL to the referenced resource. + pub url: String, + /// Advisory info for the resource, such as something that would be + /// appropriate for a tooltip. + pub title: Option, +} + +/// Link reference. +/// +/// ```markdown +/// > | [a] +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct LinkReference { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::LinkReference`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // Reference. + /// Explicitness of a reference. + pub reference_kind: ReferenceKind, + // Association. + /// Value that can match another node. + /// `identifier` is a source value: character escapes and character references + /// are *not* parsed. + /// Its value must be normalized. + pub identifier: String, + /// `label` is a string value: it works just like `title` on a link or a + /// `lang` on code: character escapes and character references are parsed. + /// + /// To normalize a value, collapse markdown whitespace (`[\t\n\r ]+`) to a + /// space, trim the optional initial and/or final space, and perform + /// case-folding. + pub label: Option, +} + +/// Image reference. +/// +/// ```markdown +/// > | ![a] +/// ^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct ImageReference { + // Void. + /// Node type. + pub kind: Kind, // `Kind::ImageReference`. + /// Positional info. + pub position: Option, + // Alternative. + /// Equivalent content for environments that cannot represent the node as + /// intended. + pub alt: String, + // Reference. + /// Explicitness of a reference. + pub reference_kind: ReferenceKind, + // Association. + /// Value that can match another node. + /// `identifier` is a source value: character escapes and character references + /// are *not* parsed. + /// Its value must be normalized. + pub identifier: String, + /// `label` is a string value: it works just like `title` on a link or a + /// `lang` on code: character escapes and character references are parsed. + /// + /// To normalize a value, collapse markdown whitespace (`[\t\n\r ]+`) to a + /// space, trim the optional initial and/or final space, and perform + /// case-folding. + pub label: Option, +} + +/// Footnote definition (GFM). +/// +/// ```markdown +/// > | [^a]: b +/// ^^^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct FootnoteDefinition { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::FootnoteDefinition`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // Association. + /// Value that can match another node. + /// `identifier` is a source value: character escapes and character references + /// are *not* parsed. + /// Its value must be normalized. + pub identifier: String, + /// `label` is a string value: it works just like `title` on a link or a + /// `lang` on code: character escapes and character references are parsed. + /// + /// To normalize a value, collapse markdown whitespace (`[\t\n\r ]+`) to a + /// space, trim the optional initial and/or final space, and perform + /// case-folding. + pub label: Option, +} + +/// Footnote reference (GFM). +/// +/// ```markdown +/// > | [^a] +/// ^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct FootnoteReference { + // Void. + /// Node type. + pub kind: Kind, // `Kind::FootnoteReference`. + /// Positional info. + pub position: Option, + // Association. + /// Value that can match another node. + /// `identifier` is a source value: character escapes and character references + /// are *not* parsed. + /// Its value must be normalized. + pub identifier: String, + /// `label` is a string value: it works just like `title` on a link or a + /// `lang` on code: character escapes and character references are parsed. + /// + /// To normalize a value, collapse markdown whitespace (`[\t\n\r ]+`) to a + /// space, trim the optional initial and/or final space, and perform + /// case-folding. + pub label: Option, +} + +/// Table (GFM). +/// +/// ```markdown +/// > | | a | +/// ^^^^^ +/// > | | - | +/// ^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Table { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::Table`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // Extra. + /// Represents how cells in columns are aligned. + pub align: Vec, +} + +/// Table row (GFM). +/// +/// ```markdown +/// > | | a | +/// ^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct TableRow { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::TableRow`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +/// Table cell (GFM). +/// +/// ```markdown +/// > | | a | +/// ^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct TableCell { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::TableCell`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +/// Delete (GFM). +/// +/// ```markdown +/// > | ~~a~~ +/// ^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Delete { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::Delete`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, +} + +/// Yaml (frontmatter). +/// +/// ```markdown +/// > | --- +/// ^^^ +/// > | a: b +/// ^^^^ +/// > | --- +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Yaml { + // Void. + /// Node type. + pub kind: Kind, // `Kind::Yaml`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// Toml (frontmatter). +/// +/// ```markdown +/// > | +++ +/// ^^^ +/// > | a: b +/// ^^^^ +/// > | +++ +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct Toml { + // Void. + /// Node type. + pub kind: Kind, // `Kind::Toml`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// MDX: ESM. +/// +/// ```markdown +/// > | import a from 'b' +/// ^^^^^^^^^^^^^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct MdxjsEsm { + // Literal. + /// Node type. + pub kind: Kind, // `Kind::MdxjsEsm`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// MDX: expression (flow). +/// +/// ```markdown +/// > | {a} +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct MdxFlowExpression { + // Literal. + /// Node type. + pub kind: Kind, // `Kind::MdxFlowExpression`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// MDX: expression (text). +/// +/// ```markdown +/// > | a {b} +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct MdxTextExpression { + // Literal. + /// Node type. + pub kind: Kind, // `Kind::MdxTextExpression`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// MDX: JSX element (container). +/// +/// ```markdown +/// > | +/// ^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct MdxJsxFlowElement { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::MdxJsxFlowElement`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // JSX element. + /// Name. + /// + /// Fragments have no name. + pub name: Option, + /// Attributes. + pub attributes: Vec, +} + +/// MDX: JSX element (text). +/// +/// ```markdown +/// > | . +/// ^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct MdxJsxTextElement { + // Parent. + /// Node type. + pub kind: Kind, // `Kind::MdxJsxTextElement`. + /// Content model. + pub children: Vec, + /// Positional info. + pub position: Option, + // JSX element. + /// Name. + /// + /// Fragments have no name. + pub name: Option, + /// Attributes. + pub attributes: Vec, +} + +/// MDX: JSX attribute expression. +/// +/// ```markdown +/// > | +/// ^^^^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct MdxJsxExpressionAttribute { + // Literal. + /// Node type. + pub kind: Kind, // `Kind::MdxJsxExpressionAttribute`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +/// MDX: JSX attribute. +/// +/// ```markdown +/// > | +/// ^ +/// ``` +#[derive(Clone, Debug)] +pub struct MdxJsxAttribute { + // Void. + /// Node type. + pub kind: Kind, // `Kind::MdxJsxAttribute`. + /// Positional info. + pub position: Option, + /// Key. + pub name: String, + /// Value. + pub value: Option, +} + +/// MDX: JSX attribute value expression. +/// +/// ```markdown +/// > | +/// ^^^ +/// ``` +#[derive(Clone, Debug)] +pub struct MdxJsxAttributeValueExpression { + // Literal. + /// Node type. + pub kind: Kind, // `Kind::MdxJsxAttributeValueExpression`. + /// Content model. + pub value: String, + /// Positional info. + pub position: Option, +} + +#[cfg(test)] +mod tests { + use super::*; + use alloc::{string::ToString, vec}; + + #[test] + fn test() { + let text = Text { + kind: Kind::Text, + value: "a".to_string(), + position: Some(Position { + start: Point { + line: 1, + column: 1, + offset: 0, + }, + end: Point { + line: 1, + column: 2, + offset: 1, + }, + }), + }; + + let paragraph = Paragraph { + kind: Kind::Paragraph, + children: vec![PhrasingContent::Text(text)], + position: Some(Position { + start: Point { + line: 1, + column: 1, + offset: 0, + }, + end: Point { + line: 1, + column: 2, + offset: 1, + }, + }), + }; + + assert_eq!(paragraph.children.len(), 1); + assert!(matches!(¶graph.children[0], PhrasingContent::Text(_))); + } +} diff --git a/src/to_html.rs b/src/to_html.rs new file mode 100644 index 0000000..43be6a7 --- /dev/null +++ b/src/to_html.rs @@ -0,0 +1,1922 @@ +//! Turn events into a string of HTML. +use crate::event::{Event, Kind, Name}; +use crate::util::{ + constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC}, + decode_character_reference::{decode_named, decode_numeric}, + encode::encode, + gfm_tagfilter::gfm_tagfilter, + normalize_identifier::normalize_identifier, + sanitize_uri::{sanitize, sanitize_with_protocols}, + skip, + slice::{Position, Slice}, +}; +use crate::{LineEnding, Options}; +use alloc::{ + format, + string::{String, ToString}, + vec, + vec::Vec, +}; +use core::str; + +/// Link, image, or footnote call. +/// Resource or reference. +/// Reused for temporary definitions as well, in the first pass. +#[derive(Debug)] +struct Media { + /// Whether this represents an image (`true`) or a link or definition + /// (`false`). + image: bool, + /// The text between the brackets (`x` in `![x]()` and `[x]()`). + /// + /// Not interpreted. + label_id: Option<(usize, usize)>, + /// The result of interpreting the text between the brackets + /// (`x` in `![x]()` and `[x]()`). + /// + /// When this is a link, it contains further text content and thus HTML + /// tags. + /// Otherwise, when an image, text content is also allowed, but resulting + /// tags are ignored. + label: Option, + /// The string between the explicit brackets of the reference (`y` in + /// `[x][y]`), as content. + /// + /// Not interpreted. + reference_id: Option<(usize, usize)>, + /// The destination (url). + /// + /// Interpreted string content. + destination: Option, + /// The destination (url). + /// + /// Interpreted string content. + title: Option, +} + +/// Representation of a definition. +#[derive(Debug)] +struct Definition { + /// Identifier. + id: String, + /// The destination (url). + /// + /// Interpreted string content. + destination: Option, + /// The title. + /// + /// Interpreted string content. + title: Option, +} + +/// GFM table: column alignment. +// To do: share with `mdast`. +#[derive(Debug, PartialEq, Eq, Copy, Clone)] +enum GfmTableAlign { + /// No alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | --- | + /// ^^^ + /// ``` + None, + /// Left alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | :-- | + /// ^^^ + /// ``` + Left, + /// Center alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | :-: | + /// ^^^ + /// ``` + Center, + /// Right alignment. + /// + /// ```markdown + /// | | aaa | + /// > | | --: | + /// ^^^ + /// ``` + Right, +} + +/// Context used to compile markdown. +#[allow(clippy::struct_excessive_bools)] +#[derive(Debug)] +struct CompileContext<'a> { + // Static info. + /// List of events. + pub events: &'a [Event], + /// List of bytes. + pub bytes: &'a [u8], + /// Configuration. + pub options: &'a Options, + // Fields used by handlers to track the things they need to track to + // compile markdown. + /// Rank of heading (atx). + pub heading_atx_rank: Option, + /// Buffer of heading (setext) text. + pub heading_setext_buffer: Option, + /// Whether raw (flow) (code (fenced), math (flow)) or code (indented) contains data. + pub raw_flow_seen_data: Option, + /// Number of raw (flow) fences. + pub raw_flow_fences_count: Option, + /// Whether we are in code (text). + pub raw_text_inside: bool, + /// Whether we are in image text. + pub image_alt_inside: bool, + /// Marker of character reference. + pub character_reference_marker: Option, + /// Whether we are expecting the first list item marker. + pub list_expect_first_marker: Option, + /// Stack of media (link, image). + pub media_stack: Vec, + /// Stack of containers. + pub tight_stack: Vec, + /// List of definitions. + pub definitions: Vec, + /// List of definitions. + pub gfm_footnote_definitions: Vec<(String, String)>, + pub gfm_footnote_definition_calls: Vec<(String, usize)>, + pub gfm_footnote_definition_stack: Vec<(usize, usize)>, + /// Whether we are in a GFM table head. + pub gfm_table_in_head: bool, + /// Current GFM table alignment. + pub gfm_table_align: Option>, + /// Current GFM table column. + pub gfm_table_column: usize, + // Fields used to influance the current compilation. + /// Ignore the next line ending. + pub slurp_one_line_ending: bool, + /// Whether to encode HTML. + pub encode_html: bool, + // Configuration + /// Line ending to use. + pub line_ending_default: LineEnding, + // Intermediate results. + /// Stack of buffers. + pub buffers: Vec, + /// Current event index. + pub index: usize, +} + +impl<'a> CompileContext<'a> { + /// Create a new compile context. + pub fn new( + events: &'a [Event], + bytes: &'a [u8], + options: &'a Options, + line_ending: LineEnding, + ) -> CompileContext<'a> { + CompileContext { + events, + bytes, + heading_atx_rank: None, + heading_setext_buffer: None, + raw_flow_seen_data: None, + raw_flow_fences_count: None, + raw_text_inside: false, + character_reference_marker: None, + list_expect_first_marker: None, + media_stack: vec![], + definitions: vec![], + gfm_footnote_definitions: vec![], + gfm_footnote_definition_calls: vec![], + gfm_footnote_definition_stack: vec![], + gfm_table_in_head: false, + gfm_table_align: None, + gfm_table_column: 0, + tight_stack: vec![], + slurp_one_line_ending: false, + image_alt_inside: false, + encode_html: true, + line_ending_default: line_ending, + buffers: vec![String::new()], + index: 0, + options, + } + } + + /// Push a buffer. + pub fn buffer(&mut self) { + self.buffers.push(String::new()); + } + + /// Pop a buffer, returning its value. + pub fn resume(&mut self) -> String { + self.buffers.pop().expect("Cannot resume w/o buffer") + } + + /// Push a str to the last buffer. + pub fn push(&mut self, value: &str) { + self.buffers + .last_mut() + .expect("Cannot push w/o buffer") + .push_str(value); + } + + /// Add a line ending. + pub fn line_ending(&mut self) { + let eol = self.line_ending_default.as_str().to_string(); + self.push(&eol); + } + + /// Add a line ending if needed (as in, there’s no eol/eof already). + pub fn line_ending_if_needed(&mut self) { + let tail = self + .buffers + .last() + .expect("at least one buffer should exist") + .as_bytes() + .last(); + + if !matches!(tail, None | Some(b'\n' | b'\r')) { + self.line_ending(); + } + } +} + +/// Turn events and bytes into a string of HTML. +pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { + let mut index = 0; + let mut line_ending_inferred = None; + + // First, we figure out what the used line ending style is. + // Stop when we find a line ending. + while index < events.len() { + let event = &events[index]; + + if event.kind == Kind::Exit + && (event.name == Name::BlankLineEnding || event.name == Name::LineEnding) + { + line_ending_inferred = Some(LineEnding::from_str( + Slice::from_position(bytes, &Position::from_exit_event(events, index)).as_str(), + )); + break; + } + + index += 1; + } + + // Figure out which line ending style we’ll use. + let line_ending_default = + line_ending_inferred.unwrap_or_else(|| options.default_line_ending.clone()); + + let mut context = CompileContext::new(events, bytes, options, line_ending_default); + let mut definition_indices = vec![]; + let mut index = 0; + let mut definition_inside = false; + + // Handle all definitions first. + // We must do two passes because we need to compile the events in + // definitions which come after references already. + // + // To speed things up, we collect the places we can jump over for the + // second pass. + // + // We don’t need to handle GFM footnote definitions like this, because + // unlike normal definitions, what they produce is not used in calls. + // It would also get very complex, because footnote definitions can be + // nested. + while index < events.len() { + let event = &events[index]; + + if definition_inside { + handle(&mut context, index); + } + + if event.kind == Kind::Enter { + if event.name == Name::Definition { + handle(&mut context, index); // Also handle start. + definition_inside = true; + definition_indices.push((index, index)); + } + } else if event.name == Name::Definition { + definition_inside = false; + definition_indices.last_mut().unwrap().1 = index; + } + + index += 1; + } + + index = 0; + let jump_default = (events.len(), events.len()); + let mut definition_index = 0; + let mut jump = definition_indices + .get(definition_index) + .unwrap_or(&jump_default); + + while index < events.len() { + if index == jump.0 { + index = jump.1 + 1; + definition_index += 1; + jump = definition_indices + .get(definition_index) + .unwrap_or(&jump_default); + } else { + handle(&mut context, index); + index += 1; + } + } + + // No section to generate. + if !context.gfm_footnote_definition_calls.is_empty() { + generate_footnote_section(&mut context); + } + + debug_assert_eq!(context.buffers.len(), 1, "expected 1 final buffer"); + context + .buffers + .get(0) + .expect("expected 1 final buffer") + .to_string() +} + +/// Handle the event at `index`. +fn handle(context: &mut CompileContext, index: usize) { + context.index = index; + + if context.events[index].kind == Kind::Enter { + enter(context); + } else { + exit(context); + } +} + +/// Handle [`Enter`][Kind::Enter]. +fn enter(context: &mut CompileContext) { + match context.events[context.index].name { + Name::CodeFencedFenceInfo + | Name::CodeFencedFenceMeta + | Name::MathFlowFenceMeta + | Name::DefinitionLabelString + | Name::DefinitionTitleString + | Name::GfmFootnoteDefinitionPrefix + | Name::HeadingAtxText + | Name::HeadingSetextText + | Name::Label + | Name::MdxEsm + | Name::MdxFlowExpression + | Name::MdxTextExpression + | Name::MdxJsxFlowTag + | Name::MdxJsxTextTag + | Name::ReferenceString + | Name::ResourceTitleString => on_enter_buffer(context), + + Name::BlockQuote => on_enter_block_quote(context), + Name::CodeIndented => on_enter_code_indented(context), + Name::CodeFenced | Name::MathFlow => on_enter_raw_flow(context), + Name::CodeText | Name::MathText => on_enter_raw_text(context), + Name::Definition => on_enter_definition(context), + Name::DefinitionDestinationString => on_enter_definition_destination_string(context), + Name::Emphasis => on_enter_emphasis(context), + Name::Frontmatter => on_enter_frontmatter(context), + Name::GfmFootnoteDefinition => on_enter_gfm_footnote_definition(context), + Name::GfmFootnoteCall => on_enter_gfm_footnote_call(context), + Name::GfmStrikethrough => on_enter_gfm_strikethrough(context), + Name::GfmTable => on_enter_gfm_table(context), + Name::GfmTableBody => on_enter_gfm_table_body(context), + Name::GfmTableCell => on_enter_gfm_table_cell(context), + Name::GfmTableHead => on_enter_gfm_table_head(context), + Name::GfmTableRow => on_enter_gfm_table_row(context), + Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context), + Name::HtmlFlow => on_enter_html_flow(context), + Name::HtmlText => on_enter_html_text(context), + Name::Image => on_enter_image(context), + Name::Link => on_enter_link(context), + Name::ListItemMarker => on_enter_list_item_marker(context), + Name::ListOrdered | Name::ListUnordered => on_enter_list(context), + Name::Paragraph => on_enter_paragraph(context), + Name::Resource => on_enter_resource(context), + Name::ResourceDestinationString => on_enter_resource_destination_string(context), + Name::Strong => on_enter_strong(context), + _ => {} + } +} + +/// Handle [`Exit`][Kind::Exit]. +fn exit(context: &mut CompileContext) { + match context.events[context.index].name { + Name::CodeFencedFenceMeta + | Name::MathFlowFenceMeta + | Name::MdxJsxTextTag + | Name::MdxTextExpression + | Name::Resource => { + on_exit_drop(context); + } + Name::MdxEsm | Name::MdxFlowExpression | Name::MdxJsxFlowTag => on_exit_drop_slurp(context), + Name::CharacterEscapeValue | Name::CodeTextData | Name::Data | Name::MathTextData => { + on_exit_data(context); + } + Name::AutolinkEmail => on_exit_autolink_email(context), + Name::AutolinkProtocol => on_exit_autolink_protocol(context), + Name::BlankLineEnding => on_exit_blank_line_ending(context), + Name::BlockQuote => on_exit_block_quote(context), + Name::CharacterReferenceMarker => on_exit_character_reference_marker(context), + Name::CharacterReferenceMarkerNumeric => { + on_exit_character_reference_marker_numeric(context); + } + Name::CharacterReferenceMarkerHexadecimal => { + on_exit_character_reference_marker_hexadecimal(context); + } + Name::CharacterReferenceValue => on_exit_character_reference_value(context), + Name::CodeFenced | Name::CodeIndented | Name::MathFlow => on_exit_raw_flow(context), + Name::CodeFencedFence | Name::MathFlowFence => on_exit_raw_flow_fence(context), + Name::CodeFencedFenceInfo => on_exit_raw_flow_fence_info(context), + Name::CodeFlowChunk | Name::MathFlowChunk => on_exit_raw_flow_chunk(context), + Name::CodeText | Name::MathText => on_exit_raw_text(context), + Name::Definition => on_exit_definition(context), + Name::DefinitionDestinationString => on_exit_definition_destination_string(context), + Name::DefinitionLabelString => on_exit_definition_label_string(context), + Name::DefinitionTitleString => on_exit_definition_title_string(context), + Name::Emphasis => on_exit_emphasis(context), + Name::Frontmatter => on_exit_frontmatter(context), + Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context), + Name::GfmAutolinkLiteralMailto => on_exit_gfm_autolink_literal_mailto(context), + Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context), + Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context), + Name::GfmAutolinkLiteralXmpp => on_exit_gfm_autolink_literal_xmpp(context), + Name::GfmFootnoteCall => on_exit_gfm_footnote_call(context), + Name::GfmFootnoteDefinitionLabelString => { + on_exit_gfm_footnote_definition_label_string(context); + } + Name::GfmFootnoteDefinitionPrefix => on_exit_gfm_footnote_definition_prefix(context), + Name::GfmFootnoteDefinition => on_exit_gfm_footnote_definition(context), + Name::GfmStrikethrough => on_exit_gfm_strikethrough(context), + Name::GfmTable => on_exit_gfm_table(context), + Name::GfmTableBody => on_exit_gfm_table_body(context), + Name::GfmTableCell => on_exit_gfm_table_cell(context), + Name::GfmTableHead => on_exit_gfm_table_head(context), + Name::GfmTableRow => on_exit_gfm_table_row(context), + Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context), + Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context), + Name::HardBreakEscape | Name::HardBreakTrailing => on_exit_break(context), + Name::HeadingAtx => on_exit_heading_atx(context), + Name::HeadingAtxSequence => on_exit_heading_atx_sequence(context), + Name::HeadingAtxText => on_exit_heading_atx_text(context), + Name::HeadingSetextText => on_exit_heading_setext_text(context), + Name::HeadingSetextUnderlineSequence => on_exit_heading_setext_underline_sequence(context), + Name::HtmlFlow | Name::HtmlText => on_exit_html(context), + Name::HtmlFlowData | Name::HtmlTextData => on_exit_html_data(context), + Name::Image | Name::Link => on_exit_media(context), + Name::Label => on_exit_label(context), + Name::LabelText => on_exit_label_text(context), + Name::LineEnding => on_exit_line_ending(context), + Name::ListOrdered | Name::ListUnordered => on_exit_list(context), + Name::ListItem => on_exit_list_item(context), + Name::ListItemValue => on_exit_list_item_value(context), + Name::Paragraph => on_exit_paragraph(context), + Name::ReferenceString => on_exit_reference_string(context), + Name::ResourceDestinationString => on_exit_resource_destination_string(context), + Name::ResourceTitleString => on_exit_resource_title_string(context), + Name::Strong => on_exit_strong(context), + Name::ThematicBreak => on_exit_thematic_break(context), + _ => {} + } +} + +/// Handle [`Enter`][Kind::Enter]:`*`. +/// +/// Buffers data. +fn on_enter_buffer(context: &mut CompileContext) { + context.buffer(); +} + +/// Handle [`Enter`][Kind::Enter]:[`BlockQuote`][Name::BlockQuote]. +fn on_enter_block_quote(context: &mut CompileContext) { + context.tight_stack.push(false); + context.line_ending_if_needed(); + context.push("
    "); +} + +/// Handle [`Enter`][Kind::Enter]:[`CodeIndented`][Name::CodeIndented]. +fn on_enter_code_indented(context: &mut CompileContext) { + context.raw_flow_seen_data = Some(false); + context.line_ending_if_needed(); + context.push("
    ");
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:{[`CodeFenced`][Name::CodeFenced],[`MathFlow`][Name::MathFlow]}.
    +fn on_enter_raw_flow(context: &mut CompileContext) {
    +    context.raw_flow_seen_data = Some(false);
    +    context.line_ending_if_needed();
    +    // Note that no `>` is used, which is added later (due to info)
    +    context.push("
    ");
    +    }
    +    context.buffer();
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`Definition`][Name::Definition].
    +fn on_enter_definition(context: &mut CompileContext) {
    +    context.buffer();
    +    context.media_stack.push(Media {
    +        image: false,
    +        label: None,
    +        label_id: None,
    +        reference_id: None,
    +        destination: None,
    +        title: None,
    +    });
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`DefinitionDestinationString`][Name::DefinitionDestinationString].
    +fn on_enter_definition_destination_string(context: &mut CompileContext) {
    +    context.buffer();
    +    context.encode_html = false;
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`Emphasis`][Name::Emphasis].
    +fn on_enter_emphasis(context: &mut CompileContext) {
    +    if !context.image_alt_inside {
    +        context.push("");
    +    }
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`Frontmatter`][Name::Frontmatter].
    +fn on_enter_frontmatter(context: &mut CompileContext) {
    +    context.buffer();
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition].
    +fn on_enter_gfm_footnote_definition(context: &mut CompileContext) {
    +    context.tight_stack.push(false);
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`GfmFootnoteCall`][Name::GfmFootnoteCall].
    +fn on_enter_gfm_footnote_call(context: &mut CompileContext) {
    +    context.media_stack.push(Media {
    +        image: false,
    +        label_id: None,
    +        label: None,
    +        reference_id: None,
    +        destination: None,
    +        title: None,
    +    });
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`GfmStrikethrough`][Name::GfmStrikethrough].
    +fn on_enter_gfm_strikethrough(context: &mut CompileContext) {
    +    if !context.image_alt_inside {
    +        context.push("");
    +    }
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`GfmTable`][Name::GfmTable].
    +fn on_enter_gfm_table(context: &mut CompileContext) {
    +    // Find the alignment.
    +    let mut index = context.index;
    +    let mut in_delimiter_row = false;
    +    let mut align = vec![];
    +
    +    while index < context.events.len() {
    +        let event = &context.events[index];
    +
    +        if in_delimiter_row {
    +            if event.kind == Kind::Enter {
    +                // Start of alignment value: set a new column.
    +                if event.name == Name::GfmTableDelimiterCellValue {
    +                    align.push(
    +                        if context.events[index + 1].name == Name::GfmTableDelimiterMarker {
    +                            GfmTableAlign::Left
    +                        } else {
    +                            GfmTableAlign::None
    +                        },
    +                    );
    +                }
    +            } else {
    +                // End of alignment value: change the column.
    +                if event.name == Name::GfmTableDelimiterCellValue {
    +                    if context.events[index - 1].name == Name::GfmTableDelimiterMarker {
    +                        let align_index = align.len() - 1;
    +                        align[align_index] = if align[align_index] == GfmTableAlign::Left {
    +                            GfmTableAlign::Center
    +                        } else {
    +                            GfmTableAlign::Right
    +                        }
    +                    }
    +                }
    +                // Done!
    +                else if event.name == Name::GfmTableDelimiterRow {
    +                    break;
    +                }
    +            }
    +        } else if event.kind == Kind::Enter && event.name == Name::GfmTableDelimiterRow {
    +            in_delimiter_row = true;
    +        }
    +
    +        index += 1;
    +    }
    +
    +    // Generate.
    +    context.gfm_table_align = Some(align);
    +    context.line_ending_if_needed();
    +    context.push("");
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`GfmTableBody`][Name::GfmTableBody].
    +fn on_enter_gfm_table_body(context: &mut CompileContext) {
    +    context.push("");
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`GfmTableCell`][Name::GfmTableCell].
    +fn on_enter_gfm_table_cell(context: &mut CompileContext) {
    +    let column = context.gfm_table_column;
    +    let align = context.gfm_table_align.as_ref().unwrap();
    +
    +    if column >= align.len() {
    +        // Capture cell to ignore it.
    +        context.buffer();
    +    } else {
    +        let value = align[column];
    +        context.line_ending_if_needed();
    +
    +        if context.gfm_table_in_head {
    +            context.push(" context.push(" align=\"left\""),
    +            GfmTableAlign::Right => context.push(" align=\"right\""),
    +            GfmTableAlign::Center => context.push(" align=\"center\""),
    +            GfmTableAlign::None => {}
    +        }
    +
    +        context.push(">");
    +    }
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`GfmTableHead`][Name::GfmTableHead].
    +fn on_enter_gfm_table_head(context: &mut CompileContext) {
    +    context.line_ending_if_needed();
    +    context.push("");
    +    context.gfm_table_in_head = true;
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`GfmTableRow`][Name::GfmTableRow].
    +fn on_enter_gfm_table_row(context: &mut CompileContext) {
    +    context.line_ending_if_needed();
    +    context.push("");
    +}
    +
    +/// Handle [`Enter`][Kind::Enter]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck].
    +fn on_enter_gfm_task_list_item_check(context: &mut CompileContext) {
    +    if !context.image_alt_inside {
    +        context.push(" | -␊
    +                //      ^
    +                //   |   a
    +                // ```
    +                let mut at_prefix = false;
    +                // Blank line directly after item, which is just a prefix.
    +                //
    +                // ```markdown
    +                // > | -␊
    +                //      ^
    +                //   | - a
    +                // ```
    +                let mut at_empty_list_item = false;
    +                // Blank line at block quote prefix:
    +                //
    +                // ```markdown
    +                // > | * >␊
    +                //        ^
    +                //   | * a
    +                // ```
    +                let mut at_empty_block_quote = false;
    +
    +                if balance == 1 {
    +                    let mut before = index - 2;
    +
    +                    if events[before].name == Name::ListItem {
    +                        before -= 1;
    +
    +                        if events[before].name == Name::SpaceOrTab {
    +                            before -= 2;
    +                        }
    +
    +                        if events[before].name == Name::BlockQuote
    +                            && events[before - 1].name == Name::BlockQuotePrefix
    +                        {
    +                            at_empty_block_quote = true;
    +                        } else if events[before].name == Name::ListItemPrefix {
    +                            at_empty_list_item = true;
    +                        }
    +                    }
    +                } else {
    +                    let mut before = index - 2;
    +
    +                    if events[before].name == Name::SpaceOrTab {
    +                        before -= 2;
    +                    }
    +
    +                    if events[before].name == Name::ListItemPrefix {
    +                        at_prefix = true;
    +                    }
    +                }
    +
    +                if !at_prefix && !at_empty_list_item && !at_empty_block_quote {
    +                    loose = true;
    +                    break;
    +                }
    +            }
    +
    +            // Done.
    +            if balance == 0 && event.name == *name {
    +                break;
    +            }
    +        }
    +
    +        index += 1;
    +    }
    +
    +    context.tight_stack.push(!loose);
    +    context.line_ending_if_needed();
    +    // Note: no `>`.
    +    context.push(if *name == Name::ListOrdered {
    +        "");
    +    }
    +
    +    context.line_ending_if_needed();
    +
    +    context.push("
  • "); + context.list_expect_first_marker = Some(false); +} + +/// Handle [`Enter`][Kind::Enter]:[`Paragraph`][Name::Paragraph]. +fn on_enter_paragraph(context: &mut CompileContext) { + let tight = context.tight_stack.last().unwrap_or(&false); + + if !tight { + context.line_ending_if_needed(); + context.push("

    "); + } +} + +/// Handle [`Enter`][Kind::Enter]:[`Resource`][Name::Resource]. +fn on_enter_resource(context: &mut CompileContext) { + context.buffer(); // We can have line endings in the resource, ignore them. + context.media_stack.last_mut().unwrap().destination = Some("".to_string()); +} + +/// Handle [`Enter`][Kind::Enter]:[`ResourceDestinationString`][Name::ResourceDestinationString]. +fn on_enter_resource_destination_string(context: &mut CompileContext) { + context.buffer(); + // Ignore encoding the result, as we’ll first percent encode the url and + // encode manually after. + context.encode_html = false; +} + +/// Handle [`Enter`][Kind::Enter]:[`Strong`][Name::Strong]. +fn on_enter_strong(context: &mut CompileContext) { + if !context.image_alt_inside { + context.push(""); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`AutolinkEmail`][Name::AutolinkEmail]. +fn on_exit_autolink_email(context: &mut CompileContext) { + generate_autolink( + context, + Some("mailto:"), + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + false, + ); +} + +/// Handle [`Exit`][Kind::Exit]:[`AutolinkProtocol`][Name::AutolinkProtocol]. +fn on_exit_autolink_protocol(context: &mut CompileContext) { + generate_autolink( + context, + None, + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + false, + ); +} + +/// Handle [`Exit`][Kind::Exit]:{[`HardBreakEscape`][Name::HardBreakEscape],[`HardBreakTrailing`][Name::HardBreakTrailing]}. +fn on_exit_break(context: &mut CompileContext) { + if !context.image_alt_inside { + context.push("
    "); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`BlankLineEnding`][Name::BlankLineEnding]. +fn on_exit_blank_line_ending(context: &mut CompileContext) { + if context.index == context.events.len() - 1 { + context.line_ending_if_needed(); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`BlockQuote`][Name::BlockQuote]. +fn on_exit_block_quote(context: &mut CompileContext) { + context.tight_stack.pop(); + context.line_ending_if_needed(); + context.slurp_one_line_ending = false; + context.push(""); +} + +/// Handle [`Exit`][Kind::Exit]:[`CharacterReferenceMarker`][Name::CharacterReferenceMarker]. +fn on_exit_character_reference_marker(context: &mut CompileContext) { + context.character_reference_marker = Some(b'&'); +} + +/// Handle [`Exit`][Kind::Exit]:[`CharacterReferenceMarkerHexadecimal`][Name::CharacterReferenceMarkerHexadecimal]. +fn on_exit_character_reference_marker_hexadecimal(context: &mut CompileContext) { + context.character_reference_marker = Some(b'x'); +} + +/// Handle [`Exit`][Kind::Exit]:[`CharacterReferenceMarkerNumeric`][Name::CharacterReferenceMarkerNumeric]. +fn on_exit_character_reference_marker_numeric(context: &mut CompileContext) { + context.character_reference_marker = Some(b'#'); +} + +/// Handle [`Exit`][Kind::Exit]:[`CharacterReferenceValue`][Name::CharacterReferenceValue]. +fn on_exit_character_reference_value(context: &mut CompileContext) { + let marker = context + .character_reference_marker + .take() + .expect("expected `character_reference_kind` to be set"); + let slice = Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ); + let value = slice.as_str(); + + let value = match marker { + b'#' => decode_numeric(value, 10), + b'x' => decode_numeric(value, 16), + b'&' => decode_named(value), + _ => panic!("impossible"), + }; + + context.push(&encode(&value, context.encode_html)); +} + +/// Handle [`Exit`][Kind::Exit]:{[`CodeFlowChunk`][Name::CodeFlowChunk],[`MathFlowChunk`][Name::MathFlowChunk]}. +fn on_exit_raw_flow_chunk(context: &mut CompileContext) { + context.raw_flow_seen_data = Some(true); + context.push(&encode( + &Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + // Must serialize to get virtual spaces. + .serialize(), + context.encode_html, + )); +} + +/// Handle [`Exit`][Kind::Exit]:{[`CodeFencedFence`][Name::CodeFencedFence],[`MathFlowFence`][Name::MathFlowFence]}. +fn on_exit_raw_flow_fence(context: &mut CompileContext) { + let count = if let Some(count) = context.raw_flow_fences_count { + count + } else { + 0 + }; + + if count == 0 { + context.push(">"); + context.slurp_one_line_ending = true; + } + + context.raw_flow_fences_count = Some(count + 1); +} + +/// Handle [`Exit`][Kind::Exit]:[`CodeFencedFenceInfo`][Name::CodeFencedFenceInfo]. +/// +/// Note: math (flow) does not support `info`. +fn on_exit_raw_flow_fence_info(context: &mut CompileContext) { + let value = context.resume(); + context.push(" class=\"language-"); + context.push(&value); + context.push("\""); +} + +/// Handle [`Exit`][Kind::Exit]:{[`CodeFenced`][Name::CodeFenced],[`CodeIndented`][Name::CodeIndented],[`MathFlow`][Name::MathFlow]}. +fn on_exit_raw_flow(context: &mut CompileContext) { + // One special case is if we are inside a container, and the raw (flow) was + // not closed (meaning it runs to the end). + // In that case, the following line ending, is considered *outside* the + // fenced code and block quote by micromark, but CM wants to treat that + // ending as part of the code. + if let Some(count) = context.raw_flow_fences_count { + // No closing fence. + if count == 1 + // In a container. + && !context.tight_stack.is_empty() + // Empty (as the closing is right at the opening fence) + && !matches!(context.events[context.index - 1].name, Name::CodeFencedFence | Name::MathFlowFence) + { + context.line_ending(); + } + } + + // But in most cases, it’s simpler: when we’ve seen some data, emit an extra + // line ending when needed. + if context + .raw_flow_seen_data + .take() + .expect("`raw_flow_seen_data` must be defined") + { + context.line_ending_if_needed(); + } + + context.push(""); + + if let Some(count) = context.raw_flow_fences_count.take() { + if count < 2 { + context.line_ending_if_needed(); + } + } + + context.slurp_one_line_ending = false; +} + +/// Handle [`Exit`][Kind::Exit]:{[`CodeText`][Name::CodeText],[`MathText`][Name::MathText]}. +fn on_exit_raw_text(context: &mut CompileContext) { + let result = context.resume(); + let mut bytes = result.as_bytes().to_vec(); + + // If we are in a GFM table, we need to decode escaped pipes. + // This is a rather weird GFM feature. + if context.gfm_table_align.is_some() { + let mut index = 0; + let mut len = bytes.len(); + + while index < len { + if index + 1 < len && bytes[index] == b'\\' && bytes[index + 1] == b'|' { + bytes.remove(index); + len -= 1; + } + + index += 1; + } + } + + let mut trim = false; + let mut index = 0; + let mut end = bytes.len(); + + if end > 2 && bytes[index] == b' ' && bytes[end - 1] == b' ' { + index += 1; + end -= 1; + while index < end && !trim { + if bytes[index] != b' ' { + trim = true; + break; + } + index += 1; + } + } + + if trim { + bytes.remove(0); + bytes.pop(); + } + + context.raw_text_inside = false; + context.push(str::from_utf8(&bytes).unwrap()); + + if !context.image_alt_inside { + context.push(""); + } +} + +/// Handle [`Exit`][Kind::Exit]:*. +/// +/// Resumes, and ignores what was resumed. +fn on_exit_drop(context: &mut CompileContext) { + context.resume(); +} + +/// Handle [`Exit`][Kind::Exit]:*. +/// +/// Resumes, ignores what was resumed, and slurps the following line ending. +fn on_exit_drop_slurp(context: &mut CompileContext) { + context.resume(); + context.slurp_one_line_ending = true; +} + +/// Handle [`Exit`][Kind::Exit]:{[`CodeTextData`][Name::CodeTextData],[`Data`][Name::Data],[`CharacterEscapeValue`][Name::CharacterEscapeValue]}. +fn on_exit_data(context: &mut CompileContext) { + context.push(&encode( + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + context.encode_html, + )); +} + +/// Handle [`Exit`][Kind::Exit]:[`Definition`][Name::Definition]. +fn on_exit_definition(context: &mut CompileContext) { + context.resume(); + let media = context.media_stack.pop().unwrap(); + let indices = media.reference_id.unwrap(); + let id = + normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()); + + context.definitions.push(Definition { + id, + destination: media.destination, + title: media.title, + }); +} + +/// Handle [`Exit`][Kind::Exit]:[`DefinitionDestinationString`][Name::DefinitionDestinationString]. +fn on_exit_definition_destination_string(context: &mut CompileContext) { + let buf = context.resume(); + context.media_stack.last_mut().unwrap().destination = Some(buf); + context.encode_html = true; +} + +/// Handle [`Exit`][Kind::Exit]:[`DefinitionLabelString`][Name::DefinitionLabelString]. +fn on_exit_definition_label_string(context: &mut CompileContext) { + // Discard label, use the source content instead. + context.resume(); + context.media_stack.last_mut().unwrap().reference_id = + Some(Position::from_exit_event(context.events, context.index).to_indices()); +} + +/// Handle [`Exit`][Kind::Exit]:[`DefinitionTitleString`][Name::DefinitionTitleString]. +fn on_exit_definition_title_string(context: &mut CompileContext) { + let buf = context.resume(); + context.media_stack.last_mut().unwrap().title = Some(buf); +} + +/// Handle [`Exit`][Kind::Exit]:[`Emphasis`][Name::Emphasis]. +fn on_exit_emphasis(context: &mut CompileContext) { + if !context.image_alt_inside { + context.push(""); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`Frontmatter`][Name::Frontmatter]. +fn on_exit_frontmatter(context: &mut CompileContext) { + context.resume(); + context.slurp_one_line_ending = true; +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralEmail`][Name::GfmAutolinkLiteralEmail]. +fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) { + generate_autolink( + context, + Some("mailto:"), + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + true, + ); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralMailto`][Name::GfmAutolinkLiteralMailto]. +fn on_exit_gfm_autolink_literal_mailto(context: &mut CompileContext) { + generate_autolink( + context, + None, + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + true, + ); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralProtocol`][Name::GfmAutolinkLiteralProtocol]. +fn on_exit_gfm_autolink_literal_protocol(context: &mut CompileContext) { + generate_autolink( + context, + None, + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + true, + ); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralWww`][Name::GfmAutolinkLiteralWww]. +fn on_exit_gfm_autolink_literal_www(context: &mut CompileContext) { + generate_autolink( + context, + Some("http://"), + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + true, + ); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmAutolinkLiteralXmpp`][Name::GfmAutolinkLiteralXmpp]. +fn on_exit_gfm_autolink_literal_xmpp(context: &mut CompileContext) { + generate_autolink( + context, + None, + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + true, + ); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteCall`][Name::GfmFootnoteCall]. +fn on_exit_gfm_footnote_call(context: &mut CompileContext) { + let indices = context.media_stack.pop().unwrap().label_id.unwrap(); + let id = + normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()); + let safe_id = sanitize(&id.to_lowercase()); + let mut call_index = 0; + + // See if this has been called before. + while call_index < context.gfm_footnote_definition_calls.len() { + if context.gfm_footnote_definition_calls[call_index].0 == id { + break; + } + call_index += 1; + } + + // New. + if call_index == context.gfm_footnote_definition_calls.len() { + context.gfm_footnote_definition_calls.push((id, 0)); + } + + // Increment. + context.gfm_footnote_definition_calls[call_index].1 += 1; + + // No call is output in an image alt, though the definition and + // backreferences are generated as if it was the case. + if context.image_alt_inside { + return; + } + + context.push(" 1 { + context.push("-"); + context.push( + &context.gfm_footnote_definition_calls[call_index] + .1 + .to_string(), + ); + } + context.push("\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">"); + + context.push(&(call_index + 1).to_string()); + context.push(""); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinitionLabelString`][Name::GfmFootnoteDefinitionLabelString]. +fn on_exit_gfm_footnote_definition_label_string(context: &mut CompileContext) { + context + .gfm_footnote_definition_stack + .push(Position::from_exit_event(context.events, context.index).to_indices()); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix]. +fn on_exit_gfm_footnote_definition_prefix(context: &mut CompileContext) { + // Drop the prefix. + context.resume(); + // Capture everything until end of definition. + context.buffer(); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition]. +fn on_exit_gfm_footnote_definition(context: &mut CompileContext) { + let value = context.resume(); + let indices = context.gfm_footnote_definition_stack.pop().unwrap(); + context.tight_stack.pop(); + context.gfm_footnote_definitions.push(( + normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()), + value, + )); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmStrikethrough`][Name::GfmStrikethrough]. +fn on_exit_gfm_strikethrough(context: &mut CompileContext) { + if !context.image_alt_inside { + context.push(""); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTable`][Name::GfmTable]. +fn on_exit_gfm_table(context: &mut CompileContext) { + context.gfm_table_align = None; + context.line_ending_if_needed(); + context.push("

  • "); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableBody`][Name::GfmTableBody]. +fn on_exit_gfm_table_body(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push(""); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableCell`][Name::GfmTableCell]. +fn on_exit_gfm_table_cell(context: &mut CompileContext) { + let align = context.gfm_table_align.as_ref().unwrap(); + + if context.gfm_table_column < align.len() { + if context.gfm_table_in_head { + context.push(""); + } else { + context.push(""); + } + } else { + // Stop capturing. + context.resume(); + } + + context.gfm_table_column += 1; +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableHead`][Name::GfmTableHead]. +fn on_exit_gfm_table_head(context: &mut CompileContext) { + context.gfm_table_in_head = false; + context.line_ending_if_needed(); + context.push(""); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTableRow`][Name::GfmTableRow]. +fn on_exit_gfm_table_row(context: &mut CompileContext) { + let mut column = context.gfm_table_column; + let len = context.gfm_table_align.as_ref().unwrap().len(); + + // Add “phantom” cells, for body rows that are shorter than the delimiter + // row (which is equal to the head row). + while column < len { + on_enter_gfm_table_cell(context); + on_exit_gfm_table_cell(context); + column += 1; + } + + context.gfm_table_column = 0; + context.line_ending_if_needed(); + context.push(""); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemCheck`][Name::GfmTaskListItemCheck]. +fn on_exit_gfm_task_list_item_check(context: &mut CompileContext) { + if !context.image_alt_inside { + context.push("/>"); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmTaskListItemValueChecked`][Name::GfmTaskListItemValueChecked]. +fn on_exit_gfm_task_list_item_value_checked(context: &mut CompileContext) { + if !context.image_alt_inside { + context.push("checked=\"\" "); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`HeadingAtx`][Name::HeadingAtx]. +fn on_exit_heading_atx(context: &mut CompileContext) { + let rank = context + .heading_atx_rank + .take() + .expect("`heading_atx_rank` must be set in headings"); + + context.push(""); +} + +/// Handle [`Exit`][Kind::Exit]:[`HeadingAtxSequence`][Name::HeadingAtxSequence]. +fn on_exit_heading_atx_sequence(context: &mut CompileContext) { + // First fence we see. + if context.heading_atx_rank.is_none() { + let rank = Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .len(); + context.line_ending_if_needed(); + context.heading_atx_rank = Some(rank); + context.push(""); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`HeadingAtxText`][Name::HeadingAtxText]. +fn on_exit_heading_atx_text(context: &mut CompileContext) { + let value = context.resume(); + context.push(&value); +} + +/// Handle [`Exit`][Kind::Exit]:[`HeadingSetextText`][Name::HeadingSetextText]. +fn on_exit_heading_setext_text(context: &mut CompileContext) { + let buf = context.resume(); + context.heading_setext_buffer = Some(buf); + context.slurp_one_line_ending = true; +} + +/// Handle [`Exit`][Kind::Exit]:[`HeadingSetextUnderlineSequence`][Name::HeadingSetextUnderlineSequence]. +fn on_exit_heading_setext_underline_sequence(context: &mut CompileContext) { + let text = context + .heading_setext_buffer + .take() + .expect("`heading_atx_rank` must be set in headings"); + let head = Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .head(); + let rank = if head == Some(b'-') { "2" } else { "1" }; + + context.line_ending_if_needed(); + context.push(""); + context.push(&text); + context.push(""); +} + +/// Handle [`Exit`][Kind::Exit]:{[`HtmlFlow`][Name::HtmlFlow],[`HtmlText`][Name::HtmlText]}. +fn on_exit_html(context: &mut CompileContext) { + context.encode_html = true; +} + +/// Handle [`Exit`][Kind::Exit]:{[`HtmlFlowData`][Name::HtmlFlowData],[`HtmlTextData`][Name::HtmlTextData]}. +fn on_exit_html_data(context: &mut CompileContext) { + let slice = Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ); + let value = slice.as_str(); + + let encoded = if context.options.gfm_tagfilter && context.options.allow_dangerous_html { + encode(&gfm_tagfilter(value), context.encode_html) + } else { + encode(value, context.encode_html) + }; + + context.push(&encoded); +} + +/// Handle [`Exit`][Kind::Exit]:[`Label`][Name::Label]. +fn on_exit_label(context: &mut CompileContext) { + let buf = context.resume(); + context.media_stack.last_mut().unwrap().label = Some(buf); +} + +/// Handle [`Exit`][Kind::Exit]:[`LabelText`][Name::LabelText]. +fn on_exit_label_text(context: &mut CompileContext) { + context.media_stack.last_mut().unwrap().label_id = + Some(Position::from_exit_event(context.events, context.index).to_indices()); +} + +/// Handle [`Exit`][Kind::Exit]:[`LineEnding`][Name::LineEnding]. +fn on_exit_line_ending(context: &mut CompileContext) { + if context.raw_text_inside { + context.push(" "); + } else if context.slurp_one_line_ending + // Ignore line endings after definitions. + || (context.index > 1 + && (context.events[context.index - 2].name == Name::Definition + || context.events[context.index - 2].name == Name::GfmFootnoteDefinition)) + { + context.slurp_one_line_ending = false; + } else { + context.push(&encode( + Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ) + .as_str(), + context.encode_html, + )); + } +} + +/// Handle [`Exit`][Kind::Exit]:{[`ListOrdered`][Name::ListOrdered],[`ListUnordered`][Name::ListUnordered]}. +fn on_exit_list(context: &mut CompileContext) { + context.tight_stack.pop(); + context.line_ending(); + context.push(if context.events[context.index].name == Name::ListOrdered { + "" + } else { + "" + }); +} + +/// Handle [`Exit`][Kind::Exit]:[`ListItem`][Name::ListItem]. +fn on_exit_list_item(context: &mut CompileContext) { + let tight = context.tight_stack.last().unwrap_or(&false); + let before_item = skip::opt_back( + context.events, + context.index - 1, + &[ + Name::BlankLineEnding, + Name::BlockQuotePrefix, + Name::LineEnding, + Name::SpaceOrTab, + // Also ignore things that don’t contribute to the document. + Name::Definition, + Name::GfmFootnoteDefinition, + ], + ); + let previous = &context.events[before_item]; + let tight_paragraph = *tight && previous.name == Name::Paragraph; + let empty_item = previous.name == Name::ListItemPrefix; + + context.slurp_one_line_ending = false; + + if !tight_paragraph && !empty_item { + context.line_ending_if_needed(); + } + + context.push(""); +} + +/// Handle [`Exit`][Kind::Exit]:[`ListItemValue`][Name::ListItemValue]. +fn on_exit_list_item_value(context: &mut CompileContext) { + if context.list_expect_first_marker.unwrap() { + let slice = Slice::from_position( + context.bytes, + &Position::from_exit_event(context.events, context.index), + ); + let value = slice.as_str().parse::().ok().unwrap(); + + if value != 1 { + context.push(" start=\""); + context.push(&value.to_string()); + context.push("\""); + } + } +} + +/// Handle [`Exit`][Kind::Exit]:{[`Image`][Name::Image],[`Link`][Name::Link]}. +fn on_exit_media(context: &mut CompileContext) { + let mut is_in_image = false; + let mut index = 0; + + // Skip current. + let end = context.media_stack.len() - 1; + while index < end { + if context.media_stack[index].image { + is_in_image = true; + break; + } + index += 1; + } + + context.image_alt_inside = is_in_image; + + let media = context.media_stack.pop().unwrap(); + let label = media.label.unwrap(); + let id = media.reference_id.or(media.label_id).map(|indices| { + normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()) + }); + + let definition_index = if media.destination.is_none() { + id.and_then(|id| { + let mut index = 0; + + while index < context.definitions.len() { + if context.definitions[index].id == id { + return Some(index); + } + + index += 1; + } + + None + }) + } else { + None + }; + + if !is_in_image { + if media.image { + context.push("\"");"); + } + + if !media.image { + context.push(&label); + + if !is_in_image { + context.push(""); + } + } +} + +/// Handle [`Exit`][Kind::Exit]:[`Paragraph`][Name::Paragraph]. +fn on_exit_paragraph(context: &mut CompileContext) { + let tight = context.tight_stack.last().unwrap_or(&false); + + if *tight { + context.slurp_one_line_ending = true; + } else { + context.push("

    "); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`ReferenceString`][Name::ReferenceString]. +fn on_exit_reference_string(context: &mut CompileContext) { + // Drop stuff. + context.resume(); + + context.media_stack.last_mut().unwrap().reference_id = + Some(Position::from_exit_event(context.events, context.index).to_indices()); +} + +/// Handle [`Exit`][Kind::Exit]:[`ResourceDestinationString`][Name::ResourceDestinationString]. +fn on_exit_resource_destination_string(context: &mut CompileContext) { + let buf = context.resume(); + context.media_stack.last_mut().unwrap().destination = Some(buf); + context.encode_html = true; +} + +/// Handle [`Exit`][Kind::Exit]:[`ResourceTitleString`][Name::ResourceTitleString]. +fn on_exit_resource_title_string(context: &mut CompileContext) { + let buf = context.resume(); + context.media_stack.last_mut().unwrap().title = Some(buf); +} + +/// Handle [`Exit`][Kind::Exit]:[`Strong`][Name::Strong]. +fn on_exit_strong(context: &mut CompileContext) { + if !context.image_alt_inside { + context.push("
    "); + } +} + +/// Handle [`Exit`][Kind::Exit]:[`ThematicBreak`][Name::ThematicBreak]. +fn on_exit_thematic_break(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push("
    "); +} + +/// Generate a footnote section. +fn generate_footnote_section(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push("
    <"); + if let Some(ref value) = context.options.gfm_footnote_label_tag_name { + context.push(&encode(value, context.encode_html)); + } else { + context.push("h2"); + } + context.push(" id=\"footnote-label\" "); + if let Some(ref value) = context.options.gfm_footnote_label_attributes { + context.push(value); + } else { + context.push("class=\"sr-only\""); + } + context.push(">"); + if let Some(ref value) = context.options.gfm_footnote_label { + context.push(&encode(value, context.encode_html)); + } else { + context.push("Footnotes"); + } + context.push(""); + context.line_ending(); + context.push("
      "); + + let mut index = 0; + while index < context.gfm_footnote_definition_calls.len() { + generate_footnote_item(context, index); + index += 1; + } + + context.line_ending(); + context.push("
    "); + context.line_ending(); + context.push("
    "); + context.line_ending(); +} + +/// Generate a footnote item from a call. +fn generate_footnote_item(context: &mut CompileContext, index: usize) { + let id = &context.gfm_footnote_definition_calls[index].0; + let safe_id = sanitize(&id.to_lowercase()); + + // Find definition: we’ll always find it. + let mut definition_index = 0; + while definition_index < context.gfm_footnote_definitions.len() { + if &context.gfm_footnote_definitions[definition_index].0 == id { + break; + } + definition_index += 1; + } + + debug_assert_ne!( + definition_index, + context.gfm_footnote_definitions.len(), + "expected definition" + ); + + context.line_ending(); + context.push("
  • "); + context.line_ending(); + + // Create one or more backreferences. + let mut reference_index = 0; + let mut backreferences = String::new(); + while reference_index < context.gfm_footnote_definition_calls[index].1 { + if reference_index != 0 { + backreferences.push(' '); + } + backreferences.push_str("↩"); + if reference_index != 0 { + backreferences.push_str(""); + backreferences.push_str(&(reference_index + 1).to_string()); + backreferences.push_str(""); + } + backreferences.push_str(""); + + reference_index += 1; + } + + let value = context.gfm_footnote_definitions[definition_index].1.clone(); + let bytes = value.as_bytes(); + let mut byte_index = bytes.len(); + // Move back past EOL. + while byte_index > 0 && matches!(bytes[byte_index - 1], b'\n' | b'\r') { + byte_index -= 1; + } + // Check if it ends in `

    `. + // This is a bit funky if someone wrote a safe paragraph by hand in + // there. + // But in all other cases, `<` and `>` would be encoded, so we can be + // sure that this is generated by our compiler. + if byte_index > 3 + && bytes[byte_index - 4] == b'<' + && bytes[byte_index - 3] == b'/' + && bytes[byte_index - 2] == b'p' + && bytes[byte_index - 1] == b'>' + { + let (before, after) = bytes.split_at(byte_index - 4); + let mut result = String::new(); + result.push_str(str::from_utf8(before).unwrap()); + result.push(' '); + result.push_str(&backreferences); + result.push_str(str::from_utf8(after).unwrap()); + context.push(&result); + } else { + context.push(&value); + context.line_ending_if_needed(); + context.push(&backreferences); + } + context.line_ending_if_needed(); + context.push("
  • "); +} + +/// Generate an autolink (used by unicode autolinks and GFM autolink literals). +fn generate_autolink( + context: &mut CompileContext, + protocol: Option<&str>, + value: &str, + is_gfm_literal: bool, +) { + let mut is_in_link = false; + let mut index = 0; + + while index < context.media_stack.len() { + if !context.media_stack[index].image { + is_in_link = true; + break; + } + index += 1; + } + + if !context.image_alt_inside && (!is_in_link || !is_gfm_literal) { + context.push(""); + } + + context.push(&encode(value, context.encode_html)); + + if !context.image_alt_inside && (!is_in_link || !is_gfm_literal) { + context.push(""); + } +} diff --git a/src/to_mdast.rs b/src/to_mdast.rs new file mode 100644 index 0000000..d56134a --- /dev/null +++ b/src/to_mdast.rs @@ -0,0 +1,40 @@ +//! Turn events into a syntax tree. + +// To do: example. + +use crate::event::Event; +use crate::mdast; +use crate::Options; +use alloc::vec; + +/// Turn events and bytes into a syntax tree. +pub fn compile(events: &[Event], _bytes: &[u8], _options: &Options) -> mdast::Root { + mdast::Root { + kind: mdast::Kind::Root, + children: vec![], + position: Some(mdast::Position { + start: if events.is_empty() { + create_point(1, 1, 0) + } else { + point_from_event(&events[0]) + }, + end: if events.is_empty() { + create_point(1, 1, 0) + } else { + point_from_event(&events[events.len() - 1]) + }, + }), + } +} + +fn point_from_event(event: &Event) -> mdast::Point { + create_point(event.point.line, event.point.column, event.point.index) +} + +fn create_point(line: usize, column: usize, offset: usize) -> mdast::Point { + mdast::Point { + line, + column, + offset, + } +} -- cgit