diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-25 13:16:45 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-25 13:16:45 +0200 |
commit | 1e4c95079cb97b2b02440b21945c6d12741a7d19 (patch) | |
tree | 4f6a4a179e72630c1cdd058f84498e32b9a433e0 /src | |
parent | 49b6a4e72516e8b2a8768e761a60a4f461802d69 (diff) | |
download | markdown-rs-1e4c95079cb97b2b02440b21945c6d12741a7d19.tar.gz markdown-rs-1e4c95079cb97b2b02440b21945c6d12741a7d19.tar.bz2 markdown-rs-1e4c95079cb97b2b02440b21945c6d12741a7d19.zip |
Add support for GFM footnotes
Diffstat (limited to '')
-rw-r--r-- | src/compiler.rs | 316 | ||||
-rw-r--r-- | src/construct/definition.rs | 12 | ||||
-rw-r--r-- | src/construct/document.rs | 39 | ||||
-rw-r--r-- | src/construct/gfm_footnote_definition.rs | 345 | ||||
-rw-r--r-- | src/construct/gfm_label_start_footnote.rs | 91 | ||||
-rw-r--r-- | src/construct/label_end.rs | 218 | ||||
-rw-r--r-- | src/construct/label_start_image.rs | 54 | ||||
-rw-r--r-- | src/construct/label_start_link.rs | 3 | ||||
-rw-r--r-- | src/construct/list_item.rs | 2 | ||||
-rw-r--r-- | src/construct/mod.rs | 5 | ||||
-rw-r--r-- | src/construct/partial_label.rs | 28 | ||||
-rw-r--r-- | src/construct/text.rs | 24 | ||||
-rw-r--r-- | src/event.rs | 187 | ||||
-rw-r--r-- | src/lib.rs | 254 | ||||
-rw-r--r-- | src/parser.rs | 10 | ||||
-rw-r--r-- | src/state.rs | 34 | ||||
-rw-r--r-- | src/tokenizer.rs | 52 |
17 files changed, 1527 insertions, 147 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index f1003fd..6ad34b2 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -18,7 +18,8 @@ use alloc::{ }; use core::str; -/// Link or image, resource or reference. +/// Link, image, or footnote call. +/// Resource or reference. /// Reused for temporary definitions as well, in the first pass. #[derive(Debug)] struct Media { @@ -76,6 +77,8 @@ struct CompileContext<'a> { pub events: &'a [Event], /// List of bytes. pub bytes: &'a [u8], + /// Configuration. + pub options: &'a Options, // Fields used by handlers to track the things they need to track to // compile markdown. /// Rank of heading (atx). @@ -100,6 +103,10 @@ struct CompileContext<'a> { pub tight_stack: Vec<bool>, /// List of definitions. pub definitions: Vec<Definition>, + /// List of definitions. + pub gfm_footnote_definitions: Vec<(String, String)>, + pub gfm_footnote_definition_calls: Vec<(String, usize)>, + pub gfm_footnote_definition_stack: Vec<(usize, usize)>, // Fields used to influance the current compilation. /// Ignore the next line ending. pub slurp_one_line_ending: bool, @@ -128,7 +135,7 @@ impl<'a> CompileContext<'a> { pub fn new( events: &'a [Event], bytes: &'a [u8], - options: &Options, + options: &'a Options, line_ending: LineEnding, ) -> CompileContext<'a> { CompileContext { @@ -143,6 +150,9 @@ impl<'a> CompileContext<'a> { list_expect_first_marker: None, media_stack: vec![], definitions: vec![], + gfm_footnote_definitions: vec![], + gfm_footnote_definition_calls: vec![], + gfm_footnote_definition_stack: vec![], tight_stack: vec![], slurp_one_line_ending: false, image_alt_inside: false, @@ -161,6 +171,7 @@ impl<'a> CompileContext<'a> { allow_dangerous_html: options.allow_dangerous_html, buffers: vec![String::new()], index: 0, + options, } } @@ -243,6 +254,11 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { // // To speed things up, we collect the places we can jump over for the // second pass. + // + // We don’t need to handle GFM footnote definitions like this, because + // unlike normal definitions, what they produce is not used in calls. + // It would also get very complex, because footnote definitions can be + // nested. while index < events.len() { let event = &events[index]; @@ -250,15 +266,15 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { handle(&mut context, index); } - if event.name == Name::Definition { - if event.kind == Kind::Enter { + if event.kind == Kind::Enter { + if event.name == Name::Definition { handle(&mut context, index); // Also handle start. definition_inside = true; definition_indices.push((index, index)); - } else { - definition_inside = false; - definition_indices.last_mut().unwrap().1 = index; } + } else if event.name == Name::Definition { + definition_inside = false; + definition_indices.last_mut().unwrap().1 = index; } index += 1; @@ -278,14 +294,17 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { jump = definition_indices .get(definition_index) .unwrap_or(&jump_default); - // Ignore line endings after definitions. - context.slurp_one_line_ending = true; } else { handle(&mut context, index); index += 1; } } + // No section to generate. + if !context.gfm_footnote_definition_calls.is_empty() { + generate_footnote_section(&mut context); + } + assert_eq!(context.buffers.len(), 1, "expected 1 final buffer"); context .buffers @@ -312,6 +331,7 @@ fn enter(context: &mut CompileContext) { | Name::CodeFencedFenceMeta | Name::DefinitionLabelString | Name::DefinitionTitleString + | Name::GfmFootnoteDefinitionPrefix | Name::HeadingAtxText | Name::HeadingSetextText | Name::Label @@ -326,6 +346,8 @@ fn enter(context: &mut CompileContext) { Name::DefinitionDestinationString => on_enter_definition_destination_string(context), Name::Emphasis => on_enter_emphasis(context), Name::Frontmatter => on_enter_frontmatter(context), + Name::GfmFootnoteDefinition => on_enter_gfm_footnote_definition(context), + Name::GfmFootnoteCall => on_enter_gfm_footnote_call(context), Name::GfmStrikethrough => on_enter_gfm_strikethrough(context), Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context), Name::HtmlFlow => on_enter_html_flow(context), @@ -374,6 +396,12 @@ fn exit(context: &mut CompileContext) { Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context), Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context), Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context), + Name::GfmFootnoteCall => on_exit_gfm_footnote_call(context), + Name::GfmFootnoteDefinitionLabelString => { + on_exit_gfm_footnote_definition_label_string(context); + } + Name::GfmFootnoteDefinitionPrefix => on_exit_gfm_footnote_definition_prefix(context), + Name::GfmFootnoteDefinition => on_exit_gfm_footnote_definition(context), Name::GfmStrikethrough => on_exit_gfm_strikethrough(context), Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context), Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context), @@ -472,6 +500,23 @@ fn on_enter_frontmatter(context: &mut CompileContext) { context.buffer(); } +/// Handle [`Enter`][Kind::Enter]:[`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition]. +fn on_enter_gfm_footnote_definition(context: &mut CompileContext) { + context.tight_stack.push(false); +} + +/// Handle [`Enter`][Kind::Enter]:[`GfmFootnoteCall`][Name::GfmFootnoteCall]. +fn on_enter_gfm_footnote_call(context: &mut CompileContext) { + context.media_stack.push(Media { + image: false, + label_id: None, + label: None, + reference_id: None, + destination: None, + title: None, + }); +} + /// Handle [`Enter`][Kind::Enter]:[`GfmStrikethrough`][Name::GfmStrikethrough]. fn on_enter_gfm_strikethrough(context: &mut CompileContext) { if !context.image_alt_inside { @@ -961,6 +1006,92 @@ fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) { on_exit_autolink_email(context); } +/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteCall`][Name::GfmFootnoteCall]. +fn on_exit_gfm_footnote_call(context: &mut CompileContext) { + let indices = context.media_stack.pop().unwrap().label_id.unwrap(); + let id = + normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()); + let safe_id = sanitize_uri(&id.to_lowercase(), &None); + let mut call_index = 0; + + // See if this has been called before. + while call_index < context.gfm_footnote_definition_calls.len() { + if context.gfm_footnote_definition_calls[call_index].0 == id { + break; + } + call_index += 1; + } + + // New. + if call_index == context.gfm_footnote_definition_calls.len() { + context.gfm_footnote_definition_calls.push((id, 0)); + } + + // Increment. + context.gfm_footnote_definition_calls[call_index].1 += 1; + + // No call is output in an image alt, though the definition and + // backreferences are generated as if it was the case. + if context.image_alt_inside { + return; + } + + context.push("<sup><a href=\"#"); + if let Some(ref value) = context.options.gfm_footnote_clobber_prefix { + context.push(&encode(value, context.encode_html)); + } else { + context.push("user-content-"); + } + context.push("fn-"); + context.push(&safe_id); + context.push("\" id=\""); + if let Some(ref value) = context.options.gfm_footnote_clobber_prefix { + context.push(&encode(value, context.encode_html)); + } else { + context.push("user-content-"); + } + context.push("fnref-"); + context.push(&safe_id); + if context.gfm_footnote_definition_calls[call_index].1 > 1 { + context.push("-"); + context.push( + &context.gfm_footnote_definition_calls[call_index] + .1 + .to_string(), + ); + } + context.push("\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">"); + + context.push(&(call_index + 1).to_string()); + context.push("</a></sup>"); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinitionLabelString`][Name::GfmFootnoteDefinitionLabelString]. +fn on_exit_gfm_footnote_definition_label_string(context: &mut CompileContext) { + context + .gfm_footnote_definition_stack + .push(Position::from_exit_event(context.events, context.index).to_indices()); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix]. +fn on_exit_gfm_footnote_definition_prefix(context: &mut CompileContext) { + // Drop the prefix. + context.resume(); + // Capture everything until end of definition. + context.buffer(); +} + +/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition]. +fn on_exit_gfm_footnote_definition(context: &mut CompileContext) { + let value = context.resume(); + let indices = context.gfm_footnote_definition_stack.pop().unwrap(); + context.tight_stack.pop(); + context.gfm_footnote_definitions.push(( + normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()), + value, + )); +} + /// Handle [`Exit`][Kind::Exit]:[`GfmStrikethrough`][Name::GfmStrikethrough]. fn on_exit_gfm_strikethrough(context: &mut CompileContext) { if !context.image_alt_inside { @@ -1080,7 +1211,12 @@ fn on_exit_label_text(context: &mut CompileContext) { fn on_exit_line_ending(context: &mut CompileContext) { if context.code_text_inside { context.push(" "); - } else if context.slurp_one_line_ending { + } else if context.slurp_one_line_ending + // Ignore line endings after definitions. + || (context.index > 1 + && (context.events[context.index - 2].name == Name::Definition + || context.events[context.index - 2].name == Name::GfmFootnoteDefinition)) + { context.slurp_one_line_ending = false; } else { context.push(&encode( @@ -1113,9 +1249,12 @@ fn on_exit_list_item(context: &mut CompileContext) { context.index - 1, &[ Name::BlankLineEnding, + Name::BlockQuotePrefix, Name::LineEnding, Name::SpaceOrTab, - Name::BlockQuotePrefix, + // Also ignore things that don’t contribute to the document. + Name::Definition, + Name::GfmFootnoteDefinition, ], ); let previous = &context.events[before_item]; @@ -1167,7 +1306,6 @@ fn on_exit_media(context: &mut CompileContext) { let media = context.media_stack.pop().unwrap(); let label = media.label.unwrap(); - let image_alt_inside = context.image_alt_inside; let id = media.reference_id.or(media.label_id).map(|indices| { normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()) }); @@ -1190,7 +1328,7 @@ fn on_exit_media(context: &mut CompileContext) { None }; - if !image_alt_inside { + if !is_in_image { if media.image { context.push("<img src=\""); } else { @@ -1223,7 +1361,7 @@ fn on_exit_media(context: &mut CompileContext) { context.push(&label); } - if !image_alt_inside { + if !is_in_image { context.push("\""); let title = if let Some(index) = definition_index { @@ -1248,7 +1386,7 @@ fn on_exit_media(context: &mut CompileContext) { if !media.image { context.push(&label); - if !image_alt_inside { + if !is_in_image { context.push("</a>"); } } @@ -1300,6 +1438,154 @@ fn on_exit_thematic_break(context: &mut CompileContext) { context.push("<hr />"); } +/// Generate a footnote section. +fn generate_footnote_section(context: &mut CompileContext) { + context.line_ending_if_needed(); + context.push("<section data-footnotes=\"\" class=\"footnotes\"><"); + if let Some(ref value) = context.options.gfm_footnote_label_tag_name { + context.push(&encode(value, context.encode_html)); + } else { + context.push("h2"); + } + context.push(" id=\"footnote-label\" "); + if let Some(ref value) = context.options.gfm_footnote_label_attributes { + context.push(value); + } else { + context.push("class=\"sr-only\""); + } + context.push(">"); + if let Some(ref value) = context.options.gfm_footnote_label { + context.push(&encode(value, context.encode_html)); + } else { + context.push("Footnotes"); + } + context.push("</"); + if let Some(ref value) = context.options.gfm_footnote_label_tag_name { + context.push(&encode(value, context.encode_html)); + } else { + context.push("h2"); + } + context.push(">"); + context.line_ending(); + context.push("<ol>"); + + let mut index = 0; + while index < context.gfm_footnote_definition_calls.len() { + generate_footnote_item(context, index); + index += 1; + } + + context.line_ending(); + context.push("</ol>"); + context.line_ending(); + context.push("</section>"); + context.line_ending(); +} + +/// Generate a footnote item from a call. +fn generate_footnote_item(context: &mut CompileContext, index: usize) { + let id = &context.gfm_footnote_definition_calls[index].0; + let safe_id = sanitize_uri(&id.to_lowercase(), &None); + + // Find definition: we’ll always find it. + let mut definition_index = 0; + while definition_index < context.gfm_footnote_definitions.len() { + if &context.gfm_footnote_definitions[definition_index].0 == id { + break; + } + definition_index += 1; + } + + debug_assert_ne!( + definition_index, + context.gfm_footnote_definitions.len(), + "expected definition" + ); + + context.line_ending(); + context.push("<li id=\""); + if let Some(ref value) = context.options.gfm_footnote_clobber_prefix { + context.push(&encode(value, context.encode_html)); + } else { + context.push("user-content-"); + } + context.push("fn-"); + context.push(&safe_id); + context.push("\">"); + context.line_ending(); + + // Create one or more backreferences. + let mut reference_index = 0; + let mut backreferences = String::new(); + while reference_index < context.gfm_footnote_definition_calls[index].1 { + if reference_index != 0 { + backreferences.push(' '); + } + backreferences.push_str("<a href=\"#"); + if let Some(ref value) = context.options.gfm_footnote_clobber_prefix { + backreferences.push_str(&encode(value, context.encode_html)); + } else { + backreferences.push_str("user-content-"); + } + backreferences.push_str("fnref-"); + backreferences.push_str(&safe_id); + if reference_index != 0 { + backreferences.push('-'); + backreferences.push_str(&(reference_index + 1).to_string()); + } + backreferences.push_str( + "\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"", + ); + if let Some(ref value) = context.options.gfm_footnote_back_label { + backreferences.push_str(&encode(value, context.encode_html)); + } else { + backreferences.push_str("Back to content"); + } + backreferences.push_str("\">↩"); + if reference_index != 0 { + backreferences.push_str("<sup>"); + backreferences.push_str(&(reference_index + 1).to_string()); + backreferences.push_str("</sup>"); + } + backreferences.push_str("</a>"); + + reference_index += 1; + } + + let value = context.gfm_footnote_definitions[definition_index].1.clone(); + let bytes = value.as_bytes(); + let mut byte_index = bytes.len(); + // Move back past EOL. + while byte_index > 0 && matches!(bytes[byte_index - 1], b'\n' | b'\r') { + byte_index -= 1; + } + // Check if it ends in `</p>`. + // This is a bit funky if someone wrote a safe paragraph by hand in + // there. + // But in all other cases, `<` and `>` would be encoded, so we can be + // sure that this is generated by our compiler. + if byte_index > 3 + && bytes[byte_index - 4] == b'<' + && bytes[byte_index - 3] == b'/' + && bytes[byte_index - 2] == b'p' + && bytes[byte_index - 1] == b'>' + { + let (before, after) = bytes.split_at(byte_index - 4); + let mut result = String::new(); + result.push_str(str::from_utf8(before).unwrap()); + result.push(' '); + result.push_str(&backreferences); + result.push_str(str::from_utf8(after).unwrap()); + context.push(&result); + } else { + context.push(&value); + context.line_ending_if_needed(); + context.push(&backreferences); + } + context.line_ending_if_needed(); + context.push("</li>"); +} + /// Generate an autolink (used by unicode autolinks and GFM autolink literals). fn generate_autolink(context: &mut CompileContext, protocol: Option<&str>, value: &str) { if !context.image_alt_inside { diff --git a/src/construct/definition.rs b/src/construct/definition.rs index e65d979..1d67635 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -175,14 +175,14 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_2 = Name::Data; tokenizer.tokenize_state.token_3 = Name::Data; - tokenizer.tokenize_state.end = skip::to_back( - &tokenizer.events, - tokenizer.events.len() - 1, - &[Name::DefinitionLabelString], - ); - match tokenizer.current { Some(b':') => { + tokenizer.tokenize_state.end = skip::to_back( + &tokenizer.events, + tokenizer.events.len() - 1, + &[Name::DefinitionLabelString], + ); + tokenizer.enter(Name::DefinitionMarker); tokenizer.consume(); tokenizer.exit(Name::DefinitionMarker); diff --git a/src/construct/document.rs b/src/construct/document.rs index b438808..9c76e46 100644 --- a/src/construct/document.rs +++ b/src/construct/document.rs @@ -1,12 +1,13 @@ //! The document content type. //! -//! **Document** represents the containers, such as block quotes and lists, -//! which structure the document and contain other sections. +//! **Document** represents the containers, such as block quotes, list items, +//! or GFM footnotes, which structure the document and contain other sections. //! //! The constructs found in flow are: //! //! * [Block quote][crate::construct::block_quote] //! * [List item][crate::construct::list_item] +//! * [GFM: Footnote definition][crate::construct::gfm_footnote_definition] use crate::event::{Content, Event, Kind, Link, Name}; use crate::state::{Name as StateName, State}; @@ -99,6 +100,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State { let name = match container.kind { Container::BlockQuote => StateName::BlockQuoteContStart, + Container::GfmFootnoteDefinition => StateName::GfmFootnoteDefinitionContStart, Container::ListItem => StateName::ListItemContStart, }; @@ -185,7 +187,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State { // List item? - // We replace the empty block quote container for this new list one. + // We replace the empty block quote container for this new list item one. tokenizer.tokenize_state.document_container_stack [tokenizer.tokenize_state.document_continued] = ContainerState { kind: Container::ListItem, @@ -200,14 +202,38 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State State::Retry(StateName::ListItemStart) } -/// At new container, but not a list (or block quote). +/// At new container, but not a block quote or list item. // /// ```markdown /// > | a /// ^ /// ``` pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State { - // It wasn’t a new block quote or a list. + // Footnote definition? + // We replace the empty list item container for this new footnote + // definition one. + tokenizer.tokenize_state.document_container_stack + [tokenizer.tokenize_state.document_continued] = ContainerState { + kind: Container::GfmFootnoteDefinition, + blank_initial: false, + size: 0, + }; + + tokenizer.attempt( + State::Next(StateName::DocumentContainerNewAfter), + State::Next(StateName::DocumentContainerNewBeforeNotGfmFootnoteDefinition), + ); + State::Retry(StateName::GfmFootnoteDefinitionStart) +} + +/// At new container, but not a block quote, list item, or footnote definition. +// +/// ```markdown +/// > | a +/// ^ +/// ``` +pub fn container_new_before_not_footnote_definition(tokenizer: &mut Tokenizer) -> State { + // It wasn’t a new block quote, list item, or footnote definition. // Swap the new container (in the middle) with the existing one (at the end). // Drop what was in the middle. tokenizer @@ -227,7 +253,7 @@ pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn container_new_after(tokenizer: &mut Tokenizer) -> State { - // It was a new block quote or a list. + // It was a new block quote, list item, or footnote definition. // Swap the new container (in the middle) with the existing one (at the end). // Take the new container. let container = tokenizer @@ -453,6 +479,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) { let container = stack_close.pop().unwrap(); let name = match container.kind { Container::BlockQuote => Name::BlockQuote, + Container::GfmFootnoteDefinition => Name::GfmFootnoteDefinition, Container::ListItem => Name::ListItem, }; diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs new file mode 100644 index 0000000..3715044 --- /dev/null +++ b/src/construct/gfm_footnote_definition.rs @@ -0,0 +1,345 @@ +//! GFM: Footnote definition occurs in the [document][] content type. +//! +//! ## Grammar +//! +//! Footnote definitions form with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! ; Restriction: `label` must start with `^` (and not be empty after it). +//! ; See the `label` construct for the BNF of that parts. +//! gfm_footnote_definition_start ::= label ':' *space_or_tab +//! +//! ; Restriction: blank line allowed. +//! gfm_footnote_definition_cont ::= 4(space_or_tab) +//! ``` +//! +//! Further lines that are not prefixed with `gfm_footnote_definition_cont` +//! cause the footnote definition to be exited, except when those lines are +//! lazy continuation or blank. +//! Like so many things in markdown, footnote definition too, are complex. +//! See [*§ Phase 1: block structure* in `CommonMark`][commonmark_block] for +//! more on parsing details. +//! +//! See [`label`][label] for grammar, notes, and recommendations on that part. +//! +//! The `label` part is interpreted as the [string][] content type. +//! That means that [character escapes][character_escape] and +//! [character references][character_reference] are allowed. +//! +//! Definitions match to calls through identifiers. +//! To match, both labels must be equal after normalizing with +//! [`normalize_identifier`][normalize_identifier]. +//! One definition can match to multiple calls. +//! Multiple definitions with the same, normalized, identifier are ignored: the +//! first definition is preferred. +//! To illustrate, the definition with the content of `x` wins: +//! +//! ```markdown +//! [^a]: x +//! [^a]: y +//! +//! [^a] +//! ``` +//! +//! Importantly, while labels *can* include [string][] content (character +//! escapes and character references), these are not considered when matching. +//! To illustrate, neither definition matches the call: +//! +//! ```markdown +//! [^a&b]: x +//! [^a\&b]: y +//! +//! [^a&b] +//! ``` +//! +//! Because footnote definitions are containers (like block quotes and list +//! items), they can contain more footnote definitions, and they can include +//! calls to themselves. +//! +//! ## HTML +//! +//! GFM footnote definitions do not, on their own, relate to anything in HTML. +//! When matched with a [label end][label_end], which in turns matches to a +//! [GFM label start (footnote)][gfm_label_start_footnote], the definition +//! relates to several elements in HTML. +//! +//! When one or more definitions are called, a footnote section is generated +//! at the end of the document, using `<section>`, `<h2>`, and `<ol>` elements: +//! +//! ```html +//! <section data-footnotes="" class="footnotes"><h2 id="footnote-label" class="sr-only">Footnotes</h2> +//! <ol>…</ol> +//! </section> +//! ``` +//! +//! Each definition is generated as a `<li>` in the `<ol>`, in the order they +//! were first called: +//! +//! ```html +//! <li id="user-content-fn-1">…</li> +//! ``` +//! +//! Backreferences are injected at the end of the first paragraph, or, when +//! there is no paragraph, at the end of the definition. +//! When a definition is called multiple times, multiple backreferences are +//! generated. +//! Further backreferences use an extra counter in the `href` attribute and +//! visually in a `<span>` after `↩`. +//! +//! ```html +//! <a href="#user-content-fnref-1" data-footnote-backref="" class="data-footnote-backref" aria-label="Back to content">↩</a> <a href="#user-content-fnref-1-2" data-footnote-backref="" class="data-footnote-backref" aria-label="Back to content">↩<sup>2</sup></a> +//! ``` +//! +//! See +//! [*§ 4.5.1 The `a` element*][html_a], +//! [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements*][html_h], +//! [*§ 4.4.8 The `li` element*][html_li], +//! [*§ 4.4.5 The `ol` element*][html_ol], +//! [*§ 4.4.1 The `p` element*][html_p], +//! [*§ 4.3.3 The `section` element*][html_section], and +//! [*§ 4.5.19 The `sub` and `sup` elements*][html_sup] +//! in the HTML spec for more info. +//! +//! ## Recommendation +//! +//! When authoring markdown with footnotes, it’s recommended to use words +//! instead of numbers (or letters or anything with an order) as calls. +//! That makes it easier to reuse and reorder footnotes. +//! +//! It’s recommended to place footnotes definitions at the bottom of the document. +//! +//! ## Bugs +//! +//! GitHub’s own algorithm to parse footnote definitions contains several bugs. +//! These are not present in this project. +//! The issues relating to footnote definitions are: +//! +//! * [Footnote reference call identifiers are trimmed, but definition identifiers aren’t](https://github.com/github/cmark-gfm/issues/237)\ +//! — initial and final whitespace in labels causes them not to match +//! * [Footnotes are matched case-insensitive, but links keep their casing, breaking them](https://github.com/github/cmark-gfm/issues/239)\ +//! — using uppercase (or any character that will be percent encoded) in identifiers breaks links +//! * [Colons in footnotes generate links w/o `href`](https://github.com/github/cmark-gfm/issues/250)\ +//! — colons in identifiers generate broken links +//! * [Character escape of `]` does not work in footnote identifiers](https://github.com/github/cmark-gfm/issues/240)\ +//! — some character escapes don’t work +//! * [Footnotes in links are broken](https://github.com/github/cmark-gfm/issues/249)\ +//! — while `CommonMark` prevents links in links, GitHub does not prevent footnotes (which turn into links) in links +//! * [Footnote-like brackets around image, break that image](https://github.com/github/cmark-gfm/issues/275)\ +//! — images can’t be used in what looks like a footnote call +//! +//! ## Tokens +//! +//! * [`DefinitionMarker`][Name::DefinitionMarker] +//! * [`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition] +//! * [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel] +//! * [`GfmFootnoteDefinitionLabelMarker`][Name::GfmFootnoteDefinitionLabelMarker] +//! * [`GfmFootnoteDefinitionLabelString`][Name::GfmFootnoteDefinitionLabelString] +//! * [`GfmFootnoteDefinitionMarker`][Name::GfmFootnoteDefinitionMarker] +//! * [`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix] +//! * [`SpaceOrTab`][Name::SpaceOrTab] +//! +//! ## References +//! +//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-footnote) +//! +//! > 👉 **Note**: Footnotes are not specified in GFM yet. +//! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270) +//! > for the related issue. +//! +//! [document]: crate::construct::document +//! [string]: crate::construct::string +//! [character_reference]: crate::construct::character_reference +//! [character_escape]: crate::construct::character_escape +//! [label]: crate::construct::partial_label +//! [label_end]: crate::construct::label_end +//! [gfm_label_start_footnote]: crate::construct::gfm_label_start_footnote +//! [commonmark_block]: https://spec.commonmark.org/0.30/#phase-1-block-structure +//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! [html_h]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements +//! [html_li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element +//! [html_ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element +//! [html_p]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element +//! [html_section]: https://html.spec.whatwg.org/multipage/sections.html#the-section-element +//! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements + +use crate::construct::partial_space_or_tab::space_or_tab_min_max; +use crate::event::Name; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::Tokenizer; +use crate::util::{ + constant::TAB_SIZE, + normalize_identifier::normalize_identifier, + skip, + slice::{Position, Slice}, +}; + +/// Start of GFM footnote definition. +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + if tokenizer + .parse_state + .options + .constructs + .gfm_footnote_definition + { + tokenizer.enter(Name::GfmFootnoteDefinition); + + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + tokenizer.attempt( + State::Next(StateName::GfmFootnoteDefinitionLabelBefore), + State::Nok, + ); + State::Retry(space_or_tab_min_max( + tokenizer, + 1, + if tokenizer.parse_state.options.constructs.code_indented { + TAB_SIZE - 1 + } else { + usize::MAX + }, + )) + } else { + State::Retry(StateName::GfmFootnoteDefinitionLabelBefore) + } + } else { + State::Nok + } +} + +/// Before definition label (after optional whitespace). +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn label_before(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'[') => { + tokenizer.tokenize_state.token_1 = Name::GfmFootnoteDefinitionLabel; + tokenizer.tokenize_state.token_2 = Name::GfmFootnoteDefinitionLabelMarker; + tokenizer.tokenize_state.token_3 = Name::GfmFootnoteDefinitionLabelString; + tokenizer.tokenize_state.token_4 = Name::GfmFootnoteDefinitionMarker; + tokenizer.tokenize_state.marker = b'^'; + tokenizer.enter(Name::GfmFootnoteDefinitionPrefix); + tokenizer.attempt( + State::Next(StateName::GfmFootnoteDefinitionLabelAfter), + State::Nok, + ); + State::Retry(StateName::LabelStart) + } + _ => State::Nok, + } +} + +/// After definition label. +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn label_after(tokenizer: &mut Tokenizer) -> State { + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + tokenizer.tokenize_state.token_4 = Name::Data; + tokenizer.tokenize_state.marker = 0; + + match tokenizer.current { + Some(b':') => { + let end = skip::to_back( + &tokenizer.events, + tokenizer.events.len() - 1, + &[Name::GfmFootnoteDefinitionLabelString], + ); + + // Note: we don’t care about virtual spaces, so `as_str` is fine. + let id = normalize_identifier( + Slice::from_position( + tokenizer.parse_state.bytes, + &Position::from_exit_event(&tokenizer.events, end), + ) + .as_str(), + ); + + // Note: we don’t care about uniqueness. + // It’s likely that that doesn’t happen very frequently. + // It is more likely that it wastes precious time. + tokenizer.tokenize_state.gfm_footnote_definitions.push(id); + + tokenizer.enter(Name::DefinitionMarker); + tokenizer.consume(); + tokenizer.exit(Name::DefinitionMarker); + tokenizer.attempt( + State::Next(StateName::GfmFootnoteDefinitionWhitespaceAfter), + State::Nok, + ); + // Any whitespace after the marker is eaten, forming indented code + // is not possible. + // No space is also fine, just like a block quote marker. + State::Next(space_or_tab_min_max(tokenizer, 0, usize::MAX)) + } + _ => State::Nok, + } +} + +/// After definition prefix. +/// +/// ```markdown +/// > | [^a]: b +/// ^ +/// ``` +pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State { + tokenizer.exit(Name::GfmFootnoteDefinitionPrefix); + State::Ok +} + +/// Start of footnote definition continuation. +/// +/// ```markdown +/// | [^a]: b +/// > | c +/// ^ +/// ``` +pub fn cont_start(tokenizer: &mut Tokenizer) -> State { + tokenizer.check( + State::Next(StateName::GfmFootnoteDefinitionContBlank), + State::Next(StateName::GfmFootnoteDefinitionContFilled), + ); + State::Retry(StateName::BlankLineStart) +} + +/// Start of footnote definition continuation, at a blank line. +/// +/// ```markdown +/// | [^a]: b +/// > | ␠␠␊ +/// ^ +/// ``` +pub fn cont_blank(tokenizer: &mut Tokenizer) -> State { + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + State::Retry(space_or_tab_min_max(tokenizer, 0, TAB_SIZE)) + } else { + State::Ok + } +} + +/// Start of footnote definition continuation, at a filled line. +/// +/// ```markdown +/// | [^a]: b +/// > | c +/// ^ +/// ``` +pub fn cont_filled(tokenizer: &mut Tokenizer) -> State { + if matches!(tokenizer.current, Some(b'\t' | b' ')) { + // Consume exactly `TAB_SIZE`. + State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE)) + } else { + State::Nok + } +} diff --git a/src/construct/gfm_label_start_footnote.rs b/src/construct/gfm_label_start_footnote.rs new file mode 100644 index 0000000..a3a0df6 --- /dev/null +++ b/src/construct/gfm_label_start_footnote.rs @@ -0,0 +1,91 @@ +//! Label start (footnote) occurs in the [text][] content type. +//! +//! ## Grammar +//! +//! Label start (footnote) forms with the following BNF +//! (<small>see [construct][crate::construct] for character groups</small>): +//! +//! ```bnf +//! gfm_label_start_footnote ::= '[' '^' +//! ``` +//! +//! ## HTML +//! +//! Label start (footnote) does not, on its own, relate to anything in HTML. +//! When matched with a [label end][label_end], they together relate to `<sup>` +//! and `<a>` elements in HTML. +//! See [*§ 4.5.19 The `sub` and `sup` elements*][html_sup] and +//! [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info. +//! Without an end, the characters (`[^`) are output. +//! +//! ## Tokens +//! +//! * [`LabelImage`][Name::LabelImage] +//! * To do. +//! +//! ## References +//! +//! * [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote) +//! +//! > 👉 **Note**: Footnotes are not specified in GFM yet. +//! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270) +//! > for the related issue. +//! +//! [text]: crate::construct::text +//! [label_end]: crate::construct::label_end +//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element +//! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements + +use crate::event::Name; +use crate::resolve::Name as ResolveName; +use crate::state::{Name as StateName, State}; +use crate::tokenizer::{LabelKind, LabelStart, Tokenizer}; + +/// Start of label (footnote) start. +/// +/// ```markdown +/// > | a [^b] c +/// ^ +/// ``` +pub fn start(tokenizer: &mut Tokenizer) -> State { + if tokenizer + .parse_state + .options + .constructs + .gfm_label_start_footnote + && tokenizer.current == Some(b'[') + { + tokenizer.enter(Name::GfmFootnoteCallLabel); + tokenizer.enter(Name::LabelMarker); + tokenizer.consume(); + tokenizer.exit(Name::LabelMarker); + State::Next(StateName::GfmLabelStartFootnoteOpen) + } else { + State::Nok + } +} + +/// After `[`, at `^`. +/// +/// ```markdown +/// > | a [^b] c +/// ^ +/// ``` +pub fn open(tokenizer: &mut Tokenizer) -> State { + match tokenizer.current { + Some(b'^') => { + tokenizer.enter(Name::GfmFootnoteCallMarker); + tokenizer.consume(); + tokenizer.exit(Name::GfmFootnoteCallMarker); + tokenizer.exit(Name::GfmFootnoteCallLabel); + tokenizer.tokenize_state.label_starts.push(LabelStart { + kind: LabelKind::GfmFootnote, + start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1), + inactive: false, + }); + tokenizer.register_resolver_before(ResolveName::Label); + State::Ok + } + _ => State::Nok, + } +} diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 0ea745f..b5a6013 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -46,6 +46,8 @@ //! attribute in case of a [label start (link)][label_start_link], and an //! `src` attribute in case of a [label start (image)][label_start_image]. //! The title is formed, optionally, on either `<a>` or `<img>`. +//! When matched with a [gfm label start (footnote)][gfm_label_start_footnote], +//! no reference or resource can follow the label end. //! //! For info on how to encode characters in URLs, see //! [`destination`][destination]. @@ -53,11 +55,13 @@ //! `<img>` when compiling, see //! [`sanitize_uri`][sanitize_uri]. //! +//! In case of a matched [gfm label start (footnote)][gfm_label_start_footnote], +//! a counter is injected. //! In case of a matched [label start (link)][label_start_link], the interpreted //! content between it and the label end, is placed between the opening and //! closing tags. -//! Otherwise, the text is also interpreted, but used *without* the resulting -//! tags: +//! In case of a matched [label start (image)][label_start_image], the text is +//! also interpreted, but used *without* the resulting tags: //! //! ```markdown //! [a *b* c](#) @@ -75,8 +79,9 @@ //! It is possible to use images in links. //! It’s somewhat possible to have links in images (the text will be used, not //! the HTML, see above). -//! But it’s not possible to use links in links. -//! The “deepest” link wins. +//! But it’s not possible to use links (or footnotes, which result in links) +//! in links. +//! The “deepest” link (or footnote) wins. //! To illustrate: //! //! ```markdown @@ -104,17 +109,26 @@ //! It can also match with [label start (image)][label_start_image], in which //! case they form an `<img>` element. //! See [*§ 4.8.3 The `img` element*][html_img] in the HTML spec for more info. +//! It can also match with [gfm label start (footnote)][gfm_label_start_footnote], +//! in which case they form `<sup>` and `<a>` elements in HTML. +//! See [*§ 4.5.19 The `sub` and `sup` elements*][html_sup] and +//! [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info. //! //! ## Recommendation //! -//! It is recommended to use labels instead of [autolinks][autolink]. +//! It is recommended to use labels for links instead of [autolinks][autolink]. //! Labels allow more characters in URLs, and allow relative URLs and `www.` //! URLs. //! They also allow for descriptive text to explain the URL in prose. //! +//! In footnotes, it’s recommended to use words instead of numbers (or letters +//! or anything with an order) as calls. +//! That makes it easier to reuse and reorder footnotes. +//! //! ## Tokens //! //! * [`Data`][Name::Data] +//! * [`GfmFootnoteCall`][Name::GfmFootnoteCall] //! * [`Image`][Name::Image] //! * [`Label`][Name::Label] //! * [`LabelEnd`][Name::LabelEnd] @@ -140,10 +154,15 @@ //! ## References //! //! * [`label-end.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-end.js) +//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-footnote) //! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions) //! * [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links) //! * [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images) //! +//! > 👉 **Note**: Footnotes are not specified in GFM yet. +//! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270) +//! > for the related issue. +//! //! [string]: crate::construct::string //! [text]: crate::construct::text //! [destination]: crate::construct::partial_destination @@ -151,25 +170,28 @@ //! [label]: crate::construct::partial_label //! [label_start_image]: crate::construct::label_start_image //! [label_start_link]: crate::construct::label_start_link +//! [gfm_label_start_footnote]: crate::construct::gfm_label_start_footnote //! [definition]: crate::construct::definition //! [autolink]: crate::construct::autolink //! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri //! [normalize_identifier]: crate::util::normalize_identifier::normalize_identifier //! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element //! [html_img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element +//! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements use crate::construct::partial_space_or_tab_eol::space_or_tab_eol; use crate::event::{Event, Kind, Name}; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; -use crate::tokenizer::{Label, LabelStart, Tokenizer}; +use crate::tokenizer::{Label, LabelKind, LabelStart, Tokenizer}; use crate::util::{ constant::RESOURCE_DESTINATION_BALANCE_MAX, normalize_identifier::normalize_identifier, skip, slice::{Position, Slice}, }; -use alloc::vec; +use alloc::{string::String, vec}; +extern crate std; /// Start of label end. /// @@ -190,7 +212,15 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.end = tokenizer.events.len(); - // Mark as balanced if the info is inactive. + // If the corresponding label (link) start is marked as inactive, + // it means we’d be wrapping a link, like this: + // + // ```markdown + // > | a [b [c](d) e](f) g. + // ^ + // ``` + // + // We can’t have that, so it’s just balanced brackets. if label_start.inactive { return State::Retry(StateName::LabelEndNok); } @@ -220,19 +250,34 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// ^ /// ``` pub fn after(tokenizer: &mut Tokenizer) -> State { - let start = tokenizer.tokenize_state.label_starts.last().unwrap(); - let defined = tokenizer - .parse_state - .definitions - .contains(&normalize_identifier( - // We don’t care about virtual spaces, so `indices` and `as_str` are fine. - Slice::from_indices( - tokenizer.parse_state.bytes, - tokenizer.events[start.start.1].point.index, - tokenizer.events[tokenizer.tokenize_state.end].point.index, - ) - .as_str(), - )); + let start_index = tokenizer.tokenize_state.label_starts.len() - 1; + let start = &tokenizer.tokenize_state.label_starts[start_index]; + + let indices = ( + tokenizer.events[start.start.1].point.index, + tokenizer.events[tokenizer.tokenize_state.end].point.index, + ); + + // We don’t care about virtual spaces, so `indices` and `as_str` are fine. + let mut id = normalize_identifier( + Slice::from_indices(tokenizer.parse_state.bytes, indices.0, indices.1).as_str(), + ); + + // See if this matches a footnote definition. + if start.kind == LabelKind::GfmFootnote { + if tokenizer.parse_state.gfm_footnote_definitions.contains(&id) { + return State::Retry(StateName::LabelEndOk); + } + + // Nope, this might be a normal link? + tokenizer.tokenize_state.label_starts[start_index].kind = LabelKind::GfmUndefinedFootnote; + let mut new_id = String::new(); + new_id.push('^'); + new_id.push_str(&id); + id = new_id; + } + + let defined = tokenizer.parse_state.definitions.contains(&id); match tokenizer.current { // Resource (`[asd](fgh)`)? @@ -302,17 +347,15 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State { // Remove the start. let label_start = tokenizer.tokenize_state.label_starts.pop().unwrap(); - let is_link = tokenizer.events[label_start.start.0].name == Name::LabelLink; - - // If this is a link, we need to mark earlier link starts as no longer - // viable for use (as they would otherwise contain a link). + // If this is a link or footnote, we need to mark earlier link starts as no + // longer viable for use (as they would otherwise contain a link). // These link starts are still looking for balanced closing brackets, so - // we can’t remove them. - if is_link { + // we can’t remove them, but we can mark them. + if label_start.kind != LabelKind::Image { let mut index = 0; while index < tokenizer.tokenize_state.label_starts.len() { let label_start = &mut tokenizer.tokenize_state.label_starts[index]; - if tokenizer.events[label_start.start.0].name == Name::LabelLink { + if label_start.kind != LabelKind::Image { label_start.inactive = true; } index += 1; @@ -320,6 +363,7 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State { } tokenizer.tokenize_state.labels.push(Label { + kind: label_start.kind, start: label_start.start, end: (tokenizer.tokenize_state.end, tokenizer.events.len() - 1), }); @@ -342,9 +386,7 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn nok(tokenizer: &mut Tokenizer) -> State { let start = tokenizer.tokenize_state.label_starts.pop().unwrap(); - tokenizer.tokenize_state.label_starts_loose.push(start); - tokenizer.tokenize_state.end = 0; State::Nok } @@ -615,120 +657,142 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State { } } -/// Resolve media. +/// Resolve images, links, and footnotes. /// -/// This turns matching label start (image, link) and label ends into links and -/// images, and turns unmatched label starts back into data. +/// This turns matching label starts and label ends into links, images, and +/// footnotes, and turns unmatched label starts back into data. pub fn resolve(tokenizer: &mut Tokenizer) { - let list = tokenizer.tokenize_state.label_starts.split_off(0); - mark_as_data(tokenizer, &list); - let list = tokenizer.tokenize_state.label_starts_loose.split_off(0); - mark_as_data(tokenizer, &list); + // Inject labels. + let labels = tokenizer.tokenize_state.labels.split_off(0); + inject_labels(tokenizer, &labels); + // Handle loose starts. + let starts = tokenizer.tokenize_state.label_starts.split_off(0); + mark_as_data(tokenizer, &starts); + let starts = tokenizer.tokenize_state.label_starts_loose.split_off(0); + mark_as_data(tokenizer, &starts); - let media = tokenizer.tokenize_state.labels.split_off(0); + tokenizer.map.consume(&mut tokenizer.events); +} +/// Inject links/images/footnotes. +fn inject_labels(tokenizer: &mut Tokenizer, labels: &[Label]) { // Add grouping events. let mut index = 0; - while index < media.len() { - let media = &media[index]; - // LabelLink:Enter or LabelImage:Enter. - let group_enter_index = media.start.0; - let group_enter_event = &tokenizer.events[group_enter_index]; - // LabelLink:Exit or LabelImage:Exit. - let text_enter_index = media.start.0 - + (if group_enter_event.name == Name::LabelLink { - 4 - } else { - 6 - }); - // LabelEnd:Enter. - let text_exit_index = media.end.0; - // LabelEnd:Exit. - let label_exit_index = media.end.0 + 3; - // Resource:Exit, etc. - let group_end_index = media.end.1; - - let group_name = if group_enter_event.name == Name::LabelLink { - Name::Link - } else { + while index < labels.len() { + let label = &labels[index]; + let group_name = if label.kind == LabelKind::GfmFootnote { + Name::GfmFootnoteCall + } else if label.kind == LabelKind::Image { Name::Image + } else { + Name::Link }; + // If this is a fine link, which starts with a footnote start that did + // not match, we need to inject the caret as data. + let mut caret = vec![]; + + if label.kind == LabelKind::GfmUndefinedFootnote { + // Add caret. + caret.push(Event { + kind: Kind::Enter, + name: Name::Data, + // Enter:GfmFootnoteCallMarker. + point: tokenizer.events[label.start.1 - 2].point.clone().clone(), + link: None, + }); + caret.push(Event { + kind: Kind::Exit, + name: Name::Data, + // Exit:GfmFootnoteCallMarker. + point: tokenizer.events[label.start.1 - 1].point.clone(), + link: None, + }); + // Change and move label end. + tokenizer.events[label.start.0].name = Name::LabelLink; + tokenizer.events[label.start.1].name = Name::LabelLink; + tokenizer.events[label.start.1].point = caret[0].point.clone(); + // Remove the caret. + // Enter:GfmFootnoteCallMarker, Exit:GfmFootnoteCallMarker. + tokenizer.map.add(label.start.1 - 2, 2, vec![]); + } + // Insert a group enter and label enter. tokenizer.map.add( - group_enter_index, + label.start.0, 0, vec![ Event { kind: Kind::Enter, name: group_name.clone(), - point: group_enter_event.point.clone(), + point: tokenizer.events[label.start.0].point.clone(), link: None, }, Event { kind: Kind::Enter, name: Name::Label, - point: group_enter_event.point.clone(), + point: tokenizer.events[label.start.0].point.clone(), link: None, }, ], ); // Empty events not allowed. - if text_enter_index != text_exit_index { - // Insert a text enter. + // Though: if this was what looked like a footnote, but didn’t match, + // it’s a link instead, and we need to inject the `^`. + if label.start.1 != label.end.0 || !caret.is_empty() { tokenizer.map.add( - text_enter_index, + label.start.1 + 1, 0, vec![Event { kind: Kind::Enter, name: Name::LabelText, - point: tokenizer.events[text_enter_index].point.clone(), + point: tokenizer.events[label.start.1].point.clone(), link: None, }], ); - - // Insert a text exit. tokenizer.map.add( - text_exit_index, + label.end.0, 0, vec![Event { kind: Kind::Exit, name: Name::LabelText, - point: tokenizer.events[text_exit_index].point.clone(), + point: tokenizer.events[label.end.0].point.clone(), link: None, }], ); } + if !caret.is_empty() { + tokenizer.map.add(label.start.1 + 1, 0, caret); + } + // Insert a label exit. tokenizer.map.add( - label_exit_index + 1, + label.end.0 + 4, 0, vec![Event { kind: Kind::Exit, name: Name::Label, - point: tokenizer.events[label_exit_index].point.clone(), + point: tokenizer.events[label.end.0 + 3].point.clone(), link: None, }], ); // Insert a group exit. tokenizer.map.add( - group_end_index + 1, + label.end.1 + 1, 0, vec![Event { kind: Kind::Exit, name: group_name, - point: tokenizer.events[group_end_index].point.clone(), + point: tokenizer.events[label.end.1].point.clone(), link: None, }], ); index += 1; } - - tokenizer.map.consume(&mut tokenizer.events); } /// Remove loose label starts. diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index a8c9ac3..4511794 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -35,7 +35,7 @@ use crate::event::Name; use crate::resolve::Name as ResolveName; use crate::state::{Name as StateName, State}; -use crate::tokenizer::{LabelStart, Tokenizer}; +use crate::tokenizer::{LabelKind, LabelStart, Tokenizer}; /// Start of label (image) start. /// @@ -68,14 +68,52 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Name::LabelMarker); tokenizer.consume(); tokenizer.exit(Name::LabelMarker); - tokenizer.exit(Name::LabelImage); - tokenizer.tokenize_state.label_starts.push(LabelStart { - start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1), - inactive: false, - }); - tokenizer.register_resolver_before(ResolveName::Label); - State::Ok + State::Next(StateName::LabelStartImageAfter) } _ => State::Nok, } } + +/// After `![`. +/// +/// ```markdown +/// > | a ![b] c +/// ^ +/// ``` +/// +/// This is needed in because, when GFM footnotes are enabled, images never +/// form when started with a `^`. +/// Instead, links form: +/// +/// ```markdown +/// ![^a](b) +/// +/// ![^a][b] +/// +/// [b]: c +/// ``` +/// +/// ```html +/// <p>!<a href=\"b\">^a</a></p> +/// <p>!<a href=\"c\">^a</a></p> +/// ``` +pub fn after(tokenizer: &mut Tokenizer) -> State { + if tokenizer + .parse_state + .options + .constructs + .gfm_label_start_footnote + && tokenizer.current == Some(b'^') + { + State::Nok + } else { + tokenizer.exit(Name::LabelImage); + tokenizer.tokenize_state.label_starts.push(LabelStart { + kind: LabelKind::Image, + start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1), + inactive: false, + }); + tokenizer.register_resolver_before(ResolveName::Label); + State::Ok + } +} diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index 3aeb68b..3454724 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -34,7 +34,7 @@ use crate::event::Name; use crate::resolve::Name as ResolveName; use crate::state::State; -use crate::tokenizer::{LabelStart, Tokenizer}; +use crate::tokenizer::{LabelKind, LabelStart, Tokenizer}; /// Start of label (link) start. /// @@ -52,6 +52,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Name::LabelMarker); tokenizer.exit(Name::LabelLink); tokenizer.tokenize_state.label_starts.push(LabelStart { + kind: LabelKind::Link, start: (start, tokenizer.events.len() - 1), inactive: false, }); diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs index 39b5d13..658c2c7 100644 --- a/src/construct/list_item.rs +++ b/src/construct/list_item.rs @@ -17,7 +17,7 @@ //! ``` //! //! Further lines that are not prefixed with `list_item_cont` cause the list -//! item to be exited, except when those lines are lazy continuation. +//! item to be exited, except when those lines are lazy continuation or blank. //! Like so many things in markdown, list items too, are complex. //! See [*§ Phase 1: block structure* in `CommonMark`][commonmark_block] for //! more on parsing details. diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 7ac3899..c5002bb 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -59,6 +59,9 @@ //! //! * [frontmatter][] //! * [gfm autolink literal][gfm_autolink_literal] +//! * [gfm footnote definition][gfm_footnote_definition] +//! * [gfm task list item check][gfm_task_list_item_check] +//! * [gfm label start footnote][gfm_label_start_footnote] //! //! There are also several small subroutines typically used in different places: //! @@ -146,6 +149,8 @@ pub mod document; pub mod flow; pub mod frontmatter; pub mod gfm_autolink_literal; +pub mod gfm_footnote_definition; +pub mod gfm_label_start_footnote; pub mod gfm_task_list_item_check; pub mod hard_break_escape; pub mod heading_atx; diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs index 47ffd90..ab436b2 100644 --- a/src/construct/partial_label.rs +++ b/src/construct/partial_label.rs @@ -81,13 +81,37 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(tokenizer.tokenize_state.token_2.clone()); tokenizer.consume(); tokenizer.exit(tokenizer.tokenize_state.token_2.clone()); - tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); - State::Next(StateName::LabelAtBreak) + State::Next(StateName::LabelAtMarker) } _ => State::Nok, } } +/// At an optional extra marker. +/// +/// Used for footnotes. +/// +/// ```markdown +/// > | [^a] +/// ^ +/// ``` +pub fn at_marker(tokenizer: &mut Tokenizer) -> State { + // For footnotes (and potentially other custom things in the future), + // We need to make sure there is a certain marker after `[`. + if tokenizer.tokenize_state.marker == 0 { + tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); + State::Retry(StateName::LabelAtBreak) + } else if tokenizer.current == Some(tokenizer.tokenize_state.marker) { + tokenizer.enter(tokenizer.tokenize_state.token_4.clone()); + tokenizer.consume(); + tokenizer.exit(tokenizer.tokenize_state.token_4.clone()); + tokenizer.enter(tokenizer.tokenize_state.token_3.clone()); + State::Next(StateName::LabelAtBreak) + } else { + State::Nok + } +} + /// In label, at something, before something else. /// /// ```markdown diff --git a/src/construct/text.rs b/src/construct/text.rs index 65f55d4..5535e3f 100644 --- a/src/construct/text.rs +++ b/src/construct/text.rs @@ -11,6 +11,8 @@ //! * [Character escape][crate::construct::character_escape] //! * [Character reference][crate::construct::character_reference] //! * [Code (text)][crate::construct::code_text] +//! * [GFM: Label start (footnote)][crate::construct::gfm_label_start_footnote] +//! * [GFM: Task list item check][crate::construct::gfm_task_list_item_check] //! * [Hard break (escape)][crate::construct::hard_break_escape] //! * [HTML (text)][crate::construct::html_text] //! * [Label start (image)][crate::construct::label_start_image] @@ -34,7 +36,7 @@ const MARKERS: [u8; 10] = [ b'<', // `autolink`, `html_text` b'[', // `label_start_link` b'\\', // `character_escape`, `hard_break_escape` - b']', // `label_end` + b']', // `label_end`, `gfm_label_start_footnote` b'_', // `attention` b'`', // `code_text` b'~', // `attention` (w/ `gfm_strikethrough`) @@ -104,9 +106,9 @@ pub fn before(tokenizer: &mut Tokenizer) -> State { Some(b'[') => { tokenizer.attempt( State::Next(StateName::TextBefore), - State::Next(StateName::TextBeforeData), + State::Next(StateName::TextBeforeLabelStartLink), ); - State::Retry(StateName::LabelStartLinkStart) + State::Retry(StateName::GfmLabelStartFootnoteStart) } Some(b'\\') => { tokenizer.attempt( @@ -165,6 +167,22 @@ pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State { State::Retry(StateName::HardBreakEscapeStart) } +/// Before label start (link). +/// +/// At `[`, which wasn’t a GFM label start (footnote). +/// +/// ```markdown +/// > | [a](b) +/// ^ +/// ``` +pub fn before_label_start_link(tokenizer: &mut Tokenizer) -> State { + tokenizer.attempt( + State::Next(StateName::TextBefore), + State::Next(StateName::TextBeforeData), + ); + State::Retry(StateName::LabelStartLinkStart) +} + /// Before data. /// /// ```markdown diff --git a/src/event.rs b/src/event.rs index f20c599..3b805e5 100644 --- a/src/event.rs +++ b/src/event.rs @@ -753,7 +753,8 @@ pub enum Name { /// ## Info /// /// * **Context**: - /// [`Definition`][Name::Definition] + /// [`Definition`][Name::Definition], + /// [`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition] /// * **Content model**: /// void /// * **Construct**: @@ -1019,7 +1020,172 @@ pub enum Name { /// ^^^^^^^^^^^^^^^ /// ``` GfmAutolinkLiteralWww, - /// GFM: Strikethrough. + /// GFM extension: whole footnote call. + /// + /// ## Info + /// + /// * **Context**: + /// [text content][crate::construct::text] + /// * **Content model**: + /// [`Label`][Name::Label] + /// * **Construct**: + /// [`label_end`][crate::construct::label_end] + /// + /// ## Example + /// + /// ```markdown + /// > | a [^b] c + /// ^^^^ + /// ``` + GfmFootnoteCall, + /// GFM extension: label start (footnote). + /// + /// ## Info + /// + /// * **Context**: + /// [`Label`][Name::Label] + /// * **Content model**: + /// [`GfmFootnoteCallMarker`][Name::GfmFootnoteCallMarker], + /// [`LabelMarker`][Name::LabelMarker] + /// * **Construct**: + /// [`gfm_label_start_footnote`][crate::construct::gfm_label_start_footnote] + /// + /// ## Example + /// + /// ```markdown + /// > | a [^b] c + /// ^^ + /// ``` + GfmFootnoteCallLabel, + /// GFM extension: label start (footnote) marker. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmFootnoteCallLabel`][Name::GfmFootnoteCallLabel] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_label_start_footnote`][crate::construct::gfm_label_start_footnote] + /// + /// ## Example + /// + /// ```markdown + /// > | a [^b] c + /// ^ + /// ``` + GfmFootnoteCallMarker, + /// GFM extension: whole footnote definition. + /// + /// ## Info + /// + /// * **Context**: + /// [document content][crate::construct::document] + /// * **Content model**: + /// [`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix], + /// [document content][crate::construct::flow] + /// * **Construct**: + /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition] + /// + /// ## Example + /// + /// ```markdown + /// > | [^a]: b + /// ^^^^^^^ + /// ``` + GfmFootnoteDefinition, + /// GFM extension: footnote definition prefix. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition] + /// * **Content model**: + /// [`DefinitionMarker`][Name::DefinitionMarker], + /// [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel], + /// [`SpaceOrTab`][Name::SpaceOrTab] + /// * **Construct**: + /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition] + /// + /// ## Example + /// + /// ```markdown + /// > | [^a]: b + /// ^^^^^^ + /// ``` + GfmFootnoteDefinitionPrefix, + /// GFM extension: footnote definition label. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix] + /// * **Content model**: + /// [`GfmFootnoteDefinitionLabelMarker`][Name::GfmFootnoteDefinitionLabelMarker], + /// [`GfmFootnoteDefinitionLabelString`][Name::GfmFootnoteDefinitionLabelString], + /// [`GfmFootnoteDefinitionMarker`][Name::GfmFootnoteDefinitionMarker] + /// * **Construct**: + /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition] + /// + /// ## Example + /// + /// ```markdown + /// > | [^a]: b + /// ^^^^ + /// ``` + GfmFootnoteDefinitionLabel, + /// GFM extension: footnote definition label marker. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition] + /// + /// ## Example + /// + /// ```markdown + /// > | [^a]: b + /// ^ ^ + GfmFootnoteDefinitionLabelMarker, + /// GFM extension: footnote definition label string. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel] + /// * **Content model**: + /// [string content][crate::construct::string] + /// * **Construct**: + /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition] + /// + /// ## Example + /// + /// ```markdown + /// > | [^a]: b + /// ^ + GfmFootnoteDefinitionLabelString, + /// GFM extension: footnote definition marker. + /// + /// ## Info + /// + /// * **Context**: + /// [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel] + /// * **Content model**: + /// void + /// * **Construct**: + /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition] + /// + /// ## Example + /// + /// ```markdown + /// > | [^a]: b + /// ^ + GfmFootnoteDefinitionMarker, + /// GFM extension: Strikethrough. /// /// ## Info /// @@ -1038,7 +1204,7 @@ pub enum Name { /// ^^^ /// ``` GfmStrikethrough, - /// Gfm: Strikethrough sequence. + /// GFM extension: Strikethrough sequence. /// /// ## Info /// @@ -1056,7 +1222,7 @@ pub enum Name { /// ^ ^ /// ``` GfmStrikethroughSequence, - /// Gfm: Strikethrough text. + /// GFM extension: Strikethrough text. /// /// ## Info /// @@ -1074,7 +1240,7 @@ pub enum Name { /// ^ /// ``` GfmStrikethroughText, - /// GFM: Task list item check. + /// GFM extension: task list item check. /// /// ## Info /// @@ -1094,7 +1260,7 @@ pub enum Name { /// ^^^ /// ``` GfmTaskListItemCheck, - /// GFM: Task list item check marker. + /// GFM extension: task list item check marker. /// /// ## Info /// @@ -1112,7 +1278,7 @@ pub enum Name { /// ^ ^ /// ``` GfmTaskListItemMarker, - /// GFM: Task list item value: checked. + /// GFM extension: task list item value: checked. /// /// ## Info /// @@ -1130,7 +1296,7 @@ pub enum Name { /// ^ /// ``` GfmTaskListItemValueChecked, - /// GFM: Task list item value: unchecked. + /// GFM extension: task list item value: unchecked. /// /// ## Info /// @@ -2105,7 +2271,7 @@ pub enum Name { } /// List of void events, used to make sure everything is working well. -pub const VOID_EVENTS: [Name; 50] = [ +pub const VOID_EVENTS: [Name; 53] = [ Name::AttentionSequence, Name::AutolinkEmail, Name::AutolinkMarker, @@ -2134,6 +2300,9 @@ pub const VOID_EVENTS: [Name; 50] = [ Name::GfmAutolinkLiteralEmail, Name::GfmAutolinkLiteralProtocol, Name::GfmAutolinkLiteralWww, + Name::GfmFootnoteCallMarker, + Name::GfmFootnoteDefinitionLabelMarker, + Name::GfmFootnoteDefinitionMarker, Name::GfmStrikethroughSequence, Name::GfmTaskListItemMarker, Name::GfmTaskListItemValueChecked, @@ -171,7 +171,20 @@ pub struct Constructs { /// ^^^^^^^^^^^^^^^^^^^ /// ``` pub gfm_autolink_literal: bool, - /// GFM: strikethrough. + /// GFM: footnote definition. + /// + /// ```markdown + /// > | [^a]: b + /// ^^^^^^^ + /// ``` + pub gfm_footnote_definition: bool, + /// GFM: footnote label start. + /// + /// ```markdown + /// > | a[^b] + /// ^^ + /// ``` + pub gfm_label_start_footnote: bool, /// /// ```markdown /// > | a ~b~ c. @@ -283,6 +296,8 @@ impl Default for Constructs { definition: true, frontmatter: false, gfm_autolink_literal: false, + gfm_label_start_footnote: false, + gfm_footnote_definition: false, gfm_strikethrough: false, gfm_task_list_item: false, hard_break_escape: true, @@ -308,6 +323,8 @@ impl Constructs { pub fn gfm() -> Self { Self { gfm_autolink_literal: true, + gfm_footnote_definition: true, + gfm_label_start_footnote: true, gfm_strikethrough: true, gfm_task_list_item: true, ..Self::default() @@ -376,6 +393,206 @@ pub struct Options { /// ``` pub allow_dangerous_protocol: bool, + /// Label to use for the footnotes section. + /// + /// Change it when the markdown is not in English. + /// Typically affects screen readers (change `gfm_footnote_label_attributes` + /// to make it visible). + /// + /// ## Examples + /// + /// ``` + /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// + /// // `"Footnotes"` is used by default: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// + /// // Pass `gfm_footnote_label` to use something else: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// gfm_footnote_label: Some("Notes de bas de page".to_string()), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Notes de bas de page</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// ``` + pub gfm_footnote_label: Option<String>, + + /// HTML tag to use for the footnote label. + /// + /// Change it to match your document structure and play well with your CSS. + /// + /// ## Examples + /// + /// ``` + /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// + /// // `"h2"` is used by default: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// + /// // Pass `gfm_footnote_label_tag_name` to use something else: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// gfm_footnote_label_tag_name: Some("h1".to_string()), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h1 id=\"footnote-label\" class=\"sr-only\">Footnotes</h1>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// ``` + pub gfm_footnote_label_tag_name: Option<String>, + + /// Attributes to use on the footnote label. + /// + /// > 👉 **Note**: `id="footnote-label"` is always added, because footnote + /// > calls use it with `aria-describedby` to provide an accessible label. + /// + /// A `class="sr-only"` is added by default to hide the label from sighted + /// users. + /// Change it to make the label visible, or add other classes or other + /// attributes. + /// + /// ## Examples + /// + /// ``` + /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// + /// // `"class=\"sr-only\""` is used by default: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// + /// // Pass `gfm_footnote_label_attributes` to use something else: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// gfm_footnote_label_attributes: Some("class=\"footnote-heading\"".to_string()), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"footnote-heading\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// ``` + pub gfm_footnote_label_attributes: Option<String>, + + /// Label to use from backreferences back to their footnote call. + /// + /// Change it when the markdown is not in English. + /// Affects screen readers. + /// + /// ## Examples + /// + /// ``` + /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// + /// // `"Back to content"` is used by default: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// + /// // Pass `gfm_footnote_back_label` to use something else: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// gfm_footnote_back_label: Some("Arrière".to_string()), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Arrière\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// ``` + pub gfm_footnote_back_label: Option<String>, + + /// Prefix to use before the `id` attribute on footnotes to prevent them + /// from *clobbering*. + /// + /// DOM clobbering is this: + /// + /// ```html + /// <p id=x></p> + /// <script>alert(x) // `x` now refers to the DOM `p#x` element</script> + /// ``` + /// + /// The above example shows that elements are made available by browsers, + /// by their ID, on the `window` object, which is a security risk because + /// you might be expecting some other variable at that place. + /// Using a prefix solves this problem. + /// + /// ## Examples + /// + /// ``` + /// use micromark::{micromark, micromark_with_options, Options, Constructs}; + /// + /// // `"user-content-"` is used by default: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// + /// // Pass `gfm_footnote_clobber_prefix` to use something else: + /// assert_eq!( + /// micromark_with_options( + /// "[^a]\n\n[^a]: b", + /// &Options { + /// constructs: Constructs::gfm(), + /// gfm_footnote_clobber_prefix: Some("".to_string()), + /// ..Options::default() + /// } + /// ), + /// "<p><sup><a href=\"#fn-a\" id=\"fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"fn-a\">\n<p>b <a href=\"#fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n" + /// ); + /// ``` + pub gfm_footnote_clobber_prefix: Option<String>, + /// Whether to support GFM strikethrough (if enabled in `constructs`) with /// a single tilde (default: true). /// @@ -389,26 +606,26 @@ pub struct Options { /// // micromark supports single tildes by default: /// assert_eq!( /// micromark_with_options( - /// "~a~", - /// &Options { - /// constructs: Constructs::gfm(), - /// ..Options::default() - /// } - /// ), - /// "<p><del>a</del></p>" + /// "~a~", + /// &Options { + /// constructs: Constructs::gfm(), + /// ..Options::default() + /// } + /// ), + /// "<p><del>a</del></p>" /// ); /// /// // Pass `gfm_strikethrough_single_tilde: false` to turn that off: /// assert_eq!( /// micromark_with_options( - /// "~a~", - /// &Options { - /// constructs: Constructs::gfm(), - /// gfm_strikethrough_single_tilde: false, - /// ..Options::default() - /// } - /// ), - /// "<p>~a~</p>" + /// "~a~", + /// &Options { + /// constructs: Constructs::gfm(), + /// gfm_strikethrough_single_tilde: false, + /// ..Options::default() + /// } + /// ), + /// "<p>~a~</p>" /// ); /// ``` pub gfm_strikethrough_single_tilde: bool, @@ -488,6 +705,11 @@ impl Default for Options { Self { allow_dangerous_html: false, allow_dangerous_protocol: false, + gfm_footnote_label: None, + gfm_footnote_label_tag_name: None, + gfm_footnote_label_attributes: None, + gfm_footnote_back_label: None, + gfm_footnote_clobber_prefix: None, gfm_strikethrough_single_tilde: true, default_line_ending: LineEnding::default(), constructs: Constructs::default(), diff --git a/src/parser.rs b/src/parser.rs index afa08ac..62b3e03 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -17,8 +17,10 @@ pub struct ParseState<'a> { pub options: &'a Options, /// List of chars. pub bytes: &'a [u8], - /// Set of defined identifiers. + /// Set of defined definition identifiers. pub definitions: Vec<String>, + /// Set of defined GFM footnote definition identifiers. + pub gfm_footnote_definitions: Vec<String>, } /// Turn a string of markdown into events. @@ -29,6 +31,7 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) options, bytes: value.as_bytes(), definitions: vec![], + gfm_footnote_definitions: vec![], }; let mut tokenizer = Tokenizer::new( @@ -50,7 +53,10 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8]) let mut events = tokenizer.events; - parse_state.definitions = tokenizer.tokenize_state.definitions; + let footnote = tokenizer.tokenize_state.gfm_footnote_definitions; + let normal = tokenizer.tokenize_state.definitions; + parse_state.gfm_footnote_definitions = footnote; + parse_state.definitions = normal; while !subtokenize(&mut events, &parse_state) {} diff --git a/src/state.rs b/src/state.rs index 65ffbeb..6c3f563 100644 --- a/src/state.rs +++ b/src/state.rs @@ -116,6 +116,7 @@ pub enum Name { DocumentContainerNewBefore, DocumentContainerNewBeforeNotBlockQuote, DocumentContainerNewBeforeNotList, + DocumentContainerNewBeforeNotGfmFootnoteDefinition, DocumentContainerNewAfter, DocumentContainersAfter, DocumentFlowInside, @@ -145,6 +146,17 @@ pub enum Name { FrontmatterCloseSequence, FrontmatterCloseAfter, + GfmFootnoteDefinitionStart, + GfmFootnoteDefinitionLabelBefore, + GfmFootnoteDefinitionLabelAfter, + GfmFootnoteDefinitionWhitespaceAfter, + GfmFootnoteDefinitionContStart, + GfmFootnoteDefinitionContBlank, + GfmFootnoteDefinitionContFilled, + + GfmLabelStartFootnoteStart, + GfmLabelStartFootnoteOpen, + GfmTaskListItemCheckStart, GfmTaskListItemCheckInside, GfmTaskListItemCheckClose, @@ -230,6 +242,7 @@ pub enum Name { HtmlTextLineEndingAfterPrefix, LabelStart, + LabelAtMarker, LabelAtBreak, LabelEolAfter, LabelAtBlankLine, @@ -256,6 +269,7 @@ pub enum Name { LabelStartImageStart, LabelStartImageOpen, + LabelStartImageAfter, LabelStartLinkStart, @@ -299,6 +313,7 @@ pub enum Name { TextBefore, TextBeforeHtml, TextBeforeHardBreakEscape, + TextBeforeLabelStartLink, TextBeforeData, ThematicBreakStart, @@ -421,6 +436,9 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::DocumentContainerNewBeforeNotList => { construct::document::container_new_before_not_list } + Name::DocumentContainerNewBeforeNotGfmFootnoteDefinition => { + construct::document::container_new_before_not_footnote_definition + } Name::DocumentContainerNewAfter => construct::document::container_new_after, Name::DocumentContainersAfter => construct::document::containers_after, Name::DocumentFlowEnd => construct::document::flow_end, @@ -450,6 +468,19 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::FrontmatterCloseSequence => construct::frontmatter::close_sequence, Name::FrontmatterCloseAfter => construct::frontmatter::close_after, + Name::GfmFootnoteDefinitionStart => construct::gfm_footnote_definition::start, + Name::GfmFootnoteDefinitionLabelBefore => construct::gfm_footnote_definition::label_before, + Name::GfmFootnoteDefinitionLabelAfter => construct::gfm_footnote_definition::label_after, + Name::GfmFootnoteDefinitionWhitespaceAfter => { + construct::gfm_footnote_definition::whitespace_after + } + Name::GfmFootnoteDefinitionContStart => construct::gfm_footnote_definition::cont_start, + Name::GfmFootnoteDefinitionContBlank => construct::gfm_footnote_definition::cont_blank, + Name::GfmFootnoteDefinitionContFilled => construct::gfm_footnote_definition::cont_filled, + + Name::GfmLabelStartFootnoteStart => construct::gfm_label_start_footnote::start, + Name::GfmLabelStartFootnoteOpen => construct::gfm_label_start_footnote::open, + Name::GfmTaskListItemCheckStart => construct::gfm_task_list_item_check::start, Name::GfmTaskListItemCheckInside => construct::gfm_task_list_item_check::inside, Name::GfmTaskListItemCheckClose => construct::gfm_task_list_item_check::close, @@ -563,6 +594,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::HtmlTextLineEndingAfterPrefix => construct::html_text::line_ending_after_prefix, Name::LabelStart => construct::partial_label::start, + Name::LabelAtMarker => construct::partial_label::at_marker, Name::LabelAtBreak => construct::partial_label::at_break, Name::LabelEolAfter => construct::partial_label::eol_after, Name::LabelAtBlankLine => construct::partial_label::at_blank_line, @@ -591,6 +623,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::LabelStartImageStart => construct::label_start_image::start, Name::LabelStartImageOpen => construct::label_start_image::open, + Name::LabelStartImageAfter => construct::label_start_image::after, Name::LabelStartLinkStart => construct::label_start_link::start, Name::ListItemStart => construct::list_item::start, @@ -633,6 +666,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::TextBefore => construct::text::before, Name::TextBeforeHtml => construct::text::before_html, Name::TextBeforeHardBreakEscape => construct::text::before_hard_break_escape, + Name::TextBeforeLabelStartLink => construct::text::before_label_start_link, Name::TextBeforeData => construct::text::before_data, Name::ThematicBreakStart => construct::thematic_break::start, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 83514cb..c6a209b 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -28,6 +28,8 @@ pub enum Container { BlockQuote, /// [List item][crate::construct::list_item]. ListItem, + /// [GFM: Footnote definition][crate::construct::gfm_footnote_definition]. + GfmFootnoteDefinition, } /// Info used to tokenize a container. @@ -56,9 +58,53 @@ enum ByteAction { Insert(u8), } +/// Label start kind. +#[derive(Debug, PartialEq, Eq)] +pub enum LabelKind { + /// Label (image) start. + /// + /// ```markdown + /// > | a ![b] c + /// ^^ + /// ``` + /// + /// Construct: [Label start (image)][crate::construct::label_start_image]. + Image, + /// Label (image) link. + /// + /// ```markdown + /// > | a [b] c + /// ^ + /// ``` + /// + /// Construct: [Label start (link)][crate::construct::label_start_link]. + Link, + /// GFM: Label (footnote) link. + /// + /// ```markdown + /// > | a [^b] c + /// ^^ + /// ``` + /// + /// Construct: [GFM: Label start (footnote)][crate::construct::gfm_label_start_footnote]. + GfmFootnote, + /// GFM: Label (footnote) link, not matching a footnote definition, so + /// handled as a label (link) start. + /// + /// ```markdown + /// > | a [^b](c) d + /// ^^ + /// ``` + /// + /// Construct: [Label end][crate::construct::label_end]. + GfmUndefinedFootnote, +} + /// Label start, looking for an end. #[derive(Debug)] pub struct LabelStart { + /// Kind of start. + pub kind: LabelKind, /// Indices of where the label starts and ends in `events`. pub start: (usize, usize), /// A boolean used internally to figure out if a (link) label start can’t @@ -71,6 +117,7 @@ pub struct LabelStart { /// Valid label. #[derive(Debug)] pub struct Label { + pub kind: LabelKind, /// Indices of label start. pub start: (usize, usize), /// Indices of label end. @@ -174,8 +221,10 @@ pub struct TokenizeState<'a> { /// Used when tokenizing [text content][crate::construct::text]. pub labels: Vec<Label>, - /// List of defined identifiers. + /// List of defined definition identifiers. pub definitions: Vec<String>, + /// List of defined GFM footnote definition identifiers. + pub gfm_footnote_definitions: Vec<String>, /// Whether to connect events. pub connect: bool, @@ -288,6 +337,7 @@ impl<'a> Tokenizer<'a> { document_child: None, document_at_first_paragraph_of_list_item: false, definitions: vec![], + gfm_footnote_definitions: vec![], end: 0, label_starts: vec![], label_starts_loose: vec![], |