aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-25 13:16:45 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-25 13:16:45 +0200
commit1e4c95079cb97b2b02440b21945c6d12741a7d19 (patch)
tree4f6a4a179e72630c1cdd058f84498e32b9a433e0 /src
parent49b6a4e72516e8b2a8768e761a60a4f461802d69 (diff)
downloadmarkdown-rs-1e4c95079cb97b2b02440b21945c6d12741a7d19.tar.gz
markdown-rs-1e4c95079cb97b2b02440b21945c6d12741a7d19.tar.bz2
markdown-rs-1e4c95079cb97b2b02440b21945c6d12741a7d19.zip
Add support for GFM footnotes
Diffstat (limited to 'src')
-rw-r--r--src/compiler.rs316
-rw-r--r--src/construct/definition.rs12
-rw-r--r--src/construct/document.rs39
-rw-r--r--src/construct/gfm_footnote_definition.rs345
-rw-r--r--src/construct/gfm_label_start_footnote.rs91
-rw-r--r--src/construct/label_end.rs218
-rw-r--r--src/construct/label_start_image.rs54
-rw-r--r--src/construct/label_start_link.rs3
-rw-r--r--src/construct/list_item.rs2
-rw-r--r--src/construct/mod.rs5
-rw-r--r--src/construct/partial_label.rs28
-rw-r--r--src/construct/text.rs24
-rw-r--r--src/event.rs187
-rw-r--r--src/lib.rs254
-rw-r--r--src/parser.rs10
-rw-r--r--src/state.rs34
-rw-r--r--src/tokenizer.rs52
17 files changed, 1527 insertions, 147 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index f1003fd..6ad34b2 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -18,7 +18,8 @@ use alloc::{
};
use core::str;
-/// Link or image, resource or reference.
+/// Link, image, or footnote call.
+/// Resource or reference.
/// Reused for temporary definitions as well, in the first pass.
#[derive(Debug)]
struct Media {
@@ -76,6 +77,8 @@ struct CompileContext<'a> {
pub events: &'a [Event],
/// List of bytes.
pub bytes: &'a [u8],
+ /// Configuration.
+ pub options: &'a Options,
// Fields used by handlers to track the things they need to track to
// compile markdown.
/// Rank of heading (atx).
@@ -100,6 +103,10 @@ struct CompileContext<'a> {
pub tight_stack: Vec<bool>,
/// List of definitions.
pub definitions: Vec<Definition>,
+ /// List of definitions.
+ pub gfm_footnote_definitions: Vec<(String, String)>,
+ pub gfm_footnote_definition_calls: Vec<(String, usize)>,
+ pub gfm_footnote_definition_stack: Vec<(usize, usize)>,
// Fields used to influance the current compilation.
/// Ignore the next line ending.
pub slurp_one_line_ending: bool,
@@ -128,7 +135,7 @@ impl<'a> CompileContext<'a> {
pub fn new(
events: &'a [Event],
bytes: &'a [u8],
- options: &Options,
+ options: &'a Options,
line_ending: LineEnding,
) -> CompileContext<'a> {
CompileContext {
@@ -143,6 +150,9 @@ impl<'a> CompileContext<'a> {
list_expect_first_marker: None,
media_stack: vec![],
definitions: vec![],
+ gfm_footnote_definitions: vec![],
+ gfm_footnote_definition_calls: vec![],
+ gfm_footnote_definition_stack: vec![],
tight_stack: vec![],
slurp_one_line_ending: false,
image_alt_inside: false,
@@ -161,6 +171,7 @@ impl<'a> CompileContext<'a> {
allow_dangerous_html: options.allow_dangerous_html,
buffers: vec![String::new()],
index: 0,
+ options,
}
}
@@ -243,6 +254,11 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String {
//
// To speed things up, we collect the places we can jump over for the
// second pass.
+ //
+ // We don’t need to handle GFM footnote definitions like this, because
+ // unlike normal definitions, what they produce is not used in calls.
+ // It would also get very complex, because footnote definitions can be
+ // nested.
while index < events.len() {
let event = &events[index];
@@ -250,15 +266,15 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String {
handle(&mut context, index);
}
- if event.name == Name::Definition {
- if event.kind == Kind::Enter {
+ if event.kind == Kind::Enter {
+ if event.name == Name::Definition {
handle(&mut context, index); // Also handle start.
definition_inside = true;
definition_indices.push((index, index));
- } else {
- definition_inside = false;
- definition_indices.last_mut().unwrap().1 = index;
}
+ } else if event.name == Name::Definition {
+ definition_inside = false;
+ definition_indices.last_mut().unwrap().1 = index;
}
index += 1;
@@ -278,14 +294,17 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String {
jump = definition_indices
.get(definition_index)
.unwrap_or(&jump_default);
- // Ignore line endings after definitions.
- context.slurp_one_line_ending = true;
} else {
handle(&mut context, index);
index += 1;
}
}
+ // No section to generate.
+ if !context.gfm_footnote_definition_calls.is_empty() {
+ generate_footnote_section(&mut context);
+ }
+
assert_eq!(context.buffers.len(), 1, "expected 1 final buffer");
context
.buffers
@@ -312,6 +331,7 @@ fn enter(context: &mut CompileContext) {
| Name::CodeFencedFenceMeta
| Name::DefinitionLabelString
| Name::DefinitionTitleString
+ | Name::GfmFootnoteDefinitionPrefix
| Name::HeadingAtxText
| Name::HeadingSetextText
| Name::Label
@@ -326,6 +346,8 @@ fn enter(context: &mut CompileContext) {
Name::DefinitionDestinationString => on_enter_definition_destination_string(context),
Name::Emphasis => on_enter_emphasis(context),
Name::Frontmatter => on_enter_frontmatter(context),
+ Name::GfmFootnoteDefinition => on_enter_gfm_footnote_definition(context),
+ Name::GfmFootnoteCall => on_enter_gfm_footnote_call(context),
Name::GfmStrikethrough => on_enter_gfm_strikethrough(context),
Name::GfmTaskListItemCheck => on_enter_gfm_task_list_item_check(context),
Name::HtmlFlow => on_enter_html_flow(context),
@@ -374,6 +396,12 @@ fn exit(context: &mut CompileContext) {
Name::GfmAutolinkLiteralProtocol => on_exit_gfm_autolink_literal_protocol(context),
Name::GfmAutolinkLiteralWww => on_exit_gfm_autolink_literal_www(context),
Name::GfmAutolinkLiteralEmail => on_exit_gfm_autolink_literal_email(context),
+ Name::GfmFootnoteCall => on_exit_gfm_footnote_call(context),
+ Name::GfmFootnoteDefinitionLabelString => {
+ on_exit_gfm_footnote_definition_label_string(context);
+ }
+ Name::GfmFootnoteDefinitionPrefix => on_exit_gfm_footnote_definition_prefix(context),
+ Name::GfmFootnoteDefinition => on_exit_gfm_footnote_definition(context),
Name::GfmStrikethrough => on_exit_gfm_strikethrough(context),
Name::GfmTaskListItemCheck => on_exit_gfm_task_list_item_check(context),
Name::GfmTaskListItemValueChecked => on_exit_gfm_task_list_item_value_checked(context),
@@ -472,6 +500,23 @@ fn on_enter_frontmatter(context: &mut CompileContext) {
context.buffer();
}
+/// Handle [`Enter`][Kind::Enter]:[`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition].
+fn on_enter_gfm_footnote_definition(context: &mut CompileContext) {
+ context.tight_stack.push(false);
+}
+
+/// Handle [`Enter`][Kind::Enter]:[`GfmFootnoteCall`][Name::GfmFootnoteCall].
+fn on_enter_gfm_footnote_call(context: &mut CompileContext) {
+ context.media_stack.push(Media {
+ image: false,
+ label_id: None,
+ label: None,
+ reference_id: None,
+ destination: None,
+ title: None,
+ });
+}
+
/// Handle [`Enter`][Kind::Enter]:[`GfmStrikethrough`][Name::GfmStrikethrough].
fn on_enter_gfm_strikethrough(context: &mut CompileContext) {
if !context.image_alt_inside {
@@ -961,6 +1006,92 @@ fn on_exit_gfm_autolink_literal_email(context: &mut CompileContext) {
on_exit_autolink_email(context);
}
+/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteCall`][Name::GfmFootnoteCall].
+fn on_exit_gfm_footnote_call(context: &mut CompileContext) {
+ let indices = context.media_stack.pop().unwrap().label_id.unwrap();
+ let id =
+ normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str());
+ let safe_id = sanitize_uri(&id.to_lowercase(), &None);
+ let mut call_index = 0;
+
+ // See if this has been called before.
+ while call_index < context.gfm_footnote_definition_calls.len() {
+ if context.gfm_footnote_definition_calls[call_index].0 == id {
+ break;
+ }
+ call_index += 1;
+ }
+
+ // New.
+ if call_index == context.gfm_footnote_definition_calls.len() {
+ context.gfm_footnote_definition_calls.push((id, 0));
+ }
+
+ // Increment.
+ context.gfm_footnote_definition_calls[call_index].1 += 1;
+
+ // No call is output in an image alt, though the definition and
+ // backreferences are generated as if it was the case.
+ if context.image_alt_inside {
+ return;
+ }
+
+ context.push("<sup><a href=\"#");
+ if let Some(ref value) = context.options.gfm_footnote_clobber_prefix {
+ context.push(&encode(value, context.encode_html));
+ } else {
+ context.push("user-content-");
+ }
+ context.push("fn-");
+ context.push(&safe_id);
+ context.push("\" id=\"");
+ if let Some(ref value) = context.options.gfm_footnote_clobber_prefix {
+ context.push(&encode(value, context.encode_html));
+ } else {
+ context.push("user-content-");
+ }
+ context.push("fnref-");
+ context.push(&safe_id);
+ if context.gfm_footnote_definition_calls[call_index].1 > 1 {
+ context.push("-");
+ context.push(
+ &context.gfm_footnote_definition_calls[call_index]
+ .1
+ .to_string(),
+ );
+ }
+ context.push("\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">");
+
+ context.push(&(call_index + 1).to_string());
+ context.push("</a></sup>");
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinitionLabelString`][Name::GfmFootnoteDefinitionLabelString].
+fn on_exit_gfm_footnote_definition_label_string(context: &mut CompileContext) {
+ context
+ .gfm_footnote_definition_stack
+ .push(Position::from_exit_event(context.events, context.index).to_indices());
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix].
+fn on_exit_gfm_footnote_definition_prefix(context: &mut CompileContext) {
+ // Drop the prefix.
+ context.resume();
+ // Capture everything until end of definition.
+ context.buffer();
+}
+
+/// Handle [`Exit`][Kind::Exit]:[`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition].
+fn on_exit_gfm_footnote_definition(context: &mut CompileContext) {
+ let value = context.resume();
+ let indices = context.gfm_footnote_definition_stack.pop().unwrap();
+ context.tight_stack.pop();
+ context.gfm_footnote_definitions.push((
+ normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()),
+ value,
+ ));
+}
+
/// Handle [`Exit`][Kind::Exit]:[`GfmStrikethrough`][Name::GfmStrikethrough].
fn on_exit_gfm_strikethrough(context: &mut CompileContext) {
if !context.image_alt_inside {
@@ -1080,7 +1211,12 @@ fn on_exit_label_text(context: &mut CompileContext) {
fn on_exit_line_ending(context: &mut CompileContext) {
if context.code_text_inside {
context.push(" ");
- } else if context.slurp_one_line_ending {
+ } else if context.slurp_one_line_ending
+ // Ignore line endings after definitions.
+ || (context.index > 1
+ && (context.events[context.index - 2].name == Name::Definition
+ || context.events[context.index - 2].name == Name::GfmFootnoteDefinition))
+ {
context.slurp_one_line_ending = false;
} else {
context.push(&encode(
@@ -1113,9 +1249,12 @@ fn on_exit_list_item(context: &mut CompileContext) {
context.index - 1,
&[
Name::BlankLineEnding,
+ Name::BlockQuotePrefix,
Name::LineEnding,
Name::SpaceOrTab,
- Name::BlockQuotePrefix,
+ // Also ignore things that don’t contribute to the document.
+ Name::Definition,
+ Name::GfmFootnoteDefinition,
],
);
let previous = &context.events[before_item];
@@ -1167,7 +1306,6 @@ fn on_exit_media(context: &mut CompileContext) {
let media = context.media_stack.pop().unwrap();
let label = media.label.unwrap();
- let image_alt_inside = context.image_alt_inside;
let id = media.reference_id.or(media.label_id).map(|indices| {
normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str())
});
@@ -1190,7 +1328,7 @@ fn on_exit_media(context: &mut CompileContext) {
None
};
- if !image_alt_inside {
+ if !is_in_image {
if media.image {
context.push("<img src=\"");
} else {
@@ -1223,7 +1361,7 @@ fn on_exit_media(context: &mut CompileContext) {
context.push(&label);
}
- if !image_alt_inside {
+ if !is_in_image {
context.push("\"");
let title = if let Some(index) = definition_index {
@@ -1248,7 +1386,7 @@ fn on_exit_media(context: &mut CompileContext) {
if !media.image {
context.push(&label);
- if !image_alt_inside {
+ if !is_in_image {
context.push("</a>");
}
}
@@ -1300,6 +1438,154 @@ fn on_exit_thematic_break(context: &mut CompileContext) {
context.push("<hr />");
}
+/// Generate a footnote section.
+fn generate_footnote_section(context: &mut CompileContext) {
+ context.line_ending_if_needed();
+ context.push("<section data-footnotes=\"\" class=\"footnotes\"><");
+ if let Some(ref value) = context.options.gfm_footnote_label_tag_name {
+ context.push(&encode(value, context.encode_html));
+ } else {
+ context.push("h2");
+ }
+ context.push(" id=\"footnote-label\" ");
+ if let Some(ref value) = context.options.gfm_footnote_label_attributes {
+ context.push(value);
+ } else {
+ context.push("class=\"sr-only\"");
+ }
+ context.push(">");
+ if let Some(ref value) = context.options.gfm_footnote_label {
+ context.push(&encode(value, context.encode_html));
+ } else {
+ context.push("Footnotes");
+ }
+ context.push("</");
+ if let Some(ref value) = context.options.gfm_footnote_label_tag_name {
+ context.push(&encode(value, context.encode_html));
+ } else {
+ context.push("h2");
+ }
+ context.push(">");
+ context.line_ending();
+ context.push("<ol>");
+
+ let mut index = 0;
+ while index < context.gfm_footnote_definition_calls.len() {
+ generate_footnote_item(context, index);
+ index += 1;
+ }
+
+ context.line_ending();
+ context.push("</ol>");
+ context.line_ending();
+ context.push("</section>");
+ context.line_ending();
+}
+
+/// Generate a footnote item from a call.
+fn generate_footnote_item(context: &mut CompileContext, index: usize) {
+ let id = &context.gfm_footnote_definition_calls[index].0;
+ let safe_id = sanitize_uri(&id.to_lowercase(), &None);
+
+ // Find definition: we’ll always find it.
+ let mut definition_index = 0;
+ while definition_index < context.gfm_footnote_definitions.len() {
+ if &context.gfm_footnote_definitions[definition_index].0 == id {
+ break;
+ }
+ definition_index += 1;
+ }
+
+ debug_assert_ne!(
+ definition_index,
+ context.gfm_footnote_definitions.len(),
+ "expected definition"
+ );
+
+ context.line_ending();
+ context.push("<li id=\"");
+ if let Some(ref value) = context.options.gfm_footnote_clobber_prefix {
+ context.push(&encode(value, context.encode_html));
+ } else {
+ context.push("user-content-");
+ }
+ context.push("fn-");
+ context.push(&safe_id);
+ context.push("\">");
+ context.line_ending();
+
+ // Create one or more backreferences.
+ let mut reference_index = 0;
+ let mut backreferences = String::new();
+ while reference_index < context.gfm_footnote_definition_calls[index].1 {
+ if reference_index != 0 {
+ backreferences.push(' ');
+ }
+ backreferences.push_str("<a href=\"#");
+ if let Some(ref value) = context.options.gfm_footnote_clobber_prefix {
+ backreferences.push_str(&encode(value, context.encode_html));
+ } else {
+ backreferences.push_str("user-content-");
+ }
+ backreferences.push_str("fnref-");
+ backreferences.push_str(&safe_id);
+ if reference_index != 0 {
+ backreferences.push('-');
+ backreferences.push_str(&(reference_index + 1).to_string());
+ }
+ backreferences.push_str(
+ "\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"",
+ );
+ if let Some(ref value) = context.options.gfm_footnote_back_label {
+ backreferences.push_str(&encode(value, context.encode_html));
+ } else {
+ backreferences.push_str("Back to content");
+ }
+ backreferences.push_str("\">↩");
+ if reference_index != 0 {
+ backreferences.push_str("<sup>");
+ backreferences.push_str(&(reference_index + 1).to_string());
+ backreferences.push_str("</sup>");
+ }
+ backreferences.push_str("</a>");
+
+ reference_index += 1;
+ }
+
+ let value = context.gfm_footnote_definitions[definition_index].1.clone();
+ let bytes = value.as_bytes();
+ let mut byte_index = bytes.len();
+ // Move back past EOL.
+ while byte_index > 0 && matches!(bytes[byte_index - 1], b'\n' | b'\r') {
+ byte_index -= 1;
+ }
+ // Check if it ends in `</p>`.
+ // This is a bit funky if someone wrote a safe paragraph by hand in
+ // there.
+ // But in all other cases, `<` and `>` would be encoded, so we can be
+ // sure that this is generated by our compiler.
+ if byte_index > 3
+ && bytes[byte_index - 4] == b'<'
+ && bytes[byte_index - 3] == b'/'
+ && bytes[byte_index - 2] == b'p'
+ && bytes[byte_index - 1] == b'>'
+ {
+ let (before, after) = bytes.split_at(byte_index - 4);
+ let mut result = String::new();
+ result.push_str(str::from_utf8(before).unwrap());
+ result.push(' ');
+ result.push_str(&backreferences);
+ result.push_str(str::from_utf8(after).unwrap());
+ context.push(&result);
+ } else {
+ context.push(&value);
+ context.line_ending_if_needed();
+ context.push(&backreferences);
+ }
+ context.line_ending_if_needed();
+ context.push("</li>");
+}
+
/// Generate an autolink (used by unicode autolinks and GFM autolink literals).
fn generate_autolink(context: &mut CompileContext, protocol: Option<&str>, value: &str) {
if !context.image_alt_inside {
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index e65d979..1d67635 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -175,14 +175,14 @@ pub fn label_after(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_2 = Name::Data;
tokenizer.tokenize_state.token_3 = Name::Data;
- tokenizer.tokenize_state.end = skip::to_back(
- &tokenizer.events,
- tokenizer.events.len() - 1,
- &[Name::DefinitionLabelString],
- );
-
match tokenizer.current {
Some(b':') => {
+ tokenizer.tokenize_state.end = skip::to_back(
+ &tokenizer.events,
+ tokenizer.events.len() - 1,
+ &[Name::DefinitionLabelString],
+ );
+
tokenizer.enter(Name::DefinitionMarker);
tokenizer.consume();
tokenizer.exit(Name::DefinitionMarker);
diff --git a/src/construct/document.rs b/src/construct/document.rs
index b438808..9c76e46 100644
--- a/src/construct/document.rs
+++ b/src/construct/document.rs
@@ -1,12 +1,13 @@
//! The document content type.
//!
-//! **Document** represents the containers, such as block quotes and lists,
-//! which structure the document and contain other sections.
+//! **Document** represents the containers, such as block quotes, list items,
+//! or GFM footnotes, which structure the document and contain other sections.
//!
//! The constructs found in flow are:
//!
//! * [Block quote][crate::construct::block_quote]
//! * [List item][crate::construct::list_item]
+//! * [GFM: Footnote definition][crate::construct::gfm_footnote_definition]
use crate::event::{Content, Event, Kind, Link, Name};
use crate::state::{Name as StateName, State};
@@ -99,6 +100,7 @@ pub fn container_existing_before(tokenizer: &mut Tokenizer) -> State {
let name = match container.kind {
Container::BlockQuote => StateName::BlockQuoteContStart,
+ Container::GfmFootnoteDefinition => StateName::GfmFootnoteDefinitionContStart,
Container::ListItem => StateName::ListItemContStart,
};
@@ -185,7 +187,7 @@ pub fn container_new_before(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State {
// List item?
- // We replace the empty block quote container for this new list one.
+ // We replace the empty block quote container for this new list item one.
tokenizer.tokenize_state.document_container_stack
[tokenizer.tokenize_state.document_continued] = ContainerState {
kind: Container::ListItem,
@@ -200,14 +202,38 @@ pub fn container_new_before_not_block_quote(tokenizer: &mut Tokenizer) -> State
State::Retry(StateName::ListItemStart)
}
-/// At new container, but not a list (or block quote).
+/// At new container, but not a block quote or list item.
//
/// ```markdown
/// > | a
/// ^
/// ```
pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State {
- // It wasn’t a new block quote or a list.
+ // Footnote definition?
+ // We replace the empty list item container for this new footnote
+ // definition one.
+ tokenizer.tokenize_state.document_container_stack
+ [tokenizer.tokenize_state.document_continued] = ContainerState {
+ kind: Container::GfmFootnoteDefinition,
+ blank_initial: false,
+ size: 0,
+ };
+
+ tokenizer.attempt(
+ State::Next(StateName::DocumentContainerNewAfter),
+ State::Next(StateName::DocumentContainerNewBeforeNotGfmFootnoteDefinition),
+ );
+ State::Retry(StateName::GfmFootnoteDefinitionStart)
+}
+
+/// At new container, but not a block quote, list item, or footnote definition.
+//
+/// ```markdown
+/// > | a
+/// ^
+/// ```
+pub fn container_new_before_not_footnote_definition(tokenizer: &mut Tokenizer) -> State {
+ // It wasn’t a new block quote, list item, or footnote definition.
// Swap the new container (in the middle) with the existing one (at the end).
// Drop what was in the middle.
tokenizer
@@ -227,7 +253,7 @@ pub fn container_new_before_not_list(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn container_new_after(tokenizer: &mut Tokenizer) -> State {
- // It was a new block quote or a list.
+ // It was a new block quote, list item, or footnote definition.
// Swap the new container (in the middle) with the existing one (at the end).
// Take the new container.
let container = tokenizer
@@ -453,6 +479,7 @@ fn exit_containers(tokenizer: &mut Tokenizer, phase: &Phase) {
let container = stack_close.pop().unwrap();
let name = match container.kind {
Container::BlockQuote => Name::BlockQuote,
+ Container::GfmFootnoteDefinition => Name::GfmFootnoteDefinition,
Container::ListItem => Name::ListItem,
};
diff --git a/src/construct/gfm_footnote_definition.rs b/src/construct/gfm_footnote_definition.rs
new file mode 100644
index 0000000..3715044
--- /dev/null
+++ b/src/construct/gfm_footnote_definition.rs
@@ -0,0 +1,345 @@
+//! GFM: Footnote definition occurs in the [document][] content type.
+//!
+//! ## Grammar
+//!
+//! Footnote definitions form with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! ; Restriction: `label` must start with `^` (and not be empty after it).
+//! ; See the `label` construct for the BNF of that parts.
+//! gfm_footnote_definition_start ::= label ':' *space_or_tab
+//!
+//! ; Restriction: blank line allowed.
+//! gfm_footnote_definition_cont ::= 4(space_or_tab)
+//! ```
+//!
+//! Further lines that are not prefixed with `gfm_footnote_definition_cont`
+//! cause the footnote definition to be exited, except when those lines are
+//! lazy continuation or blank.
+//! Like so many things in markdown, footnote definition too, are complex.
+//! See [*§ Phase 1: block structure* in `CommonMark`][commonmark_block] for
+//! more on parsing details.
+//!
+//! See [`label`][label] for grammar, notes, and recommendations on that part.
+//!
+//! The `label` part is interpreted as the [string][] content type.
+//! That means that [character escapes][character_escape] and
+//! [character references][character_reference] are allowed.
+//!
+//! Definitions match to calls through identifiers.
+//! To match, both labels must be equal after normalizing with
+//! [`normalize_identifier`][normalize_identifier].
+//! One definition can match to multiple calls.
+//! Multiple definitions with the same, normalized, identifier are ignored: the
+//! first definition is preferred.
+//! To illustrate, the definition with the content of `x` wins:
+//!
+//! ```markdown
+//! [^a]: x
+//! [^a]: y
+//!
+//! [^a]
+//! ```
+//!
+//! Importantly, while labels *can* include [string][] content (character
+//! escapes and character references), these are not considered when matching.
+//! To illustrate, neither definition matches the call:
+//!
+//! ```markdown
+//! [^a&amp;b]: x
+//! [^a\&b]: y
+//!
+//! [^a&b]
+//! ```
+//!
+//! Because footnote definitions are containers (like block quotes and list
+//! items), they can contain more footnote definitions, and they can include
+//! calls to themselves.
+//!
+//! ## HTML
+//!
+//! GFM footnote definitions do not, on their own, relate to anything in HTML.
+//! When matched with a [label end][label_end], which in turns matches to a
+//! [GFM label start (footnote)][gfm_label_start_footnote], the definition
+//! relates to several elements in HTML.
+//!
+//! When one or more definitions are called, a footnote section is generated
+//! at the end of the document, using `<section>`, `<h2>`, and `<ol>` elements:
+//!
+//! ```html
+//! <section data-footnotes="" class="footnotes"><h2 id="footnote-label" class="sr-only">Footnotes</h2>
+//! <ol>…</ol>
+//! </section>
+//! ```
+//!
+//! Each definition is generated as a `<li>` in the `<ol>`, in the order they
+//! were first called:
+//!
+//! ```html
+//! <li id="user-content-fn-1">…</li>
+//! ```
+//!
+//! Backreferences are injected at the end of the first paragraph, or, when
+//! there is no paragraph, at the end of the definition.
+//! When a definition is called multiple times, multiple backreferences are
+//! generated.
+//! Further backreferences use an extra counter in the `href` attribute and
+//! visually in a `<span>` after `↩`.
+//!
+//! ```html
+//! <a href="#user-content-fnref-1" data-footnote-backref="" class="data-footnote-backref" aria-label="Back to content">↩</a> <a href="#user-content-fnref-1-2" data-footnote-backref="" class="data-footnote-backref" aria-label="Back to content">↩<sup>2</sup></a>
+//! ```
+//!
+//! See
+//! [*§ 4.5.1 The `a` element*][html_a],
+//! [*§ 4.3.6 The `h1`, `h2`, `h3`, `h4`, `h5`, and `h6` elements*][html_h],
+//! [*§ 4.4.8 The `li` element*][html_li],
+//! [*§ 4.4.5 The `ol` element*][html_ol],
+//! [*§ 4.4.1 The `p` element*][html_p],
+//! [*§ 4.3.3 The `section` element*][html_section], and
+//! [*§ 4.5.19 The `sub` and `sup` elements*][html_sup]
+//! in the HTML spec for more info.
+//!
+//! ## Recommendation
+//!
+//! When authoring markdown with footnotes, it’s recommended to use words
+//! instead of numbers (or letters or anything with an order) as calls.
+//! That makes it easier to reuse and reorder footnotes.
+//!
+//! It’s recommended to place footnotes definitions at the bottom of the document.
+//!
+//! ## Bugs
+//!
+//! GitHub’s own algorithm to parse footnote definitions contains several bugs.
+//! These are not present in this project.
+//! The issues relating to footnote definitions are:
+//!
+//! * [Footnote reference call identifiers are trimmed, but definition identifiers aren’t](https://github.com/github/cmark-gfm/issues/237)\
+//! — initial and final whitespace in labels causes them not to match
+//! * [Footnotes are matched case-insensitive, but links keep their casing, breaking them](https://github.com/github/cmark-gfm/issues/239)\
+//! — using uppercase (or any character that will be percent encoded) in identifiers breaks links
+//! * [Colons in footnotes generate links w/o `href`](https://github.com/github/cmark-gfm/issues/250)\
+//! — colons in identifiers generate broken links
+//! * [Character escape of `]` does not work in footnote identifiers](https://github.com/github/cmark-gfm/issues/240)\
+//! — some character escapes don’t work
+//! * [Footnotes in links are broken](https://github.com/github/cmark-gfm/issues/249)\
+//! — while `CommonMark` prevents links in links, GitHub does not prevent footnotes (which turn into links) in links
+//! * [Footnote-like brackets around image, break that image](https://github.com/github/cmark-gfm/issues/275)\
+//! — images can’t be used in what looks like a footnote call
+//!
+//! ## Tokens
+//!
+//! * [`DefinitionMarker`][Name::DefinitionMarker]
+//! * [`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition]
+//! * [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel]
+//! * [`GfmFootnoteDefinitionLabelMarker`][Name::GfmFootnoteDefinitionLabelMarker]
+//! * [`GfmFootnoteDefinitionLabelString`][Name::GfmFootnoteDefinitionLabelString]
+//! * [`GfmFootnoteDefinitionMarker`][Name::GfmFootnoteDefinitionMarker]
+//! * [`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix]
+//! * [`SpaceOrTab`][Name::SpaceOrTab]
+//!
+//! ## References
+//!
+//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-footnote)
+//!
+//! > 👉 **Note**: Footnotes are not specified in GFM yet.
+//! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270)
+//! > for the related issue.
+//!
+//! [document]: crate::construct::document
+//! [string]: crate::construct::string
+//! [character_reference]: crate::construct::character_reference
+//! [character_escape]: crate::construct::character_escape
+//! [label]: crate::construct::partial_label
+//! [label_end]: crate::construct::label_end
+//! [gfm_label_start_footnote]: crate::construct::gfm_label_start_footnote
+//! [commonmark_block]: https://spec.commonmark.org/0.30/#phase-1-block-structure
+//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
+//! [html_h]: https://html.spec.whatwg.org/multipage/sections.html#the-h1,-h2,-h3,-h4,-h5,-and-h6-elements
+//! [html_li]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-li-element
+//! [html_ol]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-ol-element
+//! [html_p]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
+//! [html_section]: https://html.spec.whatwg.org/multipage/sections.html#the-section-element
+//! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements
+
+use crate::construct::partial_space_or_tab::space_or_tab_min_max;
+use crate::event::Name;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::Tokenizer;
+use crate::util::{
+ constant::TAB_SIZE,
+ normalize_identifier::normalize_identifier,
+ skip,
+ slice::{Position, Slice},
+};
+
+/// Start of GFM footnote definition.
+///
+/// ```markdown
+/// > | [^a]: b
+/// ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer
+ .parse_state
+ .options
+ .constructs
+ .gfm_footnote_definition
+ {
+ tokenizer.enter(Name::GfmFootnoteDefinition);
+
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ tokenizer.attempt(
+ State::Next(StateName::GfmFootnoteDefinitionLabelBefore),
+ State::Nok,
+ );
+ State::Retry(space_or_tab_min_max(
+ tokenizer,
+ 1,
+ if tokenizer.parse_state.options.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ))
+ } else {
+ State::Retry(StateName::GfmFootnoteDefinitionLabelBefore)
+ }
+ } else {
+ State::Nok
+ }
+}
+
+/// Before definition label (after optional whitespace).
+///
+/// ```markdown
+/// > | [^a]: b
+/// ^
+/// ```
+pub fn label_before(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'[') => {
+ tokenizer.tokenize_state.token_1 = Name::GfmFootnoteDefinitionLabel;
+ tokenizer.tokenize_state.token_2 = Name::GfmFootnoteDefinitionLabelMarker;
+ tokenizer.tokenize_state.token_3 = Name::GfmFootnoteDefinitionLabelString;
+ tokenizer.tokenize_state.token_4 = Name::GfmFootnoteDefinitionMarker;
+ tokenizer.tokenize_state.marker = b'^';
+ tokenizer.enter(Name::GfmFootnoteDefinitionPrefix);
+ tokenizer.attempt(
+ State::Next(StateName::GfmFootnoteDefinitionLabelAfter),
+ State::Nok,
+ );
+ State::Retry(StateName::LabelStart)
+ }
+ _ => State::Nok,
+ }
+}
+
+/// After definition label.
+///
+/// ```markdown
+/// > | [^a]: b
+/// ^
+/// ```
+pub fn label_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ tokenizer.tokenize_state.token_4 = Name::Data;
+ tokenizer.tokenize_state.marker = 0;
+
+ match tokenizer.current {
+ Some(b':') => {
+ let end = skip::to_back(
+ &tokenizer.events,
+ tokenizer.events.len() - 1,
+ &[Name::GfmFootnoteDefinitionLabelString],
+ );
+
+ // Note: we don’t care about virtual spaces, so `as_str` is fine.
+ let id = normalize_identifier(
+ Slice::from_position(
+ tokenizer.parse_state.bytes,
+ &Position::from_exit_event(&tokenizer.events, end),
+ )
+ .as_str(),
+ );
+
+ // Note: we don’t care about uniqueness.
+ // It’s likely that that doesn’t happen very frequently.
+ // It is more likely that it wastes precious time.
+ tokenizer.tokenize_state.gfm_footnote_definitions.push(id);
+
+ tokenizer.enter(Name::DefinitionMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::DefinitionMarker);
+ tokenizer.attempt(
+ State::Next(StateName::GfmFootnoteDefinitionWhitespaceAfter),
+ State::Nok,
+ );
+ // Any whitespace after the marker is eaten, forming indented code
+ // is not possible.
+ // No space is also fine, just like a block quote marker.
+ State::Next(space_or_tab_min_max(tokenizer, 0, usize::MAX))
+ }
+ _ => State::Nok,
+ }
+}
+
+/// After definition prefix.
+///
+/// ```markdown
+/// > | [^a]: b
+/// ^
+/// ```
+pub fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.exit(Name::GfmFootnoteDefinitionPrefix);
+ State::Ok
+}
+
+/// Start of footnote definition continuation.
+///
+/// ```markdown
+/// | [^a]: b
+/// > | c
+/// ^
+/// ```
+pub fn cont_start(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.check(
+ State::Next(StateName::GfmFootnoteDefinitionContBlank),
+ State::Next(StateName::GfmFootnoteDefinitionContFilled),
+ );
+ State::Retry(StateName::BlankLineStart)
+}
+
+/// Start of footnote definition continuation, at a blank line.
+///
+/// ```markdown
+/// | [^a]: b
+/// > | ␠␠␊
+/// ^
+/// ```
+pub fn cont_blank(tokenizer: &mut Tokenizer) -> State {
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ State::Retry(space_or_tab_min_max(tokenizer, 0, TAB_SIZE))
+ } else {
+ State::Ok
+ }
+}
+
+/// Start of footnote definition continuation, at a filled line.
+///
+/// ```markdown
+/// | [^a]: b
+/// > | c
+/// ^
+/// ```
+pub fn cont_filled(tokenizer: &mut Tokenizer) -> State {
+ if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ // Consume exactly `TAB_SIZE`.
+ State::Retry(space_or_tab_min_max(tokenizer, TAB_SIZE, TAB_SIZE))
+ } else {
+ State::Nok
+ }
+}
diff --git a/src/construct/gfm_label_start_footnote.rs b/src/construct/gfm_label_start_footnote.rs
new file mode 100644
index 0000000..a3a0df6
--- /dev/null
+++ b/src/construct/gfm_label_start_footnote.rs
@@ -0,0 +1,91 @@
+//! Label start (footnote) occurs in the [text][] content type.
+//!
+//! ## Grammar
+//!
+//! Label start (footnote) forms with the following BNF
+//! (<small>see [construct][crate::construct] for character groups</small>):
+//!
+//! ```bnf
+//! gfm_label_start_footnote ::= '[' '^'
+//! ```
+//!
+//! ## HTML
+//!
+//! Label start (footnote) does not, on its own, relate to anything in HTML.
+//! When matched with a [label end][label_end], they together relate to `<sup>`
+//! and `<a>` elements in HTML.
+//! See [*§ 4.5.19 The `sub` and `sup` elements*][html_sup] and
+//! [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info.
+//! Without an end, the characters (`[^`) are output.
+//!
+//! ## Tokens
+//!
+//! * [`LabelImage`][Name::LabelImage]
+//! * To do.
+//!
+//! ## References
+//!
+//! * [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote)
+//!
+//! > 👉 **Note**: Footnotes are not specified in GFM yet.
+//! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270)
+//! > for the related issue.
+//!
+//! [text]: crate::construct::text
+//! [label_end]: crate::construct::label_end
+//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
+//! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements
+
+use crate::event::Name;
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::tokenizer::{LabelKind, LabelStart, Tokenizer};
+
+/// Start of label (footnote) start.
+///
+/// ```markdown
+/// > | a [^b] c
+/// ^
+/// ```
+pub fn start(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer
+ .parse_state
+ .options
+ .constructs
+ .gfm_label_start_footnote
+ && tokenizer.current == Some(b'[')
+ {
+ tokenizer.enter(Name::GfmFootnoteCallLabel);
+ tokenizer.enter(Name::LabelMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::LabelMarker);
+ State::Next(StateName::GfmLabelStartFootnoteOpen)
+ } else {
+ State::Nok
+ }
+}
+
+/// After `[`, at `^`.
+///
+/// ```markdown
+/// > | a [^b] c
+/// ^
+/// ```
+pub fn open(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ Some(b'^') => {
+ tokenizer.enter(Name::GfmFootnoteCallMarker);
+ tokenizer.consume();
+ tokenizer.exit(Name::GfmFootnoteCallMarker);
+ tokenizer.exit(Name::GfmFootnoteCallLabel);
+ tokenizer.tokenize_state.label_starts.push(LabelStart {
+ kind: LabelKind::GfmFootnote,
+ start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1),
+ inactive: false,
+ });
+ tokenizer.register_resolver_before(ResolveName::Label);
+ State::Ok
+ }
+ _ => State::Nok,
+ }
+}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 0ea745f..b5a6013 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -46,6 +46,8 @@
//! attribute in case of a [label start (link)][label_start_link], and an
//! `src` attribute in case of a [label start (image)][label_start_image].
//! The title is formed, optionally, on either `<a>` or `<img>`.
+//! When matched with a [gfm label start (footnote)][gfm_label_start_footnote],
+//! no reference or resource can follow the label end.
//!
//! For info on how to encode characters in URLs, see
//! [`destination`][destination].
@@ -53,11 +55,13 @@
//! `<img>` when compiling, see
//! [`sanitize_uri`][sanitize_uri].
//!
+//! In case of a matched [gfm label start (footnote)][gfm_label_start_footnote],
+//! a counter is injected.
//! In case of a matched [label start (link)][label_start_link], the interpreted
//! content between it and the label end, is placed between the opening and
//! closing tags.
-//! Otherwise, the text is also interpreted, but used *without* the resulting
-//! tags:
+//! In case of a matched [label start (image)][label_start_image], the text is
+//! also interpreted, but used *without* the resulting tags:
//!
//! ```markdown
//! [a *b* c](#)
@@ -75,8 +79,9 @@
//! It is possible to use images in links.
//! It’s somewhat possible to have links in images (the text will be used, not
//! the HTML, see above).
-//! But it’s not possible to use links in links.
-//! The “deepest” link wins.
+//! But it’s not possible to use links (or footnotes, which result in links)
+//! in links.
+//! The “deepest” link (or footnote) wins.
//! To illustrate:
//!
//! ```markdown
@@ -104,17 +109,26 @@
//! It can also match with [label start (image)][label_start_image], in which
//! case they form an `<img>` element.
//! See [*§ 4.8.3 The `img` element*][html_img] in the HTML spec for more info.
+//! It can also match with [gfm label start (footnote)][gfm_label_start_footnote],
+//! in which case they form `<sup>` and `<a>` elements in HTML.
+//! See [*§ 4.5.19 The `sub` and `sup` elements*][html_sup] and
+//! [*§ 4.5.1 The `a` element*][html_a] in the HTML spec for more info.
//!
//! ## Recommendation
//!
-//! It is recommended to use labels instead of [autolinks][autolink].
+//! It is recommended to use labels for links instead of [autolinks][autolink].
//! Labels allow more characters in URLs, and allow relative URLs and `www.`
//! URLs.
//! They also allow for descriptive text to explain the URL in prose.
//!
+//! In footnotes, it’s recommended to use words instead of numbers (or letters
+//! or anything with an order) as calls.
+//! That makes it easier to reuse and reorder footnotes.
+//!
//! ## Tokens
//!
//! * [`Data`][Name::Data]
+//! * [`GfmFootnoteCall`][Name::GfmFootnoteCall]
//! * [`Image`][Name::Image]
//! * [`Label`][Name::Label]
//! * [`LabelEnd`][Name::LabelEnd]
@@ -140,10 +154,15 @@
//! ## References
//!
//! * [`label-end.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/label-end.js)
+//! * [`micromark-extension-gfm-task-list-item`](https://github.com/micromark/micromark-extension-gfm-footnote)
//! * [*§ 4.7 Link reference definitions* in `CommonMark`](https://spec.commonmark.org/0.30/#link-reference-definitions)
//! * [*§ 6.3 Links* in `CommonMark`](https://spec.commonmark.org/0.30/#links)
//! * [*§ 6.4 Images* in `CommonMark`](https://spec.commonmark.org/0.30/#images)
//!
+//! > 👉 **Note**: Footnotes are not specified in GFM yet.
+//! > See [`github/cmark-gfm#270`](https://github.com/github/cmark-gfm/issues/270)
+//! > for the related issue.
+//!
//! [string]: crate::construct::string
//! [text]: crate::construct::text
//! [destination]: crate::construct::partial_destination
@@ -151,25 +170,28 @@
//! [label]: crate::construct::partial_label
//! [label_start_image]: crate::construct::label_start_image
//! [label_start_link]: crate::construct::label_start_link
+//! [gfm_label_start_footnote]: crate::construct::gfm_label_start_footnote
//! [definition]: crate::construct::definition
//! [autolink]: crate::construct::autolink
//! [sanitize_uri]: crate::util::sanitize_uri::sanitize_uri
//! [normalize_identifier]: crate::util::normalize_identifier::normalize_identifier
//! [html_a]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-a-element
//! [html_img]: https://html.spec.whatwg.org/multipage/embedded-content.html#the-img-element
+//! [html_sup]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-sub-and-sup-elements
use crate::construct::partial_space_or_tab_eol::space_or_tab_eol;
use crate::event::{Event, Kind, Name};
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
-use crate::tokenizer::{Label, LabelStart, Tokenizer};
+use crate::tokenizer::{Label, LabelKind, LabelStart, Tokenizer};
use crate::util::{
constant::RESOURCE_DESTINATION_BALANCE_MAX,
normalize_identifier::normalize_identifier,
skip,
slice::{Position, Slice},
};
-use alloc::vec;
+use alloc::{string::String, vec};
+extern crate std;
/// Start of label end.
///
@@ -190,7 +212,15 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.end = tokenizer.events.len();
- // Mark as balanced if the info is inactive.
+ // If the corresponding label (link) start is marked as inactive,
+ // it means we’d be wrapping a link, like this:
+ //
+ // ```markdown
+ // > | a [b [c](d) e](f) g.
+ // ^
+ // ```
+ //
+ // We can’t have that, so it’s just balanced brackets.
if label_start.inactive {
return State::Retry(StateName::LabelEndNok);
}
@@ -220,19 +250,34 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
pub fn after(tokenizer: &mut Tokenizer) -> State {
- let start = tokenizer.tokenize_state.label_starts.last().unwrap();
- let defined = tokenizer
- .parse_state
- .definitions
- .contains(&normalize_identifier(
- // We don’t care about virtual spaces, so `indices` and `as_str` are fine.
- Slice::from_indices(
- tokenizer.parse_state.bytes,
- tokenizer.events[start.start.1].point.index,
- tokenizer.events[tokenizer.tokenize_state.end].point.index,
- )
- .as_str(),
- ));
+ let start_index = tokenizer.tokenize_state.label_starts.len() - 1;
+ let start = &tokenizer.tokenize_state.label_starts[start_index];
+
+ let indices = (
+ tokenizer.events[start.start.1].point.index,
+ tokenizer.events[tokenizer.tokenize_state.end].point.index,
+ );
+
+ // We don’t care about virtual spaces, so `indices` and `as_str` are fine.
+ let mut id = normalize_identifier(
+ Slice::from_indices(tokenizer.parse_state.bytes, indices.0, indices.1).as_str(),
+ );
+
+ // See if this matches a footnote definition.
+ if start.kind == LabelKind::GfmFootnote {
+ if tokenizer.parse_state.gfm_footnote_definitions.contains(&id) {
+ return State::Retry(StateName::LabelEndOk);
+ }
+
+ // Nope, this might be a normal link?
+ tokenizer.tokenize_state.label_starts[start_index].kind = LabelKind::GfmUndefinedFootnote;
+ let mut new_id = String::new();
+ new_id.push('^');
+ new_id.push_str(&id);
+ id = new_id;
+ }
+
+ let defined = tokenizer.parse_state.definitions.contains(&id);
match tokenizer.current {
// Resource (`[asd](fgh)`)?
@@ -302,17 +347,15 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State {
// Remove the start.
let label_start = tokenizer.tokenize_state.label_starts.pop().unwrap();
- let is_link = tokenizer.events[label_start.start.0].name == Name::LabelLink;
-
- // If this is a link, we need to mark earlier link starts as no longer
- // viable for use (as they would otherwise contain a link).
+ // If this is a link or footnote, we need to mark earlier link starts as no
+ // longer viable for use (as they would otherwise contain a link).
// These link starts are still looking for balanced closing brackets, so
- // we can’t remove them.
- if is_link {
+ // we can’t remove them, but we can mark them.
+ if label_start.kind != LabelKind::Image {
let mut index = 0;
while index < tokenizer.tokenize_state.label_starts.len() {
let label_start = &mut tokenizer.tokenize_state.label_starts[index];
- if tokenizer.events[label_start.start.0].name == Name::LabelLink {
+ if label_start.kind != LabelKind::Image {
label_start.inactive = true;
}
index += 1;
@@ -320,6 +363,7 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State {
}
tokenizer.tokenize_state.labels.push(Label {
+ kind: label_start.kind,
start: label_start.start,
end: (tokenizer.tokenize_state.end, tokenizer.events.len() - 1),
});
@@ -342,9 +386,7 @@ pub fn ok(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn nok(tokenizer: &mut Tokenizer) -> State {
let start = tokenizer.tokenize_state.label_starts.pop().unwrap();
-
tokenizer.tokenize_state.label_starts_loose.push(start);
-
tokenizer.tokenize_state.end = 0;
State::Nok
}
@@ -615,120 +657,142 @@ pub fn reference_collapsed_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Resolve media.
+/// Resolve images, links, and footnotes.
///
-/// This turns matching label start (image, link) and label ends into links and
-/// images, and turns unmatched label starts back into data.
+/// This turns matching label starts and label ends into links, images, and
+/// footnotes, and turns unmatched label starts back into data.
pub fn resolve(tokenizer: &mut Tokenizer) {
- let list = tokenizer.tokenize_state.label_starts.split_off(0);
- mark_as_data(tokenizer, &list);
- let list = tokenizer.tokenize_state.label_starts_loose.split_off(0);
- mark_as_data(tokenizer, &list);
+ // Inject labels.
+ let labels = tokenizer.tokenize_state.labels.split_off(0);
+ inject_labels(tokenizer, &labels);
+ // Handle loose starts.
+ let starts = tokenizer.tokenize_state.label_starts.split_off(0);
+ mark_as_data(tokenizer, &starts);
+ let starts = tokenizer.tokenize_state.label_starts_loose.split_off(0);
+ mark_as_data(tokenizer, &starts);
- let media = tokenizer.tokenize_state.labels.split_off(0);
+ tokenizer.map.consume(&mut tokenizer.events);
+}
+/// Inject links/images/footnotes.
+fn inject_labels(tokenizer: &mut Tokenizer, labels: &[Label]) {
// Add grouping events.
let mut index = 0;
- while index < media.len() {
- let media = &media[index];
- // LabelLink:Enter or LabelImage:Enter.
- let group_enter_index = media.start.0;
- let group_enter_event = &tokenizer.events[group_enter_index];
- // LabelLink:Exit or LabelImage:Exit.
- let text_enter_index = media.start.0
- + (if group_enter_event.name == Name::LabelLink {
- 4
- } else {
- 6
- });
- // LabelEnd:Enter.
- let text_exit_index = media.end.0;
- // LabelEnd:Exit.
- let label_exit_index = media.end.0 + 3;
- // Resource:Exit, etc.
- let group_end_index = media.end.1;
-
- let group_name = if group_enter_event.name == Name::LabelLink {
- Name::Link
- } else {
+ while index < labels.len() {
+ let label = &labels[index];
+ let group_name = if label.kind == LabelKind::GfmFootnote {
+ Name::GfmFootnoteCall
+ } else if label.kind == LabelKind::Image {
Name::Image
+ } else {
+ Name::Link
};
+ // If this is a fine link, which starts with a footnote start that did
+ // not match, we need to inject the caret as data.
+ let mut caret = vec![];
+
+ if label.kind == LabelKind::GfmUndefinedFootnote {
+ // Add caret.
+ caret.push(Event {
+ kind: Kind::Enter,
+ name: Name::Data,
+ // Enter:GfmFootnoteCallMarker.
+ point: tokenizer.events[label.start.1 - 2].point.clone().clone(),
+ link: None,
+ });
+ caret.push(Event {
+ kind: Kind::Exit,
+ name: Name::Data,
+ // Exit:GfmFootnoteCallMarker.
+ point: tokenizer.events[label.start.1 - 1].point.clone(),
+ link: None,
+ });
+ // Change and move label end.
+ tokenizer.events[label.start.0].name = Name::LabelLink;
+ tokenizer.events[label.start.1].name = Name::LabelLink;
+ tokenizer.events[label.start.1].point = caret[0].point.clone();
+ // Remove the caret.
+ // Enter:GfmFootnoteCallMarker, Exit:GfmFootnoteCallMarker.
+ tokenizer.map.add(label.start.1 - 2, 2, vec![]);
+ }
+
// Insert a group enter and label enter.
tokenizer.map.add(
- group_enter_index,
+ label.start.0,
0,
vec![
Event {
kind: Kind::Enter,
name: group_name.clone(),
- point: group_enter_event.point.clone(),
+ point: tokenizer.events[label.start.0].point.clone(),
link: None,
},
Event {
kind: Kind::Enter,
name: Name::Label,
- point: group_enter_event.point.clone(),
+ point: tokenizer.events[label.start.0].point.clone(),
link: None,
},
],
);
// Empty events not allowed.
- if text_enter_index != text_exit_index {
- // Insert a text enter.
+ // Though: if this was what looked like a footnote, but didn’t match,
+ // it’s a link instead, and we need to inject the `^`.
+ if label.start.1 != label.end.0 || !caret.is_empty() {
tokenizer.map.add(
- text_enter_index,
+ label.start.1 + 1,
0,
vec![Event {
kind: Kind::Enter,
name: Name::LabelText,
- point: tokenizer.events[text_enter_index].point.clone(),
+ point: tokenizer.events[label.start.1].point.clone(),
link: None,
}],
);
-
- // Insert a text exit.
tokenizer.map.add(
- text_exit_index,
+ label.end.0,
0,
vec![Event {
kind: Kind::Exit,
name: Name::LabelText,
- point: tokenizer.events[text_exit_index].point.clone(),
+ point: tokenizer.events[label.end.0].point.clone(),
link: None,
}],
);
}
+ if !caret.is_empty() {
+ tokenizer.map.add(label.start.1 + 1, 0, caret);
+ }
+
// Insert a label exit.
tokenizer.map.add(
- label_exit_index + 1,
+ label.end.0 + 4,
0,
vec![Event {
kind: Kind::Exit,
name: Name::Label,
- point: tokenizer.events[label_exit_index].point.clone(),
+ point: tokenizer.events[label.end.0 + 3].point.clone(),
link: None,
}],
);
// Insert a group exit.
tokenizer.map.add(
- group_end_index + 1,
+ label.end.1 + 1,
0,
vec![Event {
kind: Kind::Exit,
name: group_name,
- point: tokenizer.events[group_end_index].point.clone(),
+ point: tokenizer.events[label.end.1].point.clone(),
link: None,
}],
);
index += 1;
}
-
- tokenizer.map.consume(&mut tokenizer.events);
}
/// Remove loose label starts.
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index a8c9ac3..4511794 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -35,7 +35,7 @@
use crate::event::Name;
use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
-use crate::tokenizer::{LabelStart, Tokenizer};
+use crate::tokenizer::{LabelKind, LabelStart, Tokenizer};
/// Start of label (image) start.
///
@@ -68,14 +68,52 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Name::LabelMarker);
tokenizer.consume();
tokenizer.exit(Name::LabelMarker);
- tokenizer.exit(Name::LabelImage);
- tokenizer.tokenize_state.label_starts.push(LabelStart {
- start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1),
- inactive: false,
- });
- tokenizer.register_resolver_before(ResolveName::Label);
- State::Ok
+ State::Next(StateName::LabelStartImageAfter)
}
_ => State::Nok,
}
}
+
+/// After `![`.
+///
+/// ```markdown
+/// > | a ![b] c
+/// ^
+/// ```
+///
+/// This is needed in because, when GFM footnotes are enabled, images never
+/// form when started with a `^`.
+/// Instead, links form:
+///
+/// ```markdown
+/// ![^a](b)
+///
+/// ![^a][b]
+///
+/// [b]: c
+/// ```
+///
+/// ```html
+/// <p>!<a href=\"b\">^a</a></p>
+/// <p>!<a href=\"c\">^a</a></p>
+/// ```
+pub fn after(tokenizer: &mut Tokenizer) -> State {
+ if tokenizer
+ .parse_state
+ .options
+ .constructs
+ .gfm_label_start_footnote
+ && tokenizer.current == Some(b'^')
+ {
+ State::Nok
+ } else {
+ tokenizer.exit(Name::LabelImage);
+ tokenizer.tokenize_state.label_starts.push(LabelStart {
+ kind: LabelKind::Image,
+ start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1),
+ inactive: false,
+ });
+ tokenizer.register_resolver_before(ResolveName::Label);
+ State::Ok
+ }
+}
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index 3aeb68b..3454724 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -34,7 +34,7 @@
use crate::event::Name;
use crate::resolve::Name as ResolveName;
use crate::state::State;
-use crate::tokenizer::{LabelStart, Tokenizer};
+use crate::tokenizer::{LabelKind, LabelStart, Tokenizer};
/// Start of label (link) start.
///
@@ -52,6 +52,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Name::LabelMarker);
tokenizer.exit(Name::LabelLink);
tokenizer.tokenize_state.label_starts.push(LabelStart {
+ kind: LabelKind::Link,
start: (start, tokenizer.events.len() - 1),
inactive: false,
});
diff --git a/src/construct/list_item.rs b/src/construct/list_item.rs
index 39b5d13..658c2c7 100644
--- a/src/construct/list_item.rs
+++ b/src/construct/list_item.rs
@@ -17,7 +17,7 @@
//! ```
//!
//! Further lines that are not prefixed with `list_item_cont` cause the list
-//! item to be exited, except when those lines are lazy continuation.
+//! item to be exited, except when those lines are lazy continuation or blank.
//! Like so many things in markdown, list items too, are complex.
//! See [*§ Phase 1: block structure* in `CommonMark`][commonmark_block] for
//! more on parsing details.
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 7ac3899..c5002bb 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -59,6 +59,9 @@
//!
//! * [frontmatter][]
//! * [gfm autolink literal][gfm_autolink_literal]
+//! * [gfm footnote definition][gfm_footnote_definition]
+//! * [gfm task list item check][gfm_task_list_item_check]
+//! * [gfm label start footnote][gfm_label_start_footnote]
//!
//! There are also several small subroutines typically used in different places:
//!
@@ -146,6 +149,8 @@ pub mod document;
pub mod flow;
pub mod frontmatter;
pub mod gfm_autolink_literal;
+pub mod gfm_footnote_definition;
+pub mod gfm_label_start_footnote;
pub mod gfm_task_list_item_check;
pub mod hard_break_escape;
pub mod heading_atx;
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 47ffd90..ab436b2 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -81,13 +81,37 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(tokenizer.tokenize_state.token_2.clone());
tokenizer.consume();
tokenizer.exit(tokenizer.tokenize_state.token_2.clone());
- tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
- State::Next(StateName::LabelAtBreak)
+ State::Next(StateName::LabelAtMarker)
}
_ => State::Nok,
}
}
+/// At an optional extra marker.
+///
+/// Used for footnotes.
+///
+/// ```markdown
+/// > | [^a]
+/// ^
+/// ```
+pub fn at_marker(tokenizer: &mut Tokenizer) -> State {
+ // For footnotes (and potentially other custom things in the future),
+ // We need to make sure there is a certain marker after `[`.
+ if tokenizer.tokenize_state.marker == 0 {
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
+ State::Retry(StateName::LabelAtBreak)
+ } else if tokenizer.current == Some(tokenizer.tokenize_state.marker) {
+ tokenizer.enter(tokenizer.tokenize_state.token_4.clone());
+ tokenizer.consume();
+ tokenizer.exit(tokenizer.tokenize_state.token_4.clone());
+ tokenizer.enter(tokenizer.tokenize_state.token_3.clone());
+ State::Next(StateName::LabelAtBreak)
+ } else {
+ State::Nok
+ }
+}
+
/// In label, at something, before something else.
///
/// ```markdown
diff --git a/src/construct/text.rs b/src/construct/text.rs
index 65f55d4..5535e3f 100644
--- a/src/construct/text.rs
+++ b/src/construct/text.rs
@@ -11,6 +11,8 @@
//! * [Character escape][crate::construct::character_escape]
//! * [Character reference][crate::construct::character_reference]
//! * [Code (text)][crate::construct::code_text]
+//! * [GFM: Label start (footnote)][crate::construct::gfm_label_start_footnote]
+//! * [GFM: Task list item check][crate::construct::gfm_task_list_item_check]
//! * [Hard break (escape)][crate::construct::hard_break_escape]
//! * [HTML (text)][crate::construct::html_text]
//! * [Label start (image)][crate::construct::label_start_image]
@@ -34,7 +36,7 @@ const MARKERS: [u8; 10] = [
b'<', // `autolink`, `html_text`
b'[', // `label_start_link`
b'\\', // `character_escape`, `hard_break_escape`
- b']', // `label_end`
+ b']', // `label_end`, `gfm_label_start_footnote`
b'_', // `attention`
b'`', // `code_text`
b'~', // `attention` (w/ `gfm_strikethrough`)
@@ -104,9 +106,9 @@ pub fn before(tokenizer: &mut Tokenizer) -> State {
Some(b'[') => {
tokenizer.attempt(
State::Next(StateName::TextBefore),
- State::Next(StateName::TextBeforeData),
+ State::Next(StateName::TextBeforeLabelStartLink),
);
- State::Retry(StateName::LabelStartLinkStart)
+ State::Retry(StateName::GfmLabelStartFootnoteStart)
}
Some(b'\\') => {
tokenizer.attempt(
@@ -165,6 +167,22 @@ pub fn before_hard_break_escape(tokenizer: &mut Tokenizer) -> State {
State::Retry(StateName::HardBreakEscapeStart)
}
+/// Before label start (link).
+///
+/// At `[`, which wasn’t a GFM label start (footnote).
+///
+/// ```markdown
+/// > | [a](b)
+/// ^
+/// ```
+pub fn before_label_start_link(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::TextBefore),
+ State::Next(StateName::TextBeforeData),
+ );
+ State::Retry(StateName::LabelStartLinkStart)
+}
+
/// Before data.
///
/// ```markdown
diff --git a/src/event.rs b/src/event.rs
index f20c599..3b805e5 100644
--- a/src/event.rs
+++ b/src/event.rs
@@ -753,7 +753,8 @@ pub enum Name {
/// ## Info
///
/// * **Context**:
- /// [`Definition`][Name::Definition]
+ /// [`Definition`][Name::Definition],
+ /// [`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition]
/// * **Content model**:
/// void
/// * **Construct**:
@@ -1019,7 +1020,172 @@ pub enum Name {
/// ^^^^^^^^^^^^^^^
/// ```
GfmAutolinkLiteralWww,
- /// GFM: Strikethrough.
+ /// GFM extension: whole footnote call.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [text content][crate::construct::text]
+ /// * **Content model**:
+ /// [`Label`][Name::Label]
+ /// * **Construct**:
+ /// [`label_end`][crate::construct::label_end]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | a [^b] c
+ /// ^^^^
+ /// ```
+ GfmFootnoteCall,
+ /// GFM extension: label start (footnote).
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`Label`][Name::Label]
+ /// * **Content model**:
+ /// [`GfmFootnoteCallMarker`][Name::GfmFootnoteCallMarker],
+ /// [`LabelMarker`][Name::LabelMarker]
+ /// * **Construct**:
+ /// [`gfm_label_start_footnote`][crate::construct::gfm_label_start_footnote]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | a [^b] c
+ /// ^^
+ /// ```
+ GfmFootnoteCallLabel,
+ /// GFM extension: label start (footnote) marker.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmFootnoteCallLabel`][Name::GfmFootnoteCallLabel]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_label_start_footnote`][crate::construct::gfm_label_start_footnote]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | a [^b] c
+ /// ^
+ /// ```
+ GfmFootnoteCallMarker,
+ /// GFM extension: whole footnote definition.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [document content][crate::construct::document]
+ /// * **Content model**:
+ /// [`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix],
+ /// [document content][crate::construct::flow]
+ /// * **Construct**:
+ /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | [^a]: b
+ /// ^^^^^^^
+ /// ```
+ GfmFootnoteDefinition,
+ /// GFM extension: footnote definition prefix.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmFootnoteDefinition`][Name::GfmFootnoteDefinition]
+ /// * **Content model**:
+ /// [`DefinitionMarker`][Name::DefinitionMarker],
+ /// [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel],
+ /// [`SpaceOrTab`][Name::SpaceOrTab]
+ /// * **Construct**:
+ /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | [^a]: b
+ /// ^^^^^^
+ /// ```
+ GfmFootnoteDefinitionPrefix,
+ /// GFM extension: footnote definition label.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmFootnoteDefinitionPrefix`][Name::GfmFootnoteDefinitionPrefix]
+ /// * **Content model**:
+ /// [`GfmFootnoteDefinitionLabelMarker`][Name::GfmFootnoteDefinitionLabelMarker],
+ /// [`GfmFootnoteDefinitionLabelString`][Name::GfmFootnoteDefinitionLabelString],
+ /// [`GfmFootnoteDefinitionMarker`][Name::GfmFootnoteDefinitionMarker]
+ /// * **Construct**:
+ /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | [^a]: b
+ /// ^^^^
+ /// ```
+ GfmFootnoteDefinitionLabel,
+ /// GFM extension: footnote definition label marker.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | [^a]: b
+ /// ^ ^
+ GfmFootnoteDefinitionLabelMarker,
+ /// GFM extension: footnote definition label string.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel]
+ /// * **Content model**:
+ /// [string content][crate::construct::string]
+ /// * **Construct**:
+ /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | [^a]: b
+ /// ^
+ GfmFootnoteDefinitionLabelString,
+ /// GFM extension: footnote definition marker.
+ ///
+ /// ## Info
+ ///
+ /// * **Context**:
+ /// [`GfmFootnoteDefinitionLabel`][Name::GfmFootnoteDefinitionLabel]
+ /// * **Content model**:
+ /// void
+ /// * **Construct**:
+ /// [`gfm_footnote_definition`][crate::construct::gfm_footnote_definition]
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// > | [^a]: b
+ /// ^
+ GfmFootnoteDefinitionMarker,
+ /// GFM extension: Strikethrough.
///
/// ## Info
///
@@ -1038,7 +1204,7 @@ pub enum Name {
/// ^^^
/// ```
GfmStrikethrough,
- /// Gfm: Strikethrough sequence.
+ /// GFM extension: Strikethrough sequence.
///
/// ## Info
///
@@ -1056,7 +1222,7 @@ pub enum Name {
/// ^ ^
/// ```
GfmStrikethroughSequence,
- /// Gfm: Strikethrough text.
+ /// GFM extension: Strikethrough text.
///
/// ## Info
///
@@ -1074,7 +1240,7 @@ pub enum Name {
/// ^
/// ```
GfmStrikethroughText,
- /// GFM: Task list item check.
+ /// GFM extension: task list item check.
///
/// ## Info
///
@@ -1094,7 +1260,7 @@ pub enum Name {
/// ^^^
/// ```
GfmTaskListItemCheck,
- /// GFM: Task list item check marker.
+ /// GFM extension: task list item check marker.
///
/// ## Info
///
@@ -1112,7 +1278,7 @@ pub enum Name {
/// ^ ^
/// ```
GfmTaskListItemMarker,
- /// GFM: Task list item value: checked.
+ /// GFM extension: task list item value: checked.
///
/// ## Info
///
@@ -1130,7 +1296,7 @@ pub enum Name {
/// ^
/// ```
GfmTaskListItemValueChecked,
- /// GFM: Task list item value: unchecked.
+ /// GFM extension: task list item value: unchecked.
///
/// ## Info
///
@@ -2105,7 +2271,7 @@ pub enum Name {
}
/// List of void events, used to make sure everything is working well.
-pub const VOID_EVENTS: [Name; 50] = [
+pub const VOID_EVENTS: [Name; 53] = [
Name::AttentionSequence,
Name::AutolinkEmail,
Name::AutolinkMarker,
@@ -2134,6 +2300,9 @@ pub const VOID_EVENTS: [Name; 50] = [
Name::GfmAutolinkLiteralEmail,
Name::GfmAutolinkLiteralProtocol,
Name::GfmAutolinkLiteralWww,
+ Name::GfmFootnoteCallMarker,
+ Name::GfmFootnoteDefinitionLabelMarker,
+ Name::GfmFootnoteDefinitionMarker,
Name::GfmStrikethroughSequence,
Name::GfmTaskListItemMarker,
Name::GfmTaskListItemValueChecked,
diff --git a/src/lib.rs b/src/lib.rs
index 5b7836c..fd5e500 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -171,7 +171,20 @@ pub struct Constructs {
/// ^^^^^^^^^^^^^^^^^^^
/// ```
pub gfm_autolink_literal: bool,
- /// GFM: strikethrough.
+ /// GFM: footnote definition.
+ ///
+ /// ```markdown
+ /// > | [^a]: b
+ /// ^^^^^^^
+ /// ```
+ pub gfm_footnote_definition: bool,
+ /// GFM: footnote label start.
+ ///
+ /// ```markdown
+ /// > | a[^b]
+ /// ^^
+ /// ```
+ pub gfm_label_start_footnote: bool,
///
/// ```markdown
/// > | a ~b~ c.
@@ -283,6 +296,8 @@ impl Default for Constructs {
definition: true,
frontmatter: false,
gfm_autolink_literal: false,
+ gfm_label_start_footnote: false,
+ gfm_footnote_definition: false,
gfm_strikethrough: false,
gfm_task_list_item: false,
hard_break_escape: true,
@@ -308,6 +323,8 @@ impl Constructs {
pub fn gfm() -> Self {
Self {
gfm_autolink_literal: true,
+ gfm_footnote_definition: true,
+ gfm_label_start_footnote: true,
gfm_strikethrough: true,
gfm_task_list_item: true,
..Self::default()
@@ -376,6 +393,206 @@ pub struct Options {
/// ```
pub allow_dangerous_protocol: bool,
+ /// Label to use for the footnotes section.
+ ///
+ /// Change it when the markdown is not in English.
+ /// Typically affects screen readers (change `gfm_footnote_label_attributes`
+ /// to make it visible).
+ ///
+ /// ## Examples
+ ///
+ /// ```
+ /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+ ///
+ /// // `"Footnotes"` is used by default:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ ///
+ /// // Pass `gfm_footnote_label` to use something else:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// gfm_footnote_label: Some("Notes de bas de page".to_string()),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Notes de bas de page</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ /// ```
+ pub gfm_footnote_label: Option<String>,
+
+ /// HTML tag to use for the footnote label.
+ ///
+ /// Change it to match your document structure and play well with your CSS.
+ ///
+ /// ## Examples
+ ///
+ /// ```
+ /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+ ///
+ /// // `"h2"` is used by default:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ ///
+ /// // Pass `gfm_footnote_label_tag_name` to use something else:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// gfm_footnote_label_tag_name: Some("h1".to_string()),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h1 id=\"footnote-label\" class=\"sr-only\">Footnotes</h1>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ /// ```
+ pub gfm_footnote_label_tag_name: Option<String>,
+
+ /// Attributes to use on the footnote label.
+ ///
+ /// > 👉 **Note**: `id="footnote-label"` is always added, because footnote
+ /// > calls use it with `aria-describedby` to provide an accessible label.
+ ///
+ /// A `class="sr-only"` is added by default to hide the label from sighted
+ /// users.
+ /// Change it to make the label visible, or add other classes or other
+ /// attributes.
+ ///
+ /// ## Examples
+ ///
+ /// ```
+ /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+ ///
+ /// // `"class=\"sr-only\""` is used by default:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ ///
+ /// // Pass `gfm_footnote_label_attributes` to use something else:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// gfm_footnote_label_attributes: Some("class=\"footnote-heading\"".to_string()),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"footnote-heading\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ /// ```
+ pub gfm_footnote_label_attributes: Option<String>,
+
+ /// Label to use from backreferences back to their footnote call.
+ ///
+ /// Change it when the markdown is not in English.
+ /// Affects screen readers.
+ ///
+ /// ## Examples
+ ///
+ /// ```
+ /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+ ///
+ /// // `"Back to content"` is used by default:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ ///
+ /// // Pass `gfm_footnote_back_label` to use something else:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// gfm_footnote_back_label: Some("Arrière".to_string()),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Arrière\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ /// ```
+ pub gfm_footnote_back_label: Option<String>,
+
+ /// Prefix to use before the `id` attribute on footnotes to prevent them
+ /// from *clobbering*.
+ ///
+ /// DOM clobbering is this:
+ ///
+ /// ```html
+ /// <p id=x></p>
+ /// <script>alert(x) // `x` now refers to the DOM `p#x` element</script>
+ /// ```
+ ///
+ /// The above example shows that elements are made available by browsers,
+ /// by their ID, on the `window` object, which is a security risk because
+ /// you might be expecting some other variable at that place.
+ /// Using a prefix solves this problem.
+ ///
+ /// ## Examples
+ ///
+ /// ```
+ /// use micromark::{micromark, micromark_with_options, Options, Constructs};
+ ///
+ /// // `"user-content-"` is used by default:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#user-content-fn-a\" id=\"user-content-fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"user-content-fn-a\">\n<p>b <a href=\"#user-content-fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ ///
+ /// // Pass `gfm_footnote_clobber_prefix` to use something else:
+ /// assert_eq!(
+ /// micromark_with_options(
+ /// "[^a]\n\n[^a]: b",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// gfm_footnote_clobber_prefix: Some("".to_string()),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><sup><a href=\"#fn-a\" id=\"fnref-a\" data-footnote-ref=\"\" aria-describedby=\"footnote-label\">1</a></sup></p>\n<section data-footnotes=\"\" class=\"footnotes\"><h2 id=\"footnote-label\" class=\"sr-only\">Footnotes</h2>\n<ol>\n<li id=\"fn-a\">\n<p>b <a href=\"#fnref-a\" data-footnote-backref=\"\" class=\"data-footnote-backref\" aria-label=\"Back to content\">↩</a></p>\n</li>\n</ol>\n</section>\n"
+ /// );
+ /// ```
+ pub gfm_footnote_clobber_prefix: Option<String>,
+
/// Whether to support GFM strikethrough (if enabled in `constructs`) with
/// a single tilde (default: true).
///
@@ -389,26 +606,26 @@ pub struct Options {
/// // micromark supports single tildes by default:
/// assert_eq!(
/// micromark_with_options(
- /// "~a~",
- /// &Options {
- /// constructs: Constructs::gfm(),
- /// ..Options::default()
- /// }
- /// ),
- /// "<p><del>a</del></p>"
+ /// "~a~",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p><del>a</del></p>"
/// );
///
/// // Pass `gfm_strikethrough_single_tilde: false` to turn that off:
/// assert_eq!(
/// micromark_with_options(
- /// "~a~",
- /// &Options {
- /// constructs: Constructs::gfm(),
- /// gfm_strikethrough_single_tilde: false,
- /// ..Options::default()
- /// }
- /// ),
- /// "<p>~a~</p>"
+ /// "~a~",
+ /// &Options {
+ /// constructs: Constructs::gfm(),
+ /// gfm_strikethrough_single_tilde: false,
+ /// ..Options::default()
+ /// }
+ /// ),
+ /// "<p>~a~</p>"
/// );
/// ```
pub gfm_strikethrough_single_tilde: bool,
@@ -488,6 +705,11 @@ impl Default for Options {
Self {
allow_dangerous_html: false,
allow_dangerous_protocol: false,
+ gfm_footnote_label: None,
+ gfm_footnote_label_tag_name: None,
+ gfm_footnote_label_attributes: None,
+ gfm_footnote_back_label: None,
+ gfm_footnote_clobber_prefix: None,
gfm_strikethrough_single_tilde: true,
default_line_ending: LineEnding::default(),
constructs: Constructs::default(),
diff --git a/src/parser.rs b/src/parser.rs
index afa08ac..62b3e03 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -17,8 +17,10 @@ pub struct ParseState<'a> {
pub options: &'a Options,
/// List of chars.
pub bytes: &'a [u8],
- /// Set of defined identifiers.
+ /// Set of defined definition identifiers.
pub definitions: Vec<String>,
+ /// Set of defined GFM footnote definition identifiers.
+ pub gfm_footnote_definitions: Vec<String>,
}
/// Turn a string of markdown into events.
@@ -29,6 +31,7 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8])
options,
bytes: value.as_bytes(),
definitions: vec![],
+ gfm_footnote_definitions: vec![],
};
let mut tokenizer = Tokenizer::new(
@@ -50,7 +53,10 @@ pub fn parse<'a>(value: &'a str, options: &'a Options) -> (Vec<Event>, &'a [u8])
let mut events = tokenizer.events;
- parse_state.definitions = tokenizer.tokenize_state.definitions;
+ let footnote = tokenizer.tokenize_state.gfm_footnote_definitions;
+ let normal = tokenizer.tokenize_state.definitions;
+ parse_state.gfm_footnote_definitions = footnote;
+ parse_state.definitions = normal;
while !subtokenize(&mut events, &parse_state) {}
diff --git a/src/state.rs b/src/state.rs
index 65ffbeb..6c3f563 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -116,6 +116,7 @@ pub enum Name {
DocumentContainerNewBefore,
DocumentContainerNewBeforeNotBlockQuote,
DocumentContainerNewBeforeNotList,
+ DocumentContainerNewBeforeNotGfmFootnoteDefinition,
DocumentContainerNewAfter,
DocumentContainersAfter,
DocumentFlowInside,
@@ -145,6 +146,17 @@ pub enum Name {
FrontmatterCloseSequence,
FrontmatterCloseAfter,
+ GfmFootnoteDefinitionStart,
+ GfmFootnoteDefinitionLabelBefore,
+ GfmFootnoteDefinitionLabelAfter,
+ GfmFootnoteDefinitionWhitespaceAfter,
+ GfmFootnoteDefinitionContStart,
+ GfmFootnoteDefinitionContBlank,
+ GfmFootnoteDefinitionContFilled,
+
+ GfmLabelStartFootnoteStart,
+ GfmLabelStartFootnoteOpen,
+
GfmTaskListItemCheckStart,
GfmTaskListItemCheckInside,
GfmTaskListItemCheckClose,
@@ -230,6 +242,7 @@ pub enum Name {
HtmlTextLineEndingAfterPrefix,
LabelStart,
+ LabelAtMarker,
LabelAtBreak,
LabelEolAfter,
LabelAtBlankLine,
@@ -256,6 +269,7 @@ pub enum Name {
LabelStartImageStart,
LabelStartImageOpen,
+ LabelStartImageAfter,
LabelStartLinkStart,
@@ -299,6 +313,7 @@ pub enum Name {
TextBefore,
TextBeforeHtml,
TextBeforeHardBreakEscape,
+ TextBeforeLabelStartLink,
TextBeforeData,
ThematicBreakStart,
@@ -421,6 +436,9 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::DocumentContainerNewBeforeNotList => {
construct::document::container_new_before_not_list
}
+ Name::DocumentContainerNewBeforeNotGfmFootnoteDefinition => {
+ construct::document::container_new_before_not_footnote_definition
+ }
Name::DocumentContainerNewAfter => construct::document::container_new_after,
Name::DocumentContainersAfter => construct::document::containers_after,
Name::DocumentFlowEnd => construct::document::flow_end,
@@ -450,6 +468,19 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::FrontmatterCloseSequence => construct::frontmatter::close_sequence,
Name::FrontmatterCloseAfter => construct::frontmatter::close_after,
+ Name::GfmFootnoteDefinitionStart => construct::gfm_footnote_definition::start,
+ Name::GfmFootnoteDefinitionLabelBefore => construct::gfm_footnote_definition::label_before,
+ Name::GfmFootnoteDefinitionLabelAfter => construct::gfm_footnote_definition::label_after,
+ Name::GfmFootnoteDefinitionWhitespaceAfter => {
+ construct::gfm_footnote_definition::whitespace_after
+ }
+ Name::GfmFootnoteDefinitionContStart => construct::gfm_footnote_definition::cont_start,
+ Name::GfmFootnoteDefinitionContBlank => construct::gfm_footnote_definition::cont_blank,
+ Name::GfmFootnoteDefinitionContFilled => construct::gfm_footnote_definition::cont_filled,
+
+ Name::GfmLabelStartFootnoteStart => construct::gfm_label_start_footnote::start,
+ Name::GfmLabelStartFootnoteOpen => construct::gfm_label_start_footnote::open,
+
Name::GfmTaskListItemCheckStart => construct::gfm_task_list_item_check::start,
Name::GfmTaskListItemCheckInside => construct::gfm_task_list_item_check::inside,
Name::GfmTaskListItemCheckClose => construct::gfm_task_list_item_check::close,
@@ -563,6 +594,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::HtmlTextLineEndingAfterPrefix => construct::html_text::line_ending_after_prefix,
Name::LabelStart => construct::partial_label::start,
+ Name::LabelAtMarker => construct::partial_label::at_marker,
Name::LabelAtBreak => construct::partial_label::at_break,
Name::LabelEolAfter => construct::partial_label::eol_after,
Name::LabelAtBlankLine => construct::partial_label::at_blank_line,
@@ -591,6 +623,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::LabelStartImageStart => construct::label_start_image::start,
Name::LabelStartImageOpen => construct::label_start_image::open,
+ Name::LabelStartImageAfter => construct::label_start_image::after,
Name::LabelStartLinkStart => construct::label_start_link::start,
Name::ListItemStart => construct::list_item::start,
@@ -633,6 +666,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::TextBefore => construct::text::before,
Name::TextBeforeHtml => construct::text::before_html,
Name::TextBeforeHardBreakEscape => construct::text::before_hard_break_escape,
+ Name::TextBeforeLabelStartLink => construct::text::before_label_start_link,
Name::TextBeforeData => construct::text::before_data,
Name::ThematicBreakStart => construct::thematic_break::start,
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 83514cb..c6a209b 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -28,6 +28,8 @@ pub enum Container {
BlockQuote,
/// [List item][crate::construct::list_item].
ListItem,
+ /// [GFM: Footnote definition][crate::construct::gfm_footnote_definition].
+ GfmFootnoteDefinition,
}
/// Info used to tokenize a container.
@@ -56,9 +58,53 @@ enum ByteAction {
Insert(u8),
}
+/// Label start kind.
+#[derive(Debug, PartialEq, Eq)]
+pub enum LabelKind {
+ /// Label (image) start.
+ ///
+ /// ```markdown
+ /// > | a ![b] c
+ /// ^^
+ /// ```
+ ///
+ /// Construct: [Label start (image)][crate::construct::label_start_image].
+ Image,
+ /// Label (image) link.
+ ///
+ /// ```markdown
+ /// > | a [b] c
+ /// ^
+ /// ```
+ ///
+ /// Construct: [Label start (link)][crate::construct::label_start_link].
+ Link,
+ /// GFM: Label (footnote) link.
+ ///
+ /// ```markdown
+ /// > | a [^b] c
+ /// ^^
+ /// ```
+ ///
+ /// Construct: [GFM: Label start (footnote)][crate::construct::gfm_label_start_footnote].
+ GfmFootnote,
+ /// GFM: Label (footnote) link, not matching a footnote definition, so
+ /// handled as a label (link) start.
+ ///
+ /// ```markdown
+ /// > | a [^b](c) d
+ /// ^^
+ /// ```
+ ///
+ /// Construct: [Label end][crate::construct::label_end].
+ GfmUndefinedFootnote,
+}
+
/// Label start, looking for an end.
#[derive(Debug)]
pub struct LabelStart {
+ /// Kind of start.
+ pub kind: LabelKind,
/// Indices of where the label starts and ends in `events`.
pub start: (usize, usize),
/// A boolean used internally to figure out if a (link) label start can’t
@@ -71,6 +117,7 @@ pub struct LabelStart {
/// Valid label.
#[derive(Debug)]
pub struct Label {
+ pub kind: LabelKind,
/// Indices of label start.
pub start: (usize, usize),
/// Indices of label end.
@@ -174,8 +221,10 @@ pub struct TokenizeState<'a> {
/// Used when tokenizing [text content][crate::construct::text].
pub labels: Vec<Label>,
- /// List of defined identifiers.
+ /// List of defined definition identifiers.
pub definitions: Vec<String>,
+ /// List of defined GFM footnote definition identifiers.
+ pub gfm_footnote_definitions: Vec<String>,
/// Whether to connect events.
pub connect: bool,
@@ -288,6 +337,7 @@ impl<'a> Tokenizer<'a> {
document_child: None,
document_at_first_paragraph_of_list_item: false,
definitions: vec![],
+ gfm_footnote_definitions: vec![],
end: 0,
label_starts: vec![],
label_starts_loose: vec![],