//! Turn events into a string of HTML.
use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC};
use crate::token::Token;
use crate::tokenizer::{Event, EventType};
use crate::util::normalize_identifier::normalize_identifier;
use crate::util::{
decode_character_reference::{decode_named, decode_numeric},
encode::encode,
sanitize_uri::sanitize_uri,
skip,
slice::{Position, Slice},
};
use crate::{LineEnding, Options};
use std::str;
/// Representation of a link or image, resource or reference.
/// Reused for temporary definitions as well, in the first pass.
#[derive(Debug)]
struct Media {
/// Whether this represents an image (`true`) or a link or definition
/// (`false`).
image: bool,
/// The text between the brackets (`x` in `![x]()` and `[x]()`), as an
/// identifier, meaning that the original source characters are used
/// instead of interpreting them.
/// Not interpreted.
label_id: Option<(usize, usize)>,
/// The text between the brackets (`x` in `![x]()` and `[x]()`), as
/// interpreted content.
/// When this is a link, it can contain further text content and thus HTML
/// tags.
/// Otherwise, when an image, text content is also allowed, but resulting
/// tags are ignored.
label: Option ");
}
}
/// Handle [`Enter`][EventType::Enter]:[`Resource`][Token::Resource].
fn on_enter_resource(context: &mut CompileContext) {
context.buffer(); // We can have line endings in the resource, ignore them.
context.media_stack.last_mut().unwrap().destination = Some("".to_string());
}
/// Handle [`Enter`][EventType::Enter]:[`ResourceDestinationString`][Token::ResourceDestinationString].
fn on_enter_resource_destination_string(context: &mut CompileContext) {
context.buffer();
// Ignore encoding the result, as we’ll first percent encode the url and
// encode manually after.
context.encode_html = false;
}
/// Handle [`Enter`][EventType::Enter]:[`Strong`][Token::Strong].
fn on_enter_strong(context: &mut CompileContext) {
if !context.in_image_alt {
context.push("");
}
}
/// Handle [`Exit`][EventType::Exit]:[`AutolinkEmail`][Token::AutolinkEmail].
fn on_exit_autolink_email(context: &mut CompileContext) {
let slice = Slice::from_position(
context.bytes,
&Position::from_exit_event(context.events, context.index),
);
let value = slice.as_str();
if !context.in_image_alt {
context.push("");
}
context.push(&encode(value, context.encode_html));
if !context.in_image_alt {
context.push("");
}
}
/// Handle [`Exit`][EventType::Exit]:[`AutolinkProtocol`][Token::AutolinkProtocol].
fn on_exit_autolink_protocol(context: &mut CompileContext) {
let slice = Slice::from_position(
context.bytes,
&Position::from_exit_event(context.events, context.index),
);
let value = slice.as_str();
if !context.in_image_alt {
context.push("");
}
context.push(&encode(value, context.encode_html));
if !context.in_image_alt {
context.push("");
}
}
/// Handle [`Exit`][EventType::Exit]:{[`HardBreakEscape`][Token::HardBreakEscape],[`HardBreakTrailing`][Token::HardBreakTrailing]}.
fn on_exit_break(context: &mut CompileContext) {
if !context.in_image_alt {
context.push("");
}
/// Handle [`Enter`][EventType::Enter]:[`CodeIndented`][Token::CodeIndented].
fn on_enter_code_indented(context: &mut CompileContext) {
context.code_flow_seen_data = Some(false);
context.line_ending_if_needed();
context.push("
");
}
/// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker].
fn on_exit_character_reference_marker(context: &mut CompileContext) {
context.character_reference_marker = Some(b'&');
}
/// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarkerHexadecimal`][Token::CharacterReferenceMarkerHexadecimal].
fn on_exit_character_reference_marker_hexadecimal(context: &mut CompileContext) {
context.character_reference_marker = Some(b'x');
}
/// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarkerNumeric`][Token::CharacterReferenceMarkerNumeric].
fn on_exit_character_reference_marker_numeric(context: &mut CompileContext) {
context.character_reference_marker = Some(b'#');
}
/// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceValue`][Token::CharacterReferenceValue].
fn on_exit_character_reference_value(context: &mut CompileContext) {
let marker = context
.character_reference_marker
.take()
.expect("expected `character_reference_kind` to be set");
let slice = Slice::from_position(
context.bytes,
&Position::from_exit_event(context.events, context.index),
);
let value = slice.as_str();
let value = match marker {
b'#' => decode_numeric(value, 10),
b'x' => decode_numeric(value, 16),
b'&' => decode_named(value),
_ => panic!("impossible"),
};
context.push(&encode(&value, context.encode_html));
}
/// Handle [`Exit`][EventType::Exit]:[`CodeFlowChunk`][Token::CodeFlowChunk].
fn on_exit_code_flow_chunk(context: &mut CompileContext) {
context.code_flow_seen_data = Some(true);
context.push(&encode(
&Slice::from_position(
context.bytes,
&Position::from_exit_event(context.events, context.index),
)
// Must serialize to get virtual spaces.
.serialize(),
context.encode_html,
));
}
/// Handle [`Exit`][EventType::Exit]:[`CodeFencedFence`][Token::CodeFencedFence].
fn on_exit_code_fenced_fence(context: &mut CompileContext) {
let count = if let Some(count) = context.code_fenced_fences_count {
count
} else {
0
};
if count == 0 {
context.push(">");
context.slurp_one_line_ending = true;
}
context.code_fenced_fences_count = Some(count + 1);
}
/// Handle [`Exit`][EventType::Exit]:[`CodeFencedFenceInfo`][Token::CodeFencedFenceInfo].
fn on_exit_code_fenced_fence_info(context: &mut CompileContext) {
let value = context.resume();
context.push(" class=\"language-");
context.push(&value);
context.push("\"");
}
/// Handle [`Exit`][EventType::Exit]:{[`CodeFenced`][Token::CodeFenced],[`CodeIndented`][Token::CodeIndented]}.
fn on_exit_code_flow(context: &mut CompileContext) {
let seen_data = context
.code_flow_seen_data
.take()
.expect("`code_flow_seen_data` must be defined");
// One special case is if we are inside a container, and the fenced code was
// not closed (meaning it runs to the end).
// In that case, the following line ending, is considered *outside* the
// fenced code and block quote by micromark, but CM wants to treat that
// ending as part of the code.
if let Some(count) = context.code_fenced_fences_count {
// No closing fence.
if count == 1
// In a container.
&& !context.tight_stack.is_empty()
// Empty (as the closing is right at the opening fence)
&& context.events[context.index - 1].token_type != Token::CodeFencedFence
{
context.line_ending();
}
}
// But in most cases, it’s simpler: when we’ve seen some data, emit an extra
// line ending when needed.
if seen_data {
context.line_ending_if_needed();
}
context.push("");
if let Some(count) = context.code_fenced_fences_count.take() {
if count < 2 {
context.line_ending_if_needed();
}
}
context.slurp_one_line_ending = false;
}
/// Handle [`Exit`][EventType::Exit]:[`CodeText`][Token::CodeText].
fn on_exit_code_text(context: &mut CompileContext) {
let result = context.resume();
let mut bytes = result.as_bytes();
let mut trim = false;
let mut index = 0;
let mut end = bytes.len();
if end > 2 && bytes[index] == b' ' && bytes[end - 1] == b' ' {
index += 1;
end -= 1;
while index < end && !trim {
if bytes[index] != b' ' {
trim = true;
break;
}
index += 1;
}
}
if trim {
bytes = &bytes[1..end];
}
context.code_text_inside = false;
context.push(str::from_utf8(bytes).unwrap());
if !context.in_image_alt {
context.push("");
}
}
/// Handle [`Exit`][EventType::Exit]:*.
///
/// Resumes, and ignores what was resumed.
fn on_exit_drop(context: &mut CompileContext) {
context.resume();
}
/// Handle [`Exit`][EventType::Exit]:{[`CodeTextData`][Token::CodeTextData],[`Data`][Token::Data],[`CharacterEscapeValue`][Token::CharacterEscapeValue]}.
fn on_exit_data(context: &mut CompileContext) {
context.push(&encode(
Slice::from_position(
context.bytes,
&Position::from_exit_event(context.events, context.index),
)
.as_str(),
context.encode_html,
));
}
/// Handle [`Exit`][EventType::Exit]:[`Definition`][Token::Definition].
fn on_exit_definition(context: &mut CompileContext) {
context.resume();
let media = context.media_stack.pop().unwrap();
let indices = media.reference_id.unwrap();
let id =
normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str());
context.definitions.push((
id,
Definition {
destination: media.destination,
title: media.title,
},
));
}
/// Handle [`Exit`][EventType::Exit]:[`DefinitionDestinationString`][Token::DefinitionDestinationString].
fn on_exit_definition_destination_string(context: &mut CompileContext) {
let buf = context.resume();
context.media_stack.last_mut().unwrap().destination = Some(buf);
context.encode_html = true;
}
/// Handle [`Exit`][EventType::Exit]:[`DefinitionLabelString`][Token::DefinitionLabelString].
fn on_exit_definition_label_string(context: &mut CompileContext) {
// Discard label, use the source content instead.
context.resume();
context.media_stack.last_mut().unwrap().reference_id =
Some(Position::from_exit_event(context.events, context.index).to_indices());
}
/// Handle [`Exit`][EventType::Exit]:[`DefinitionTitleString`][Token::DefinitionTitleString].
fn on_exit_definition_title_string(context: &mut CompileContext) {
let buf = context.resume();
context.media_stack.last_mut().unwrap().title = Some(buf);
}
/// Handle [`Exit`][EventType::Exit]:[`Strong`][Token::Emphasis].
fn on_exit_emphasis(context: &mut CompileContext) {
if !context.in_image_alt {
context.push("");
}
}
/// Handle [`Exit`][EventType::Exit]:[`HeadingAtx`][Token::HeadingAtx].
fn on_exit_heading_atx(context: &mut CompileContext) {
let rank = context
.atx_opening_sequence_size
.take()
.expect("`atx_opening_sequence_size` must be set in headings");
context.push("");
}
/// Handle [`Exit`][EventType::Exit]:[`HeadingAtxSequence`][Token::HeadingAtxSequence].
fn on_exit_heading_atx_sequence(context: &mut CompileContext) {
// First fence we see.
if context.atx_opening_sequence_size.is_none() {
let rank = Slice::from_position(
context.bytes,
&Position::from_exit_event(context.events, context.index),
)
.len();
context.line_ending_if_needed();
context.atx_opening_sequence_size = Some(rank);
context.push("");
}
/// Handle [`Enter`][EventType::Enter]:[`CodeFenced`][Token::CodeFenced].
fn on_enter_code_fenced(context: &mut CompileContext) {
context.code_flow_seen_data = Some(false);
context.line_ending_if_needed();
// Note that no `>` is used, which is added later.
context.push("
");
}
context.buffer();
}
/// Handle [`Enter`][EventType::Enter]:[`Definition`][Token::Definition].
fn on_enter_definition(context: &mut CompileContext) {
context.buffer();
context.media_stack.push(Media {
image: false,
label: None,
label_id: None,
reference_id: None,
destination: None,
title: None,
});
}
/// Handle [`Enter`][EventType::Enter]:[`DefinitionDestinationString`][Token::DefinitionDestinationString].
fn on_enter_definition_destination_string(context: &mut CompileContext) {
context.buffer();
context.encode_html = false;
}
/// Handle [`Enter`][EventType::Enter]:[`Emphasis`][Token::Emphasis].
fn on_enter_emphasis(context: &mut CompileContext) {
if !context.in_image_alt {
context.push("");
}
}
/// Handle [`Enter`][EventType::Enter]:[`HtmlFlow`][Token::HtmlFlow].
fn on_enter_html_flow(context: &mut CompileContext) {
context.line_ending_if_needed();
if context.allow_dangerous_html {
context.encode_html = false;
}
}
/// Handle [`Enter`][EventType::Enter]:[`HtmlText`][Token::HtmlText].
fn on_enter_html_text(context: &mut CompileContext) {
if context.allow_dangerous_html {
context.encode_html = false;
}
}
/// Handle [`Enter`][EventType::Enter]:[`Image`][Token::Image].
fn on_enter_image(context: &mut CompileContext) {
context.media_stack.push(Media {
image: true,
label_id: None,
label: None,
reference_id: None,
destination: None,
title: None,
});
context.in_image_alt = true; // Disallow tags.
}
/// Handle [`Enter`][EventType::Enter]:[`Link`][Token::Link].
fn on_enter_link(context: &mut CompileContext) {
context.media_stack.push(Media {
image: false,
label_id: None,
label: None,
reference_id: None,
destination: None,
title: None,
});
}
/// Handle [`Enter`][EventType::Enter]:{[`ListOrdered`][Token::ListOrdered],[`ListUnordered`][Token::ListUnordered]}.
fn on_enter_list(context: &mut CompileContext) {
let events = &context.events;
let mut index = context.index;
let mut balance = 0;
let mut loose = false;
let token_type = &events[index].token_type;
while index < events.len() {
let event = &events[index];
if event.event_type == EventType::Enter {
balance += 1;
} else {
balance -= 1;
// Blank line directly in list or directly in list item,
// but not a blank line after an empty list item.
if balance < 3 && event.token_type == Token::BlankLineEnding {
let mut at_marker = false;
if balance == 2 {
let mut before = index - 2;
if events[before].token_type == Token::SpaceOrTab {
before -= 2;
}
if events[before].token_type == Token::ListItemPrefix {
at_marker = true;
}
}
let mut at_empty_list_item = false;
let mut at_empty_block_quote = false;
if balance == 1 {
let mut before = index - 2;
if events[before].token_type == Token::SpaceOrTab {
before -= 2;
}
if events[before].token_type == Token::ListItem
&& events[before - 1].token_type == Token::ListItemPrefix
{
at_empty_list_item = true;
}
if events[before].token_type == Token::ListItem
&& events[before - 1].token_type == Token::BlockQuote
&& events[before - 2].token_type == Token::BlockQuotePrefix
{
at_empty_block_quote = true;
}
}
if !at_marker && !at_empty_list_item && !at_empty_block_quote {
loose = true;
break;
}
}
// Done.
if balance == 0 && event.token_type == *token_type {
break;
}
}
index += 1;
}
context.tight_stack.push(!loose);
context.line_ending_if_needed();
// Note: no `>`.
context.push(if *token_type == Token::ListOrdered {
"
");
}
context.line_ending_if_needed();
context.push("
");
}
}
/// Handle [`Exit`][EventType::Exit]:[`BlankLineEnding`][Token::BlankLineEnding].
fn on_exit_blank_line_ending(context: &mut CompileContext) {
if context.index == context.events.len() - 1 {
context.line_ending_if_needed();
}
}
/// Handle [`Exit`][EventType::Exit]:[`BlockQuote`][Token::BlockQuote].
fn on_exit_block_quote(context: &mut CompileContext) {
context.tight_stack.pop();
context.line_ending_if_needed();
context.slurp_one_line_ending = false;
context.push("