//! Turn events into a string of HTML.
use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC};
use crate::construct::character_reference::Kind as CharacterReferenceKind;
use crate::tokenizer::{Code, Event, EventType, TokenType};
use crate::util::{
decode_character_reference::{decode_named, decode_numeric},
encode::encode,
sanitize_uri::sanitize_uri,
span::{codes as codes_from_span, from_exit_event, serialize},
};
/// Type of line endings in markdown.
#[derive(Debug, Clone, PartialEq)]
pub enum LineEnding {
/// Both a carriage return (`\r`) and a line feed (`\n`).
///
/// ## Example
///
/// ```markdown
/// a␍␊
/// b
/// ```
CarriageReturnLineFeed,
/// Sole carriage return (`\r`).
///
/// ## Example
///
/// ```markdown
/// a␍
/// b
/// ```
CarriageReturn,
/// Sole line feed (`\n`).
///
/// ## Example
///
/// ```markdown
/// a␊
/// b
/// ```
LineFeed,
}
impl LineEnding {
/// Turn the line ending into a [str].
fn as_str(&self) -> &str {
match self {
LineEnding::CarriageReturnLineFeed => "\r\n",
LineEnding::CarriageReturn => "\r",
LineEnding::LineFeed => "\n",
}
}
/// Turn a [Code] into a line ending.
///
/// ## Panics
///
/// Panics if `code` is not `\r\n`, `\r`, or `\n`.
fn from_code(code: Code) -> LineEnding {
match code {
Code::CarriageReturnLineFeed => LineEnding::CarriageReturnLineFeed,
Code::Char('\r') => LineEnding::CarriageReturn,
Code::Char('\n') => LineEnding::LineFeed,
_ => unreachable!("invalid code"),
}
}
}
/// Representation of a link or image, resource or reference.
#[derive(Debug)]
struct Media {
/// Whether this represents an image (`true`) or a link (`false`).
image: bool,
/// The text between the brackets (`x` in `![x]()` and `[x]()`), as an
/// identifier, meaning that the original source characters are used
/// instead of interpreting them.
label_id: Option Hi, <i>venus</i>! Hi, venus! a a > a a > a ".to_string());
}
#[allow(unreachable_patterns)]
_ => {
unreachable!("unhandled `enter` of TokenType {:?}", token_type)
}
},
EventType::Exit => match token_type {
TokenType::Autolink
| TokenType::AutolinkMarker
| TokenType::BlankLineEnding
| TokenType::CharacterEscape
| TokenType::CharacterEscapeMarker
| TokenType::CharacterReference
| TokenType::CharacterReferenceMarkerSemi
| TokenType::CodeFencedFenceSequence
| TokenType::CodeTextSequence
| TokenType::DefinitionLabel
| TokenType::DefinitionLabelMarker
| TokenType::DefinitionLabelString
| TokenType::DefinitionMarker
| TokenType::DefinitionDestination
| TokenType::DefinitionDestinationLiteral
| TokenType::DefinitionDestinationLiteralMarker
| TokenType::DefinitionDestinationRaw
| TokenType::DefinitionDestinationString
| TokenType::DefinitionTitle
| TokenType::DefinitionTitleMarker
| TokenType::DefinitionTitleString
| TokenType::HardBreakEscapeMarker
| TokenType::HardBreakTrailingSpace
| TokenType::HeadingSetext
| TokenType::ThematicBreakSequence
| TokenType::SpaceOrTab => {
// Ignore.
}
TokenType::LabelImage
| TokenType::LabelImageMarker
| TokenType::LabelLink
| TokenType::LabelMarker
| TokenType::LabelEnd
| TokenType::ResourceMarker
| TokenType::ResourceDestination
| TokenType::ResourceDestinationLiteral
| TokenType::ResourceDestinationLiteralMarker
| TokenType::ResourceDestinationRaw
| TokenType::ResourceTitle
| TokenType::ResourceTitleMarker
| TokenType::Reference
| TokenType::ReferenceMarker
| TokenType::ReferenceString => {
println!("ignore labels for now");
}
TokenType::Label => {
let media = media_stack.last_mut().unwrap();
media.label = Some(resume(buffers));
}
TokenType::LabelText => {
let media = media_stack.last_mut().unwrap();
media.label_id = Some(serialize(codes, &from_exit_event(events, index), false));
}
TokenType::ResourceDestinationString => {
let media = media_stack.last_mut().unwrap();
media.destination = Some(resume(buffers));
ignore_encode = false;
}
TokenType::ResourceTitleString => {
let media = media_stack.last_mut().unwrap();
media.title = Some(resume(buffers));
}
TokenType::Image | TokenType::Link => {
// let mut is_in_image = false;
// let mut index = 0;
// Skip current.
// while index < (media_stack.len() - 1) {
// if media_stack[index].image {
// is_in_image = true;
// break;
// }
// index += 1;
// }
// tags = is_in_image;
let media = media_stack.pop().unwrap();
println!("media: {:?}", media);
let label = media.label.unwrap();
let buf = buf_tail_mut(buffers);
// To do: get from definition.
let destination = media.destination.unwrap();
let title = if let Some(title) = media.title {
format!(" title=\"{}\"", title)
} else {
"".to_string()
};
if media.image {
buf.push(format!(
"",
sanitize_uri(&destination, &protocol_src),
label,
title
));
} else {
buf.push(format!(
"{}",
sanitize_uri(&destination, &protocol_href),
title,
label
));
}
}
// Just output it.
TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {
// last_was_tag = false;
buf_tail_mut(buffers).push(encode_opt(
&serialize(codes, &from_exit_event(events, index), false),
ignore_encode,
));
}
TokenType::AutolinkEmail => {
let slice = serialize(codes, &from_exit_event(events, index), false);
let buf = buf_tail_mut(buffers);
buf.push(format!(
"",
sanitize_uri(slice.as_str(), &protocol_href)
));
buf.push(encode_opt(&slice, ignore_encode));
buf.push("".to_string());
}
TokenType::AutolinkProtocol => {
let slice = serialize(codes, &from_exit_event(events, index), false);
let buf = buf_tail_mut(buffers);
buf.push(format!(
"",
sanitize_uri(slice.as_str(), &protocol_href)
));
buf.push(encode_opt(&slice, ignore_encode));
buf.push("".to_string());
}
TokenType::CharacterReferenceMarker => {
character_reference_kind = Some(CharacterReferenceKind::Named);
}
TokenType::CharacterReferenceMarkerNumeric => {
character_reference_kind = Some(CharacterReferenceKind::Decimal);
}
TokenType::CharacterReferenceMarkerHexadecimal => {
character_reference_kind = Some(CharacterReferenceKind::Hexadecimal);
}
TokenType::CharacterReferenceValue => {
let kind = character_reference_kind
.expect("expected `character_reference_kind` to be set");
let reference = serialize(codes, &from_exit_event(events, index), false);
let ref_string = reference.as_str();
let value = match kind {
CharacterReferenceKind::Decimal => {
decode_numeric(ref_string, 10).to_string()
}
CharacterReferenceKind::Hexadecimal => {
decode_numeric(ref_string, 16).to_string()
}
CharacterReferenceKind::Named => decode_named(ref_string),
};
buf_tail_mut(buffers).push(encode_opt(&value, ignore_encode));
character_reference_kind = None;
}
TokenType::CodeFenced | TokenType::CodeIndented => {
let seen_data =
code_flow_seen_data.expect("`code_flow_seen_data` must be defined");
// To do: containers.
// One special case is if we are inside a container, and the fenced code was
// not closed (meaning it runs to the end).
// In that case, the following line ending, is considered *outside* the
// fenced code and block quote by micromark, but CM wants to treat that
// ending as part of the code.
// if fenced_count != None && fenced_count < 2 && tightStack.length > 0 && !last_was_tag {
// line_ending();
// }
// But in most cases, it’s simpler: when we’ve seen some data, emit an extra
// line ending when needed.
if seen_data {
line_ending_if_needed(buffers, &line_ending_default);
}
buf_tail_mut(buffers).push("\n
`.
///
/// To create that line ending, the document is checked for the first line
/// ending that is used.
/// If there is no line ending, `default_line_ending` is used.
/// If that isn’t configured, `\n` is used.
///
/// ## Examples
///
/// ```rust
/// use micromark::{micromark, micromark_with_options, Options, LineEnding};
///
/// // micromark is safe by default:
/// assert_eq!(
/// micromark("> a"),
/// // To do: block quote
/// // "\n
"
/// "\r\n
"
/// "".to_string());
}
TokenType::CodeFenced => {
code_flow_seen_data = Some(false);
line_ending_if_needed(buffers, &line_ending_default);
// Note that no `>` is used, which is added later.
buf_tail_mut(buffers).push("
".to_string());
}
TokenType::CodeTextLineEnding => {
buf_tail_mut(buffers).push(" ".to_string());
}
TokenType::Definition => {
resume(buffers);
slurp_one_line_ending = true;
}
TokenType::HardBreakEscape | TokenType::HardBreakTrailing => {
buf_tail_mut(buffers).push("
".to_string());
if let Some(count) = code_fenced_fences_count {
if count < 2 {
line_ending_if_needed(buffers, &line_ending_default);
}
}
code_flow_seen_data = None;
code_fenced_fences_count = None;
slurp_one_line_ending = false;
}
TokenType::CodeFencedFence => {
let count = if let Some(count) = code_fenced_fences_count {
count
} else {
0
};
if count == 0 {
buf_tail_mut(buffers).push(">".to_string());
// tag = true;
slurp_one_line_ending = true;
}
code_fenced_fences_count = Some(count + 1);
}
TokenType::CodeFencedFenceInfo => {
let value = resume(buffers);
buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value));
// tag = true;
}
TokenType::CodeFencedFenceMeta | TokenType::Resource => {
resume(buffers);
}
TokenType::CodeFlowChunk => {
code_flow_seen_data = Some(true);
buf_tail_mut(buffers).push(encode_opt(
&serialize(codes, &from_exit_event(events, index), false),
ignore_encode,
));
}
TokenType::CodeText => {
let result = resume(buffers);
let mut chars = result.chars();
let mut trim = false;
if Some(' ') == chars.next() && Some(' ') == chars.next_back() {
let mut next = chars.next();
while next != None && !trim {
if Some(' ') != next {
trim = true;
}
next = chars.next();
}
}
buf_tail_mut(buffers).push(if trim {
result[1..(result.len() - 1)].to_string()
} else {
result
});
buf_tail_mut(buffers).push(" {
buf_tail_mut(buffers).push("
".to_string());
buffer(buffers);
}
TokenType::HtmlFlow => {
line_ending_if_needed(buffers, &line_ending_default);
if options.allow_dangerous_html {
ignore_encode = true;
}
}
TokenType::HtmlText => {
if options.allow_dangerous_html {
ignore_encode = true;
}
}
TokenType::Image => {
media_stack.push(Media {
image: true,
label_id: None,
label: None,
// reference_id: "".to_string(),
destination: None,
title: None,
});
// tags = undefined // Disallow tags.
}
TokenType::Link => {
media_stack.push(Media {
image: false,
label_id: None,
label: None,
// reference_id: "".to_string(),
destination: None,
title: None,
});
}
TokenType::Resource => {
buffer(buffers); // We can have line endings in the resource, ignore them.
let media = media_stack.last_mut().unwrap();
media.destination = Some("".to_string());
}
TokenType::ResourceDestinationString => {
buffer(buffers);
// Ignore encoding the result, as we’ll first percent encode the url and
// encode manually after.
ignore_encode = true;
}
TokenType::LabelImage
| TokenType::LabelImageMarker
| TokenType::LabelLink
| TokenType::LabelMarker
| TokenType::LabelEnd
| TokenType::ResourceMarker
| TokenType::ResourceDestination
| TokenType::ResourceDestinationLiteral
| TokenType::ResourceDestinationLiteralMarker
| TokenType::ResourceDestinationRaw
| TokenType::ResourceTitle
| TokenType::ResourceTitleMarker
| TokenType::Reference
| TokenType::ReferenceMarker
| TokenType::ReferenceString
| TokenType::LabelText => {
println!("ignore labels for now");
}
TokenType::Paragraph => {
buf_tail_mut(buffers).push("
".to_string());
}
TokenType::HeadingAtx => {
let rank = atx_opening_sequence_size
.expect("`atx_opening_sequence_size` must be set in headings");
buf_tail_mut(buffers).push(format!("", rank));
atx_opening_sequence_size = None;
}
TokenType::HeadingAtxSequence => {
// First fence we see.
if None == atx_opening_sequence_size {
let rank = serialize(codes, &from_exit_event(events, index), false).len();
atx_opening_sequence_size = Some(rank);
buf_tail_mut(buffers).push(format!("