aboutsummaryrefslogtreecommitdiffstats
path: root/src/compiler.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-24 17:57:10 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-24 17:57:10 +0200
commita3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0 (patch)
tree7b4bf040da23a03f38efe92a252e187a630a14f6 /src/compiler.rs
parente7b3761c8cd6f0f902dd9927e4fbf2589465ed57 (diff)
downloadmarkdown-rs-a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0.tar.gz
markdown-rs-a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0.tar.bz2
markdown-rs-a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0.zip
Add link, images (resource)
This is still some messy code that needs cleaning up, but it adds support for links and images, of the resource kind (`[a](b)`). References (`[a][b]`) are parsed and will soon be supported, but need matching. * Fix bug to pad percent-encoded bytes when normalizing urls * Fix bug with escapes counting as balancing in destination * Add `space_or_tab_one_line_ending`, to parse whitespace including up to one line ending (but not a blank line) * Add `ParserState` to share codes, definitions, etc
Diffstat (limited to 'src/compiler.rs')
-rw-r--r--src/compiler.rs213
1 files changed, 184 insertions, 29 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index cfe749a..11dea29 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -1,5 +1,5 @@
//! Turn events into a string of HTML.
-use crate::constant::SAFE_PROTOCOL_HREF;
+use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC};
use crate::construct::character_reference::Kind as CharacterReferenceKind;
use crate::tokenizer::{Code, Event, EventType, TokenType};
use crate::util::{
@@ -17,6 +17,23 @@ pub enum LineEnding {
LineFeed,
}
+/// To do.
+#[derive(Debug)]
+struct Media {
+ /// To do.
+ image: bool,
+ /// To do.
+ label_id: String,
+ /// To do.
+ label: String,
+ /// To do.
+ // reference_id: String,
+ /// To do.
+ destination: Option<String>,
+ /// To do.
+ title: Option<String>,
+}
+
impl LineEnding {
/// Turn the line ending into a [str].
fn as_str(&self) -> &str {
@@ -168,7 +185,13 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
} else {
Some(SAFE_PROTOCOL_HREF.to_vec())
};
+ let protocol_src = if options.allow_dangerous_protocol {
+ None
+ } else {
+ Some(SAFE_PROTOCOL_SRC.to_vec())
+ };
let mut line_ending_inferred: Option<LineEnding> = None;
+ let mut media_stack: Vec<Media> = vec![];
// let mut slurp_all_line_endings = false;
while index < events.len() {
@@ -257,7 +280,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
| TokenType::CodeFencedFenceMeta
| TokenType::Definition
| TokenType::HeadingAtxText
- | TokenType::HeadingSetextText => {
+ | TokenType::HeadingSetextText
+ | TokenType::Label
+ | TokenType::ResourceTitleString => {
buffer(buffers);
}
TokenType::CodeIndented => {
@@ -287,6 +312,56 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
ignore_encode = true;
}
}
+ TokenType::Image => {
+ media_stack.push(Media {
+ image: true,
+ label_id: "".to_string(),
+ label: "".to_string(),
+ // reference_id: "".to_string(),
+ destination: None,
+ title: None,
+ });
+ // tags = undefined // Disallow tags.
+ }
+ TokenType::Link => {
+ media_stack.push(Media {
+ image: false,
+ label_id: "".to_string(),
+ label: "".to_string(),
+ // reference_id: "".to_string(),
+ destination: None,
+ title: None,
+ });
+ }
+ TokenType::Resource => {
+ buffer(buffers); // We can have line endings in the resource, ignore them.
+ let media = media_stack.last_mut().unwrap();
+ media.destination = Some("".to_string());
+ }
+ TokenType::ResourceDestinationString => {
+ buffer(buffers);
+ // Ignore encoding the result, as we’ll first percent encode the url and
+ // encode manually after.
+ ignore_encode = true;
+ }
+ TokenType::LabelImage
+ | TokenType::LabelImageMarker
+ | TokenType::LabelLink
+ | TokenType::LabelMarker
+ | TokenType::LabelEnd
+ | TokenType::ResourceMarker
+ | TokenType::ResourceDestination
+ | TokenType::ResourceDestinationLiteral
+ | TokenType::ResourceDestinationLiteralMarker
+ | TokenType::ResourceDestinationRaw
+ | TokenType::ResourceTitle
+ | TokenType::ResourceTitleMarker
+ | TokenType::Reference
+ | TokenType::ReferenceMarker
+ | TokenType::ReferenceString
+ | TokenType::LabelText => {
+ println!("ignore labels for now");
+ }
TokenType::Paragraph => {
buf_tail_mut(buffers).push("<p>".to_string());
}
@@ -324,14 +399,88 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
| TokenType::SpaceOrTab => {
// Ignore.
}
+ TokenType::LabelImage
+ | TokenType::LabelImageMarker
+ | TokenType::LabelLink
+ | TokenType::LabelMarker
+ | TokenType::LabelEnd
+ | TokenType::ResourceMarker
+ | TokenType::ResourceDestination
+ | TokenType::ResourceDestinationLiteral
+ | TokenType::ResourceDestinationLiteralMarker
+ | TokenType::ResourceDestinationRaw
+ | TokenType::ResourceTitle
+ | TokenType::ResourceTitleMarker
+ | TokenType::Reference
+ | TokenType::ReferenceMarker
+ | TokenType::ReferenceString => {
+ println!("ignore labels for now");
+ }
+ TokenType::Label => {
+ let media = media_stack.last_mut().unwrap();
+ media.label = resume(buffers);
+ }
+ TokenType::LabelText => {
+ let media = media_stack.last_mut().unwrap();
+ media.label_id = serialize(codes, &from_exit_event(events, index), false);
+ }
+ TokenType::ResourceDestinationString => {
+ let media = media_stack.last_mut().unwrap();
+ media.destination = Some(resume(buffers));
+ ignore_encode = false;
+ }
+ TokenType::ResourceTitleString => {
+ let media = media_stack.last_mut().unwrap();
+ media.title = Some(resume(buffers));
+ }
+ TokenType::Image | TokenType::Link => {
+ // let mut is_in_image = false;
+ // let mut index = 0;
+ // Skip current.
+ // while index < (media_stack.len() - 1) {
+ // if media_stack[index].image {
+ // is_in_image = true;
+ // break;
+ // }
+ // index += 1;
+ // }
+
+ // tags = is_in_image;
+
+ let media = media_stack.pop().unwrap();
+ println!("media: {:?}", media);
+ let buf = buf_tail_mut(buffers);
+ // To do: get from definition.
+ let destination = media.destination.unwrap();
+ let title = if let Some(title) = media.title {
+ format!(" title=\"{}\"", title)
+ } else {
+ "".to_string()
+ };
+
+ if media.image {
+ buf.push(format!(
+ "<img src=\"{}\" alt=\"{}\"{} />",
+ sanitize_uri(&destination, &protocol_src),
+ media.label,
+ title
+ ));
+ } else {
+ buf.push(format!(
+ "<a href=\"{}\"{}>{}</a>",
+ sanitize_uri(&destination, &protocol_href),
+ title,
+ media.label
+ ));
+ }
+ }
// Just output it.
TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {
// last_was_tag = false;
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ));
}
TokenType::AutolinkEmail => {
let slice = serialize(codes, &from_exit_event(events, index), false);
@@ -340,7 +489,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
"<a href=\"mailto:{}\">",
sanitize_uri(slice.as_str(), &protocol_href)
));
- buf.push(encode(&slice));
+ buf.push(encode_opt(&slice, ignore_encode));
buf.push("</a>".to_string());
}
TokenType::AutolinkProtocol => {
@@ -350,7 +499,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
"<a href=\"{}\">",
sanitize_uri(slice.as_str(), &protocol_href)
));
- buf.push(encode(&slice));
+ buf.push(encode_opt(&slice, ignore_encode));
buf.push("</a>".to_string());
}
TokenType::CharacterReferenceMarker => {
@@ -377,7 +526,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
CharacterReferenceKind::Named => decode_named(ref_string),
};
- buf_tail_mut(buffers).push(encode(&value));
+ buf_tail_mut(buffers).push(encode_opt(&value, ignore_encode));
character_reference_kind = None;
}
TokenType::CodeFenced | TokenType::CodeIndented => {
@@ -432,16 +581,15 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value));
// tag = true;
}
- TokenType::CodeFencedFenceMeta => {
+ TokenType::CodeFencedFenceMeta | TokenType::Resource => {
resume(buffers);
}
TokenType::CodeFlowChunk => {
code_flow_seen_data = Some(true);
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ));
}
TokenType::CodeText => {
let result = resume(buffers);
@@ -492,11 +640,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
if let Some(buf) = atx_heading_buffer {
atx_heading_buffer = Some(
buf.to_string()
- + &encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )),
+ + &encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ),
);
}
@@ -512,14 +659,14 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
if let Some(ref buf) = atx_heading_buffer {
if !buf.is_empty() {
- buf_tail_mut(buffers).push(encode(buf));
+ buf_tail_mut(buffers).push(encode_opt(buf, ignore_encode));
atx_heading_buffer = Some("".to_string());
}
} else {
atx_heading_buffer = Some("".to_string());
}
- buf_tail_mut(buffers).push(encode(&result));
+ buf_tail_mut(buffers).push(encode_opt(&result, ignore_encode));
}
TokenType::HeadingSetextText => {
heading_setext_buffer = Some(resume(buffers));
@@ -540,7 +687,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
TokenType::HtmlFlowData | TokenType::HtmlTextData => {
let slice = serialize(codes, &from_exit_event(events, index), false);
// last_was_tag = false;
- buf_tail_mut(buffers).push(if ignore_encode { slice } else { encode(&slice) });
+ buf_tail_mut(buffers).push(encode_opt(&slice, ignore_encode));
}
TokenType::LineEnding => {
// if slurp_all_line_endings {
@@ -549,11 +696,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
if slurp_one_line_ending {
slurp_one_line_ending = false;
} else {
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ));
}
}
TokenType::Paragraph => {
@@ -605,6 +751,15 @@ fn buf_tail(buffers: &mut [Vec<String>]) -> &Vec<String> {
buffers.last().expect("at least one buffer should exist")
}
+/// To do.
+fn encode_opt(value: &str, ignore_encode: bool) -> String {
+ if ignore_encode {
+ value.to_string()
+ } else {
+ encode(value)
+ }
+}
+
/// Add a line ending.
fn line_ending(buffers: &mut [Vec<String>], default: &LineEnding) {
let tail = buf_tail_mut(buffers);