aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--readme.md33
-rw-r--r--src/compiler.rs213
-rw-r--r--src/constant.rs7
-rw-r--r--src/construct/definition.rs136
-rw-r--r--src/construct/label_end.rs712
-rw-r--r--src/construct/label_start_image.rs47
-rw-r--r--src/construct/label_start_link.rs30
-rw-r--r--src/construct/mod.rs11
-rw-r--r--src/construct/partial_destination.rs3
-rw-r--r--src/construct/partial_space_or_tab.rs39
-rw-r--r--src/construct/partial_title.rs14
-rw-r--r--src/content/flow.rs20
-rw-r--r--src/content/text.rs17
-rw-r--r--src/parser.rs20
-rw-r--r--src/subtokenize.rs38
-rw-r--r--src/tokenizer.rs111
-rw-r--r--src/util/sanitize_uri.rs2
-rw-r--r--tests/character_escape.rs11
-rw-r--r--tests/character_reference.rs24
-rw-r--r--tests/image.rs229
-rw-r--r--tests/link_resource.rs464
-rw-r--r--tests/misc_dangerous_protocol.rs324
-rw-r--r--tests/misc_tabs.rs66
-rw-r--r--tests/misc_url.rs107
24 files changed, 2216 insertions, 462 deletions
diff --git a/readme.md b/readme.md
index e5bc638..6dd8cc5 100644
--- a/readme.md
+++ b/readme.md
@@ -82,9 +82,9 @@ cargo doc --document-private-items
- [x] heading (setext)
- [x] html (flow)
- [x] html (text)
-- [ ] (3) label end
-- [ ] (3) label start (image)
-- [ ] (3) label start (link)
+- [x] label end
+- [x] label start (image)
+- [x] label start (link)
- [ ] (8) list
- [x] paragraph
- [x] thematic break
@@ -113,9 +113,9 @@ cargo doc --document-private-items
- [x] hard break (escape)
- [x] hard break (trailing)
- [x] html (text)
- - [ ] label end
- - [ ] label start (image)
- - [ ] label start (link)
+ - [x] label end
+ - [x] label start (image)
+ - [x] label start (link)
- [x] string
- [x] character escape
- [x] character reference
@@ -124,10 +124,28 @@ cargo doc --document-private-items
#### Docs
+- [ ] (1) Media in compiler (`Media`, `encode_opt`)
+- [ ] (1) `LINK_RESOURCE_DESTINATION_BALANCE_MAX` in constants
+- [ ] (1) `label_start_image`, `label_start_link`
+- [ ] (1) `label_end`
+- [ ] (1) `space_or_tab_one_line_ending`
+- [ ] (1) `ParseState`
+- [ ] (1) Image, Link, and other media token types; `LabelStart`, `Media`
+- [ ] (1) Resolvers, push, feed, etc.
- [ ] (1) Go through all bnf
- [ ] (1) Go through all docs
- [ ] (1) Add overview docs on how everything works
+#### Refactor
+
+- [ ] (1) Move map handling from `resolve_media`, reuse in `subtokenize`
+- [ ] (1) Clean shifting, assertions in the above helper
+- [ ] (1) Clean `space_or_tab_one_line_ending`
+- [ ] (1) Use `link_to` (and `space_or_tab_one_line_ending`) in more places?
+ It’s probably better
+- [ ] (1) Force chunks in `link_to`, disallowing `LineEnding` and such
+- [ ] (1) Clean feeding, resolving
+
#### Parse
- [ ] (1) Parse initial and final space_or_tab of paragraphs (in text)\
@@ -136,8 +154,7 @@ cargo doc --document-private-items
`misc_tabs`, `thematic_break`)
- [ ] (3) Interrupting (html flow complete)
- [ ] (5) labels\
- test (`character_escape`, `character_reference`, `definition`,
- `misc_dangerous_protocol`, `misc_tabs`, `misc_url`, `thematic_break`)\
+ test (`character_escape`, `character_reference`, `definition`)\
link link reference (definition)\
link label end (destination, label, title)\
link label start (label)
diff --git a/src/compiler.rs b/src/compiler.rs
index cfe749a..11dea29 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -1,5 +1,5 @@
//! Turn events into a string of HTML.
-use crate::constant::SAFE_PROTOCOL_HREF;
+use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC};
use crate::construct::character_reference::Kind as CharacterReferenceKind;
use crate::tokenizer::{Code, Event, EventType, TokenType};
use crate::util::{
@@ -17,6 +17,23 @@ pub enum LineEnding {
LineFeed,
}
+/// To do.
+#[derive(Debug)]
+struct Media {
+ /// To do.
+ image: bool,
+ /// To do.
+ label_id: String,
+ /// To do.
+ label: String,
+ /// To do.
+ // reference_id: String,
+ /// To do.
+ destination: Option<String>,
+ /// To do.
+ title: Option<String>,
+}
+
impl LineEnding {
/// Turn the line ending into a [str].
fn as_str(&self) -> &str {
@@ -168,7 +185,13 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
} else {
Some(SAFE_PROTOCOL_HREF.to_vec())
};
+ let protocol_src = if options.allow_dangerous_protocol {
+ None
+ } else {
+ Some(SAFE_PROTOCOL_SRC.to_vec())
+ };
let mut line_ending_inferred: Option<LineEnding> = None;
+ let mut media_stack: Vec<Media> = vec![];
// let mut slurp_all_line_endings = false;
while index < events.len() {
@@ -257,7 +280,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
| TokenType::CodeFencedFenceMeta
| TokenType::Definition
| TokenType::HeadingAtxText
- | TokenType::HeadingSetextText => {
+ | TokenType::HeadingSetextText
+ | TokenType::Label
+ | TokenType::ResourceTitleString => {
buffer(buffers);
}
TokenType::CodeIndented => {
@@ -287,6 +312,56 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
ignore_encode = true;
}
}
+ TokenType::Image => {
+ media_stack.push(Media {
+ image: true,
+ label_id: "".to_string(),
+ label: "".to_string(),
+ // reference_id: "".to_string(),
+ destination: None,
+ title: None,
+ });
+ // tags = undefined // Disallow tags.
+ }
+ TokenType::Link => {
+ media_stack.push(Media {
+ image: false,
+ label_id: "".to_string(),
+ label: "".to_string(),
+ // reference_id: "".to_string(),
+ destination: None,
+ title: None,
+ });
+ }
+ TokenType::Resource => {
+ buffer(buffers); // We can have line endings in the resource, ignore them.
+ let media = media_stack.last_mut().unwrap();
+ media.destination = Some("".to_string());
+ }
+ TokenType::ResourceDestinationString => {
+ buffer(buffers);
+ // Ignore encoding the result, as we’ll first percent encode the url and
+ // encode manually after.
+ ignore_encode = true;
+ }
+ TokenType::LabelImage
+ | TokenType::LabelImageMarker
+ | TokenType::LabelLink
+ | TokenType::LabelMarker
+ | TokenType::LabelEnd
+ | TokenType::ResourceMarker
+ | TokenType::ResourceDestination
+ | TokenType::ResourceDestinationLiteral
+ | TokenType::ResourceDestinationLiteralMarker
+ | TokenType::ResourceDestinationRaw
+ | TokenType::ResourceTitle
+ | TokenType::ResourceTitleMarker
+ | TokenType::Reference
+ | TokenType::ReferenceMarker
+ | TokenType::ReferenceString
+ | TokenType::LabelText => {
+ println!("ignore labels for now");
+ }
TokenType::Paragraph => {
buf_tail_mut(buffers).push("<p>".to_string());
}
@@ -324,14 +399,88 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
| TokenType::SpaceOrTab => {
// Ignore.
}
+ TokenType::LabelImage
+ | TokenType::LabelImageMarker
+ | TokenType::LabelLink
+ | TokenType::LabelMarker
+ | TokenType::LabelEnd
+ | TokenType::ResourceMarker
+ | TokenType::ResourceDestination
+ | TokenType::ResourceDestinationLiteral
+ | TokenType::ResourceDestinationLiteralMarker
+ | TokenType::ResourceDestinationRaw
+ | TokenType::ResourceTitle
+ | TokenType::ResourceTitleMarker
+ | TokenType::Reference
+ | TokenType::ReferenceMarker
+ | TokenType::ReferenceString => {
+ println!("ignore labels for now");
+ }
+ TokenType::Label => {
+ let media = media_stack.last_mut().unwrap();
+ media.label = resume(buffers);
+ }
+ TokenType::LabelText => {
+ let media = media_stack.last_mut().unwrap();
+ media.label_id = serialize(codes, &from_exit_event(events, index), false);
+ }
+ TokenType::ResourceDestinationString => {
+ let media = media_stack.last_mut().unwrap();
+ media.destination = Some(resume(buffers));
+ ignore_encode = false;
+ }
+ TokenType::ResourceTitleString => {
+ let media = media_stack.last_mut().unwrap();
+ media.title = Some(resume(buffers));
+ }
+ TokenType::Image | TokenType::Link => {
+ // let mut is_in_image = false;
+ // let mut index = 0;
+ // Skip current.
+ // while index < (media_stack.len() - 1) {
+ // if media_stack[index].image {
+ // is_in_image = true;
+ // break;
+ // }
+ // index += 1;
+ // }
+
+ // tags = is_in_image;
+
+ let media = media_stack.pop().unwrap();
+ println!("media: {:?}", media);
+ let buf = buf_tail_mut(buffers);
+ // To do: get from definition.
+ let destination = media.destination.unwrap();
+ let title = if let Some(title) = media.title {
+ format!(" title=\"{}\"", title)
+ } else {
+ "".to_string()
+ };
+
+ if media.image {
+ buf.push(format!(
+ "<img src=\"{}\" alt=\"{}\"{} />",
+ sanitize_uri(&destination, &protocol_src),
+ media.label,
+ title
+ ));
+ } else {
+ buf.push(format!(
+ "<a href=\"{}\"{}>{}</a>",
+ sanitize_uri(&destination, &protocol_href),
+ title,
+ media.label
+ ));
+ }
+ }
// Just output it.
TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {
// last_was_tag = false;
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ));
}
TokenType::AutolinkEmail => {
let slice = serialize(codes, &from_exit_event(events, index), false);
@@ -340,7 +489,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
"<a href=\"mailto:{}\">",
sanitize_uri(slice.as_str(), &protocol_href)
));
- buf.push(encode(&slice));
+ buf.push(encode_opt(&slice, ignore_encode));
buf.push("</a>".to_string());
}
TokenType::AutolinkProtocol => {
@@ -350,7 +499,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
"<a href=\"{}\">",
sanitize_uri(slice.as_str(), &protocol_href)
));
- buf.push(encode(&slice));
+ buf.push(encode_opt(&slice, ignore_encode));
buf.push("</a>".to_string());
}
TokenType::CharacterReferenceMarker => {
@@ -377,7 +526,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
CharacterReferenceKind::Named => decode_named(ref_string),
};
- buf_tail_mut(buffers).push(encode(&value));
+ buf_tail_mut(buffers).push(encode_opt(&value, ignore_encode));
character_reference_kind = None;
}
TokenType::CodeFenced | TokenType::CodeIndented => {
@@ -432,16 +581,15 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value));
// tag = true;
}
- TokenType::CodeFencedFenceMeta => {
+ TokenType::CodeFencedFenceMeta | TokenType::Resource => {
resume(buffers);
}
TokenType::CodeFlowChunk => {
code_flow_seen_data = Some(true);
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ));
}
TokenType::CodeText => {
let result = resume(buffers);
@@ -492,11 +640,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
if let Some(buf) = atx_heading_buffer {
atx_heading_buffer = Some(
buf.to_string()
- + &encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )),
+ + &encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ),
);
}
@@ -512,14 +659,14 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
if let Some(ref buf) = atx_heading_buffer {
if !buf.is_empty() {
- buf_tail_mut(buffers).push(encode(buf));
+ buf_tail_mut(buffers).push(encode_opt(buf, ignore_encode));
atx_heading_buffer = Some("".to_string());
}
} else {
atx_heading_buffer = Some("".to_string());
}
- buf_tail_mut(buffers).push(encode(&result));
+ buf_tail_mut(buffers).push(encode_opt(&result, ignore_encode));
}
TokenType::HeadingSetextText => {
heading_setext_buffer = Some(resume(buffers));
@@ -540,7 +687,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
TokenType::HtmlFlowData | TokenType::HtmlTextData => {
let slice = serialize(codes, &from_exit_event(events, index), false);
// last_was_tag = false;
- buf_tail_mut(buffers).push(if ignore_encode { slice } else { encode(&slice) });
+ buf_tail_mut(buffers).push(encode_opt(&slice, ignore_encode));
}
TokenType::LineEnding => {
// if slurp_all_line_endings {
@@ -549,11 +696,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
if slurp_one_line_ending {
slurp_one_line_ending = false;
} else {
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ));
}
}
TokenType::Paragraph => {
@@ -605,6 +751,15 @@ fn buf_tail(buffers: &mut [Vec<String>]) -> &Vec<String> {
buffers.last().expect("at least one buffer should exist")
}
+/// To do.
+fn encode_opt(value: &str, ignore_encode: bool) -> String {
+ if ignore_encode {
+ value.to_string()
+ } else {
+ encode(value)
+ }
+}
+
/// Add a line ending.
fn line_ending(buffers: &mut [Vec<String>], default: &LineEnding) {
let tail = buf_tail_mut(buffers);
diff --git a/src/constant.rs b/src/constant.rs
index 8e1acf3..5cb7826 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -193,6 +193,11 @@ pub const HTML_RAW_SIZE_MAX: usize = 8;
/// To safeguard performance, labels are capped at a large number: `999`.
pub const LINK_REFERENCE_SIZE_MAX: usize = 999;
+/// To do.
+/// See: <https://spec.commonmark.org/0.30/#link-destination>,
+/// <https://github.com/remarkjs/react-markdown/issues/658#issuecomment-984345577>.
+pub const LINK_RESOURCE_DESTINATION_BALANCE_MAX: usize = 32;
+
/// List of protocols allowed, when operating safely, as `href` on `a`.
///
/// This list is based on what is allowed by GitHub.
@@ -201,8 +206,6 @@ pub const SAFE_PROTOCOL_HREF: [&str; 6] = ["http", "https", "irc", "ircs", "mail
/// List of protocols allowed, when operating safely, as `src` on `img`.
///
/// This list is based on what is allowed by GitHub.
-// To do: image.
-#[allow(dead_code)]
pub const SAFE_PROTOCOL_SRC: [&str; 2] = ["http", "https"];
/// The number of characters that form a tab stop.
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 92d275c..674bd65 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -115,7 +115,7 @@
use crate::construct::{
partial_destination::{start as destination, Options as DestinationOptions},
partial_label::{start as label, Options as LabelOptions},
- partial_space_or_tab::space_or_tab,
+ partial_space_or_tab::{space_or_tab, space_or_tab_one_line_ending},
partial_title::{start as title, Options as TitleOptions},
};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -168,7 +168,7 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::DefinitionMarker);
(
State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab(), marker_after),
+ tokenizer.go(space_or_tab_one_line_ending(), destination_before),
)),
None,
)
@@ -177,31 +177,6 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// After the marker, after whitespace.
-///
-/// ```markdown
-/// [a]: |b "c"
-///
-/// [a]: |␊
-/// b "c"
-/// ```
-fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (
- State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab(), destination_before),
- )),
- None,
- )
- }
- _ => destination_before(tokenizer, code),
- }
-}
-
/// Before a destination.
///
/// ```markdown
@@ -211,35 +186,23 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// |b "c"
/// ```
fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let event = tokenizer.events.last().unwrap();
-
- // Whitespace.
- if (event.token_type == TokenType::LineEnding || event.token_type == TokenType::SpaceOrTab)
- // Blank line not ok.
- && !matches!(
- code,
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
- ) {
- tokenizer.go(
- |t, c| {
- destination(
- t,
- c,
- DestinationOptions {
- limit: usize::MAX,
- destination: TokenType::DefinitionDestination,
- literal: TokenType::DefinitionDestinationLiteral,
- marker: TokenType::DefinitionDestinationLiteralMarker,
- raw: TokenType::DefinitionDestinationRaw,
- string: TokenType::DefinitionDestinationString,
- },
- )
- },
- destination_after,
- )(tokenizer, code)
- } else {
- (State::Nok, None)
- }
+ tokenizer.go(
+ |t, c| {
+ destination(
+ t,
+ c,
+ DestinationOptions {
+ limit: usize::MAX,
+ destination: TokenType::DefinitionDestination,
+ literal: TokenType::DefinitionDestinationLiteral,
+ marker: TokenType::DefinitionDestinationLiteralMarker,
+ raw: TokenType::DefinitionDestinationRaw,
+ string: TokenType::DefinitionDestinationString,
+ },
+ )
+ },
+ destination_after,
+ )(tokenizer, code)
}
/// After a destination.
@@ -289,32 +252,7 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// "c"
/// ```
fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt_opt(space_or_tab(), title_before_after_optional_whitespace)(tokenizer, code)
-}
-
-/// Before a title, after optional whitespace.
-///
-/// ```markdown
-/// [a]: b |"c"
-///
-/// [a]: b |␊
-/// "c"
-/// ```
-fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (
- State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab(), title_before_marker),
- )),
- None,
- )
- }
- _ => title_before_marker(tokenizer, code),
- }
+ tokenizer.go(space_or_tab_one_line_ending(), title_before_marker)(tokenizer, code)
}
/// Before a title, after a line ending.
@@ -324,26 +262,20 @@ fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code)
/// | "c"
/// ```
fn title_before_marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let event = tokenizer.events.last().unwrap();
-
- if event.token_type == TokenType::LineEnding || event.token_type == TokenType::SpaceOrTab {
- tokenizer.go(
- |t, c| {
- title(
- t,
- c,
- TitleOptions {
- title: TokenType::DefinitionTitle,
- marker: TokenType::DefinitionTitleMarker,
- string: TokenType::DefinitionTitleString,
- },
- )
- },
- title_after,
- )(tokenizer, code)
- } else {
- (State::Nok, None)
- }
+ tokenizer.go(
+ |t, c| {
+ title(
+ t,
+ c,
+ TitleOptions {
+ title: TokenType::DefinitionTitle,
+ marker: TokenType::DefinitionTitleMarker,
+ string: TokenType::DefinitionTitleString,
+ },
+ )
+ },
+ title_after,
+ )(tokenizer, code)
}
/// After a title.
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
new file mode 100644
index 0000000..405858d
--- /dev/null
+++ b/src/construct/label_end.rs
@@ -0,0 +1,712 @@
+//! To do
+
+use crate::constant::LINK_RESOURCE_DESTINATION_BALANCE_MAX;
+use crate::construct::{
+ partial_destination::{start as destination, Options as DestinationOptions},
+ partial_label::{start as label, Options as LabelOptions},
+ partial_space_or_tab::space_or_tab_one_line_ending,
+ partial_title::{start as title, Options as TitleOptions},
+};
+use crate::tokenizer::{
+ Code, Event, EventType, LabelStart, Media, State, StateFnResult, TokenType, Tokenizer,
+};
+use crate::util::{
+ normalize_identifier::normalize_identifier,
+ span::{serialize, Span},
+};
+/// To do: could we do without `HashMap`, so we don’t need `std`?
+use std::collections::HashMap;
+
+#[derive(Debug)]
+struct Info {
+ /// To do.
+ label_start_index: usize,
+ /// To do.
+ media: Media,
+}
+
+#[allow(clippy::too_many_lines)]
+pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
+ let mut left: Vec<LabelStart> = tokenizer.label_start_list_loose.drain(..).collect();
+ let mut left_2: Vec<LabelStart> = tokenizer.label_start_stack.drain(..).collect();
+ let media: Vec<Media> = tokenizer.media_list.drain(..).collect();
+ left.append(&mut left_2);
+
+ let mut map: HashMap<usize, (usize, Vec<Event>)> = HashMap::new();
+ let events = &tokenizer.events;
+
+ let mut index = 0;
+ while index < left.len() {
+ let label_start = &left[index];
+ let data_enter_index = label_start.start.0;
+ let data_exit_index = label_start.start.1;
+
+ map.insert(
+ data_enter_index,
+ (
+ data_exit_index - data_enter_index,
+ vec![
+ Event {
+ event_type: EventType::Enter,
+ token_type: TokenType::Data,
+ point: events[data_enter_index].point.clone(),
+ index: events[data_enter_index].index,
+ previous: None,
+ next: None,
+ },
+ Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::Data,
+ point: events[data_exit_index].point.clone(),
+ index: events[data_exit_index].index,
+ previous: None,
+ next: None,
+ },
+ ],
+ ),
+ );
+
+ index += 1;
+ }
+
+ let mut index = 0;
+ while index < media.len() {
+ let media = &media[index];
+ // LabelLink:Enter or LabelImage:Enter.
+ let group_enter_index = media.start.0;
+ let group_enter_event = &events[group_enter_index];
+ // LabelLink:Exit or LabelImage:Exit.
+ let text_enter_index = media.start.0
+ + (if group_enter_event.token_type == TokenType::LabelLink {
+ 4
+ } else {
+ 6
+ });
+ // LabelEnd:Enter.
+ let text_exit_index = media.end.0;
+ // LabelEnd:Exit.
+ let label_exit_index = media.end.0 + 3;
+ // Resource:Exit, etc.
+ let group_end_index = media.end.1;
+
+ // Insert a group enter and label enter.
+ add(
+ &mut map,
+ group_enter_index,
+ 0,
+ vec![
+ Event {
+ event_type: EventType::Enter,
+ token_type: if group_enter_event.token_type == TokenType::LabelLink {
+ TokenType::Link
+ } else {
+ TokenType::Image
+ },
+ point: group_enter_event.point.clone(),
+ index: group_enter_event.index,
+ previous: None,
+ next: None,
+ },
+ Event {
+ event_type: EventType::Enter,
+ token_type: TokenType::Label,
+ point: group_enter_event.point.clone(),
+ index: group_enter_event.index,
+ previous: None,
+ next: None,
+ },
+ ],
+ );
+
+ // Empty events not allowed.
+ if text_enter_index != text_exit_index {
+ // Insert a text enter.
+ add(
+ &mut map,
+ text_enter_index,
+ 0,
+ vec![Event {
+ event_type: EventType::Enter,
+ token_type: TokenType::LabelText,
+ point: events[text_enter_index].point.clone(),
+ index: events[text_enter_index].index,
+ previous: None,
+ next: None,
+ }],
+ );
+
+ // Insert a text exit.
+ add(
+ &mut map,
+ text_exit_index,
+ 0,
+ vec![Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::LabelText,
+ point: events[text_exit_index].point.clone(),
+ index: events[text_exit_index].index,
+ previous: None,
+ next: None,
+ }],
+ );
+ }
+
+ // Insert a label exit.
+ add(
+ &mut map,
+ label_exit_index + 1,
+ 0,
+ vec![Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::Label,
+ point: events[label_exit_index].point.clone(),
+ index: events[label_exit_index].index,
+ previous: None,
+ next: None,
+ }],
+ );
+
+ // Insert a group exit.
+ add(
+ &mut map,
+ group_end_index + 1,
+ 0,
+ vec![Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::Link,
+ point: events[group_end_index].point.clone(),
+ index: events[group_end_index].index,
+ previous: None,
+ next: None,
+ }],
+ );
+
+ index += 1;
+ }
+
+ let mut indices: Vec<&usize> = map.keys().collect();
+ indices.sort_unstable();
+ let mut next_events: Vec<Event> = vec![];
+ let mut index_into_indices = 0;
+ let mut start = 0;
+ let events = &mut tokenizer.events;
+ let mut shift: i32 = 0;
+
+ while index_into_indices < indices.len() {
+ let index = *indices[index_into_indices];
+
+ if start < index {
+ let append = &mut events[start..index].to_vec();
+ let mut index = 0;
+
+ while index < append.len() {
+ let ev = &mut append[index];
+
+ if let Some(x) = ev.previous {
+ let next = (x as i32 + shift) as usize;
+ ev.previous = Some(next);
+ println!("todo: y: previous {:?} {:?} {:?}", x, shift, start);
+ }
+
+ if let Some(x) = ev.next {
+ let next = (x as i32 + shift) as usize;
+ ev.next = Some(next);
+ println!("todo: y: next {:?} {:?} {:?}", x, shift, start);
+ }
+
+ index += 1;
+ }
+
+ next_events.append(append);
+ }
+
+ let (remove, add) = map.get(&index).unwrap();
+ shift += (add.len() as i32) - (*remove as i32);
+
+ if !add.is_empty() {
+ let append = &mut add.clone();
+ let mut index = 0;
+
+ while index < append.len() {
+ let ev = &mut append[index];
+
+ if let Some(x) = ev.previous {
+ println!("todo: x: previous {:?} {:?} {:?}", x, shift, start);
+ }
+
+ if let Some(x) = ev.next {
+ println!("todo: x: next {:?} {:?} {:?}", x, shift, start);
+ }
+
+ index += 1;
+ }
+
+ next_events.append(append);
+ }
+
+ start = index + remove;
+ index_into_indices += 1;
+ }
+
+ if start < events.len() {
+ next_events.append(&mut events[start..].to_vec());
+ }
+
+ next_events
+}
+
+/// Start of label end.
+///
+/// ```markdown
+/// [a|](b) c
+/// [a|][b] c
+/// [a|][] b
+/// [a|] b
+///
+/// [a]: z
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ if Code::Char(']') == code {
+ let mut label_start_index: Option<usize> = None;
+ let mut index = tokenizer.label_start_stack.len();
+
+ while index > 0 {
+ index -= 1;
+
+ if !tokenizer.label_start_stack[index].balanced {
+ label_start_index = Some(index);
+ break;
+ }
+ }
+
+ // If there is an okay opening:
+ if let Some(label_start_index) = label_start_index {
+ let label_start = tokenizer
+ .label_start_stack
+ .get_mut(label_start_index)
+ .unwrap();
+
+ // Mark as balanced if the info is inactive.
+ if label_start.inactive {
+ return nok(tokenizer, code, label_start_index);
+ }
+
+ let label_end_start = tokenizer.events.len();
+ let info = Info {
+ label_start_index,
+ media: Media {
+ start: label_start.start,
+ end: (label_end_start, label_end_start + 3),
+ id: normalize_identifier(&serialize(
+ &tokenizer.parse_state.codes,
+ &Span {
+ start_index: tokenizer.events[label_start.start.1].index,
+ end_index: tokenizer.events[label_end_start - 1].index,
+ },
+ false,
+ )),
+ },
+ };
+
+ tokenizer.enter(TokenType::LabelEnd);
+ tokenizer.enter(TokenType::LabelMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LabelMarker);
+ tokenizer.exit(TokenType::LabelEnd);
+
+ return (State::Fn(Box::new(move |t, c| after(t, c, info))), None);
+ }
+ }
+
+ (State::Nok, None)
+}
+
+/// After `]`.
+///
+/// ```markdown
+/// [a]|(b) c
+/// [a]|[b] c
+/// [a]|[] b
+/// [a]| b
+///
+/// [a]: z
+/// ```
+fn after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ // let label_start = tokenizer
+ // .label_start_stack
+ // .get_mut(info.label_start_index)
+ // .unwrap();
+ // To do: figure out if defined or not.
+ let defined = false;
+ println!("to do: is `{:?}` defined?", info);
+ match code {
+ // Resource (`[asd](fgh)`)?
+ Code::Char('(') => tokenizer.attempt(resource, move |is_ok| {
+ Box::new(move |t, c| {
+ // Also fine if `defined`, as then it’s a valid shortcut.
+ if is_ok || defined {
+ ok(t, c, info)
+ } else {
+ nok(t, c, info.label_start_index)
+ }
+ })
+ })(tokenizer, code),
+ // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference?
+ Code::Char('[') => tokenizer.attempt(full_reference, move |is_ok| {
+ Box::new(move |t, c| {
+ if is_ok {
+ ok(t, c, info)
+ } else if defined {
+ reference_not_full(t, c, info)
+ } else {
+ nok(t, c, info.label_start_index)
+ }
+ })
+ })(tokenizer, code),
+ // Shortcut reference: `[asd]`?
+ _ => {
+ if defined {
+ ok(tokenizer, code, info)
+ } else {
+ nok(tokenizer, code, info.label_start_index)
+ }
+ }
+ }
+}
+
+/// After `]`, at `[`, but not at a full reference.
+///
+/// > 👉 **Note**: we only get here if the label is defined.
+///
+/// ```markdown
+/// [a]|[] b
+///
+/// [a]: z
+/// ```
+fn reference_not_full(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ tokenizer.attempt(collapsed_reference, move |is_ok| {
+ Box::new(move |t, c| {
+ if is_ok {
+ ok(t, c, info)
+ } else {
+ nok(t, c, info.label_start_index)
+ }
+ })
+ })(tokenizer, code)
+}
+
+/// Done, we found something.
+///
+/// ```markdown
+/// [a](b)| c
+/// [a][b]| c
+/// [a][]| b
+/// [a]| b
+///
+/// [a]: z
+/// ```
+fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
+ println!(
+ "ok res, ref full, ref, collapsed, or ref shortcut: {:?}",
+ info.media
+ );
+ // Remove this one and everything after it.
+ let mut left: Vec<LabelStart> = tokenizer
+ .label_start_stack
+ .drain(info.label_start_index..)
+ .collect();
+ // Remove this one from `left`, as we’ll move it to `media_list`.
+ left.remove(0);
+ tokenizer.label_start_list_loose.append(&mut left);
+
+ let is_link = tokenizer.events[info.media.start.0].token_type == TokenType::LabelLink;
+
+ if is_link {
+ let mut index = 0;
+ while index < tokenizer.label_start_stack.len() {
+ let label_start = &mut tokenizer.label_start_stack[index];
+ if tokenizer.events[label_start.start.0].token_type == TokenType::LabelLink {
+ label_start.inactive = true;
+ }
+ index += 1;
+ }
+ }
+
+ info.media.end.1 = tokenizer.events.len() - 1;
+ tokenizer.media_list.push(info.media);
+ tokenizer.register_resolver("media".to_string(), Box::new(resolve_media));
+ (State::Ok, Some(vec![code]))
+}
+
+/// Done, it’s nothing.
+///
+/// There was an okay opening, but we didn’t match anything.
+///
+/// ```markdown
+/// [a]|(b c
+/// [a]|[b c
+/// [b]|[ c
+/// [b]| c
+///
+/// [a]: z
+/// ```
+fn nok(tokenizer: &mut Tokenizer, _code: Code, label_start_index: usize) -> StateFnResult {
+ let label_start = tokenizer
+ .label_start_stack
+ .get_mut(label_start_index)
+ .unwrap();
+ println!("just balanced braces: {:?}", label_start);
+ label_start.balanced = true;
+ // To do: pop things off the list?
+ (State::Nok, None)
+}
+
+/// Before a resource, at `(`.
+///
+/// ```markdown
+/// [a]|(b) c
+/// ```
+fn resource(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('(') => {
+ tokenizer.enter(TokenType::Resource);
+ tokenizer.enter(TokenType::ResourceMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::ResourceMarker);
+ (State::Fn(Box::new(resource_start)), None)
+ }
+ _ => unreachable!("expected `(`"),
+ }
+}
+
+/// At the start of a resource, after `(`, before a definition.
+///
+/// ```markdown
+/// [a](|b) c
+/// ```
+fn resource_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_open)(tokenizer, code)
+}
+
+/// At the start of a resource, after optional whitespace.
+///
+/// ```markdown
+/// [a](|b) c
+/// ```
+fn resource_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(')') => resource_end(tokenizer, code),
+ _ => tokenizer.go(
+ |t, c| {
+ destination(
+ t,
+ c,
+ DestinationOptions {
+ limit: LINK_RESOURCE_DESTINATION_BALANCE_MAX,
+ destination: TokenType::ResourceDestination,
+ literal: TokenType::ResourceDestinationLiteral,
+ marker: TokenType::ResourceDestinationLiteralMarker,
+ raw: TokenType::ResourceDestinationRaw,
+ string: TokenType::ResourceDestinationString,
+ },
+ )
+ },
+ destination_after,
+ )(tokenizer, code),
+ }
+}
+
+/// In a resource, after a destination, before optional whitespace.
+///
+/// ```markdown
+/// [a](b|) c
+/// [a](b| "c") d
+/// ```
+fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.attempt(space_or_tab_one_line_ending(), |ok| {
+ Box::new(if ok { resource_between } else { resource_end })
+ })(tokenizer, code)
+}
+
+/// In a resource, after a destination, after whitespace.
+///
+/// ```markdown
+/// [a](b |) c
+/// [a](b |"c") d
+/// ```
+fn resource_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('"' | '\'' | '(') => tokenizer.go(
+ |t, c| {
+ title(
+ t,
+ c,
+ TitleOptions {
+ title: TokenType::ResourceTitle,
+ marker: TokenType::ResourceTitleMarker,
+ string: TokenType::ResourceTitleString,
+ },
+ )
+ },
+ title_after,
+ )(tokenizer, code),
+ _ => resource_end(tokenizer, code),
+ }
+}
+
+/// In a resource, after a title.
+///
+/// ```markdown
+/// [a](b "c"|) d
+/// ```
+fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_end)(tokenizer, code)
+}
+
+/// In a resource, at the `)`.
+///
+/// ```markdown
+/// [a](b|) c
+/// [a](b |) c
+/// [a](b "c"|) d
+/// ```
+fn resource_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(')') => {
+ tokenizer.enter(TokenType::ResourceMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::ResourceMarker);
+ tokenizer.exit(TokenType::Resource);
+ (State::Ok, None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// In a reference (full), at the `[`.
+///
+/// ```markdown
+/// [a]|[b]
+/// ```
+fn full_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('[') => tokenizer.go(
+ |t, c| {
+ label(
+ t,
+ c,
+ LabelOptions {
+ label: TokenType::Reference,
+ marker: TokenType::ReferenceMarker,
+ string: TokenType::ReferenceString,
+ },
+ )
+ },
+ full_reference_after,
+ )(tokenizer, code),
+ _ => unreachable!("expected `[`"),
+ }
+}
+
+/// In a reference (full), after `]`.
+///
+/// ```markdown
+/// [a][b]|
+/// ```
+fn full_reference_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ let events = &tokenizer.events;
+ let mut index = events.len() - 1;
+ let mut start: Option<usize> = None;
+ let mut end: Option<usize> = None;
+
+ while index > 0 {
+ index -= 1;
+ let event = &events[index];
+ if event.token_type == TokenType::ReferenceString {
+ if event.event_type == EventType::Exit {
+ end = Some(event.index);
+ } else {
+ start = Some(event.index);
+ break;
+ }
+ }
+ }
+
+ // Always found, otherwise we don’t get here.
+ let start = start.unwrap();
+ let end = end.unwrap();
+
+ let id = normalize_identifier(&serialize(
+ &tokenizer.parse_state.codes,
+ &Span {
+ start_index: start,
+ end_index: end,
+ },
+ false,
+ ));
+ println!("to do: is `{:?}` defined?", id);
+ let defined = false;
+
+ if defined {
+ (State::Ok, Some(vec![code]))
+ } else {
+ (State::Nok, None)
+ }
+}
+
+/// In a reference (collapsed), at the `[`.
+///
+/// > 👉 **Note**: we only get here if the label is defined.
+///
+/// ```markdown
+/// [a]|[]
+/// ```
+fn collapsed_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('[') => {
+ tokenizer.enter(TokenType::Reference);
+ tokenizer.enter(TokenType::ReferenceMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::ReferenceMarker);
+ (State::Fn(Box::new(collapsed_reference_open)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// In a reference (collapsed), at the `]`.
+///
+/// > 👉 **Note**: we only get here if the label is defined.
+///
+/// ```markdown
+/// [a][|]
+/// ```
+fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(']') => {
+ tokenizer.enter(TokenType::ReferenceMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::ReferenceMarker);
+ tokenizer.exit(TokenType::Reference);
+ (State::Ok, None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+pub fn add(
+ map: &mut HashMap<usize, (usize, Vec<Event>)>,
+ index: usize,
+ mut remove: usize,
+ mut add: Vec<Event>,
+) {
+ let curr = map.remove(&index);
+
+ if let Some((curr_rm, mut curr_add)) = curr {
+ remove += curr_rm;
+ curr_add.append(&mut add);
+ add = curr_add;
+ }
+
+ map.insert(index, (remove, add));
+}
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
new file mode 100644
index 0000000..2e96977
--- /dev/null
+++ b/src/construct/label_start_image.rs
@@ -0,0 +1,47 @@
+//! To do
+
+use super::label_end::resolve_media;
+use crate::tokenizer::{Code, LabelStart, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of label (image) start.
+///
+/// ```markdown
+/// a |![ b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('!') => {
+ tokenizer.enter(TokenType::LabelImage);
+ tokenizer.enter(TokenType::LabelImageMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LabelImageMarker);
+ (State::Fn(Box::new(open)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// After `!`, before a `[`.
+///
+/// ```markdown
+/// a !|[ b
+/// ```
+pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('[') => {
+ tokenizer.enter(TokenType::LabelMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LabelMarker);
+ tokenizer.exit(TokenType::LabelImage);
+ let end = tokenizer.events.len() - 1;
+ tokenizer.label_start_stack.push(LabelStart {
+ start: (end - 5, end),
+ balanced: false,
+ inactive: false,
+ });
+ tokenizer.register_resolver("media".to_string(), Box::new(resolve_media));
+ (State::Ok, None)
+ }
+ _ => (State::Nok, None),
+ }
+}
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
new file mode 100644
index 0000000..35c9dcd
--- /dev/null
+++ b/src/construct/label_start_link.rs
@@ -0,0 +1,30 @@
+//! To do
+
+use super::label_end::resolve_media;
+use crate::tokenizer::{Code, LabelStart, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of label (link) start.
+///
+/// ```markdown
+/// a |[ b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('[') => {
+ let start = tokenizer.events.len();
+ tokenizer.enter(TokenType::LabelLink);
+ tokenizer.enter(TokenType::LabelMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LabelMarker);
+ tokenizer.exit(TokenType::LabelLink);
+ tokenizer.label_start_stack.push(LabelStart {
+ start: (start, tokenizer.events.len() - 1),
+ balanced: false,
+ inactive: false,
+ });
+ tokenizer.register_resolver("media".to_string(), Box::new(resolve_media));
+ (State::Ok, None)
+ }
+ _ => (State::Nok, None),
+ }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 9e5da0e..8565b2f 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -30,9 +30,9 @@
//! * [heading (setext)][heading_setext]
//! * [html (flow)][html_flow]
//! * [html (text)][html_text]
-//! * label end
-//! * label start (image)
-//! * label start (link)
+//! * [label end][label_end]
+//! * [label start (image)][label_start_image]
+//! * [label start (link)][label_start_link]
//! * list
//! * [paragraph][]
//! * [thematic break][thematic_break]
@@ -59,8 +59,6 @@
//! They also contain references to character as defined by [char][], so for
//! example `ascii_punctuation` refers to
//! [`char::is_ascii_punctuation`][char::is_ascii_punctuation].
-//!
-//!
pub mod autolink;
pub mod blank_line;
@@ -76,6 +74,9 @@ pub mod heading_atx;
pub mod heading_setext;
pub mod html_flow;
pub mod html_text;
+pub mod label_end;
+pub mod label_start_image;
+pub mod label_start_link;
pub mod paragraph;
pub mod partial_data;
pub mod partial_destination;
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 03dcbee..7887a44 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -267,11 +267,10 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
/// ```markdown
/// a\|)b
/// ```
-fn raw_escape(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
+fn raw_escape(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char('(' | ')' | '\\') => {
tokenizer.consume(code);
- info.balance += 1;
(State::Fn(Box::new(move |t, c| raw(t, c, info))), None)
}
_ => raw(tokenizer, code, info),
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 024a4b2..43bdc53 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -35,6 +35,45 @@ pub fn space_or_tab() -> Box<StateFn> {
space_or_tab_min_max(1, usize::MAX)
}
+pub fn space_or_tab_one_line_ending() -> Box<StateFn> {
+ Box::new(|tokenizer, code| {
+ tokenizer.attempt(space_or_tab(), move |ok| {
+ Box::new(move |tokenizer, code| match code {
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.enter(TokenType::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LineEnding);
+ (
+ State::Fn(Box::new(tokenizer.attempt_opt(
+ space_or_tab(),
+ move |_t, code| {
+ if !matches!(
+ code,
+ Code::None
+ | Code::CarriageReturnLineFeed
+ | Code::Char('\r' | '\n')
+ ) {
+ (State::Ok, Some(vec![code]))
+ } else {
+ (State::Nok, None)
+ }
+ },
+ ))),
+ None,
+ )
+ }
+ _ => {
+ if ok {
+ (State::Ok, Some(vec![code]))
+ } else {
+ (State::Nok, None)
+ }
+ }
+ })
+ })(tokenizer, code)
+ })
+}
+
/// Between `x` and `y` `space_or_tab`
///
/// ```bnf
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 3e61788..78ae311 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -32,7 +32,7 @@
//! <!-- To do: link label end. -->
use crate::construct::partial_space_or_tab::space_or_tab;
-use crate::subtokenize::link;
+use crate::subtokenize::link_to;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Configuration.
@@ -109,7 +109,7 @@ impl Kind {
#[derive(Debug)]
struct Info {
/// Whether we’ve seen our first `ChunkString`.
- connect: bool,
+ connect_index: Option<usize>,
/// Kind of title.
kind: Kind,
/// Configuration.
@@ -125,9 +125,9 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFnResult {
match code {
- Code::Char(char) if char == '(' || char == '"' || char == '\'' => {
+ Code::Char(char) if char == '"' || char == '\'' || char == '(' => {
let info = Info {
- connect: false,
+ connect_index: None,
kind: Kind::from_char(char),
options,
};
@@ -184,11 +184,11 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
_ => {
tokenizer.enter(TokenType::ChunkString);
- if info.connect {
+ if let Some(connect_index) = info.connect_index {
let index = tokenizer.events.len() - 1;
- link(&mut tokenizer.events, index);
+ link_to(&mut tokenizer.events, connect_index, index);
} else {
- info.connect = true;
+ info.connect_index = Some(tokenizer.events.len() - 1);
}
title(tokenizer, code, info)
diff --git a/src/content/flow.rs b/src/content/flow.rs
index e71d25a..546712f 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -26,6 +26,7 @@ use crate::construct::{
html_flow::start as html_flow, paragraph::start as paragraph,
thematic_break::start as thematic_break,
};
+use crate::parser::ParseState;
use crate::subtokenize::subtokenize;
use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer};
use crate::util::{
@@ -34,9 +35,10 @@ use crate::util::{
};
/// Turn `codes` as the flow content type into events.
-pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> {
- let mut tokenizer = Tokenizer::new(point, index);
- tokenizer.feed(codes, Box::new(start), true);
+pub fn flow(parse_state: &ParseState, point: Point, index: usize) -> Vec<Event> {
+ let mut tokenizer = Tokenizer::new(point, index, parse_state);
+
+ tokenizer.push(&parse_state.codes, Box::new(start), true);
let mut index = 0;
@@ -47,9 +49,14 @@ pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> {
&& event.token_type == TokenType::DefinitionLabelString
{
let id = normalize_identifier(
- serialize(codes, &from_exit_event(&tokenizer.events, index), false).as_str(),
+ serialize(
+ &parse_state.codes,
+ &from_exit_event(&tokenizer.events, index),
+ false,
+ )
+ .as_str(),
);
- println!("to do: use identifier {:?}", id);
+ println!("to do: use definition identifier {:?}", id);
}
index += 1;
@@ -58,8 +65,9 @@ pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> {
let mut result = (tokenizer.events, false);
while !result.1 {
- result = subtokenize(result.0, codes);
+ result = subtokenize(result.0, parse_state);
}
+
result.0
}
diff --git a/src/content/text.rs b/src/content/text.rs
index 1224064..5718617 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -21,15 +21,19 @@ use crate::construct::{
character_reference::start as character_reference, code_text::start as code_text,
hard_break_escape::start as hard_break_escape,
hard_break_trailing::start as hard_break_trailing, html_text::start as html_text,
- partial_data::start as data,
+ label_end::start as label_end, label_start_image::start as label_start_image,
+ label_start_link::start as label_start_link, partial_data::start as data,
};
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
-const MARKERS: [Code; 5] = [
+const MARKERS: [Code; 8] = [
Code::Char(' '), // `hard_break_trailing`
+ Code::Char('!'), // `label_start_image`
Code::Char('&'), // `character_reference`
Code::Char('<'), // `autolink`, `html_text`
+ Code::Char('['), // `label_start_link`
Code::Char('\\'), // `character_escape`, `hard_break_escape`
+ Code::Char(']'), // `label_end`
Code::Char('`'), // `code_text`
];
@@ -47,13 +51,16 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
Code::None => (State::Ok, None),
_ => tokenizer.attempt_n(
vec![
- Box::new(character_reference),
+ Box::new(autolink),
Box::new(character_escape),
+ Box::new(character_reference),
+ Box::new(code_text),
Box::new(hard_break_escape),
Box::new(hard_break_trailing),
- Box::new(autolink),
Box::new(html_text),
- Box::new(code_text),
+ Box::new(label_end),
+ Box::new(label_start_image),
+ Box::new(label_start_link),
],
|ok| Box::new(if ok { start } else { before_data }),
)(tokenizer, code),
diff --git a/src/parser.rs b/src/parser.rs
index 49d99d3..32b7f36 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -4,14 +4,24 @@
use crate::content::flow::flow;
use crate::tokenizer::{as_codes, Code, Event, Point};
+pub struct ParseState {
+ /// To do.
+ pub codes: Vec<Code>,
+ /// To do.
+ pub definitions: Vec<String>,
+}
+
/// Turn a string of markdown into events.
///
/// Passes the codes back so the compiler can access the source.
pub fn parse(value: &str) -> (Vec<Event>, Vec<Code>) {
- let codes = as_codes(value);
- // To do: pass a reference to this around, and slices in the (back)feeding. Might be tough.
+ let parse_state = ParseState {
+ codes: as_codes(value),
+ definitions: vec![],
+ };
+
let events = flow(
- &codes,
+ &parse_state,
Point {
line: 1,
column: 1,
@@ -19,5 +29,7 @@ pub fn parse(value: &str) -> (Vec<Event>, Vec<Code>) {
},
0,
);
- (events, codes)
+
+ // To do: pass whole `parse_state` back?
+ (events, parse_state.codes)
}
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 4ee2242..58db3c6 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -28,9 +28,8 @@
use std::collections::HashMap;
use crate::content::{string::start as string, text::start as text};
-use crate::tokenizer::{
- Code, Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer,
-};
+use crate::parser::ParseState;
+use crate::tokenizer::{Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer};
use crate::util::span;
/// Create a link between two [`Event`][]s.
@@ -39,25 +38,36 @@ use crate::util::span;
/// This optimizes for the common case where the token at `index` is connected
/// to the previous void token.
pub fn link(events: &mut [Event], index: usize) {
- let prev = &mut events[index - 2];
+ link_to(events, index - 2, index);
+}
+
+/// To do
+pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
+ let prev = &mut events[pevious];
+ // To do: force chunks?
+ // assert!(
+ // prev.token_type == TokenType::ChunkString || prev.token_type == TokenType::ChunkText,
+ // "{:?}",
+ // prev.token_type.to_owned()
+ // );
assert_eq!(prev.event_type, EventType::Enter);
- prev.next = Some(index);
+ prev.next = Some(next);
- let prev_ref = &events[index - 2];
- let prev_exit_ref = &events[index - 1];
+ let prev_ref = &events[pevious];
+ let prev_exit_ref = &events[pevious + 1];
assert_eq!(prev_exit_ref.event_type, EventType::Exit);
assert_eq!(prev_exit_ref.token_type, prev_ref.token_type);
- let curr = &mut events[index];
+ let curr = &mut events[next];
assert_eq!(curr.event_type, EventType::Enter);
- curr.previous = Some(index - 2);
+ curr.previous = Some(pevious);
// Note: the exit of this event may not exist, so don’t check for that.
}
/// Parse linked events.
///
/// Supposed to be called repeatedly, returns `1: true` when done.
-pub fn subtokenize(mut events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
+pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>, bool) {
let mut index = 0;
// Map of first chunks to their tokenizer.
let mut head_to_tokenizer: HashMap<usize, Tokenizer> = HashMap::new();
@@ -83,7 +93,7 @@ pub fn subtokenize(mut events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool)
// Index into `events` pointing to a chunk.
let mut index_opt: Option<usize> = Some(index);
// Subtokenizer.
- let mut tokenizer = Tokenizer::new(event.point.clone(), event.index);
+ let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state);
// Substate.
let mut result: StateFnResult = (
State::Fn(Box::new(if event.token_type == TokenType::ChunkString {
@@ -115,7 +125,11 @@ pub fn subtokenize(mut events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool)
_ => unreachable!("cannot be ok/nok"),
};
- result = tokenizer.feed(span::codes(codes, &span), func, enter.next == None);
+ result = tokenizer.push(
+ span::codes(&parse_state.codes, &span),
+ func,
+ enter.next == None,
+ );
assert!(result.1.is_none(), "expected no remainder");
index_opt = enter.next;
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7b71308..a692a4d 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -15,6 +15,7 @@
use std::collections::HashMap;
use crate::constant::TAB_SIZE;
+use crate::parser::ParseState;
/// Semantic label of a span.
// To do: figure out how to share this so extensions can add their own stuff,
@@ -1073,6 +1074,32 @@ pub enum TokenType {
/// ^^^
/// ```
HtmlTextData,
+ /// To do,
+ LabelImage,
+ /// To do,
+ LabelImageMarker,
+ /// To do,
+ LabelLink,
+ /// To do,
+ LabelMarker,
+ LabelEnd,
+ Resource,
+ ResourceMarker,
+ ResourceDestination,
+ ResourceDestinationLiteral,
+ ResourceDestinationLiteralMarker,
+ ResourceDestinationRaw,
+ ResourceDestinationString,
+ ResourceTitle,
+ ResourceTitleMarker,
+ ResourceTitleString,
+ Reference,
+ ReferenceMarker,
+ ReferenceString,
+ Link,
+ Image,
+ Label,
+ LabelText,
/// Line ending.
///
/// ## Info
@@ -1243,6 +1270,9 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> StateFnResult;
/// In certain cases, it can also yield back up parsed codes that were passed down.
pub type StateFnResult = (State, Option<Vec<Code>>);
+/// To do.
+pub type Resolver = dyn FnOnce(&mut Tokenizer) -> Vec<Event>;
+
/// The result of a state.
pub enum State {
/// There is a future state: a boxed [`StateFn`][] to pass the next code to.
@@ -1253,6 +1283,30 @@ pub enum State {
Nok,
}
+/// To do.
+#[derive(Debug)]
+pub struct LabelStart {
+ /// To do.
+ pub start: (usize, usize),
+ /// A boolean used internally to figure out if a label start link can’t be
+ /// used (because links in links are incorrect).
+ pub inactive: bool,
+ /// A boolean used internally to figure out if a label is balanced: they’re
+ /// not media, it’s just balanced braces.
+ pub balanced: bool,
+}
+
+/// To do.
+#[derive(Debug)]
+pub struct Media {
+ /// To do.
+ pub start: (usize, usize),
+ /// To do.
+ pub end: (usize, usize),
+ /// To do.
+ pub id: String,
+}
+
/// The internal state of a tokenizer, not to be confused with states from the
/// state machine, this instead is all the information about where we currently
/// are and what’s going on.
@@ -1272,9 +1326,10 @@ struct InternalState {
point: Point,
}
+// #[derive(Debug)]
+
/// A tokenizer itself.
-#[derive(Debug)]
-pub struct Tokenizer {
+pub struct Tokenizer<'a> {
column_start: HashMap<usize, usize>,
/// Track whether a character is expected to be consumed, and whether it’s
/// actually consumed
@@ -1295,11 +1350,22 @@ pub struct Tokenizer {
index: usize,
/// Current relative and absolute place in the file.
point: Point,
+ /// To do.
+ pub parse_state: &'a ParseState,
+ /// To do.
+ pub label_start_stack: Vec<LabelStart>,
+ /// To do.
+ pub label_start_list_loose: Vec<LabelStart>,
+ /// To do.
+ pub media_list: Vec<Media>,
+ /// To do.
+ resolvers: Vec<Box<Resolver>>,
+ resolver_ids: Vec<String>,
}
-impl Tokenizer {
+impl<'a> Tokenizer<'a> {
/// Create a new tokenizer.
- pub fn new(point: Point, index: usize) -> Tokenizer {
+ pub fn new(point: Point, index: usize, parse_state: &'a ParseState) -> Tokenizer {
Tokenizer {
previous: Code::None,
current: Code::None,
@@ -1309,6 +1375,20 @@ impl Tokenizer {
point,
stack: vec![],
events: vec![],
+ parse_state,
+ label_start_stack: vec![],
+ label_start_list_loose: vec![],
+ media_list: vec![],
+ resolvers: vec![],
+ resolver_ids: vec![],
+ }
+ }
+
+ /// To do.
+ pub fn register_resolver(&mut self, id: String, resolver: Box<Resolver>) {
+ if !self.resolver_ids.contains(&id) {
+ self.resolver_ids.push(id);
+ self.resolvers.push(resolver);
}
}
@@ -1582,7 +1662,8 @@ impl Tokenizer {
/// This is set up to support repeatedly calling `feed`, and thus streaming
/// markdown into the state machine, and normally pauses after feeding.
/// When `done: true` is passed, the EOF is fed.
- pub fn feed(
+ // To do: call this `feed_impl`, and rename `push` to `feed`?
+ fn feed(
&mut self,
codes: &[Code],
start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
@@ -1643,6 +1724,26 @@ impl Tokenizer {
check_statefn_result((state, None))
}
+
+ /// To do.
+ // To do: set a `drained` to prevent passing after draining?
+ pub fn push(
+ &mut self,
+ codes: &[Code],
+ start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ drain: bool,
+ ) -> StateFnResult {
+ let result = self.feed(codes, start, drain);
+
+ if drain {
+ while !self.resolvers.is_empty() {
+ let resolver = self.resolvers.remove(0);
+ self.events = resolver(self);
+ }
+ }
+
+ result
+ }
}
/// Internal utility to wrap states to also capture codes.
diff --git a/src/util/sanitize_uri.rs b/src/util/sanitize_uri.rs
index d66978e..55b15e4 100644
--- a/src/util/sanitize_uri.rs
+++ b/src/util/sanitize_uri.rs
@@ -115,7 +115,7 @@ fn normalize_uri(value: &str) -> String {
result.push(
buff[0..char.len_utf8()]
.iter()
- .map(|&byte| format!("%{:X}", byte))
+ .map(|&byte| format!("%{:>02X}", byte))
.collect::<String>(),
);
diff --git a/tests/character_escape.rs b/tests/character_escape.rs
index e4f23d2..3e3e839 100644
--- a/tests/character_escape.rs
+++ b/tests/character_escape.rs
@@ -61,12 +61,11 @@ fn character_escape() {
"should not escape in flow html"
);
- // To do: link (reference).
- // assert_eq!(
- // micromark("[foo](/bar\\* \"ti\\*tle\")"),
- // "<p><a href=\"/bar*\" title=\"ti*tle\">foo</a></p>",
- // "should escape in resource and title"
- // );
+ assert_eq!(
+ micromark("[foo](/bar\\* \"ti\\*tle\")"),
+ "<p><a href=\"/bar*\" title=\"ti*tle\">foo</a></p>",
+ "should escape in resource and title"
+ );
// To do: link (reference).
// assert_eq!(
diff --git a/tests/character_reference.rs b/tests/character_reference.rs
index 136ce17..3d2111e 100644
--- a/tests/character_reference.rs
+++ b/tests/character_reference.rs
@@ -55,14 +55,13 @@ fn character_reference() {
"should not care about character references in html"
);
- // To do: link (resource).
- // assert_eq!(
- // micromark("[foo](/f&ouml;&ouml; \"f&ouml;&ouml;\")"),
- // "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>",
- // "should support character references in resource URLs and titles"
- // );
+ assert_eq!(
+ micromark("[foo](/f&ouml;&ouml; \"f&ouml;&ouml;\")"),
+ "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>",
+ "should support character references in resource URLs and titles"
+ );
- // To do: link (resource).
+ // To do: link (reference).
// assert_eq!(
// micromark("[foo]: /f&ouml;&ouml; \"f&ouml;&ouml;\"\n\n[foo]"),
// "<p><a href=\"/f%C3%B6%C3%B6\" title=\"föö\">foo</a></p>",
@@ -101,12 +100,11 @@ fn character_reference() {
// "should not support character references as construct markers (2)"
// );
- // To do: link (resource).
- // assert_eq!(
- // micromark("[a](url &quot;tit&quot;)"),
- // "<p>[a](url &quot;tit&quot;)</p>",
- // "should not support character references as construct markers (3)"
- // );
+ assert_eq!(
+ micromark("[a](url &quot;tit&quot;)"),
+ "<p>[a](url &quot;tit&quot;)</p>",
+ "should not support character references as construct markers (3)"
+ );
assert_eq!(
micromark("foo&#10;&#10;bar"),
diff --git a/tests/image.rs b/tests/image.rs
new file mode 100644
index 0000000..a54c8d2
--- /dev/null
+++ b/tests/image.rs
@@ -0,0 +1,229 @@
+extern crate micromark;
+use micromark::micromark;
+
+#[test]
+fn image() {
+ assert_eq!(
+ micromark("[link](/uri \"title\")"),
+ "<p><a href=\"/uri\" title=\"title\">link</a></p>",
+ "should support links"
+ );
+ assert_eq!(
+ micromark("![foo](/url \"title\")"),
+ "<p><img src=\"/url\" alt=\"foo\" title=\"title\" /></p>",
+ "should support image w/ resource"
+ );
+
+ // To do: attention.
+ // assert_eq!(
+ // micromark("[foo *bar*]: train.jpg \"train & tracks\"\n\n![foo *bar*]"),
+ // "<p><img src=\"train.jpg\" alt=\"foo bar\" title=\"train &amp; tracks\" /></p>",
+ // "should support image as shortcut reference"
+ // );
+
+ // To do: tags in images.
+ // assert_eq!(
+ // micromark("![foo ![bar](/url)](/url2)"),
+ // "<p><img src=\"/url2\" alt=\"foo bar\" /></p>",
+ // "should “support” images in images"
+ // );
+
+ // To do: tags in images.
+ // assert_eq!(
+ // micromark("![foo [bar](/url)](/url2)"),
+ // "<p><img src=\"/url2\" alt=\"foo bar\" /></p>",
+ // "should “support” links in images"
+ // );
+
+ // To do: tags in images.
+ // assert_eq!(
+ // micromark("[foo *bar*]: train.jpg \"train & tracks\"\n\n![foo *bar*][]"),
+ // "<p><img src=\"train.jpg\" alt=\"foo bar\" title=\"train &amp; tracks\" /></p>",
+ // "should support “content” in images"
+ // );
+
+ // To do: tags in images, attention, references.
+ // assert_eq!(
+ // micromark("[FOOBAR]: train.jpg \"train & tracks\"\n\n![foo *bar*][foobar]"),
+ // "<p><img src=\"train.jpg\" alt=\"foo bar\" title=\"train &amp; tracks\" /></p>",
+ // "should support “content” in images"
+ // );
+
+ assert_eq!(
+ micromark("![foo](train.jpg)"),
+ "<p><img src=\"train.jpg\" alt=\"foo\" /></p>",
+ "should support images w/o title"
+ );
+
+ assert_eq!(
+ micromark("My ![foo bar](/path/to/train.jpg \"title\" )"),
+ "<p>My <img src=\"/path/to/train.jpg\" alt=\"foo bar\" title=\"title\" /></p>",
+ "should support images w/ lots of whitespace"
+ );
+
+ assert_eq!(
+ micromark("![foo](<url>)"),
+ "<p><img src=\"url\" alt=\"foo\" /></p>",
+ "should support images w/ enclosed destinations"
+ );
+
+ assert_eq!(
+ micromark("![](/url)"),
+ "<p><img src=\"/url\" alt=\"\" /></p>",
+ "should support images w/ empty labels"
+ );
+
+ // To do: references.
+ // assert_eq!(
+ // micromark("[bar]: /url\n\n![foo][bar]"),
+ // "<p><img src=\"/url\" alt=\"foo\" /></p>",
+ // "should support full references (1)"
+ // );
+
+ // To do: references.
+ // assert_eq!(
+ // micromark("[BAR]: /url\n\n![foo][bar]"),
+ // "<p><img src=\"/url\" alt=\"foo\" /></p>",
+ // "should support full references (2)"
+ // );
+
+ // To do: references.
+ // assert_eq!(
+ // micromark("[foo]: /url \"title\"\n\n![foo][]"),
+ // "<p><img src=\"/url\" alt=\"foo\" title=\"title\" /></p>",
+ // "should support collapsed references (1)"
+ // );
+
+ // To do: references, attention, tags in images.
+ // assert_eq!(
+ // micromark("[*foo* bar]: /url \"title\"\n\n![*foo* bar][]"),
+ // "<p><img src=\"/url\" alt=\"foo bar\" title=\"title\" /></p>",
+ // "should support collapsed references (2)"
+ // );
+
+ // To do: references.
+ // assert_eq!(
+ // micromark("[foo]: /url \"title\"\n\n![Foo][]"),
+ // "<p><img src=\"/url\" alt=\"Foo\" title=\"title\" /></p>",
+ // "should support case-insensitive labels"
+ // );
+
+ // To do: references.
+ // assert_eq!(
+ // micromark("[foo]: /url \"title\"\n\n![foo] \n[]"),
+ // "<p><img src=\"/url\" alt=\"foo\" title=\"title\" />\n[]</p>",
+ // "should not support whitespace between sets of brackets"
+ // );
+
+ // To do: references.
+ // assert_eq!(
+ // micromark("[foo]: /url \"title\"\n\n![foo]"),
+ // "<p><img src=\"/url\" alt=\"foo\" title=\"title\" /></p>",
+ // "should support shortcut references (1)"
+ // );
+
+ // To do: references, tags in images, attention.
+ // assert_eq!(
+ // micromark("[*foo* bar]: /url \"title\"\n\n![*foo* bar]"),
+ // "<p><img src=\"/url\" alt=\"foo bar\" title=\"title\" /></p>",
+ // "should support shortcut references (2)"
+ // );
+
+ assert_eq!(
+ micromark("[[foo]]: /url \"title\"\n\n![[foo]]"),
+ "<p>[[foo]]: /url &quot;title&quot;</p>\n<p>![[foo]]</p>",
+ "should not support link labels w/ unescaped brackets"
+ );
+
+ // To do: references.
+ // assert_eq!(
+ // micromark("[foo]: /url \"title\"\n\n![Foo]"),
+ // "<p><img src=\"/url\" alt=\"Foo\" title=\"title\" /></p>",
+ // "should support case-insensitive label matching"
+ // );
+
+ // To do: references.
+ // assert_eq!(
+ // micromark("[foo]: /url \"title\"\n\n!\\[foo]"),
+ // "<p>![foo]</p>",
+ // "should “support” an escaped bracket instead of an image"
+ // );
+
+ // To do: references.
+ // assert_eq!(
+ // micromark("[foo]: /url \"title\"\n\n\\![foo]"),
+ // "<p>!<a href=\"/url\" title=\"title\">foo</a></p>",
+ // "should support an escaped bang instead of an image, but still have a link"
+ // );
+
+ // Extra
+ assert_eq!(
+ micromark("![foo]()"),
+ "<p><img src=\"\" alt=\"foo\" /></p>",
+ "should support images w/o destination"
+ );
+
+ assert_eq!(
+ micromark("![foo](<>)"),
+ "<p><img src=\"\" alt=\"foo\" /></p>",
+ "should support images w/ explicit empty destination"
+ );
+
+ assert_eq!(
+ micromark("![](example.png)"),
+ "<p><img src=\"example.png\" alt=\"\" /></p>",
+ "should support images w/o alt"
+ );
+
+ assert_eq!(
+ micromark("![alpha](bravo.png \"\")"),
+ "<p><img src=\"bravo.png\" alt=\"alpha\" /></p>",
+ "should support images w/ empty title (1)"
+ );
+
+ assert_eq!(
+ micromark("![alpha](bravo.png '')"),
+ "<p><img src=\"bravo.png\" alt=\"alpha\" /></p>",
+ "should support images w/ empty title (2)"
+ );
+
+ assert_eq!(
+ micromark("![alpha](bravo.png ())"),
+ "<p><img src=\"bravo.png\" alt=\"alpha\" /></p>",
+ "should support images w/ empty title (3)"
+ );
+
+ assert_eq!(
+ micromark("![&amp;&copy;&](example.com/&amp;&copy;& \"&amp;&copy;&\")"),
+ "<p><img src=\"example.com/&amp;%C2%A9&amp;\" alt=\"&amp;©&amp;\" title=\"&amp;©&amp;\" /></p>",
+ "should support character references in images"
+ );
+
+ // Extra
+ // See: <https://github.com/commonmark/commonmark.js/issues/192>
+ assert_eq!(
+ micromark("![](<> \"\")"),
+ "<p><img src=\"\" alt=\"\" /></p>",
+ "should ignore an empty title"
+ );
+
+ // To do: extensions
+ // assert_eq!(
+ // micromark("![x]()", {extensions: [{disable: {null: ["labelStartImage"]}}]}),
+ // "<p>!<a href=\"\">x</a></p>",
+ // "should support turning off label start (image)"
+ // );
+
+ assert_eq!(
+ micromark("![](javascript:alert(1))"),
+ "<p><img src=\"\" alt=\"\" /></p>",
+ "should ignore non-http protocols by default"
+ );
+
+ // To do: extensions
+ // assert_eq!(
+ // micromark("![](javascript:alert(1))", {allowDangerousProtocol: true}),
+ // "<p><img src=\"javascript:alert(1)\" alt=\"\" /></p>",
+ // "should allow non-http protocols w/ `allowDangerousProtocol`"
+ // );
+}
diff --git a/tests/link_resource.rs b/tests/link_resource.rs
new file mode 100644
index 0000000..b1e1905
--- /dev/null
+++ b/tests/link_resource.rs
@@ -0,0 +1,464 @@
+extern crate micromark;
+use micromark::{micromark, micromark_with_options, Options};
+
+const DANGER: &Options = &Options {
+ allow_dangerous_html: true,
+ allow_dangerous_protocol: true,
+ default_line_ending: None,
+};
+
+#[test]
+fn link_resource() {
+ assert_eq!(
+ micromark("[link](/uri \"title\")"),
+ "<p><a href=\"/uri\" title=\"title\">link</a></p>",
+ "should support links"
+ );
+
+ assert_eq!(
+ micromark("[link](/uri)"),
+ "<p><a href=\"/uri\">link</a></p>",
+ "should support links w/o title"
+ );
+
+ assert_eq!(
+ micromark("[link]()"),
+ "<p><a href=\"\">link</a></p>",
+ "should support links w/o destination"
+ );
+
+ assert_eq!(
+ micromark("[link](<>)"),
+ "<p><a href=\"\">link</a></p>",
+ "should support links w/ empty enclosed destination"
+ );
+
+ assert_eq!(
+ micromark("[link](/my uri)"),
+ "<p>[link](/my uri)</p>",
+ "should not support links w/ spaces in destination"
+ );
+
+ assert_eq!(
+ micromark("[link](</my uri>)"),
+ "<p><a href=\"/my%20uri\">link</a></p>",
+ "should support links w/ spaces in enclosed destination"
+ );
+
+ assert_eq!(
+ micromark("[link](foo\nbar)"),
+ "<p>[link](foo\nbar)</p>",
+ "should not support links w/ line endings in destination"
+ );
+
+ assert_eq!(
+ micromark_with_options("[link](<foo\nbar>)", DANGER),
+ "<p>[link](<foo\nbar>)</p>",
+ "should not support links w/ line endings in enclosed destination"
+ );
+
+ assert_eq!(
+ micromark("[a](<b)c>)"),
+ "<p><a href=\"b)c\">a</a></p>",
+ "should support links w/ closing parens in destination"
+ );
+
+ assert_eq!(
+ micromark("[link](<foo\\>)"),
+ "<p>[link](&lt;foo&gt;)</p>",
+ "should not support links w/ enclosed destinations w/o end"
+ );
+
+ assert_eq!(
+ micromark_with_options("[a](<b)c\n[a](<b)c>\n[a](<b>c)", DANGER),
+ "<p>[a](&lt;b)c\n[a](&lt;b)c&gt;\n[a](<b>c)</p>",
+ "should not support links w/ unmatched enclosed destinations"
+ );
+
+ assert_eq!(
+ micromark("[link](\\(foo\\))"),
+ "<p><a href=\"(foo)\">link</a></p>",
+ "should support links w/ destinations w/ escaped parens"
+ );
+
+ assert_eq!(
+ micromark("[link](foo(and(bar)))"),
+ "<p><a href=\"foo(and(bar))\">link</a></p>",
+ "should support links w/ destinations w/ balanced parens"
+ );
+
+ assert_eq!(
+ micromark("[link](foo\\(and\\(bar\\))"),
+ "<p><a href=\"foo(and(bar)\">link</a></p>",
+ "should support links w/ destinations w/ escaped parens"
+ );
+
+ assert_eq!(
+ micromark("[link](<foo(and(bar)>)"),
+ "<p><a href=\"foo(and(bar)\">link</a></p>",
+ "should support links w/ enclosed destinations w/ parens"
+ );
+
+ assert_eq!(
+ micromark_with_options("[link](foo\\)\\:)", DANGER),
+ "<p><a href=\"foo):\">link</a></p>",
+ "should support links w/ escapes in destinations"
+ );
+
+ assert_eq!(
+ micromark("[link](#fragment)"),
+ "<p><a href=\"#fragment\">link</a></p>",
+ "should support links w/ destinations to fragments"
+ );
+
+ assert_eq!(
+ micromark("[link](http://example.com#fragment)"),
+ "<p><a href=\"http://example.com#fragment\">link</a></p>",
+ "should support links w/ destinations to URLs w/ fragments"
+ );
+
+ assert_eq!(
+ micromark("[link](http://example.com?foo=3#frag)"),
+ "<p><a href=\"http://example.com?foo=3#frag\">link</a></p>",
+ "should support links w/ destinations to URLs w/ search and fragments"
+ );
+
+ assert_eq!(
+ micromark("[link](foo\\bar)"),
+ "<p><a href=\"foo%5Cbar\">link</a></p>",
+ "should not support non-punctuation character escapes in links"
+ );
+
+ assert_eq!(
+ micromark("[link](foo%20b&auml;)"),
+ "<p><a href=\"foo%20b%C3%A4\">link</a></p>",
+ "should support character references in links"
+ );
+
+ assert_eq!(
+ micromark("[link](\"title\")"),
+ "<p><a href=\"%22title%22\">link</a></p>",
+ "should not support links w/ only a title"
+ );
+
+ assert_eq!(
+ micromark("[link](/url \"title\")"),
+ "<p><a href=\"/url\" title=\"title\">link</a></p>",
+ "should support titles w/ double quotes"
+ );
+
+ assert_eq!(
+ micromark("[link](/url 'title')"),
+ "<p><a href=\"/url\" title=\"title\">link</a></p>",
+ "should support titles w/ single quotes"
+ );
+
+ assert_eq!(
+ micromark("[link](/url (title))"),
+ "<p><a href=\"/url\" title=\"title\">link</a></p>",
+ "should support titles w/ parens"
+ );
+
+ assert_eq!(
+ micromark("[link](/url \"title \\\"&quot;\")"),
+ "<p><a href=\"/url\" title=\"title &quot;&quot;\">link</a></p>",
+ "should support character references and escapes in titles"
+ );
+
+ assert_eq!(
+ micromark("[link](/url \"title\")"),
+ "<p><a href=\"/url%C2%A0%22title%22\">link</a></p>",
+ "should not support unicode whitespace between destination and title"
+ );
+
+ assert_eq!(
+ micromark("[link](/url \"title \"and\" title\")"),
+ "<p>[link](/url &quot;title &quot;and&quot; title&quot;)</p>",
+ "should not support nested balanced quotes in title"
+ );
+
+ assert_eq!(
+ micromark("[link](/url 'title \"and\" title')"),
+ "<p><a href=\"/url\" title=\"title &quot;and&quot; title\">link</a></p>",
+ "should support the other quotes in titles"
+ );
+
+ assert_eq!(
+ micromark("[link]( /uri\n \"title\" )"),
+ "<p><a href=\"/uri\" title=\"title\">link</a></p>",
+ "should support whitespace around destination and title (1)"
+ );
+
+ assert_eq!(
+ micromark("[link](\t\n/uri \"title\")"),
+ "<p><a href=\"/uri\" title=\"title\">link</a></p>",
+ "should support whitespace around destination and title (2)"
+ );
+
+ assert_eq!(
+ micromark("[link](/uri \"title\"\t\n)"),
+ "<p><a href=\"/uri\" title=\"title\">link</a></p>",
+ "should support whitespace around destination and title (3)"
+ );
+
+ assert_eq!(
+ micromark("[link] (/uri)"),
+ "<p>[link] (/uri)</p>",
+ "should not support whitespace between label and resource"
+ );
+
+ assert_eq!(
+ micromark("[link [foo [bar]]](/uri)"),
+ "<p><a href=\"/uri\">link [foo [bar]]</a></p>",
+ "should support balanced brackets"
+ );
+
+ assert_eq!(
+ micromark("[link] bar](/uri)"),
+ "<p>[link] bar](/uri)</p>",
+ "should not support unbalanced brackets (1)"
+ );
+
+ assert_eq!(
+ micromark("[link [bar](/uri)"),
+ "<p>[link <a href=\"/uri\">bar</a></p>",
+ "should not support unbalanced brackets (2)"
+ );
+
+ assert_eq!(
+ micromark("[link \\[bar](/uri)"),
+ "<p><a href=\"/uri\">link [bar</a></p>",
+ "should support characer escapes"
+ );
+
+ // To do: attention.
+ // assert_eq!(
+ // micromark("[link *foo **bar** `#`*](/uri)"),
+ // "<p><a href=\"/uri\">link <em>foo <strong>bar</strong> <code>#</code></em></a></p>",
+ // "should support content"
+ // );
+
+ assert_eq!(
+ micromark("[![moon](moon.jpg)](/uri)"),
+ "<p><a href=\"/uri\"><img src=\"moon.jpg\" alt=\"moon\" /></a></p>",
+ "should support an image as content"
+ );
+
+ assert_eq!(
+ micromark("[foo [bar](/uri)](/uri)"),
+ "<p>[foo <a href=\"/uri\">bar</a>](/uri)</p>",
+ "should not support links in links (1)"
+ );
+
+ // To do: attention.
+ // assert_eq!(
+ // micromark("[foo *[bar [baz](/uri)](/uri)*](/uri)"),
+ // "<p>[foo <em>[bar <a href=\"/uri\">baz</a>](/uri)</em>](/uri)</p>",
+ // "should not support links in links (2)"
+ // );
+
+ // To do: tags in images.
+ // assert_eq!(
+ // micromark("![[[foo](uri1)](uri2)](uri3)"),
+ // "<p><img src=\"uri3\" alt=\"[foo](uri2)\" /></p>",
+ // "should not support links in links (3)"
+ // );
+
+ assert_eq!(
+ micromark("*[foo*](/uri)"),
+ "<p>*<a href=\"/uri\">foo*</a></p>",
+ "should prefer links over emphasis (1)"
+ );
+
+ assert_eq!(
+ micromark("[foo *bar](baz*)"),
+ "<p><a href=\"baz*\">foo *bar</a></p>",
+ "should prefer links over emphasis (2)"
+ );
+
+ assert_eq!(
+ micromark_with_options("[foo <bar attr=\"](baz)\">", DANGER),
+ "<p>[foo <bar attr=\"](baz)\"></p>",
+ "should prefer HTML over links"
+ );
+
+ assert_eq!(
+ micromark("[foo`](/uri)`"),
+ "<p>[foo<code>](/uri)</code></p>",
+ "should prefer code over links"
+ );
+
+ assert_eq!(
+ micromark("[foo<http://example.com/?search=](uri)>"),
+ "<p>[foo<a href=\"http://example.com/?search=%5D(uri)\">http://example.com/?search=](uri)</a></p>",
+ "should prefer autolinks over links"
+ );
+
+ assert_eq!(
+ micromark("[foo<http://example.com/?search=](uri)>"),
+ "<p>[foo<a href=\"http://example.com/?search=%5D(uri)\">http://example.com/?search=](uri)</a></p>",
+ "should prefer autolinks over links"
+ );
+
+ // Extra
+ assert_eq!(
+ micromark("[]()"),
+ "<p><a href=\"\"></a></p>",
+ "should support an empty link"
+ );
+
+ // See: <https://github.com/commonmark/commonmark.js/issues/192>
+ assert_eq!(
+ micromark("[](<> \"\")"),
+ "<p><a href=\"\"></a></p>",
+ "should ignore an empty title"
+ );
+
+ assert_eq!(
+ micromark_with_options("[a](<b>\"c\")", DANGER),
+ "<p>[a](<b>&quot;c&quot;)</p>",
+ "should require whitespace between enclosed destination and title"
+ );
+
+ assert_eq!(
+ micromark("[](<"),
+ "<p>[](&lt;</p>",
+ "should not support an unclosed enclosed destination"
+ );
+
+ assert_eq!(
+ micromark("[]("),
+ "<p>[](</p>",
+ "should not support an unclosed destination"
+ );
+
+ assert_eq!(
+ micromark("[](\\<)"),
+ "<p><a href=\"%3C\"></a></p>",
+ "should support unenclosed link destination starting w/ escapes"
+ );
+
+ assert_eq!(
+ micromark("[](<\\<>)"),
+ "<p><a href=\"%3C\"></a></p>",
+ "should support enclosed link destination starting w/ escapes"
+ );
+
+ assert_eq!(
+ micromark("[](\\"),
+ "<p>[](\\</p>",
+ "should not support unenclosed link destination starting w/ an incorrect escape"
+ );
+
+ assert_eq!(
+ micromark("[](<\\"),
+ "<p>[](&lt;\\</p>",
+ "should not support enclosed link destination starting w/ an incorrect escape"
+ );
+
+ assert_eq!(
+ micromark("[](a \""),
+ "<p>[](a &quot;</p>",
+ "should not support an eof in a link title (1)"
+ );
+
+ assert_eq!(
+ micromark("[](a '"),
+ "<p>[](a '</p>",
+ "should not support an eof in a link title (2)"
+ );
+
+ assert_eq!(
+ micromark("[](a ("),
+ "<p>[](a (</p>",
+ "should not support an eof in a link title (3)"
+ );
+
+ assert_eq!(
+ micromark("[](a \"\\"),
+ "<p>[](a &quot;\\</p>",
+ "should not support an eof in a link title escape (1)"
+ );
+
+ assert_eq!(
+ micromark("[](a '\\"),
+ "<p>[](a '\\</p>",
+ "should not support an eof in a link title escape (2)"
+ );
+
+ assert_eq!(
+ micromark("[](a (\\"),
+ "<p>[](a (\\</p>",
+ "should not support an eof in a link title escape (3)"
+ );
+
+ assert_eq!(
+ micromark("[](a \"\\\"\")"),
+ "<p><a href=\"a\" title=\"&quot;\"></a></p>",
+ "should support a character escape to start a link title (1)"
+ );
+
+ assert_eq!(
+ micromark("[](a '\\'')"),
+ "<p><a href=\"a\" title=\"\'\"></a></p>",
+ "should support a character escape to start a link title (2)"
+ );
+
+ assert_eq!(
+ micromark("[](a (\\)))"),
+ "<p><a href=\"a\" title=\")\"></a></p>",
+ "should support a character escape to start a link title (3)"
+ );
+
+ assert_eq!(
+ micromark("[&amp;&copy;&](example.com/&amp;&copy;& \"&amp;&copy;&\")"),
+ "<p><a href=\"example.com/&amp;%C2%A9&amp;\" title=\"&amp;©&amp;\">&amp;©&amp;</a></p>",
+ "should support character references in links"
+ );
+
+ assert_eq!(
+ micromark("[a](1())"),
+ "<p><a href=\"1()\">a</a></p>",
+ "should support 1 set of parens"
+ );
+
+ assert_eq!(
+ micromark("[a](1(2()))"),
+ "<p><a href=\"1(2())\">a</a></p>",
+ "should support 2 sets of parens"
+ );
+
+ assert_eq!(
+ micromark(
+ "[a](1(2(3(4(5(6(7(8(9(10(11(12(13(14(15(16(17(18(19(20(21(22(23(24(25(26(27(28(29(30(31(32()))))))))))))))))))))))))))))))))"
+ ),
+ "<p><a href=\"1(2(3(4(5(6(7(8(9(10(11(12(13(14(15(16(17(18(19(20(21(22(23(24(25(26(27(28(29(30(31(32())))))))))))))))))))))))))))))))\">a</a></p>",
+ "should support 32 sets of parens"
+ );
+
+ assert_eq!(
+ micromark(
+ "[a](1(2(3(4(5(6(7(8(9(10(11(12(13(14(15(16(17(18(19(20(21(22(23(24(25(26(27(28(29(30(31(32(33())))))))))))))))))))))))))))))))))"
+ ),
+ "<p>[a](1(2(3(4(5(6(7(8(9(10(11(12(13(14(15(16(17(18(19(20(21(22(23(24(25(26(27(28(29(30(31(32(33())))))))))))))))))))))))))))))))))</p>",
+ "should not support 33 or more sets of parens"
+ );
+
+ assert_eq!(
+ micromark("[a](b \"\n c\")"),
+ "<p><a href=\"b\" title=\"\nc\">a</a></p>",
+ "should support an eol at the start of a title"
+ );
+
+ assert_eq!(
+ micromark("[a](b( \"c\")"),
+ "<p>[a](b( &quot;c&quot;)</p>",
+ "should not support whitespace when unbalanced in a raw destination"
+ );
+
+ assert_eq!(
+ micromark("[a](\0)"),
+ "<p><a href=\"%EF%BF%BD\">a</a></p>",
+ "should support a single NUL character as a link resource"
+ );
+}
diff --git a/tests/misc_dangerous_protocol.rs b/tests/misc_dangerous_protocol.rs
index 6f759e3..3aa042a 100644
--- a/tests/misc_dangerous_protocol.rs
+++ b/tests/misc_dangerous_protocol.rs
@@ -34,166 +34,164 @@ fn dangerous_protocol_autolink() {
);
}
-// To do: image.
-// #[test]
-// fn dangerous_protocol_image() {
-// assert_eq!(
-// micromark("![](javascript:alert(1))"),
-// "<p><img src=\"\" alt=\"\" /></p>",
-// "should be safe by default"
-// );
-
-// assert_eq!(
-// micromark("![](http://a)"),
-// "<p><img src=\"http://a\" alt=\"\" /></p>",
-// "should allow `http:`"
-// );
-
-// assert_eq!(
-// micromark("![](https://a)"),
-// "<p><img src=\"https://a\" alt=\"\" /></p>",
-// "should allow `https:`"
-// );
-
-// assert_eq!(
-// micromark("![](irc:///help)"),
-// "<p><img src=\"\" alt=\"\" /></p>",
-// "should not allow `irc:`"
-// );
-
-// assert_eq!(
-// micromark("![](mailto:a)"),
-// "<p><img src=\"\" alt=\"\" /></p>",
-// "should not allow `mailto:`"
-// );
-
-// assert_eq!(
-// micromark("![](#a)"),
-// "<p><img src=\"#a\" alt=\"\" /></p>",
-// "should allow a hash"
-// );
-
-// assert_eq!(
-// micromark("![](?a)"),
-// "<p><img src=\"?a\" alt=\"\" /></p>",
-// "should allow a search"
-// );
-
-// assert_eq!(
-// micromark("![](/a)"),
-// "<p><img src=\"/a\" alt=\"\" /></p>",
-// "should allow an absolute"
-// );
-
-// assert_eq!(
-// micromark("![](./a)"),
-// "<p><img src=\"./a\" alt=\"\" /></p>",
-// "should allow an relative"
-// );
-
-// assert_eq!(
-// micromark("![](../a)"),
-// "<p><img src=\"../a\" alt=\"\" /></p>",
-// "should allow an upwards relative"
-// );
-
-// assert_eq!(
-// micromark("![](a#b:c)"),
-// "<p><img src=\"a#b:c\" alt=\"\" /></p>",
-// "should allow a colon in a hash"
-// );
-
-// assert_eq!(
-// micromark("![](a?b:c)"),
-// "<p><img src=\"a?b:c\" alt=\"\" /></p>",
-// "should allow a colon in a search"
-// );
-
-// assert_eq!(
-// micromark("![](a/b:c)"),
-// "<p><img src=\"a/b:c\" alt=\"\" /></p>",
-// "should allow a colon in a path"
-// );
-// }
-
-// To do: link.
-// #[test]
-// fn dangerous_protocol_link() {
-// assert_eq!(
-// micromark("[](javascript:alert(1))"),
-// "<p><a href=\"\"></a></p>",
-// "should be safe by default"
-// );
-
-// assert_eq!(
-// micromark("[](http://a)"),
-// "<p><a href=\"http://a\"></a></p>",
-// "should allow `http:`"
-// );
-
-// assert_eq!(
-// micromark("[](https://a)"),
-// "<p><a href=\"https://a\"></a></p>",
-// "should allow `https:`"
-// );
-
-// assert_eq!(
-// micromark("[](irc:///help)"),
-// "<p><a href=\"irc:///help\"></a></p>",
-// "should allow `irc:`"
-// );
-
-// assert_eq!(
-// micromark("[](mailto:a)"),
-// "<p><a href=\"mailto:a\"></a></p>",
-// "should allow `mailto:`"
-// );
-
-// assert_eq!(
-// micromark("[](#a)"),
-// "<p><a href=\"#a\"></a></p>",
-// "should allow a hash"
-// );
-
-// assert_eq!(
-// micromark("[](?a)"),
-// "<p><a href=\"?a\"></a></p>",
-// "should allow a search"
-// );
-
-// assert_eq!(
-// micromark("[](/a)"),
-// "<p><a href=\"/a\"></a></p>",
-// "should allow an absolute"
-// );
-
-// assert_eq!(
-// micromark("[](./a)"),
-// "<p><a href=\"./a\"></a></p>",
-// "should allow an relative"
-// );
-
-// assert_eq!(
-// micromark("[](../a)"),
-// "<p><a href=\"../a\"></a></p>",
-// "should allow an upwards relative"
-// );
-
-// assert_eq!(
-// micromark("[](a#b:c)"),
-// "<p><a href=\"a#b:c\"></a></p>",
-// "should allow a colon in a hash"
-// );
-
-// assert_eq!(
-// micromark("[](a?b:c)"),
-// "<p><a href=\"a?b:c\"></a></p>",
-// "should allow a colon in a search"
-// );
-
-// assert_eq!(
-// micromark("[](a/b:c)"),
-// "<p><a href=\"a/b:c\"></a></p>",
-// "should allow a colon in a path"
-// );
-// }
+#[test]
+fn dangerous_protocol_image() {
+ assert_eq!(
+ micromark("![](javascript:alert(1))"),
+ "<p><img src=\"\" alt=\"\" /></p>",
+ "should be safe by default"
+ );
+
+ assert_eq!(
+ micromark("![](http://a)"),
+ "<p><img src=\"http://a\" alt=\"\" /></p>",
+ "should allow `http:`"
+ );
+
+ assert_eq!(
+ micromark("![](https://a)"),
+ "<p><img src=\"https://a\" alt=\"\" /></p>",
+ "should allow `https:`"
+ );
+
+ assert_eq!(
+ micromark("![](irc:///help)"),
+ "<p><img src=\"\" alt=\"\" /></p>",
+ "should not allow `irc:`"
+ );
+
+ assert_eq!(
+ micromark("![](mailto:a)"),
+ "<p><img src=\"\" alt=\"\" /></p>",
+ "should not allow `mailto:`"
+ );
+
+ assert_eq!(
+ micromark("![](#a)"),
+ "<p><img src=\"#a\" alt=\"\" /></p>",
+ "should allow a hash"
+ );
+
+ assert_eq!(
+ micromark("![](?a)"),
+ "<p><img src=\"?a\" alt=\"\" /></p>",
+ "should allow a search"
+ );
+
+ assert_eq!(
+ micromark("![](/a)"),
+ "<p><img src=\"/a\" alt=\"\" /></p>",
+ "should allow an absolute"
+ );
+
+ assert_eq!(
+ micromark("![](./a)"),
+ "<p><img src=\"./a\" alt=\"\" /></p>",
+ "should allow an relative"
+ );
+
+ assert_eq!(
+ micromark("![](../a)"),
+ "<p><img src=\"../a\" alt=\"\" /></p>",
+ "should allow an upwards relative"
+ );
+
+ assert_eq!(
+ micromark("![](a#b:c)"),
+ "<p><img src=\"a#b:c\" alt=\"\" /></p>",
+ "should allow a colon in a hash"
+ );
+
+ assert_eq!(
+ micromark("![](a?b:c)"),
+ "<p><img src=\"a?b:c\" alt=\"\" /></p>",
+ "should allow a colon in a search"
+ );
+
+ assert_eq!(
+ micromark("![](a/b:c)"),
+ "<p><img src=\"a/b:c\" alt=\"\" /></p>",
+ "should allow a colon in a path"
+ );
+}
+
+#[test]
+fn dangerous_protocol_link() {
+ assert_eq!(
+ micromark("[](javascript:alert(1))"),
+ "<p><a href=\"\"></a></p>",
+ "should be safe by default"
+ );
+
+ assert_eq!(
+ micromark("[](http://a)"),
+ "<p><a href=\"http://a\"></a></p>",
+ "should allow `http:`"
+ );
+
+ assert_eq!(
+ micromark("[](https://a)"),
+ "<p><a href=\"https://a\"></a></p>",
+ "should allow `https:`"
+ );
+
+ assert_eq!(
+ micromark("[](irc:///help)"),
+ "<p><a href=\"irc:///help\"></a></p>",
+ "should allow `irc:`"
+ );
+
+ assert_eq!(
+ micromark("[](mailto:a)"),
+ "<p><a href=\"mailto:a\"></a></p>",
+ "should allow `mailto:`"
+ );
+
+ assert_eq!(
+ micromark("[](#a)"),
+ "<p><a href=\"#a\"></a></p>",
+ "should allow a hash"
+ );
+
+ assert_eq!(
+ micromark("[](?a)"),
+ "<p><a href=\"?a\"></a></p>",
+ "should allow a search"
+ );
+
+ assert_eq!(
+ micromark("[](/a)"),
+ "<p><a href=\"/a\"></a></p>",
+ "should allow an absolute"
+ );
+
+ assert_eq!(
+ micromark("[](./a)"),
+ "<p><a href=\"./a\"></a></p>",
+ "should allow an relative"
+ );
+
+ assert_eq!(
+ micromark("[](../a)"),
+ "<p><a href=\"../a\"></a></p>",
+ "should allow an upwards relative"
+ );
+
+ assert_eq!(
+ micromark("[](a#b:c)"),
+ "<p><a href=\"a#b:c\"></a></p>",
+ "should allow a colon in a hash"
+ );
+
+ assert_eq!(
+ micromark("[](a?b:c)"),
+ "<p><a href=\"a?b:c\"></a></p>",
+ "should allow a colon in a search"
+ );
+
+ assert_eq!(
+ micromark("[](a/b:c)"),
+ "<p><a href=\"a/b:c\"></a></p>",
+ "should allow a colon in a path"
+ );
+}
diff --git a/tests/misc_tabs.rs b/tests/misc_tabs.rs
index e9a0b72..568172e 100644
--- a/tests/misc_tabs.rs
+++ b/tests/misc_tabs.rs
@@ -221,48 +221,42 @@ fn tabs_text() {
// "should support an initial tab after a line ending in a paragraph"
// );
- // To do: link (reference).
- // assert_eq!(
- // micromark("x[\ty](z)"),
- // "<p>x<a href=\"z\">\ty</a></p>",
- // "should support an initial tab in a link label"
- // );
+ assert_eq!(
+ micromark("x[\ty](z)"),
+ "<p>x<a href=\"z\">\ty</a></p>",
+ "should support an initial tab in a link label"
+ );
- // To do: link (reference).
- // assert_eq!(
- // micromark("x[y\t](z)"),
- // "<p>x<a href=\"z\">y\t</a></p>",
- // "should support a final tab in a link label"
- // );
+ assert_eq!(
+ micromark("x[y\t](z)"),
+ "<p>x<a href=\"z\">y\t</a></p>",
+ "should support a final tab in a link label"
+ );
- // To do: link (reference).
- // assert_eq!(
- // micromark("[x\ty](z)"),
- // "<p><a href=\"z\">x\ty</a></p>",
- // "should support a tab in a link label"
- // );
+ assert_eq!(
+ micromark("[x\ty](z)"),
+ "<p><a href=\"z\">x\ty</a></p>",
+ "should support a tab in a link label"
+ );
- // To do: link (resource).
// Note: CM.js bug, see: <https://github.com/commonmark/commonmark.js/issues/191>
- // assert_eq!(
- // micromark("[x](\ty)"),
- // "<p><a href=\"y\">x</a></p>",
- // "should support a tab starting a link resource"
- // );
+ assert_eq!(
+ micromark("[x](\ty)"),
+ "<p><a href=\"y\">x</a></p>",
+ "should support a tab starting a link resource"
+ );
- // To do: link (resource).
- // assert_eq!(
- // micromark("[x](y\t)"),
- // "<p><a href=\"y\">x</a></p>",
- // "should support a tab ending a link resource"
- // );
+ assert_eq!(
+ micromark("[x](y\t)"),
+ "<p><a href=\"y\">x</a></p>",
+ "should support a tab ending a link resource"
+ );
- // To do: link (resource).
- // assert_eq!(
- // micromark("[x](y\t\"z\")"),
- // "<p><a href=\"y\" title=\"z\">x</a></p>",
- // "should support a tab between a link destination and title"
- // );
+ assert_eq!(
+ micromark("[x](y\t\"z\")"),
+ "<p><a href=\"y\" title=\"z\">x</a></p>",
+ "should support a tab between a link destination and title"
+ );
}
#[test]
diff --git a/tests/misc_url.rs b/tests/misc_url.rs
index a6f8ead..5e94366 100644
--- a/tests/misc_url.rs
+++ b/tests/misc_url.rs
@@ -9,28 +9,25 @@ fn url() {
"should support incorrect percentage encoded values (0)"
);
- // To do: link.
- // assert_eq!(
- // micromark("[](<%>)"),
- // "<p><a href=\"%25\"></a></p>",
- // "should support incorrect percentage encoded values (1)"
- // );
-
- // To do: link.
- // assert_eq!(
- // micromark("[](<%%20>)"),
- // "<p><a href=\"%25%20\"></a></p>",
- // "should support incorrect percentage encoded values (2)"
- // );
-
- // To do: link.
- // assert_eq!(
- // micromark("[](<%a%20>)"),
- // "<p><a href=\"%25a%20\"></a></p>",
- // "should support incorrect percentage encoded values (3)"
- // );
+ assert_eq!(
+ micromark("[](<%>)"),
+ "<p><a href=\"%25\"></a></p>",
+ "should support incorrect percentage encoded values (1)"
+ );
- // Surrogate handling not needed in Rust.
+ assert_eq!(
+ micromark("[](<%%20>)"),
+ "<p><a href=\"%25%20\"></a></p>",
+ "should support incorrect percentage encoded values (2)"
+ );
+
+ assert_eq!(
+ micromark("[](<%a%20>)"),
+ "<p><a href=\"%25a%20\"></a></p>",
+ "should support incorrect percentage encoded values (3)"
+ );
+
+ // Note: Surrogate handling not needed in Rust.
// assert_eq!(
// micromark("[](<foo\u{D800}bar>)"),
// "<p><a href=\"foo%EF%BF%BDbar\"></a></p>",
@@ -114,39 +111,37 @@ fn url() {
// "should support a lone low surrogate at the end (highest)"
// );
- // To do: link.
- // assert_eq!(
- // micromark("[](<🤔>)"),
- // "<p><a href=\"%F0%9F%A4%94\"></a></p>",
- // "should support an emoji"
- // );
-
- // To do: link.
- // let mut ascii: Vec<char> = vec![];
- // let mut code = 0;
-
- // while code < 128 {
- // // LF and CR can’t be in resources.
- // if code == 10 || code == 13 {
- // code += 1;
- // continue;
- // }
-
- // // `<`, `>`, `\` need to be escaped.
- // if code == 60 || code == 62 || code == 92 {
- // ascii.push('\\');
- // }
-
- // ascii.push(char::from_u32(code).unwrap());
-
- // code += 1;
- // }
-
- // let ascii_in = ascii.into_iter().collect::<String>();
- // let ascii_out = "%EF%BF%BD%01%02%03%04%05%06%07%08%09%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22#$%25&amp;\"()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F";
- // assert_eq!(
- // micromark(&format!("[](<{}>)", ascii_in)),
- // format!("<p><a href=\"{}\"></a></p>", ascii_out),
- // "should support ascii characters"
- // );
+ assert_eq!(
+ micromark("[](<🤔>)"),
+ "<p><a href=\"%F0%9F%A4%94\"></a></p>",
+ "should support an emoji"
+ );
+
+ let mut ascii: Vec<char> = vec![];
+ let mut code = 0;
+
+ while code < 128 {
+ // LF and CR can’t be in resources.
+ if code == 10 || code == 13 {
+ code += 1;
+ continue;
+ }
+
+ // `<`, `>`, `\` need to be escaped.
+ if code == 60 || code == 62 || code == 92 {
+ ascii.push('\\');
+ }
+
+ ascii.push(char::from_u32(code).unwrap());
+
+ code += 1;
+ }
+
+ let ascii_in = ascii.into_iter().collect::<String>();
+ let ascii_out = "%EF%BF%BD%01%02%03%04%05%06%07%08%09%0B%0C%0E%0F%10%11%12%13%14%15%16%17%18%19%1A%1B%1C%1D%1E%1F%20!%22#$%25&amp;'()*+,-./0123456789:;%3C=%3E?@ABCDEFGHIJKLMNOPQRSTUVWXYZ%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz%7B%7C%7D~%7F";
+ assert_eq!(
+ micromark(&format!("[](<{}>)", ascii_in)),
+ format!("<p><a href=\"{}\"></a></p>", ascii_out),
+ "should support ascii characters"
+ );
}