aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compiler.rs213
-rw-r--r--src/constant.rs7
-rw-r--r--src/construct/definition.rs136
-rw-r--r--src/construct/label_end.rs712
-rw-r--r--src/construct/label_start_image.rs47
-rw-r--r--src/construct/label_start_link.rs30
-rw-r--r--src/construct/mod.rs11
-rw-r--r--src/construct/partial_destination.rs3
-rw-r--r--src/construct/partial_space_or_tab.rs39
-rw-r--r--src/construct/partial_title.rs14
-rw-r--r--src/content/flow.rs20
-rw-r--r--src/content/text.rs17
-rw-r--r--src/parser.rs20
-rw-r--r--src/subtokenize.rs38
-rw-r--r--src/tokenizer.rs111
-rw-r--r--src/util/sanitize_uri.rs2
16 files changed, 1240 insertions, 180 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index cfe749a..11dea29 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -1,5 +1,5 @@
//! Turn events into a string of HTML.
-use crate::constant::SAFE_PROTOCOL_HREF;
+use crate::constant::{SAFE_PROTOCOL_HREF, SAFE_PROTOCOL_SRC};
use crate::construct::character_reference::Kind as CharacterReferenceKind;
use crate::tokenizer::{Code, Event, EventType, TokenType};
use crate::util::{
@@ -17,6 +17,23 @@ pub enum LineEnding {
LineFeed,
}
+/// To do.
+#[derive(Debug)]
+struct Media {
+ /// To do.
+ image: bool,
+ /// To do.
+ label_id: String,
+ /// To do.
+ label: String,
+ /// To do.
+ // reference_id: String,
+ /// To do.
+ destination: Option<String>,
+ /// To do.
+ title: Option<String>,
+}
+
impl LineEnding {
/// Turn the line ending into a [str].
fn as_str(&self) -> &str {
@@ -168,7 +185,13 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
} else {
Some(SAFE_PROTOCOL_HREF.to_vec())
};
+ let protocol_src = if options.allow_dangerous_protocol {
+ None
+ } else {
+ Some(SAFE_PROTOCOL_SRC.to_vec())
+ };
let mut line_ending_inferred: Option<LineEnding> = None;
+ let mut media_stack: Vec<Media> = vec![];
// let mut slurp_all_line_endings = false;
while index < events.len() {
@@ -257,7 +280,9 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
| TokenType::CodeFencedFenceMeta
| TokenType::Definition
| TokenType::HeadingAtxText
- | TokenType::HeadingSetextText => {
+ | TokenType::HeadingSetextText
+ | TokenType::Label
+ | TokenType::ResourceTitleString => {
buffer(buffers);
}
TokenType::CodeIndented => {
@@ -287,6 +312,56 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
ignore_encode = true;
}
}
+ TokenType::Image => {
+ media_stack.push(Media {
+ image: true,
+ label_id: "".to_string(),
+ label: "".to_string(),
+ // reference_id: "".to_string(),
+ destination: None,
+ title: None,
+ });
+ // tags = undefined // Disallow tags.
+ }
+ TokenType::Link => {
+ media_stack.push(Media {
+ image: false,
+ label_id: "".to_string(),
+ label: "".to_string(),
+ // reference_id: "".to_string(),
+ destination: None,
+ title: None,
+ });
+ }
+ TokenType::Resource => {
+ buffer(buffers); // We can have line endings in the resource, ignore them.
+ let media = media_stack.last_mut().unwrap();
+ media.destination = Some("".to_string());
+ }
+ TokenType::ResourceDestinationString => {
+ buffer(buffers);
+ // Ignore encoding the result, as we’ll first percent encode the url and
+ // encode manually after.
+ ignore_encode = true;
+ }
+ TokenType::LabelImage
+ | TokenType::LabelImageMarker
+ | TokenType::LabelLink
+ | TokenType::LabelMarker
+ | TokenType::LabelEnd
+ | TokenType::ResourceMarker
+ | TokenType::ResourceDestination
+ | TokenType::ResourceDestinationLiteral
+ | TokenType::ResourceDestinationLiteralMarker
+ | TokenType::ResourceDestinationRaw
+ | TokenType::ResourceTitle
+ | TokenType::ResourceTitleMarker
+ | TokenType::Reference
+ | TokenType::ReferenceMarker
+ | TokenType::ReferenceString
+ | TokenType::LabelText => {
+ println!("ignore labels for now");
+ }
TokenType::Paragraph => {
buf_tail_mut(buffers).push("<p>".to_string());
}
@@ -324,14 +399,88 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
| TokenType::SpaceOrTab => {
// Ignore.
}
+ TokenType::LabelImage
+ | TokenType::LabelImageMarker
+ | TokenType::LabelLink
+ | TokenType::LabelMarker
+ | TokenType::LabelEnd
+ | TokenType::ResourceMarker
+ | TokenType::ResourceDestination
+ | TokenType::ResourceDestinationLiteral
+ | TokenType::ResourceDestinationLiteralMarker
+ | TokenType::ResourceDestinationRaw
+ | TokenType::ResourceTitle
+ | TokenType::ResourceTitleMarker
+ | TokenType::Reference
+ | TokenType::ReferenceMarker
+ | TokenType::ReferenceString => {
+ println!("ignore labels for now");
+ }
+ TokenType::Label => {
+ let media = media_stack.last_mut().unwrap();
+ media.label = resume(buffers);
+ }
+ TokenType::LabelText => {
+ let media = media_stack.last_mut().unwrap();
+ media.label_id = serialize(codes, &from_exit_event(events, index), false);
+ }
+ TokenType::ResourceDestinationString => {
+ let media = media_stack.last_mut().unwrap();
+ media.destination = Some(resume(buffers));
+ ignore_encode = false;
+ }
+ TokenType::ResourceTitleString => {
+ let media = media_stack.last_mut().unwrap();
+ media.title = Some(resume(buffers));
+ }
+ TokenType::Image | TokenType::Link => {
+ // let mut is_in_image = false;
+ // let mut index = 0;
+ // Skip current.
+ // while index < (media_stack.len() - 1) {
+ // if media_stack[index].image {
+ // is_in_image = true;
+ // break;
+ // }
+ // index += 1;
+ // }
+
+ // tags = is_in_image;
+
+ let media = media_stack.pop().unwrap();
+ println!("media: {:?}", media);
+ let buf = buf_tail_mut(buffers);
+ // To do: get from definition.
+ let destination = media.destination.unwrap();
+ let title = if let Some(title) = media.title {
+ format!(" title=\"{}\"", title)
+ } else {
+ "".to_string()
+ };
+
+ if media.image {
+ buf.push(format!(
+ "<img src=\"{}\" alt=\"{}\"{} />",
+ sanitize_uri(&destination, &protocol_src),
+ media.label,
+ title
+ ));
+ } else {
+ buf.push(format!(
+ "<a href=\"{}\"{}>{}</a>",
+ sanitize_uri(&destination, &protocol_href),
+ title,
+ media.label
+ ));
+ }
+ }
// Just output it.
TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {
// last_was_tag = false;
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ));
}
TokenType::AutolinkEmail => {
let slice = serialize(codes, &from_exit_event(events, index), false);
@@ -340,7 +489,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
"<a href=\"mailto:{}\">",
sanitize_uri(slice.as_str(), &protocol_href)
));
- buf.push(encode(&slice));
+ buf.push(encode_opt(&slice, ignore_encode));
buf.push("</a>".to_string());
}
TokenType::AutolinkProtocol => {
@@ -350,7 +499,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
"<a href=\"{}\">",
sanitize_uri(slice.as_str(), &protocol_href)
));
- buf.push(encode(&slice));
+ buf.push(encode_opt(&slice, ignore_encode));
buf.push("</a>".to_string());
}
TokenType::CharacterReferenceMarker => {
@@ -377,7 +526,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
CharacterReferenceKind::Named => decode_named(ref_string),
};
- buf_tail_mut(buffers).push(encode(&value));
+ buf_tail_mut(buffers).push(encode_opt(&value, ignore_encode));
character_reference_kind = None;
}
TokenType::CodeFenced | TokenType::CodeIndented => {
@@ -432,16 +581,15 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value));
// tag = true;
}
- TokenType::CodeFencedFenceMeta => {
+ TokenType::CodeFencedFenceMeta | TokenType::Resource => {
resume(buffers);
}
TokenType::CodeFlowChunk => {
code_flow_seen_data = Some(true);
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ));
}
TokenType::CodeText => {
let result = resume(buffers);
@@ -492,11 +640,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
if let Some(buf) = atx_heading_buffer {
atx_heading_buffer = Some(
buf.to_string()
- + &encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )),
+ + &encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ),
);
}
@@ -512,14 +659,14 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
if let Some(ref buf) = atx_heading_buffer {
if !buf.is_empty() {
- buf_tail_mut(buffers).push(encode(buf));
+ buf_tail_mut(buffers).push(encode_opt(buf, ignore_encode));
atx_heading_buffer = Some("".to_string());
}
} else {
atx_heading_buffer = Some("".to_string());
}
- buf_tail_mut(buffers).push(encode(&result));
+ buf_tail_mut(buffers).push(encode_opt(&result, ignore_encode));
}
TokenType::HeadingSetextText => {
heading_setext_buffer = Some(resume(buffers));
@@ -540,7 +687,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
TokenType::HtmlFlowData | TokenType::HtmlTextData => {
let slice = serialize(codes, &from_exit_event(events, index), false);
// last_was_tag = false;
- buf_tail_mut(buffers).push(if ignore_encode { slice } else { encode(&slice) });
+ buf_tail_mut(buffers).push(encode_opt(&slice, ignore_encode));
}
TokenType::LineEnding => {
// if slurp_all_line_endings {
@@ -549,11 +696,10 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
if slurp_one_line_ending {
slurp_one_line_ending = false;
} else {
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(encode_opt(
+ &serialize(codes, &from_exit_event(events, index), false),
+ ignore_encode,
+ ));
}
}
TokenType::Paragraph => {
@@ -605,6 +751,15 @@ fn buf_tail(buffers: &mut [Vec<String>]) -> &Vec<String> {
buffers.last().expect("at least one buffer should exist")
}
+/// To do.
+fn encode_opt(value: &str, ignore_encode: bool) -> String {
+ if ignore_encode {
+ value.to_string()
+ } else {
+ encode(value)
+ }
+}
+
/// Add a line ending.
fn line_ending(buffers: &mut [Vec<String>], default: &LineEnding) {
let tail = buf_tail_mut(buffers);
diff --git a/src/constant.rs b/src/constant.rs
index 8e1acf3..5cb7826 100644
--- a/src/constant.rs
+++ b/src/constant.rs
@@ -193,6 +193,11 @@ pub const HTML_RAW_SIZE_MAX: usize = 8;
/// To safeguard performance, labels are capped at a large number: `999`.
pub const LINK_REFERENCE_SIZE_MAX: usize = 999;
+/// To do.
+/// See: <https://spec.commonmark.org/0.30/#link-destination>,
+/// <https://github.com/remarkjs/react-markdown/issues/658#issuecomment-984345577>.
+pub const LINK_RESOURCE_DESTINATION_BALANCE_MAX: usize = 32;
+
/// List of protocols allowed, when operating safely, as `href` on `a`.
///
/// This list is based on what is allowed by GitHub.
@@ -201,8 +206,6 @@ pub const SAFE_PROTOCOL_HREF: [&str; 6] = ["http", "https", "irc", "ircs", "mail
/// List of protocols allowed, when operating safely, as `src` on `img`.
///
/// This list is based on what is allowed by GitHub.
-// To do: image.
-#[allow(dead_code)]
pub const SAFE_PROTOCOL_SRC: [&str; 2] = ["http", "https"];
/// The number of characters that form a tab stop.
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 92d275c..674bd65 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -115,7 +115,7 @@
use crate::construct::{
partial_destination::{start as destination, Options as DestinationOptions},
partial_label::{start as label, Options as LabelOptions},
- partial_space_or_tab::space_or_tab,
+ partial_space_or_tab::{space_or_tab, space_or_tab_one_line_ending},
partial_title::{start as title, Options as TitleOptions},
};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
@@ -168,7 +168,7 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.exit(TokenType::DefinitionMarker);
(
State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab(), marker_after),
+ tokenizer.go(space_or_tab_one_line_ending(), destination_before),
)),
None,
)
@@ -177,31 +177,6 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
-/// After the marker, after whitespace.
-///
-/// ```markdown
-/// [a]: |b "c"
-///
-/// [a]: |␊
-/// b "c"
-/// ```
-fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (
- State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab(), destination_before),
- )),
- None,
- )
- }
- _ => destination_before(tokenizer, code),
- }
-}
-
/// Before a destination.
///
/// ```markdown
@@ -211,35 +186,23 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// |b "c"
/// ```
fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let event = tokenizer.events.last().unwrap();
-
- // Whitespace.
- if (event.token_type == TokenType::LineEnding || event.token_type == TokenType::SpaceOrTab)
- // Blank line not ok.
- && !matches!(
- code,
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
- ) {
- tokenizer.go(
- |t, c| {
- destination(
- t,
- c,
- DestinationOptions {
- limit: usize::MAX,
- destination: TokenType::DefinitionDestination,
- literal: TokenType::DefinitionDestinationLiteral,
- marker: TokenType::DefinitionDestinationLiteralMarker,
- raw: TokenType::DefinitionDestinationRaw,
- string: TokenType::DefinitionDestinationString,
- },
- )
- },
- destination_after,
- )(tokenizer, code)
- } else {
- (State::Nok, None)
- }
+ tokenizer.go(
+ |t, c| {
+ destination(
+ t,
+ c,
+ DestinationOptions {
+ limit: usize::MAX,
+ destination: TokenType::DefinitionDestination,
+ literal: TokenType::DefinitionDestinationLiteral,
+ marker: TokenType::DefinitionDestinationLiteralMarker,
+ raw: TokenType::DefinitionDestinationRaw,
+ string: TokenType::DefinitionDestinationString,
+ },
+ )
+ },
+ destination_after,
+ )(tokenizer, code)
}
/// After a destination.
@@ -289,32 +252,7 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// "c"
/// ```
fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt_opt(space_or_tab(), title_before_after_optional_whitespace)(tokenizer, code)
-}
-
-/// Before a title, after optional whitespace.
-///
-/// ```markdown
-/// [a]: b |"c"
-///
-/// [a]: b |␊
-/// "c"
-/// ```
-fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- match code {
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (
- State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab(), title_before_marker),
- )),
- None,
- )
- }
- _ => title_before_marker(tokenizer, code),
- }
+ tokenizer.go(space_or_tab_one_line_ending(), title_before_marker)(tokenizer, code)
}
/// Before a title, after a line ending.
@@ -324,26 +262,20 @@ fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code)
/// | "c"
/// ```
fn title_before_marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let event = tokenizer.events.last().unwrap();
-
- if event.token_type == TokenType::LineEnding || event.token_type == TokenType::SpaceOrTab {
- tokenizer.go(
- |t, c| {
- title(
- t,
- c,
- TitleOptions {
- title: TokenType::DefinitionTitle,
- marker: TokenType::DefinitionTitleMarker,
- string: TokenType::DefinitionTitleString,
- },
- )
- },
- title_after,
- )(tokenizer, code)
- } else {
- (State::Nok, None)
- }
+ tokenizer.go(
+ |t, c| {
+ title(
+ t,
+ c,
+ TitleOptions {
+ title: TokenType::DefinitionTitle,
+ marker: TokenType::DefinitionTitleMarker,
+ string: TokenType::DefinitionTitleString,
+ },
+ )
+ },
+ title_after,
+ )(tokenizer, code)
}
/// After a title.
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
new file mode 100644
index 0000000..405858d
--- /dev/null
+++ b/src/construct/label_end.rs
@@ -0,0 +1,712 @@
+//! To do
+
+use crate::constant::LINK_RESOURCE_DESTINATION_BALANCE_MAX;
+use crate::construct::{
+ partial_destination::{start as destination, Options as DestinationOptions},
+ partial_label::{start as label, Options as LabelOptions},
+ partial_space_or_tab::space_or_tab_one_line_ending,
+ partial_title::{start as title, Options as TitleOptions},
+};
+use crate::tokenizer::{
+ Code, Event, EventType, LabelStart, Media, State, StateFnResult, TokenType, Tokenizer,
+};
+use crate::util::{
+ normalize_identifier::normalize_identifier,
+ span::{serialize, Span},
+};
+/// To do: could we do without `HashMap`, so we don’t need `std`?
+use std::collections::HashMap;
+
+#[derive(Debug)]
+struct Info {
+ /// To do.
+ label_start_index: usize,
+ /// To do.
+ media: Media,
+}
+
+#[allow(clippy::too_many_lines)]
+pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
+ let mut left: Vec<LabelStart> = tokenizer.label_start_list_loose.drain(..).collect();
+ let mut left_2: Vec<LabelStart> = tokenizer.label_start_stack.drain(..).collect();
+ let media: Vec<Media> = tokenizer.media_list.drain(..).collect();
+ left.append(&mut left_2);
+
+ let mut map: HashMap<usize, (usize, Vec<Event>)> = HashMap::new();
+ let events = &tokenizer.events;
+
+ let mut index = 0;
+ while index < left.len() {
+ let label_start = &left[index];
+ let data_enter_index = label_start.start.0;
+ let data_exit_index = label_start.start.1;
+
+ map.insert(
+ data_enter_index,
+ (
+ data_exit_index - data_enter_index,
+ vec![
+ Event {
+ event_type: EventType::Enter,
+ token_type: TokenType::Data,
+ point: events[data_enter_index].point.clone(),
+ index: events[data_enter_index].index,
+ previous: None,
+ next: None,
+ },
+ Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::Data,
+ point: events[data_exit_index].point.clone(),
+ index: events[data_exit_index].index,
+ previous: None,
+ next: None,
+ },
+ ],
+ ),
+ );
+
+ index += 1;
+ }
+
+ let mut index = 0;
+ while index < media.len() {
+ let media = &media[index];
+ // LabelLink:Enter or LabelImage:Enter.
+ let group_enter_index = media.start.0;
+ let group_enter_event = &events[group_enter_index];
+ // LabelLink:Exit or LabelImage:Exit.
+ let text_enter_index = media.start.0
+ + (if group_enter_event.token_type == TokenType::LabelLink {
+ 4
+ } else {
+ 6
+ });
+ // LabelEnd:Enter.
+ let text_exit_index = media.end.0;
+ // LabelEnd:Exit.
+ let label_exit_index = media.end.0 + 3;
+ // Resource:Exit, etc.
+ let group_end_index = media.end.1;
+
+ // Insert a group enter and label enter.
+ add(
+ &mut map,
+ group_enter_index,
+ 0,
+ vec![
+ Event {
+ event_type: EventType::Enter,
+ token_type: if group_enter_event.token_type == TokenType::LabelLink {
+ TokenType::Link
+ } else {
+ TokenType::Image
+ },
+ point: group_enter_event.point.clone(),
+ index: group_enter_event.index,
+ previous: None,
+ next: None,
+ },
+ Event {
+ event_type: EventType::Enter,
+ token_type: TokenType::Label,
+ point: group_enter_event.point.clone(),
+ index: group_enter_event.index,
+ previous: None,
+ next: None,
+ },
+ ],
+ );
+
+ // Empty events not allowed.
+ if text_enter_index != text_exit_index {
+ // Insert a text enter.
+ add(
+ &mut map,
+ text_enter_index,
+ 0,
+ vec![Event {
+ event_type: EventType::Enter,
+ token_type: TokenType::LabelText,
+ point: events[text_enter_index].point.clone(),
+ index: events[text_enter_index].index,
+ previous: None,
+ next: None,
+ }],
+ );
+
+ // Insert a text exit.
+ add(
+ &mut map,
+ text_exit_index,
+ 0,
+ vec![Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::LabelText,
+ point: events[text_exit_index].point.clone(),
+ index: events[text_exit_index].index,
+ previous: None,
+ next: None,
+ }],
+ );
+ }
+
+ // Insert a label exit.
+ add(
+ &mut map,
+ label_exit_index + 1,
+ 0,
+ vec![Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::Label,
+ point: events[label_exit_index].point.clone(),
+ index: events[label_exit_index].index,
+ previous: None,
+ next: None,
+ }],
+ );
+
+ // Insert a group exit.
+ add(
+ &mut map,
+ group_end_index + 1,
+ 0,
+ vec![Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::Link,
+ point: events[group_end_index].point.clone(),
+ index: events[group_end_index].index,
+ previous: None,
+ next: None,
+ }],
+ );
+
+ index += 1;
+ }
+
+ let mut indices: Vec<&usize> = map.keys().collect();
+ indices.sort_unstable();
+ let mut next_events: Vec<Event> = vec![];
+ let mut index_into_indices = 0;
+ let mut start = 0;
+ let events = &mut tokenizer.events;
+ let mut shift: i32 = 0;
+
+ while index_into_indices < indices.len() {
+ let index = *indices[index_into_indices];
+
+ if start < index {
+ let append = &mut events[start..index].to_vec();
+ let mut index = 0;
+
+ while index < append.len() {
+ let ev = &mut append[index];
+
+ if let Some(x) = ev.previous {
+ let next = (x as i32 + shift) as usize;
+ ev.previous = Some(next);
+ println!("todo: y: previous {:?} {:?} {:?}", x, shift, start);
+ }
+
+ if let Some(x) = ev.next {
+ let next = (x as i32 + shift) as usize;
+ ev.next = Some(next);
+ println!("todo: y: next {:?} {:?} {:?}", x, shift, start);
+ }
+
+ index += 1;
+ }
+
+ next_events.append(append);
+ }
+
+ let (remove, add) = map.get(&index).unwrap();
+ shift += (add.len() as i32) - (*remove as i32);
+
+ if !add.is_empty() {
+ let append = &mut add.clone();
+ let mut index = 0;
+
+ while index < append.len() {
+ let ev = &mut append[index];
+
+ if let Some(x) = ev.previous {
+ println!("todo: x: previous {:?} {:?} {:?}", x, shift, start);
+ }
+
+ if let Some(x) = ev.next {
+ println!("todo: x: next {:?} {:?} {:?}", x, shift, start);
+ }
+
+ index += 1;
+ }
+
+ next_events.append(append);
+ }
+
+ start = index + remove;
+ index_into_indices += 1;
+ }
+
+ if start < events.len() {
+ next_events.append(&mut events[start..].to_vec());
+ }
+
+ next_events
+}
+
+/// Start of label end.
+///
+/// ```markdown
+/// [a|](b) c
+/// [a|][b] c
+/// [a|][] b
+/// [a|] b
+///
+/// [a]: z
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ if Code::Char(']') == code {
+ let mut label_start_index: Option<usize> = None;
+ let mut index = tokenizer.label_start_stack.len();
+
+ while index > 0 {
+ index -= 1;
+
+ if !tokenizer.label_start_stack[index].balanced {
+ label_start_index = Some(index);
+ break;
+ }
+ }
+
+ // If there is an okay opening:
+ if let Some(label_start_index) = label_start_index {
+ let label_start = tokenizer
+ .label_start_stack
+ .get_mut(label_start_index)
+ .unwrap();
+
+ // Mark as balanced if the info is inactive.
+ if label_start.inactive {
+ return nok(tokenizer, code, label_start_index);
+ }
+
+ let label_end_start = tokenizer.events.len();
+ let info = Info {
+ label_start_index,
+ media: Media {
+ start: label_start.start,
+ end: (label_end_start, label_end_start + 3),
+ id: normalize_identifier(&serialize(
+ &tokenizer.parse_state.codes,
+ &Span {
+ start_index: tokenizer.events[label_start.start.1].index,
+ end_index: tokenizer.events[label_end_start - 1].index,
+ },
+ false,
+ )),
+ },
+ };
+
+ tokenizer.enter(TokenType::LabelEnd);
+ tokenizer.enter(TokenType::LabelMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LabelMarker);
+ tokenizer.exit(TokenType::LabelEnd);
+
+ return (State::Fn(Box::new(move |t, c| after(t, c, info))), None);
+ }
+ }
+
+ (State::Nok, None)
+}
+
+/// After `]`.
+///
+/// ```markdown
+/// [a]|(b) c
+/// [a]|[b] c
+/// [a]|[] b
+/// [a]| b
+///
+/// [a]: z
+/// ```
+fn after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ // let label_start = tokenizer
+ // .label_start_stack
+ // .get_mut(info.label_start_index)
+ // .unwrap();
+ // To do: figure out if defined or not.
+ let defined = false;
+ println!("to do: is `{:?}` defined?", info);
+ match code {
+ // Resource (`[asd](fgh)`)?
+ Code::Char('(') => tokenizer.attempt(resource, move |is_ok| {
+ Box::new(move |t, c| {
+ // Also fine if `defined`, as then it’s a valid shortcut.
+ if is_ok || defined {
+ ok(t, c, info)
+ } else {
+ nok(t, c, info.label_start_index)
+ }
+ })
+ })(tokenizer, code),
+ // Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference?
+ Code::Char('[') => tokenizer.attempt(full_reference, move |is_ok| {
+ Box::new(move |t, c| {
+ if is_ok {
+ ok(t, c, info)
+ } else if defined {
+ reference_not_full(t, c, info)
+ } else {
+ nok(t, c, info.label_start_index)
+ }
+ })
+ })(tokenizer, code),
+ // Shortcut reference: `[asd]`?
+ _ => {
+ if defined {
+ ok(tokenizer, code, info)
+ } else {
+ nok(tokenizer, code, info.label_start_index)
+ }
+ }
+ }
+}
+
+/// After `]`, at `[`, but not at a full reference.
+///
+/// > 👉 **Note**: we only get here if the label is defined.
+///
+/// ```markdown
+/// [a]|[] b
+///
+/// [a]: z
+/// ```
+fn reference_not_full(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ tokenizer.attempt(collapsed_reference, move |is_ok| {
+ Box::new(move |t, c| {
+ if is_ok {
+ ok(t, c, info)
+ } else {
+ nok(t, c, info.label_start_index)
+ }
+ })
+ })(tokenizer, code)
+}
+
+/// Done, we found something.
+///
+/// ```markdown
+/// [a](b)| c
+/// [a][b]| c
+/// [a][]| b
+/// [a]| b
+///
+/// [a]: z
+/// ```
+fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
+ println!(
+ "ok res, ref full, ref, collapsed, or ref shortcut: {:?}",
+ info.media
+ );
+ // Remove this one and everything after it.
+ let mut left: Vec<LabelStart> = tokenizer
+ .label_start_stack
+ .drain(info.label_start_index..)
+ .collect();
+ // Remove this one from `left`, as we’ll move it to `media_list`.
+ left.remove(0);
+ tokenizer.label_start_list_loose.append(&mut left);
+
+ let is_link = tokenizer.events[info.media.start.0].token_type == TokenType::LabelLink;
+
+ if is_link {
+ let mut index = 0;
+ while index < tokenizer.label_start_stack.len() {
+ let label_start = &mut tokenizer.label_start_stack[index];
+ if tokenizer.events[label_start.start.0].token_type == TokenType::LabelLink {
+ label_start.inactive = true;
+ }
+ index += 1;
+ }
+ }
+
+ info.media.end.1 = tokenizer.events.len() - 1;
+ tokenizer.media_list.push(info.media);
+ tokenizer.register_resolver("media".to_string(), Box::new(resolve_media));
+ (State::Ok, Some(vec![code]))
+}
+
+/// Done, it’s nothing.
+///
+/// There was an okay opening, but we didn’t match anything.
+///
+/// ```markdown
+/// [a]|(b c
+/// [a]|[b c
+/// [b]|[ c
+/// [b]| c
+///
+/// [a]: z
+/// ```
+fn nok(tokenizer: &mut Tokenizer, _code: Code, label_start_index: usize) -> StateFnResult {
+ let label_start = tokenizer
+ .label_start_stack
+ .get_mut(label_start_index)
+ .unwrap();
+ println!("just balanced braces: {:?}", label_start);
+ label_start.balanced = true;
+ // To do: pop things off the list?
+ (State::Nok, None)
+}
+
+/// Before a resource, at `(`.
+///
+/// ```markdown
+/// [a]|(b) c
+/// ```
+fn resource(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('(') => {
+ tokenizer.enter(TokenType::Resource);
+ tokenizer.enter(TokenType::ResourceMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::ResourceMarker);
+ (State::Fn(Box::new(resource_start)), None)
+ }
+ _ => unreachable!("expected `(`"),
+ }
+}
+
+/// At the start of a resource, after `(`, before a definition.
+///
+/// ```markdown
+/// [a](|b) c
+/// ```
+fn resource_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_open)(tokenizer, code)
+}
+
+/// At the start of a resource, after optional whitespace.
+///
+/// ```markdown
+/// [a](|b) c
+/// ```
+fn resource_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(')') => resource_end(tokenizer, code),
+ _ => tokenizer.go(
+ |t, c| {
+ destination(
+ t,
+ c,
+ DestinationOptions {
+ limit: LINK_RESOURCE_DESTINATION_BALANCE_MAX,
+ destination: TokenType::ResourceDestination,
+ literal: TokenType::ResourceDestinationLiteral,
+ marker: TokenType::ResourceDestinationLiteralMarker,
+ raw: TokenType::ResourceDestinationRaw,
+ string: TokenType::ResourceDestinationString,
+ },
+ )
+ },
+ destination_after,
+ )(tokenizer, code),
+ }
+}
+
+/// In a resource, after a destination, before optional whitespace.
+///
+/// ```markdown
+/// [a](b|) c
+/// [a](b| "c") d
+/// ```
+fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.attempt(space_or_tab_one_line_ending(), |ok| {
+ Box::new(if ok { resource_between } else { resource_end })
+ })(tokenizer, code)
+}
+
+/// In a resource, after a destination, after whitespace.
+///
+/// ```markdown
+/// [a](b |) c
+/// [a](b |"c") d
+/// ```
+fn resource_between(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('"' | '\'' | '(') => tokenizer.go(
+ |t, c| {
+ title(
+ t,
+ c,
+ TitleOptions {
+ title: TokenType::ResourceTitle,
+ marker: TokenType::ResourceTitleMarker,
+ string: TokenType::ResourceTitleString,
+ },
+ )
+ },
+ title_after,
+ )(tokenizer, code),
+ _ => resource_end(tokenizer, code),
+ }
+}
+
+/// In a resource, after a title.
+///
+/// ```markdown
+/// [a](b "c"|) d
+/// ```
+fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.attempt_opt(space_or_tab_one_line_ending(), resource_end)(tokenizer, code)
+}
+
+/// In a resource, at the `)`.
+///
+/// ```markdown
+/// [a](b|) c
+/// [a](b |) c
+/// [a](b "c"|) d
+/// ```
+fn resource_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(')') => {
+ tokenizer.enter(TokenType::ResourceMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::ResourceMarker);
+ tokenizer.exit(TokenType::Resource);
+ (State::Ok, None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// In a reference (full), at the `[`.
+///
+/// ```markdown
+/// [a]|[b]
+/// ```
+fn full_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('[') => tokenizer.go(
+ |t, c| {
+ label(
+ t,
+ c,
+ LabelOptions {
+ label: TokenType::Reference,
+ marker: TokenType::ReferenceMarker,
+ string: TokenType::ReferenceString,
+ },
+ )
+ },
+ full_reference_after,
+ )(tokenizer, code),
+ _ => unreachable!("expected `[`"),
+ }
+}
+
+/// In a reference (full), after `]`.
+///
+/// ```markdown
+/// [a][b]|
+/// ```
+fn full_reference_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ let events = &tokenizer.events;
+ let mut index = events.len() - 1;
+ let mut start: Option<usize> = None;
+ let mut end: Option<usize> = None;
+
+ while index > 0 {
+ index -= 1;
+ let event = &events[index];
+ if event.token_type == TokenType::ReferenceString {
+ if event.event_type == EventType::Exit {
+ end = Some(event.index);
+ } else {
+ start = Some(event.index);
+ break;
+ }
+ }
+ }
+
+ // Always found, otherwise we don’t get here.
+ let start = start.unwrap();
+ let end = end.unwrap();
+
+ let id = normalize_identifier(&serialize(
+ &tokenizer.parse_state.codes,
+ &Span {
+ start_index: start,
+ end_index: end,
+ },
+ false,
+ ));
+ println!("to do: is `{:?}` defined?", id);
+ let defined = false;
+
+ if defined {
+ (State::Ok, Some(vec![code]))
+ } else {
+ (State::Nok, None)
+ }
+}
+
+/// In a reference (collapsed), at the `[`.
+///
+/// > 👉 **Note**: we only get here if the label is defined.
+///
+/// ```markdown
+/// [a]|[]
+/// ```
+fn collapsed_reference(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('[') => {
+ tokenizer.enter(TokenType::Reference);
+ tokenizer.enter(TokenType::ReferenceMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::ReferenceMarker);
+ (State::Fn(Box::new(collapsed_reference_open)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// In a reference (collapsed), at the `]`.
+///
+/// > 👉 **Note**: we only get here if the label is defined.
+///
+/// ```markdown
+/// [a][|]
+/// ```
+fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(']') => {
+ tokenizer.enter(TokenType::ReferenceMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::ReferenceMarker);
+ tokenizer.exit(TokenType::Reference);
+ (State::Ok, None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+pub fn add(
+ map: &mut HashMap<usize, (usize, Vec<Event>)>,
+ index: usize,
+ mut remove: usize,
+ mut add: Vec<Event>,
+) {
+ let curr = map.remove(&index);
+
+ if let Some((curr_rm, mut curr_add)) = curr {
+ remove += curr_rm;
+ curr_add.append(&mut add);
+ add = curr_add;
+ }
+
+ map.insert(index, (remove, add));
+}
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
new file mode 100644
index 0000000..2e96977
--- /dev/null
+++ b/src/construct/label_start_image.rs
@@ -0,0 +1,47 @@
+//! To do
+
+use super::label_end::resolve_media;
+use crate::tokenizer::{Code, LabelStart, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of label (image) start.
+///
+/// ```markdown
+/// a |![ b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('!') => {
+ tokenizer.enter(TokenType::LabelImage);
+ tokenizer.enter(TokenType::LabelImageMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LabelImageMarker);
+ (State::Fn(Box::new(open)), None)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// After `!`, before a `[`.
+///
+/// ```markdown
+/// a !|[ b
+/// ```
+pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('[') => {
+ tokenizer.enter(TokenType::LabelMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LabelMarker);
+ tokenizer.exit(TokenType::LabelImage);
+ let end = tokenizer.events.len() - 1;
+ tokenizer.label_start_stack.push(LabelStart {
+ start: (end - 5, end),
+ balanced: false,
+ inactive: false,
+ });
+ tokenizer.register_resolver("media".to_string(), Box::new(resolve_media));
+ (State::Ok, None)
+ }
+ _ => (State::Nok, None),
+ }
+}
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
new file mode 100644
index 0000000..35c9dcd
--- /dev/null
+++ b/src/construct/label_start_link.rs
@@ -0,0 +1,30 @@
+//! To do
+
+use super::label_end::resolve_media;
+use crate::tokenizer::{Code, LabelStart, State, StateFnResult, TokenType, Tokenizer};
+
+/// Start of label (link) start.
+///
+/// ```markdown
+/// a |[ b
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char('[') => {
+ let start = tokenizer.events.len();
+ tokenizer.enter(TokenType::LabelLink);
+ tokenizer.enter(TokenType::LabelMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LabelMarker);
+ tokenizer.exit(TokenType::LabelLink);
+ tokenizer.label_start_stack.push(LabelStart {
+ start: (start, tokenizer.events.len() - 1),
+ balanced: false,
+ inactive: false,
+ });
+ tokenizer.register_resolver("media".to_string(), Box::new(resolve_media));
+ (State::Ok, None)
+ }
+ _ => (State::Nok, None),
+ }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 9e5da0e..8565b2f 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -30,9 +30,9 @@
//! * [heading (setext)][heading_setext]
//! * [html (flow)][html_flow]
//! * [html (text)][html_text]
-//! * label end
-//! * label start (image)
-//! * label start (link)
+//! * [label end][label_end]
+//! * [label start (image)][label_start_image]
+//! * [label start (link)][label_start_link]
//! * list
//! * [paragraph][]
//! * [thematic break][thematic_break]
@@ -59,8 +59,6 @@
//! They also contain references to character as defined by [char][], so for
//! example `ascii_punctuation` refers to
//! [`char::is_ascii_punctuation`][char::is_ascii_punctuation].
-//!
-//!
pub mod autolink;
pub mod blank_line;
@@ -76,6 +74,9 @@ pub mod heading_atx;
pub mod heading_setext;
pub mod html_flow;
pub mod html_text;
+pub mod label_end;
+pub mod label_start_image;
+pub mod label_start_link;
pub mod paragraph;
pub mod partial_data;
pub mod partial_destination;
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 03dcbee..7887a44 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -267,11 +267,10 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
/// ```markdown
/// a\|)b
/// ```
-fn raw_escape(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
+fn raw_escape(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char('(' | ')' | '\\') => {
tokenizer.consume(code);
- info.balance += 1;
(State::Fn(Box::new(move |t, c| raw(t, c, info))), None)
}
_ => raw(tokenizer, code, info),
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 024a4b2..43bdc53 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -35,6 +35,45 @@ pub fn space_or_tab() -> Box<StateFn> {
space_or_tab_min_max(1, usize::MAX)
}
+pub fn space_or_tab_one_line_ending() -> Box<StateFn> {
+ Box::new(|tokenizer, code| {
+ tokenizer.attempt(space_or_tab(), move |ok| {
+ Box::new(move |tokenizer, code| match code {
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.enter(TokenType::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LineEnding);
+ (
+ State::Fn(Box::new(tokenizer.attempt_opt(
+ space_or_tab(),
+ move |_t, code| {
+ if !matches!(
+ code,
+ Code::None
+ | Code::CarriageReturnLineFeed
+ | Code::Char('\r' | '\n')
+ ) {
+ (State::Ok, Some(vec![code]))
+ } else {
+ (State::Nok, None)
+ }
+ },
+ ))),
+ None,
+ )
+ }
+ _ => {
+ if ok {
+ (State::Ok, Some(vec![code]))
+ } else {
+ (State::Nok, None)
+ }
+ }
+ })
+ })(tokenizer, code)
+ })
+}
+
/// Between `x` and `y` `space_or_tab`
///
/// ```bnf
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 3e61788..78ae311 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -32,7 +32,7 @@
//! <!-- To do: link label end. -->
use crate::construct::partial_space_or_tab::space_or_tab;
-use crate::subtokenize::link;
+use crate::subtokenize::link_to;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Configuration.
@@ -109,7 +109,7 @@ impl Kind {
#[derive(Debug)]
struct Info {
/// Whether we’ve seen our first `ChunkString`.
- connect: bool,
+ connect_index: Option<usize>,
/// Kind of title.
kind: Kind,
/// Configuration.
@@ -125,9 +125,9 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFnResult {
match code {
- Code::Char(char) if char == '(' || char == '"' || char == '\'' => {
+ Code::Char(char) if char == '"' || char == '\'' || char == '(' => {
let info = Info {
- connect: false,
+ connect_index: None,
kind: Kind::from_char(char),
options,
};
@@ -184,11 +184,11 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
_ => {
tokenizer.enter(TokenType::ChunkString);
- if info.connect {
+ if let Some(connect_index) = info.connect_index {
let index = tokenizer.events.len() - 1;
- link(&mut tokenizer.events, index);
+ link_to(&mut tokenizer.events, connect_index, index);
} else {
- info.connect = true;
+ info.connect_index = Some(tokenizer.events.len() - 1);
}
title(tokenizer, code, info)
diff --git a/src/content/flow.rs b/src/content/flow.rs
index e71d25a..546712f 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -26,6 +26,7 @@ use crate::construct::{
html_flow::start as html_flow, paragraph::start as paragraph,
thematic_break::start as thematic_break,
};
+use crate::parser::ParseState;
use crate::subtokenize::subtokenize;
use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer};
use crate::util::{
@@ -34,9 +35,10 @@ use crate::util::{
};
/// Turn `codes` as the flow content type into events.
-pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> {
- let mut tokenizer = Tokenizer::new(point, index);
- tokenizer.feed(codes, Box::new(start), true);
+pub fn flow(parse_state: &ParseState, point: Point, index: usize) -> Vec<Event> {
+ let mut tokenizer = Tokenizer::new(point, index, parse_state);
+
+ tokenizer.push(&parse_state.codes, Box::new(start), true);
let mut index = 0;
@@ -47,9 +49,14 @@ pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> {
&& event.token_type == TokenType::DefinitionLabelString
{
let id = normalize_identifier(
- serialize(codes, &from_exit_event(&tokenizer.events, index), false).as_str(),
+ serialize(
+ &parse_state.codes,
+ &from_exit_event(&tokenizer.events, index),
+ false,
+ )
+ .as_str(),
);
- println!("to do: use identifier {:?}", id);
+ println!("to do: use definition identifier {:?}", id);
}
index += 1;
@@ -58,8 +65,9 @@ pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> {
let mut result = (tokenizer.events, false);
while !result.1 {
- result = subtokenize(result.0, codes);
+ result = subtokenize(result.0, parse_state);
}
+
result.0
}
diff --git a/src/content/text.rs b/src/content/text.rs
index 1224064..5718617 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -21,15 +21,19 @@ use crate::construct::{
character_reference::start as character_reference, code_text::start as code_text,
hard_break_escape::start as hard_break_escape,
hard_break_trailing::start as hard_break_trailing, html_text::start as html_text,
- partial_data::start as data,
+ label_end::start as label_end, label_start_image::start as label_start_image,
+ label_start_link::start as label_start_link, partial_data::start as data,
};
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
-const MARKERS: [Code; 5] = [
+const MARKERS: [Code; 8] = [
Code::Char(' '), // `hard_break_trailing`
+ Code::Char('!'), // `label_start_image`
Code::Char('&'), // `character_reference`
Code::Char('<'), // `autolink`, `html_text`
+ Code::Char('['), // `label_start_link`
Code::Char('\\'), // `character_escape`, `hard_break_escape`
+ Code::Char(']'), // `label_end`
Code::Char('`'), // `code_text`
];
@@ -47,13 +51,16 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
Code::None => (State::Ok, None),
_ => tokenizer.attempt_n(
vec![
- Box::new(character_reference),
+ Box::new(autolink),
Box::new(character_escape),
+ Box::new(character_reference),
+ Box::new(code_text),
Box::new(hard_break_escape),
Box::new(hard_break_trailing),
- Box::new(autolink),
Box::new(html_text),
- Box::new(code_text),
+ Box::new(label_end),
+ Box::new(label_start_image),
+ Box::new(label_start_link),
],
|ok| Box::new(if ok { start } else { before_data }),
)(tokenizer, code),
diff --git a/src/parser.rs b/src/parser.rs
index 49d99d3..32b7f36 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -4,14 +4,24 @@
use crate::content::flow::flow;
use crate::tokenizer::{as_codes, Code, Event, Point};
+pub struct ParseState {
+ /// To do.
+ pub codes: Vec<Code>,
+ /// To do.
+ pub definitions: Vec<String>,
+}
+
/// Turn a string of markdown into events.
///
/// Passes the codes back so the compiler can access the source.
pub fn parse(value: &str) -> (Vec<Event>, Vec<Code>) {
- let codes = as_codes(value);
- // To do: pass a reference to this around, and slices in the (back)feeding. Might be tough.
+ let parse_state = ParseState {
+ codes: as_codes(value),
+ definitions: vec![],
+ };
+
let events = flow(
- &codes,
+ &parse_state,
Point {
line: 1,
column: 1,
@@ -19,5 +29,7 @@ pub fn parse(value: &str) -> (Vec<Event>, Vec<Code>) {
},
0,
);
- (events, codes)
+
+ // To do: pass whole `parse_state` back?
+ (events, parse_state.codes)
}
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 4ee2242..58db3c6 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -28,9 +28,8 @@
use std::collections::HashMap;
use crate::content::{string::start as string, text::start as text};
-use crate::tokenizer::{
- Code, Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer,
-};
+use crate::parser::ParseState;
+use crate::tokenizer::{Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer};
use crate::util::span;
/// Create a link between two [`Event`][]s.
@@ -39,25 +38,36 @@ use crate::util::span;
/// This optimizes for the common case where the token at `index` is connected
/// to the previous void token.
pub fn link(events: &mut [Event], index: usize) {
- let prev = &mut events[index - 2];
+ link_to(events, index - 2, index);
+}
+
+/// To do
+pub fn link_to(events: &mut [Event], pevious: usize, next: usize) {
+ let prev = &mut events[pevious];
+ // To do: force chunks?
+ // assert!(
+ // prev.token_type == TokenType::ChunkString || prev.token_type == TokenType::ChunkText,
+ // "{:?}",
+ // prev.token_type.to_owned()
+ // );
assert_eq!(prev.event_type, EventType::Enter);
- prev.next = Some(index);
+ prev.next = Some(next);
- let prev_ref = &events[index - 2];
- let prev_exit_ref = &events[index - 1];
+ let prev_ref = &events[pevious];
+ let prev_exit_ref = &events[pevious + 1];
assert_eq!(prev_exit_ref.event_type, EventType::Exit);
assert_eq!(prev_exit_ref.token_type, prev_ref.token_type);
- let curr = &mut events[index];
+ let curr = &mut events[next];
assert_eq!(curr.event_type, EventType::Enter);
- curr.previous = Some(index - 2);
+ curr.previous = Some(pevious);
// Note: the exit of this event may not exist, so don’t check for that.
}
/// Parse linked events.
///
/// Supposed to be called repeatedly, returns `1: true` when done.
-pub fn subtokenize(mut events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
+pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Event>, bool) {
let mut index = 0;
// Map of first chunks to their tokenizer.
let mut head_to_tokenizer: HashMap<usize, Tokenizer> = HashMap::new();
@@ -83,7 +93,7 @@ pub fn subtokenize(mut events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool)
// Index into `events` pointing to a chunk.
let mut index_opt: Option<usize> = Some(index);
// Subtokenizer.
- let mut tokenizer = Tokenizer::new(event.point.clone(), event.index);
+ let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state);
// Substate.
let mut result: StateFnResult = (
State::Fn(Box::new(if event.token_type == TokenType::ChunkString {
@@ -115,7 +125,11 @@ pub fn subtokenize(mut events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool)
_ => unreachable!("cannot be ok/nok"),
};
- result = tokenizer.feed(span::codes(codes, &span), func, enter.next == None);
+ result = tokenizer.push(
+ span::codes(&parse_state.codes, &span),
+ func,
+ enter.next == None,
+ );
assert!(result.1.is_none(), "expected no remainder");
index_opt = enter.next;
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 7b71308..a692a4d 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -15,6 +15,7 @@
use std::collections::HashMap;
use crate::constant::TAB_SIZE;
+use crate::parser::ParseState;
/// Semantic label of a span.
// To do: figure out how to share this so extensions can add their own stuff,
@@ -1073,6 +1074,32 @@ pub enum TokenType {
/// ^^^
/// ```
HtmlTextData,
+ /// To do,
+ LabelImage,
+ /// To do,
+ LabelImageMarker,
+ /// To do,
+ LabelLink,
+ /// To do,
+ LabelMarker,
+ LabelEnd,
+ Resource,
+ ResourceMarker,
+ ResourceDestination,
+ ResourceDestinationLiteral,
+ ResourceDestinationLiteralMarker,
+ ResourceDestinationRaw,
+ ResourceDestinationString,
+ ResourceTitle,
+ ResourceTitleMarker,
+ ResourceTitleString,
+ Reference,
+ ReferenceMarker,
+ ReferenceString,
+ Link,
+ Image,
+ Label,
+ LabelText,
/// Line ending.
///
/// ## Info
@@ -1243,6 +1270,9 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> StateFnResult;
/// In certain cases, it can also yield back up parsed codes that were passed down.
pub type StateFnResult = (State, Option<Vec<Code>>);
+/// To do.
+pub type Resolver = dyn FnOnce(&mut Tokenizer) -> Vec<Event>;
+
/// The result of a state.
pub enum State {
/// There is a future state: a boxed [`StateFn`][] to pass the next code to.
@@ -1253,6 +1283,30 @@ pub enum State {
Nok,
}
+/// To do.
+#[derive(Debug)]
+pub struct LabelStart {
+ /// To do.
+ pub start: (usize, usize),
+ /// A boolean used internally to figure out if a label start link can’t be
+ /// used (because links in links are incorrect).
+ pub inactive: bool,
+ /// A boolean used internally to figure out if a label is balanced: they’re
+ /// not media, it’s just balanced braces.
+ pub balanced: bool,
+}
+
+/// To do.
+#[derive(Debug)]
+pub struct Media {
+ /// To do.
+ pub start: (usize, usize),
+ /// To do.
+ pub end: (usize, usize),
+ /// To do.
+ pub id: String,
+}
+
/// The internal state of a tokenizer, not to be confused with states from the
/// state machine, this instead is all the information about where we currently
/// are and what’s going on.
@@ -1272,9 +1326,10 @@ struct InternalState {
point: Point,
}
+// #[derive(Debug)]
+
/// A tokenizer itself.
-#[derive(Debug)]
-pub struct Tokenizer {
+pub struct Tokenizer<'a> {
column_start: HashMap<usize, usize>,
/// Track whether a character is expected to be consumed, and whether it’s
/// actually consumed
@@ -1295,11 +1350,22 @@ pub struct Tokenizer {
index: usize,
/// Current relative and absolute place in the file.
point: Point,
+ /// To do.
+ pub parse_state: &'a ParseState,
+ /// To do.
+ pub label_start_stack: Vec<LabelStart>,
+ /// To do.
+ pub label_start_list_loose: Vec<LabelStart>,
+ /// To do.
+ pub media_list: Vec<Media>,
+ /// To do.
+ resolvers: Vec<Box<Resolver>>,
+ resolver_ids: Vec<String>,
}
-impl Tokenizer {
+impl<'a> Tokenizer<'a> {
/// Create a new tokenizer.
- pub fn new(point: Point, index: usize) -> Tokenizer {
+ pub fn new(point: Point, index: usize, parse_state: &'a ParseState) -> Tokenizer {
Tokenizer {
previous: Code::None,
current: Code::None,
@@ -1309,6 +1375,20 @@ impl Tokenizer {
point,
stack: vec![],
events: vec![],
+ parse_state,
+ label_start_stack: vec![],
+ label_start_list_loose: vec![],
+ media_list: vec![],
+ resolvers: vec![],
+ resolver_ids: vec![],
+ }
+ }
+
+ /// To do.
+ pub fn register_resolver(&mut self, id: String, resolver: Box<Resolver>) {
+ if !self.resolver_ids.contains(&id) {
+ self.resolver_ids.push(id);
+ self.resolvers.push(resolver);
}
}
@@ -1582,7 +1662,8 @@ impl Tokenizer {
/// This is set up to support repeatedly calling `feed`, and thus streaming
/// markdown into the state machine, and normally pauses after feeding.
/// When `done: true` is passed, the EOF is fed.
- pub fn feed(
+ // To do: call this `feed_impl`, and rename `push` to `feed`?
+ fn feed(
&mut self,
codes: &[Code],
start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
@@ -1643,6 +1724,26 @@ impl Tokenizer {
check_statefn_result((state, None))
}
+
+ /// To do.
+ // To do: set a `drained` to prevent passing after draining?
+ pub fn push(
+ &mut self,
+ codes: &[Code],
+ start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+ drain: bool,
+ ) -> StateFnResult {
+ let result = self.feed(codes, start, drain);
+
+ if drain {
+ while !self.resolvers.is_empty() {
+ let resolver = self.resolvers.remove(0);
+ self.events = resolver(self);
+ }
+ }
+
+ result
+ }
}
/// Internal utility to wrap states to also capture codes.
diff --git a/src/util/sanitize_uri.rs b/src/util/sanitize_uri.rs
index d66978e..55b15e4 100644
--- a/src/util/sanitize_uri.rs
+++ b/src/util/sanitize_uri.rs
@@ -115,7 +115,7 @@ fn normalize_uri(value: &str) -> String {
result.push(
buff[0..char.len_utf8()]
.iter()
- .map(|&byte| format!("%{:X}", byte))
+ .map(|&byte| format!("%{:>02X}", byte))
.collect::<String>(),
);