aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-28 14:18:17 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-28 14:18:17 +0200
commitdfd11b1bc155ae1fba9975a90c2dc83dc07697b4 (patch)
tree0dd150365a6ae1df4c4845518efafe02ab61cb77 /src/construct
parenta3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0 (diff)
downloadmarkdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.gz
markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.tar.bz2
markdown-rs-dfd11b1bc155ae1fba9975a90c2dc83dc07697b4.zip
Fix jumps in `edit_map`
* Use resolve more often (e.g., heading (atx, setext)) * Fix to link whole phrasing (e.g., one big chunk of text in heading (atx, setext), titles, labels) * Replace `ChunkText`, `ChunkString`, with `event.content_type: Option<ContentType>` * Refactor to externalize `edit_map` from `label`
Diffstat (limited to '')
-rw-r--r--src/construct/code_fenced.rs12
-rw-r--r--src/construct/heading_atx.rs107
-rw-r--r--src/construct/heading_setext.rs32
-rw-r--r--src/construct/label_end.rs159
-rw-r--r--src/construct/paragraph.rs10
-rw-r--r--src/construct/partial_destination.rs12
-rw-r--r--src/construct/partial_label.rs54
-rw-r--r--src/construct/partial_space_or_tab.rs161
-rw-r--r--src/construct/partial_title.rs67
9 files changed, 316 insertions, 298 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 5b1426c..1602aad 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -103,7 +103,7 @@
use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
use crate::util::span::from_exit_event;
/// Kind of fences.
@@ -259,7 +259,7 @@ fn info_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
}
_ => {
tokenizer.enter(TokenType::CodeFencedFenceInfo);
- tokenizer.enter(TokenType::ChunkString);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
info_inside(tokenizer, code, info, vec![])
}
}
@@ -280,13 +280,13 @@ fn info_inside(
) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.exit(TokenType::ChunkString);
+ tokenizer.exit(TokenType::Data);
tokenizer.exit(TokenType::CodeFencedFenceInfo);
tokenizer.exit(TokenType::CodeFencedFence);
at_break(tokenizer, code, info)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
- tokenizer.exit(TokenType::ChunkString);
+ tokenizer.exit(TokenType::Data);
tokenizer.exit(TokenType::CodeFencedFenceInfo);
tokenizer.attempt_opt(space_or_tab(), |t, c| meta_before(t, c, info))(tokenizer, code)
}
@@ -317,7 +317,7 @@ fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
}
_ => {
tokenizer.enter(TokenType::CodeFencedFenceMeta);
- tokenizer.enter(TokenType::ChunkString);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
meta(tokenizer, code, info)
}
}
@@ -333,7 +333,7 @@ fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.exit(TokenType::ChunkString);
+ tokenizer.exit(TokenType::Data);
tokenizer.exit(TokenType::CodeFencedFenceMeta);
tokenizer.exit(TokenType::CodeFencedFence);
at_break(tokenizer, code, info)
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 1e5fe3d..2811894 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -40,7 +40,7 @@
//! * [`HeadingAtx`][TokenType::HeadingAtx]
//! * [`HeadingAtxSequence`][TokenType::HeadingAtxSequence]
//! * [`HeadingAtxText`][TokenType::HeadingAtxText]
-//! * [`HeadingAtxSpaceOrTab`][TokenType::HeadingAtxSpaceOrTab]
+//! * [`SpaceOrTab`][TokenType::SpaceOrTab]
//!
//! ## References
//!
@@ -54,11 +54,12 @@
//! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
//! [atx]: http://www.aaronsw.com/2002/atx/
-use super::partial_space_or_tab::{
- space_or_tab, space_or_tab_with_options, Options as SpaceOrTabOptions,
-};
+use super::partial_space_or_tab::space_or_tab;
use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{
+ Code, ContentType, Event, EventType, State, StateFnResult, TokenType, Tokenizer,
+};
+use crate::util::edit_map::EditMap;
/// Start of a heading (atx).
///
@@ -106,14 +107,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnR
}
_ if rank > 0 => {
tokenizer.exit(TokenType::HeadingAtxSequence);
- tokenizer.go(
- space_or_tab_with_options(SpaceOrTabOptions {
- kind: TokenType::HeadingAtxSpaceOrTab,
- min: 1,
- max: usize::MAX,
- }),
- at_break,
- )(tokenizer, code)
+ tokenizer.go(space_or_tab(), at_break)(tokenizer, code)
}
_ => (State::Nok, None),
}
@@ -132,23 +126,18 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(TokenType::HeadingAtx);
+ tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve));
(State::Ok, Some(vec![code]))
}
- Code::VirtualSpace | Code::Char('\t' | ' ') => tokenizer.go(
- space_or_tab_with_options(SpaceOrTabOptions {
- kind: TokenType::HeadingAtxSpaceOrTab,
- min: 1,
- max: usize::MAX,
- }),
- at_break,
- )(tokenizer, code),
+ Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ tokenizer.go(space_or_tab(), at_break)(tokenizer, code)
+ }
Code::Char('#') => {
tokenizer.enter(TokenType::HeadingAtxSequence);
further_sequence(tokenizer, code)
}
Code::Char(_) => {
- tokenizer.enter(TokenType::HeadingAtxText);
- tokenizer.enter(TokenType::ChunkText);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
data(tokenizer, code)
}
}
@@ -179,8 +168,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
// Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.
Code::None | Code::CarriageReturnLineFeed | Code::Char('\t' | '\n' | '\r' | ' ') => {
- tokenizer.exit(TokenType::ChunkText);
- tokenizer.exit(TokenType::HeadingAtxText);
+ tokenizer.exit(TokenType::Data);
at_break(tokenizer, code)
}
_ => {
@@ -189,3 +177,72 @@ fn data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
}
}
+
+/// To do.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+ let mut edit_map = EditMap::new();
+ let mut index = 0;
+ let mut heading_start: Option<usize> = None;
+ let mut data_start: Option<usize> = None;
+ let mut data_end: Option<usize> = None;
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.token_type == TokenType::HeadingAtx {
+ if event.event_type == EventType::Enter {
+ heading_start = Some(index);
+ } else if let Some(start) = data_start {
+ // If `start` is some, `end` is too.
+ let end = data_end.unwrap();
+
+ edit_map.add(
+ start,
+ 0,
+ vec![Event {
+ event_type: EventType::Enter,
+ token_type: TokenType::HeadingAtxText,
+ point: tokenizer.events[start].point.clone(),
+ index: tokenizer.events[start].index,
+ previous: None,
+ next: None,
+ content_type: None,
+ }],
+ );
+
+ // Remove everything between the start and the end.
+ edit_map.add(start + 1, end - start - 1, vec![]);
+
+ edit_map.add(
+ end + 1,
+ 0,
+ vec![Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::HeadingAtxText,
+ point: tokenizer.events[end].point.clone(),
+ index: tokenizer.events[end].index,
+ previous: None,
+ next: None,
+ content_type: None,
+ }],
+ );
+
+ heading_start = None;
+ data_start = None;
+ data_end = None;
+ }
+ } else if heading_start.is_some() && event.token_type == TokenType::Data {
+ if event.event_type == EventType::Enter {
+ if data_start.is_none() {
+ data_start = Some(index);
+ }
+ } else {
+ data_end = Some(index);
+ }
+ }
+
+ index += 1;
+ }
+
+ edit_map.consume(&mut tokenizer.events)
+}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 06ce481..63f3c30 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -56,9 +56,9 @@
//! [atx]: http://www.aaronsw.com/2002/atx/
use crate::constant::TAB_SIZE;
-use crate::construct::partial_space_or_tab::space_or_tab;
+use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_with_options, Options};
use crate::subtokenize::link;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
use crate::util::span::from_exit_event;
/// Kind of underline.
@@ -131,7 +131,7 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
_ => {
tokenizer.enter(TokenType::HeadingSetextText);
- tokenizer.enter(TokenType::ChunkText);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
text_inside(tokenizer, code)
}
}
@@ -148,7 +148,7 @@ fn text_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Nok, None),
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.exit(TokenType::ChunkText);
+ tokenizer.exit(TokenType::Data);
tokenizer.exit(TokenType::HeadingSetextText);
tokenizer.attempt(underline_before, |ok| {
Box::new(if ok { after } else { text_continue })
@@ -176,16 +176,23 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- tokenizer.enter(TokenType::LineEnding);
+ tokenizer.enter_with_content(TokenType::LineEnding, Some(ContentType::Text));
let index = tokenizer.events.len() - 1;
link(&mut tokenizer.events, index);
tokenizer.consume(code);
tokenizer.exit(TokenType::LineEnding);
(
- State::Fn(Box::new(
- tokenizer.attempt_opt(space_or_tab(), text_line_start),
- )),
+ State::Fn(Box::new(tokenizer.attempt_opt(
+ space_or_tab_with_options(Options {
+ kind: TokenType::SpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ content_type: Some(ContentType::Text),
+ connect: true,
+ }),
+ text_line_start,
+ ))),
None,
)
}
@@ -201,18 +208,11 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ==
/// ```
fn text_line_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let index = tokenizer.events.len() - 2;
-
- // Link the whitespace, if it exists.
- if tokenizer.events[index].token_type == TokenType::SpaceOrTab {
- link(&mut tokenizer.events, index);
- }
-
match code {
// Blank lines not allowed.
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
_ => {
- tokenizer.enter(TokenType::ChunkText);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
let index = tokenizer.events.len() - 1;
link(&mut tokenizer.events, index);
text_inside(tokenizer, code)
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 405858d..6e8e476 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -11,11 +11,10 @@ use crate::tokenizer::{
Code, Event, EventType, LabelStart, Media, State, StateFnResult, TokenType, Tokenizer,
};
use crate::util::{
+ edit_map::EditMap,
normalize_identifier::normalize_identifier,
span::{serialize, Span},
};
-/// To do: could we do without `HashMap`, so we don’t need `std`?
-use std::collections::HashMap;
#[derive(Debug)]
struct Info {
@@ -32,43 +31,45 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
let media: Vec<Media> = tokenizer.media_list.drain(..).collect();
left.append(&mut left_2);
- let mut map: HashMap<usize, (usize, Vec<Event>)> = HashMap::new();
+ let mut edit_map = EditMap::new();
let events = &tokenizer.events;
+ // Remove loose label starts.
let mut index = 0;
while index < left.len() {
let label_start = &left[index];
let data_enter_index = label_start.start.0;
let data_exit_index = label_start.start.1;
- map.insert(
+ edit_map.add(
data_enter_index,
- (
- data_exit_index - data_enter_index,
- vec![
- Event {
- event_type: EventType::Enter,
- token_type: TokenType::Data,
- point: events[data_enter_index].point.clone(),
- index: events[data_enter_index].index,
- previous: None,
- next: None,
- },
- Event {
- event_type: EventType::Exit,
- token_type: TokenType::Data,
- point: events[data_exit_index].point.clone(),
- index: events[data_exit_index].index,
- previous: None,
- next: None,
- },
- ],
- ),
+ data_exit_index - data_enter_index,
+ vec![
+ Event {
+ event_type: EventType::Enter,
+ token_type: TokenType::Data,
+ point: events[data_enter_index].point.clone(),
+ index: events[data_enter_index].index,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Exit,
+ token_type: TokenType::Data,
+ point: events[data_exit_index].point.clone(),
+ index: events[data_exit_index].index,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ ],
);
index += 1;
}
+ // Add grouping events.
let mut index = 0;
while index < media.len() {
let media = &media[index];
@@ -90,8 +91,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
let group_end_index = media.end.1;
// Insert a group enter and label enter.
- add(
- &mut map,
+ edit_map.add(
group_enter_index,
0,
vec![
@@ -106,6 +106,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
index: group_enter_event.index,
previous: None,
next: None,
+ content_type: None,
},
Event {
event_type: EventType::Enter,
@@ -114,6 +115,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
index: group_enter_event.index,
previous: None,
next: None,
+ content_type: None,
},
],
);
@@ -121,8 +123,7 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
// Empty events not allowed.
if text_enter_index != text_exit_index {
// Insert a text enter.
- add(
- &mut map,
+ edit_map.add(
text_enter_index,
0,
vec![Event {
@@ -132,12 +133,12 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
index: events[text_enter_index].index,
previous: None,
next: None,
+ content_type: None,
}],
);
// Insert a text exit.
- add(
- &mut map,
+ edit_map.add(
text_exit_index,
0,
vec![Event {
@@ -147,13 +148,13 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
index: events[text_exit_index].index,
previous: None,
next: None,
+ content_type: None,
}],
);
}
// Insert a label exit.
- add(
- &mut map,
+ edit_map.add(
label_exit_index + 1,
0,
vec![Event {
@@ -163,12 +164,12 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
index: events[label_exit_index].index,
previous: None,
next: None,
+ content_type: None,
}],
);
// Insert a group exit.
- add(
- &mut map,
+ edit_map.add(
group_end_index + 1,
0,
vec![Event {
@@ -178,81 +179,14 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) -> Vec<Event> {
index: events[group_end_index].index,
previous: None,
next: None,
+ content_type: None,
}],
);
index += 1;
}
- let mut indices: Vec<&usize> = map.keys().collect();
- indices.sort_unstable();
- let mut next_events: Vec<Event> = vec![];
- let mut index_into_indices = 0;
- let mut start = 0;
- let events = &mut tokenizer.events;
- let mut shift: i32 = 0;
-
- while index_into_indices < indices.len() {
- let index = *indices[index_into_indices];
-
- if start < index {
- let append = &mut events[start..index].to_vec();
- let mut index = 0;
-
- while index < append.len() {
- let ev = &mut append[index];
-
- if let Some(x) = ev.previous {
- let next = (x as i32 + shift) as usize;
- ev.previous = Some(next);
- println!("todo: y: previous {:?} {:?} {:?}", x, shift, start);
- }
-
- if let Some(x) = ev.next {
- let next = (x as i32 + shift) as usize;
- ev.next = Some(next);
- println!("todo: y: next {:?} {:?} {:?}", x, shift, start);
- }
-
- index += 1;
- }
-
- next_events.append(append);
- }
-
- let (remove, add) = map.get(&index).unwrap();
- shift += (add.len() as i32) - (*remove as i32);
-
- if !add.is_empty() {
- let append = &mut add.clone();
- let mut index = 0;
-
- while index < append.len() {
- let ev = &mut append[index];
-
- if let Some(x) = ev.previous {
- println!("todo: x: previous {:?} {:?} {:?}", x, shift, start);
- }
-
- if let Some(x) = ev.next {
- println!("todo: x: next {:?} {:?} {:?}", x, shift, start);
- }
-
- index += 1;
- }
-
- next_events.append(append);
- }
-
- start = index + remove;
- index_into_indices += 1;
- }
-
- if start < events.len() {
- next_events.append(&mut events[start..].to_vec());
- }
-
- next_events
+ edit_map.consume(&mut tokenizer.events)
}
/// Start of label end.
@@ -693,20 +627,3 @@ fn collapsed_reference_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes
_ => (State::Nok, None),
}
}
-
-pub fn add(
- map: &mut HashMap<usize, (usize, Vec<Event>)>,
- index: usize,
- mut remove: usize,
- mut add: Vec<Event>,
-) {
- let curr = map.remove(&index);
-
- if let Some((curr_rm, mut curr_add)) = curr {
- remove += curr_rm;
- curr_add.append(&mut add);
- add = curr_add;
- }
-
- map.insert(index, (remove, add));
-}
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 13bd5aa..fea7052 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -39,7 +39,7 @@ use crate::construct::{
partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break,
};
use crate::subtokenize::link;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
/// Before a paragraph.
///
@@ -53,7 +53,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
_ => {
tokenizer.enter(TokenType::Paragraph);
- tokenizer.enter(TokenType::ChunkText);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
inside(tokenizer, code)
}
}
@@ -86,8 +86,8 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
fn at_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.consume(code);
- tokenizer.exit(TokenType::ChunkText);
- tokenizer.enter(TokenType::ChunkText);
+ tokenizer.exit(TokenType::Data);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text));
let index = tokenizer.events.len() - 1;
link(&mut tokenizer.events, index);
(State::Fn(Box::new(inside)), None)
@@ -100,7 +100,7 @@ fn at_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ***
/// ```
fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.exit(TokenType::ChunkText);
+ tokenizer.exit(TokenType::Data);
tokenizer.exit(TokenType::Paragraph);
(State::Ok, Some(vec![code]))
}
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 7887a44..05f5060 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -72,7 +72,7 @@
//!
//! <!-- To do: link label end. -->
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
/// Configuration.
///
@@ -134,7 +134,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFn
tokenizer.enter(info.options.destination.clone());
tokenizer.enter(info.options.raw.clone());
tokenizer.enter(info.options.string.clone());
- tokenizer.enter(TokenType::ChunkString);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
raw(tokenizer, code, info)
}
}
@@ -155,7 +155,7 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFn
(State::Ok, None)
} else {
tokenizer.enter(info.options.string.clone());
- tokenizer.enter(TokenType::ChunkString);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
enclosed(tokenizer, code, info)
}
}
@@ -168,7 +168,7 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFn
fn enclosed(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char('>') => {
- tokenizer.exit(TokenType::ChunkString);
+ tokenizer.exit(TokenType::Data);
tokenizer.exit(info.options.string.clone());
enclosed_before(tokenizer, code, info)
}
@@ -222,7 +222,7 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
}
Code::Char(')') => {
if info.balance == 0 {
- tokenizer.exit(TokenType::ChunkString);
+ tokenizer.exit(TokenType::Data);
tokenizer.exit(info.options.string.clone());
tokenizer.exit(info.options.raw.clone());
tokenizer.exit(info.options.destination);
@@ -240,7 +240,7 @@ fn raw(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
if info.balance > 0 {
(State::Nok, None)
} else {
- tokenizer.exit(TokenType::ChunkString);
+ tokenizer.exit(TokenType::Data);
tokenizer.exit(info.options.string.clone());
tokenizer.exit(info.options.raw.clone());
tokenizer.exit(info.options.destination);
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 1cb7d4b..dd8ee84 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -55,10 +55,12 @@
// To do: pass token types in.
+use super::partial_space_or_tab::{
+ space_or_tab_one_line_ending_with_options, OneLineEndingOptions,
+};
use crate::constant::LINK_REFERENCE_SIZE_MAX;
-use crate::construct::partial_space_or_tab::space_or_tab;
use crate::subtokenize::link;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
/// Configuration.
///
@@ -130,8 +132,18 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
tokenizer.exit(info.options.label);
(State::Ok, None)
}
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => tokenizer.go(
+ space_or_tab_one_line_ending_with_options(OneLineEndingOptions {
+ content_type: Some(ContentType::String),
+ connect: info.connect,
+ }),
+ |t, c| {
+ info.connect = true;
+ at_break(t, c, info)
+ },
+ )(tokenizer, code),
_ => {
- tokenizer.enter(TokenType::ChunkString);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
if info.connect {
let index = tokenizer.events.len() - 1;
@@ -145,30 +157,6 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
}
}
-/// After a line ending.
-///
-/// ```markdown
-/// [a
-/// |b]
-/// ```
-fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
- tokenizer.attempt_opt(space_or_tab(), |t, c| line_begin(t, c, info))(tokenizer, code)
-}
-
-/// After a line ending, after optional whitespace.
-///
-/// ```markdown
-/// [a
-/// |b]
-/// ```
-fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
- match code {
- // Blank line not allowed.
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
- _ => at_break(tokenizer, code, info),
- }
-}
-
/// In a label, in text.
///
/// ```markdown
@@ -176,20 +164,14 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResul
/// ```
fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
- Code::None | Code::Char('[' | ']') => {
- tokenizer.exit(TokenType::ChunkString);
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '[' | ']') => {
+ tokenizer.exit(TokenType::Data);
at_break(tokenizer, code, info)
}
_ if info.size > LINK_REFERENCE_SIZE_MAX => {
- tokenizer.exit(TokenType::ChunkString);
+ tokenizer.exit(TokenType::Data);
at_break(tokenizer, code, info)
}
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- tokenizer.consume(code);
- info.size += 1;
- tokenizer.exit(TokenType::ChunkString);
- (State::Fn(Box::new(|t, c| line_start(t, c, info))), None)
- }
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.consume(code);
info.size += 1;
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 43bdc53..8df7601 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -4,7 +4,8 @@
//!
//! * [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js)
-use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer};
+use crate::subtokenize::link;
+use crate::tokenizer::{Code, ContentType, State, StateFn, StateFnResult, TokenType, Tokenizer};
/// Options to parse whitespace.
#[derive(Debug)]
@@ -15,6 +16,25 @@ pub struct Options {
pub max: usize,
/// Token type to use for whitespace events.
pub kind: TokenType,
+ /// To do.
+ pub content_type: Option<ContentType>,
+ pub connect: bool,
+}
+
+#[derive(Debug)]
+pub struct OneLineEndingOptions {
+ /// To do.
+ pub content_type: Option<ContentType>,
+ pub connect: bool,
+}
+
+/// Options to parse whitespace.
+#[derive(Debug)]
+struct OneLineInfo {
+ /// Whether something was seen.
+ connect: bool,
+ /// Configuration.
+ options: OneLineEndingOptions,
}
/// Options to parse whitespace.
@@ -35,45 +55,6 @@ pub fn space_or_tab() -> Box<StateFn> {
space_or_tab_min_max(1, usize::MAX)
}
-pub fn space_or_tab_one_line_ending() -> Box<StateFn> {
- Box::new(|tokenizer, code| {
- tokenizer.attempt(space_or_tab(), move |ok| {
- Box::new(move |tokenizer, code| match code {
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (
- State::Fn(Box::new(tokenizer.attempt_opt(
- space_or_tab(),
- move |_t, code| {
- if !matches!(
- code,
- Code::None
- | Code::CarriageReturnLineFeed
- | Code::Char('\r' | '\n')
- ) {
- (State::Ok, Some(vec![code]))
- } else {
- (State::Nok, None)
- }
- },
- ))),
- None,
- )
- }
- _ => {
- if ok {
- (State::Ok, Some(vec![code]))
- } else {
- (State::Nok, None)
- }
- }
- })
- })(tokenizer, code)
- })
-}
-
/// Between `x` and `y` `space_or_tab`
///
/// ```bnf
@@ -84,6 +65,8 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
kind: TokenType::SpaceOrTab,
min,
max,
+ content_type: None,
+ connect: false,
})
}
@@ -104,7 +87,13 @@ pub fn space_or_tab_with_options(options: Options) -> Box<StateFn> {
fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
Code::VirtualSpace | Code::Char('\t' | ' ') if info.options.max > 0 => {
- tokenizer.enter(info.options.kind.clone());
+ tokenizer.enter_with_content(info.options.kind.clone(), info.options.content_type);
+
+ if info.options.content_type.is_some() {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ }
+
tokenizer.consume(code);
info.size += 1;
(State::Fn(Box::new(|t, c| inside(t, c, info))), None)
@@ -146,3 +135,93 @@ fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResul
}
}
}
+
+pub fn space_or_tab_one_line_ending() -> Box<StateFn> {
+ space_or_tab_one_line_ending_with_options(OneLineEndingOptions {
+ content_type: None,
+ connect: false,
+ })
+}
+
+pub fn space_or_tab_one_line_ending_with_options(options: OneLineEndingOptions) -> Box<StateFn> {
+ Box::new(move |tokenizer, code| {
+ let mut info = OneLineInfo {
+ connect: false,
+ options,
+ };
+
+ tokenizer.attempt(
+ space_or_tab_with_options(Options {
+ kind: TokenType::SpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ content_type: info.options.content_type,
+ connect: info.options.connect,
+ }),
+ move |ok| {
+ if ok && info.options.content_type.is_some() {
+ info.connect = true;
+ }
+
+ Box::new(move |tokenizer, code| match code {
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ at_eol(tokenizer, code, info)
+ }
+ _ => {
+ if ok {
+ (State::Ok, Some(vec![code]))
+ } else {
+ (State::Nok, None)
+ }
+ }
+ })
+ },
+ )(tokenizer, code)
+ })
+}
+
+fn at_eol(tokenizer: &mut Tokenizer, code: Code, mut info: OneLineInfo) -> StateFnResult {
+ match code {
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.enter_with_content(TokenType::LineEnding, info.options.content_type);
+
+ if info.options.content_type.is_some() {
+ if info.connect {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ } else {
+ info.connect = true;
+ }
+ }
+
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LineEnding);
+ (
+ State::Fn(Box::new(tokenizer.attempt_opt(
+ space_or_tab_with_options(Options {
+ kind: TokenType::SpaceOrTab,
+ min: 1,
+ max: usize::MAX,
+ content_type: info.options.content_type,
+ connect: info.connect,
+ }),
+ after_eol,
+ ))),
+ None,
+ )
+ }
+ _ => unreachable!("expected eol"),
+ }
+}
+
+fn after_eol(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ // Blank line not allowed.
+ if matches!(
+ code,
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
+ ) {
+ (State::Nok, None)
+ } else {
+ (State::Ok, Some(vec![code]))
+ }
+}
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 78ae311..b102f7e 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -31,9 +31,11 @@
//!
//! <!-- To do: link label end. -->
-use crate::construct::partial_space_or_tab::space_or_tab;
-use crate::subtokenize::link_to;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use super::partial_space_or_tab::{
+ space_or_tab_one_line_ending_with_options, OneLineEndingOptions,
+};
+use crate::subtokenize::link;
+use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer};
/// Configuration.
///
@@ -108,8 +110,8 @@ impl Kind {
/// State needed to parse titles.
#[derive(Debug)]
struct Info {
- /// Whether we’ve seen our first `ChunkString`.
- connect_index: Option<usize>,
+ /// Whether we’ve seen data.
+ connect: bool,
/// Kind of title.
kind: Kind,
/// Configuration.
@@ -127,7 +129,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code, options: Options) -> StateFn
match code {
Code::Char(char) if char == '"' || char == '\'' || char == '(' => {
let info = Info {
- connect_index: None,
+ connect: false,
kind: Kind::from_char(char),
options,
};
@@ -181,14 +183,24 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
begin(tokenizer, code, info)
}
Code::None => (State::Nok, None),
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => tokenizer.go(
+ space_or_tab_one_line_ending_with_options(OneLineEndingOptions {
+ content_type: Some(ContentType::String),
+ connect: info.connect,
+ }),
+ |t, c| {
+ info.connect = true;
+ at_break(t, c, info)
+ },
+ )(tokenizer, code),
_ => {
- tokenizer.enter(TokenType::ChunkString);
+ tokenizer.enter_with_content(TokenType::Data, Some(ContentType::String));
- if let Some(connect_index) = info.connect_index {
+ if info.connect {
let index = tokenizer.events.len() - 1;
- link_to(&mut tokenizer.events, connect_index, index);
+ link(&mut tokenizer.events, index);
} else {
- info.connect_index = Some(tokenizer.events.len() - 1);
+ info.connect = true;
}
title(tokenizer, code, info)
@@ -196,30 +208,6 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes
}
}
-/// After a line ending.
-///
-/// ```markdown
-/// "a
-/// |b"
-/// ```
-fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
- tokenizer.attempt_opt(space_or_tab(), |t, c| line_begin(t, c, info))(tokenizer, code)
-}
-
-/// After a line ending, after optional whitespace.
-///
-/// ```markdown
-/// "a
-/// |b"
-/// ```
-fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
- match code {
- // Blank line not allowed.
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
- _ => at_break(tokenizer, code, info),
- }
-}
-
/// In title text.
///
/// ```markdown
@@ -228,18 +216,13 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResul
fn title(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char(char) if char == info.kind.as_char() => {
- tokenizer.exit(TokenType::ChunkString);
+ tokenizer.exit(TokenType::Data);
at_break(tokenizer, code, info)
}
- Code::None => {
- tokenizer.exit(TokenType::ChunkString);
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.exit(TokenType::Data);
at_break(tokenizer, code, info)
}
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- tokenizer.consume(code);
- tokenizer.exit(TokenType::ChunkString);
- (State::Fn(Box::new(|t, c| line_start(t, c, info))), None)
- }
Code::Char('\\') => {
tokenizer.consume(code);
(State::Fn(Box::new(|t, c| escape(t, c, info))), None)