diff options
Diffstat (limited to 'src/construct')
-rw-r--r-- | src/construct/label_end.rs | 2 | ||||
-rw-r--r-- | src/construct/label_start_image.rs | 2 | ||||
-rw-r--r-- | src/construct/label_start_link.rs | 2 | ||||
-rw-r--r-- | src/construct/mod.rs | 2 | ||||
-rw-r--r-- | src/construct/partial_data.rs | 51 | ||||
-rw-r--r-- | src/construct/partial_whitespace.rs | 56 |
6 files changed, 110 insertions, 5 deletions
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index 888355b..0da12b8 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -510,7 +510,7 @@ fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { info.media.end.1 = tokenizer.events.len() - 1; tokenizer.media_list.push(info.media); - tokenizer.register_resolver("media".to_string(), Box::new(resolve_media)); + tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); (State::Ok, Some(vec![code])) } diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 7725334..a45205a 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -67,7 +67,7 @@ pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { balanced: false, inactive: false, }); - tokenizer.register_resolver("media".to_string(), Box::new(resolve_media)); + tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); (State::Ok, None) } _ => (State::Nok, None), diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs index 46d7c9c..6c4d7ae 100644 --- a/src/construct/label_start_link.rs +++ b/src/construct/label_start_link.rs @@ -49,7 +49,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { balanced: false, inactive: false, }); - tokenizer.register_resolver("media".to_string(), Box::new(resolve_media)); + tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media)); (State::Ok, None) } _ => (State::Nok, None), diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 8565b2f..9e3dfb0 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -44,6 +44,7 @@ //! * [label][partial_label] //! * [space or tab][partial_space_or_tab] //! * [title][partial_title] +//! * [whitespace][partial_whitespace] //! //! Each construct maintained here is explained with a BNF diagram. //! For example, the docs for [character escape][character_escape] contain: @@ -83,4 +84,5 @@ pub mod partial_destination; pub mod partial_label; pub mod partial_space_or_tab; pub mod partial_title; +pub mod partial_whitespace; pub mod thematic_break; diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs index d83787a..9f99570 100644 --- a/src/construct/partial_data.rs +++ b/src/construct/partial_data.rs @@ -8,7 +8,8 @@ // To do: pass token types in? -use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; +use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, TokenType, Tokenizer}; +use crate::util::edit_map::EditMap; /// At the beginning of data. /// @@ -39,7 +40,10 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnRe tokenizer.exit(TokenType::LineEnding); (State::Fn(Box::new(|t, c| at_break(t, c, stop))), None) } - _ if stop.contains(&code) => (State::Ok, Some(vec![code])), + _ if stop.contains(&code) => { + tokenizer.register_resolver("data".to_string(), Box::new(resolve)); + (State::Ok, Some(vec![code])) + } _ => { tokenizer.enter(TokenType::Data); data(tokenizer, code, stop) @@ -67,3 +71,46 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult (State::Fn(Box::new(|t, c| data(t, c, stop))), None) } } + +/// Merge adjacent data events. +pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { + let mut edit_map = EditMap::new(); + let len = tokenizer.events.len(); + let mut index = 0; + + // Loop through events and merge adjacent data events. + while index < len { + let event = &tokenizer.events[index]; + + if event.event_type == EventType::Enter && event.token_type == TokenType::Data { + let exit_index = index + 1; + let mut exit_far_index = exit_index; + + // Find multiple `data` events. + while exit_far_index + 1 < len + && tokenizer.events[exit_far_index + 1].token_type == TokenType::Data + { + exit_far_index += 2; + } + + if exit_far_index > exit_index { + edit_map.add(exit_index, exit_far_index - exit_index, vec![]); + + // Change positional info. + let exit_far = &tokenizer.events[exit_far_index]; + let point_end = exit_far.point.clone(); + let index_end = exit_far.index; + let exit = &mut tokenizer.events[exit_index]; + exit.point = point_end; + exit.index = index_end; + index = exit_far_index; + + continue; + } + } + + index += 1; + } + + edit_map.consume(&mut tokenizer.events) +} diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs new file mode 100644 index 0000000..9a7a54d --- /dev/null +++ b/src/construct/partial_whitespace.rs @@ -0,0 +1,56 @@ +//! Trailing whitespace occurs in [string][] and [text][]. +//! +//! It occurs at the start or end of the whole, or around line endings. +//! This whitespace is ignored +//! +//! They’re formed with the following BNF: +//! +//! ```bnf +//! ; Restriction: the start and end here count as an eol. +//! whitespace ::= 0.*space_or_tab eol 0.*space_or_tab +//! ``` +//! +//! ## References +//! +//! * [`initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js) +//! +//! [string]: crate::content::string +//! [text]: crate::content::text + +use super::partial_space_or_tab::space_or_tab; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +/// Parse initial or final whitespace. +pub fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.go( + // Nothing if there’s no whitespace. + space_or_tab(), + if matches!( + tokenizer.previous, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + ) { + // If there’s whitespace, and we were at an eol/eof, `ok` + ok + } else { + // If there’s whitespace, and we were not at an eol/eof, there must be one here. + at_eol + }, + )(tokenizer, code) +} + +/// After whitespace, at an eol/eof. +fn at_eol(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + if matches!( + code, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') + ) { + ok(tokenizer, code) + } else { + (State::Nok, None) + } +} + +/// Fine. +fn ok(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + (State::Ok, Some(vec![code])) +} |