aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/label_end.rs2
-rw-r--r--src/construct/label_start_image.rs2
-rw-r--r--src/construct/label_start_link.rs2
-rw-r--r--src/construct/mod.rs2
-rw-r--r--src/construct/partial_data.rs51
-rw-r--r--src/construct/partial_whitespace.rs56
6 files changed, 110 insertions, 5 deletions
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 888355b..0da12b8 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -510,7 +510,7 @@ fn ok(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
info.media.end.1 = tokenizer.events.len() - 1;
tokenizer.media_list.push(info.media);
- tokenizer.register_resolver("media".to_string(), Box::new(resolve_media));
+ tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media));
(State::Ok, Some(vec![code]))
}
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 7725334..a45205a 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -67,7 +67,7 @@ pub fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
balanced: false,
inactive: false,
});
- tokenizer.register_resolver("media".to_string(), Box::new(resolve_media));
+ tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media));
(State::Ok, None)
}
_ => (State::Nok, None),
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index 46d7c9c..6c4d7ae 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -49,7 +49,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
balanced: false,
inactive: false,
});
- tokenizer.register_resolver("media".to_string(), Box::new(resolve_media));
+ tokenizer.register_resolver_before("media".to_string(), Box::new(resolve_media));
(State::Ok, None)
}
_ => (State::Nok, None),
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 8565b2f..9e3dfb0 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -44,6 +44,7 @@
//! * [label][partial_label]
//! * [space or tab][partial_space_or_tab]
//! * [title][partial_title]
+//! * [whitespace][partial_whitespace]
//!
//! Each construct maintained here is explained with a BNF diagram.
//! For example, the docs for [character escape][character_escape] contain:
@@ -83,4 +84,5 @@ pub mod partial_destination;
pub mod partial_label;
pub mod partial_space_or_tab;
pub mod partial_title;
+pub mod partial_whitespace;
pub mod thematic_break;
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index d83787a..9f99570 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -8,7 +8,8 @@
// To do: pass token types in?
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::edit_map::EditMap;
/// At the beginning of data.
///
@@ -39,7 +40,10 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnRe
tokenizer.exit(TokenType::LineEnding);
(State::Fn(Box::new(|t, c| at_break(t, c, stop))), None)
}
- _ if stop.contains(&code) => (State::Ok, Some(vec![code])),
+ _ if stop.contains(&code) => {
+ tokenizer.register_resolver("data".to_string(), Box::new(resolve));
+ (State::Ok, Some(vec![code]))
+ }
_ => {
tokenizer.enter(TokenType::Data);
data(tokenizer, code, stop)
@@ -67,3 +71,46 @@ fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult
(State::Fn(Box::new(|t, c| data(t, c, stop))), None)
}
}
+
+/// Merge adjacent data events.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+ let mut edit_map = EditMap::new();
+ let len = tokenizer.events.len();
+ let mut index = 0;
+
+ // Loop through events and merge adjacent data events.
+ while index < len {
+ let event = &tokenizer.events[index];
+
+ if event.event_type == EventType::Enter && event.token_type == TokenType::Data {
+ let exit_index = index + 1;
+ let mut exit_far_index = exit_index;
+
+ // Find multiple `data` events.
+ while exit_far_index + 1 < len
+ && tokenizer.events[exit_far_index + 1].token_type == TokenType::Data
+ {
+ exit_far_index += 2;
+ }
+
+ if exit_far_index > exit_index {
+ edit_map.add(exit_index, exit_far_index - exit_index, vec![]);
+
+ // Change positional info.
+ let exit_far = &tokenizer.events[exit_far_index];
+ let point_end = exit_far.point.clone();
+ let index_end = exit_far.index;
+ let exit = &mut tokenizer.events[exit_index];
+ exit.point = point_end;
+ exit.index = index_end;
+ index = exit_far_index;
+
+ continue;
+ }
+ }
+
+ index += 1;
+ }
+
+ edit_map.consume(&mut tokenizer.events)
+}
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
new file mode 100644
index 0000000..9a7a54d
--- /dev/null
+++ b/src/construct/partial_whitespace.rs
@@ -0,0 +1,56 @@
+//! Trailing whitespace occurs in [string][] and [text][].
+//!
+//! It occurs at the start or end of the whole, or around line endings.
+//! This whitespace is ignored
+//!
+//! They’re formed with the following BNF:
+//!
+//! ```bnf
+//! ; Restriction: the start and end here count as an eol.
+//! whitespace ::= 0.*space_or_tab eol 0.*space_or_tab
+//! ```
+//!
+//! ## References
+//!
+//! * [`initialize/text.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark/dev/lib/initialize/text.js)
+//!
+//! [string]: crate::content::string
+//! [text]: crate::content::text
+
+use super::partial_space_or_tab::space_or_tab;
+use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
+
+/// Parse initial or final whitespace.
+pub fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.go(
+ // Nothing if there’s no whitespace.
+ space_or_tab(),
+ if matches!(
+ tokenizer.previous,
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
+ ) {
+ // If there’s whitespace, and we were at an eol/eof, `ok`
+ ok
+ } else {
+ // If there’s whitespace, and we were not at an eol/eof, there must be one here.
+ at_eol
+ },
+ )(tokenizer, code)
+}
+
+/// After whitespace, at an eol/eof.
+fn at_eol(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ if matches!(
+ code,
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
+ ) {
+ ok(tokenizer, code)
+ } else {
+ (State::Nok, None)
+ }
+}
+
+/// Fine.
+fn ok(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ (State::Ok, Some(vec![code]))
+}