aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/partial_whitespace.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-28 16:48:00 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-28 16:48:00 +0200
commitf7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456 (patch)
treec1ac3f22473bd79566d835b2474d2ae9e00d6c55 /src/construct/partial_whitespace.rs
parentd729b07712ca9cc91e68af1776dac9d7008a90cb (diff)
downloadmarkdown-rs-f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456.tar.gz
markdown-rs-f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456.tar.bz2
markdown-rs-f7e5fb852dc9c416b9eeb1f0d4f2d51ba5b68456.zip
Refactor to work on `char`s
Previously, a custom char implementation was used. This was easier to work with, as sometimes “virtual” characters are injected, or characters are ignored. This replaces that with working on actual `char`s. In the hope of in the future working on `u8`s, even. This simplifies the state machine somewhat, as only `\n` is fed, regardless of whether it was a CRLF, CR, or LF. It also feeds `' '` instead of virtual spaces. The BOM, if present, is now available as a `ByteOrderMark` event.
Diffstat (limited to 'src/construct/partial_whitespace.rs')
-rw-r--r--src/construct/partial_whitespace.rs53
1 files changed, 24 insertions, 29 deletions
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index 4c94c7d..152824b 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -47,8 +47,8 @@
use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN;
use crate::token::Token;
-use crate::tokenizer::{Code, Event, EventType, Tokenizer};
-use crate::util::span;
+use crate::tokenizer::{Event, EventType, Tokenizer};
+use crate::util::slice::{Position, Slice};
/// To do.
pub fn create_resolve_whitespace(hard_break: bool, trim_whole: bool) -> impl Fn(&mut Tokenizer) {
@@ -85,30 +85,26 @@ fn trim_data(
trim_end: bool,
hard_break: bool,
) {
- let mut codes = span::codes(
- &tokenizer.parse_state.codes,
- &span::from_exit_event(&tokenizer.events, exit_index),
+ let mut slice = Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position::from_exit_event(&tokenizer.events, exit_index),
);
if trim_end {
- let mut index = codes.len();
- let mut vs = 0;
- let mut spaces_only = true;
+ let mut index = slice.chars.len();
+ let vs = slice.after;
+ let mut spaces_only = vs == 0;
while index > 0 {
- match codes[index - 1] {
- Code::Char(' ') => {}
- Code::Char('\t') => spaces_only = false,
- Code::VirtualSpace => {
- vs += 1;
- spaces_only = false;
- }
+ match slice.chars[index - 1] {
+ ' ' => {}
+ '\t' => spaces_only = false,
_ => break,
}
index -= 1;
}
- let diff = codes.len() - index;
+ let diff = slice.chars.len() - index;
let token_type = if spaces_only
&& hard_break
&& exit_index + 1 < tokenizer.events.len()
@@ -127,12 +123,12 @@ fn trim_data(
return;
}
- if diff > 0 {
+ if diff > 0 || vs > 0 {
let exit_point = tokenizer.events[exit_index].point.clone();
let mut enter_point = exit_point.clone();
enter_point.index -= diff;
- enter_point.column -= diff - vs;
- enter_point.offset -= diff - vs;
+ enter_point.column -= diff;
+ enter_point.vs = 0;
tokenizer.map.add(
exit_index + 1,
@@ -154,17 +150,16 @@ fn trim_data(
);
tokenizer.events[exit_index].point = enter_point;
- codes = &codes[..index];
+ slice.chars = &slice.chars[..index];
}
}
if trim_start {
let mut index = 0;
- let mut vs = 0;
- while index < codes.len() {
- match codes[index] {
- Code::Char(' ' | '\t') => {}
- Code::VirtualSpace => vs += 1,
+ let vs = slice.before;
+ while index < slice.chars.len() {
+ match slice.chars[index] {
+ ' ' | '\t' => {}
_ => break,
}
@@ -173,18 +168,18 @@ fn trim_data(
// The whole data is whitespace.
// We can be very fast: we only change the token types.
- if index == codes.len() {
+ if index == slice.chars.len() {
tokenizer.events[exit_index - 1].token_type = Token::SpaceOrTab;
tokenizer.events[exit_index].token_type = Token::SpaceOrTab;
return;
}
- if index > 0 {
+ if index > 0 || vs > 0 {
let enter_point = tokenizer.events[exit_index - 1].point.clone();
let mut exit_point = enter_point.clone();
exit_point.index += index;
- exit_point.column += index - vs;
- exit_point.offset += index - vs;
+ exit_point.column += index;
+ exit_point.vs = 0;
tokenizer.map.add(
exit_index - 1,