From c51784f08024e9dc3dc940a2a6b0db171f56aaa7 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 15 Jul 2022 11:48:20 +0200 Subject: Fix annoying bug around virtual spaces in containers --- src/content/document.rs | 5 ++--- src/subtokenize.rs | 2 +- src/tokenizer.rs | 40 ++++++++++++++++++++++------------------ 3 files changed, 25 insertions(+), 22 deletions(-) (limited to 'src') diff --git a/src/content/document.rs b/src/content/document.rs index cd66909..e2c4f96 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -302,8 +302,7 @@ fn containers_after( tokenizer.interrupt = info.interrupt_before; // Define start. - let point = tokenizer.point.clone(); - tokenizer.define_skip(&point); + tokenizer.define_skip(tokenizer.point.clone(), tokenizer.index); flow_start(tokenizer, code, info) } @@ -491,7 +490,7 @@ fn line_end( // So, we’re at the end of a line, but we need to close the *previous* line. if !eof { println!("closing previous flow"); - tokenizer.define_skip(&tokenizer.point.clone()); + tokenizer.define_skip(tokenizer.point.clone(), tokenizer.index); let mut current_events = tokenizer.events.drain(info.index..).collect::>(); let next = info.next; info.next = Box::new(flow); // This is weird but Rust needs a function there. diff --git a/src/subtokenize.rs b/src/subtokenize.rs index dd0351d..ad9a745 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -113,7 +113,7 @@ pub fn subtokenize(mut events: Vec, parse_state: &ParseState) -> (Vec = match result.0 { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 34cfde3..788ba6f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -161,7 +161,7 @@ struct InternalState { /// A tokenizer itself. #[allow(clippy::struct_excessive_bools)] pub struct Tokenizer<'a> { - column_start: HashMap, + column_start: HashMap, /// Track whether a character is expected to be consumed, and whether it’s /// actually consumed /// @@ -269,10 +269,10 @@ impl<'a> Tokenizer<'a> { /// /// This defines how much columns are increased when consuming a line /// ending. - pub fn define_skip(&mut self, point: &Point) { - self.column_start.insert(point.line, point.column); + pub fn define_skip(&mut self, point: Point, index: usize) { + log::debug!("position: define skip: {:?}, {:?}", point, index); + self.column_start.insert(point.line, (point, index)); self.account_for_potential_skip(); - log::debug!("position: define skip: `{:?}`", point); } /// Increment the current positional info if we’re right after a line @@ -281,11 +281,10 @@ impl<'a> Tokenizer<'a> { if self.point.column == 1 { match self.column_start.get(&self.point.line) { None => {} - Some(next_column) => { - let col = *next_column; - self.point.column = col; - self.point.offset += col - 1; - self.index += col - 1; + Some((point, index)) => { + self.point.column = point.column; + self.point.offset = point.offset; + self.index = *index; } }; } @@ -302,6 +301,8 @@ impl<'a> Tokenizer<'a> { log::debug!("consume: `{:?}` ({:?})", code, self.point); assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned"); + self.index += 1; + match code { Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { self.point.line += 1; @@ -323,7 +324,6 @@ impl<'a> Tokenizer<'a> { } } - self.index += 1; self.previous = code; // Mark as consumed. self.consumed = true; @@ -335,7 +335,7 @@ impl<'a> Tokenizer<'a> { } pub fn enter_with_content(&mut self, token_type: Token, content_type: Option) { - log::debug!("enter `{:?}` ({:?})", token_type, self.point); + log::debug!("enter: `{:?}` ({:?})", token_type, self.point); self.events.push(Event { event_type: EventType::Enter, token_type: token_type.clone(), @@ -366,19 +366,23 @@ impl<'a> Tokenizer<'a> { "expected non-empty token" ); - // A bit weird, but if we exit right after a line ending, we *don’t* want to consider\ + // A bit weird, but if we exit right after a line ending, we *don’t* want to consider // potential skips. if matches!( self.previous, Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') ) { - let shift = point.column - 1; - point.column -= shift; - point.offset -= shift; - index -= shift; + point.column = 1; + point.offset = previous.point.offset + + if self.previous == Code::CarriageReturnLineFeed { + 2 + } else { + 1 + }; + index = previous.index + 1; } - log::debug!("exit `{:?}` ({:?})", token_type, point); + log::debug!("exit: `{:?}` ({:?})", token_type, point); self.events.push(Event { event_type: EventType::Exit, token_type, @@ -683,7 +687,7 @@ fn feed_impl( break; } State::Fn(func) => { - log::debug!("main: passing `{:?}`", code); + log::debug!("main: passing: `{:?}`", code); tokenizer.expect(code, false); let (next, remainder) = check_statefn_result(func(tokenizer, code)); state = next; -- cgit