diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-15 11:48:20 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-15 11:48:23 +0200 |
commit | c51784f08024e9dc3dc940a2a6b0db171f56aaa7 (patch) | |
tree | 1ea2029dd90ee59584d7db780c5b2fa29cc58e88 /src/tokenizer.rs | |
parent | a7a4d19fe3bc9e5e0c3218d9af41d9cee80e9374 (diff) | |
download | markdown-rs-c51784f08024e9dc3dc940a2a6b0db171f56aaa7.tar.gz markdown-rs-c51784f08024e9dc3dc940a2a6b0db171f56aaa7.tar.bz2 markdown-rs-c51784f08024e9dc3dc940a2a6b0db171f56aaa7.zip |
Fix annoying bug around virtual spaces in containers
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r-- | src/tokenizer.rs | 40 |
1 files changed, 22 insertions, 18 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 34cfde3..788ba6f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -161,7 +161,7 @@ struct InternalState { /// A tokenizer itself. #[allow(clippy::struct_excessive_bools)] pub struct Tokenizer<'a> { - column_start: HashMap<usize, usize>, + column_start: HashMap<usize, (Point, usize)>, /// Track whether a character is expected to be consumed, and whether it’s /// actually consumed /// @@ -269,10 +269,10 @@ impl<'a> Tokenizer<'a> { /// /// This defines how much columns are increased when consuming a line /// ending. - pub fn define_skip(&mut self, point: &Point) { - self.column_start.insert(point.line, point.column); + pub fn define_skip(&mut self, point: Point, index: usize) { + log::debug!("position: define skip: {:?}, {:?}", point, index); + self.column_start.insert(point.line, (point, index)); self.account_for_potential_skip(); - log::debug!("position: define skip: `{:?}`", point); } /// Increment the current positional info if we’re right after a line @@ -281,11 +281,10 @@ impl<'a> Tokenizer<'a> { if self.point.column == 1 { match self.column_start.get(&self.point.line) { None => {} - Some(next_column) => { - let col = *next_column; - self.point.column = col; - self.point.offset += col - 1; - self.index += col - 1; + Some((point, index)) => { + self.point.column = point.column; + self.point.offset = point.offset; + self.index = *index; } }; } @@ -302,6 +301,8 @@ impl<'a> Tokenizer<'a> { log::debug!("consume: `{:?}` ({:?})", code, self.point); assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned"); + self.index += 1; + match code { Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { self.point.line += 1; @@ -323,7 +324,6 @@ impl<'a> Tokenizer<'a> { } } - self.index += 1; self.previous = code; // Mark as consumed. self.consumed = true; @@ -335,7 +335,7 @@ impl<'a> Tokenizer<'a> { } pub fn enter_with_content(&mut self, token_type: Token, content_type: Option<ContentType>) { - log::debug!("enter `{:?}` ({:?})", token_type, self.point); + log::debug!("enter: `{:?}` ({:?})", token_type, self.point); self.events.push(Event { event_type: EventType::Enter, token_type: token_type.clone(), @@ -366,19 +366,23 @@ impl<'a> Tokenizer<'a> { "expected non-empty token" ); - // A bit weird, but if we exit right after a line ending, we *don’t* want to consider\ + // A bit weird, but if we exit right after a line ending, we *don’t* want to consider // potential skips. if matches!( self.previous, Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') ) { - let shift = point.column - 1; - point.column -= shift; - point.offset -= shift; - index -= shift; + point.column = 1; + point.offset = previous.point.offset + + if self.previous == Code::CarriageReturnLineFeed { + 2 + } else { + 1 + }; + index = previous.index + 1; } - log::debug!("exit `{:?}` ({:?})", token_type, point); + log::debug!("exit: `{:?}` ({:?})", token_type, point); self.events.push(Event { event_type: EventType::Exit, token_type, @@ -683,7 +687,7 @@ fn feed_impl( break; } State::Fn(func) => { - log::debug!("main: passing `{:?}`", code); + log::debug!("main: passing: `{:?}`", code); tokenizer.expect(code, false); let (next, remainder) = check_statefn_result(func(tokenizer, code)); state = next; |