diff options
-rw-r--r-- | src/content/document.rs | 5 | ||||
-rw-r--r-- | src/subtokenize.rs | 2 | ||||
-rw-r--r-- | src/tokenizer.rs | 40 | ||||
-rw-r--r-- | tests/list.rs | 11 |
4 files changed, 30 insertions, 28 deletions
diff --git a/src/content/document.rs b/src/content/document.rs index cd66909..e2c4f96 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -302,8 +302,7 @@ fn containers_after( tokenizer.interrupt = info.interrupt_before; // Define start. - let point = tokenizer.point.clone(); - tokenizer.define_skip(&point); + tokenizer.define_skip(tokenizer.point.clone(), tokenizer.index); flow_start(tokenizer, code, info) } @@ -491,7 +490,7 @@ fn line_end( // So, we’re at the end of a line, but we need to close the *previous* line. if !eof { println!("closing previous flow"); - tokenizer.define_skip(&tokenizer.point.clone()); + tokenizer.define_skip(tokenizer.point.clone(), tokenizer.index); let mut current_events = tokenizer.events.drain(info.index..).collect::<Vec<_>>(); let next = info.next; info.next = Box::new(flow); // This is weird but Rust needs a function there. diff --git a/src/subtokenize.rs b/src/subtokenize.rs index dd0351d..ad9a745 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -113,7 +113,7 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve ends.push(span.end_index); if enter.previous != None { - tokenizer.define_skip(&enter.point); + tokenizer.define_skip(enter.point.clone(), enter.index); } let func: Box<StateFn> = match result.0 { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 34cfde3..788ba6f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -161,7 +161,7 @@ struct InternalState { /// A tokenizer itself. #[allow(clippy::struct_excessive_bools)] pub struct Tokenizer<'a> { - column_start: HashMap<usize, usize>, + column_start: HashMap<usize, (Point, usize)>, /// Track whether a character is expected to be consumed, and whether it’s /// actually consumed /// @@ -269,10 +269,10 @@ impl<'a> Tokenizer<'a> { /// /// This defines how much columns are increased when consuming a line /// ending. - pub fn define_skip(&mut self, point: &Point) { - self.column_start.insert(point.line, point.column); + pub fn define_skip(&mut self, point: Point, index: usize) { + log::debug!("position: define skip: {:?}, {:?}", point, index); + self.column_start.insert(point.line, (point, index)); self.account_for_potential_skip(); - log::debug!("position: define skip: `{:?}`", point); } /// Increment the current positional info if we’re right after a line @@ -281,11 +281,10 @@ impl<'a> Tokenizer<'a> { if self.point.column == 1 { match self.column_start.get(&self.point.line) { None => {} - Some(next_column) => { - let col = *next_column; - self.point.column = col; - self.point.offset += col - 1; - self.index += col - 1; + Some((point, index)) => { + self.point.column = point.column; + self.point.offset = point.offset; + self.index = *index; } }; } @@ -302,6 +301,8 @@ impl<'a> Tokenizer<'a> { log::debug!("consume: `{:?}` ({:?})", code, self.point); assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned"); + self.index += 1; + match code { Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { self.point.line += 1; @@ -323,7 +324,6 @@ impl<'a> Tokenizer<'a> { } } - self.index += 1; self.previous = code; // Mark as consumed. self.consumed = true; @@ -335,7 +335,7 @@ impl<'a> Tokenizer<'a> { } pub fn enter_with_content(&mut self, token_type: Token, content_type: Option<ContentType>) { - log::debug!("enter `{:?}` ({:?})", token_type, self.point); + log::debug!("enter: `{:?}` ({:?})", token_type, self.point); self.events.push(Event { event_type: EventType::Enter, token_type: token_type.clone(), @@ -366,19 +366,23 @@ impl<'a> Tokenizer<'a> { "expected non-empty token" ); - // A bit weird, but if we exit right after a line ending, we *don’t* want to consider\ + // A bit weird, but if we exit right after a line ending, we *don’t* want to consider // potential skips. if matches!( self.previous, Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') ) { - let shift = point.column - 1; - point.column -= shift; - point.offset -= shift; - index -= shift; + point.column = 1; + point.offset = previous.point.offset + + if self.previous == Code::CarriageReturnLineFeed { + 2 + } else { + 1 + }; + index = previous.index + 1; } - log::debug!("exit `{:?}` ({:?})", token_type, point); + log::debug!("exit: `{:?}` ({:?})", token_type, point); self.events.push(Event { event_type: EventType::Exit, token_type, @@ -683,7 +687,7 @@ fn feed_impl( break; } State::Fn(func) => { - log::debug!("main: passing `{:?}`", code); + log::debug!("main: passing: `{:?}`", code); tokenizer.expect(code, false); let (next, remainder) = check_statefn_result(func(tokenizer, code)); state = next; diff --git a/tests/list.rs b/tests/list.rs index 0360ee7..2ef349b 100644 --- a/tests/list.rs +++ b/tests/list.rs @@ -528,12 +528,11 @@ fn list() { "should remove indent of code (fenced) in list (4 spaces)" ); - // To do: list (something ugly with tabs and counts and skips?). - // assert_eq!( - // micromark("- ```\n\t\n ```"), - // "<ul>\n<li>\n<pre><code> \n</code></pre>\n</li>\n</ul>", - // "should remove indent of code (fenced) in list (1 tab)" - // ); + assert_eq!( + micromark("- ```\n\t\n ```"), + "<ul>\n<li>\n<pre><code> \n</code></pre>\n</li>\n</ul>", + "should remove indent of code (fenced) in list (1 tab)" + ); assert_eq!( micromark("- +\n-"), |