aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/content/document.rs5
-rw-r--r--src/subtokenize.rs2
-rw-r--r--src/tokenizer.rs40
-rw-r--r--tests/list.rs11
4 files changed, 30 insertions, 28 deletions
diff --git a/src/content/document.rs b/src/content/document.rs
index cd66909..e2c4f96 100644
--- a/src/content/document.rs
+++ b/src/content/document.rs
@@ -302,8 +302,7 @@ fn containers_after(
tokenizer.interrupt = info.interrupt_before;
// Define start.
- let point = tokenizer.point.clone();
- tokenizer.define_skip(&point);
+ tokenizer.define_skip(tokenizer.point.clone(), tokenizer.index);
flow_start(tokenizer, code, info)
}
@@ -491,7 +490,7 @@ fn line_end(
// So, we’re at the end of a line, but we need to close the *previous* line.
if !eof {
println!("closing previous flow");
- tokenizer.define_skip(&tokenizer.point.clone());
+ tokenizer.define_skip(tokenizer.point.clone(), tokenizer.index);
let mut current_events = tokenizer.events.drain(info.index..).collect::<Vec<_>>();
let next = info.next;
info.next = Box::new(flow); // This is weird but Rust needs a function there.
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index dd0351d..ad9a745 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -113,7 +113,7 @@ pub fn subtokenize(mut events: Vec<Event>, parse_state: &ParseState) -> (Vec<Eve
ends.push(span.end_index);
if enter.previous != None {
- tokenizer.define_skip(&enter.point);
+ tokenizer.define_skip(enter.point.clone(), enter.index);
}
let func: Box<StateFn> = match result.0 {
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 34cfde3..788ba6f 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -161,7 +161,7 @@ struct InternalState {
/// A tokenizer itself.
#[allow(clippy::struct_excessive_bools)]
pub struct Tokenizer<'a> {
- column_start: HashMap<usize, usize>,
+ column_start: HashMap<usize, (Point, usize)>,
/// Track whether a character is expected to be consumed, and whether it’s
/// actually consumed
///
@@ -269,10 +269,10 @@ impl<'a> Tokenizer<'a> {
///
/// This defines how much columns are increased when consuming a line
/// ending.
- pub fn define_skip(&mut self, point: &Point) {
- self.column_start.insert(point.line, point.column);
+ pub fn define_skip(&mut self, point: Point, index: usize) {
+ log::debug!("position: define skip: {:?}, {:?}", point, index);
+ self.column_start.insert(point.line, (point, index));
self.account_for_potential_skip();
- log::debug!("position: define skip: `{:?}`", point);
}
/// Increment the current positional info if we’re right after a line
@@ -281,11 +281,10 @@ impl<'a> Tokenizer<'a> {
if self.point.column == 1 {
match self.column_start.get(&self.point.line) {
None => {}
- Some(next_column) => {
- let col = *next_column;
- self.point.column = col;
- self.point.offset += col - 1;
- self.index += col - 1;
+ Some((point, index)) => {
+ self.point.column = point.column;
+ self.point.offset = point.offset;
+ self.index = *index;
}
};
}
@@ -302,6 +301,8 @@ impl<'a> Tokenizer<'a> {
log::debug!("consume: `{:?}` ({:?})", code, self.point);
assert!(!self.consumed, "expected code to not have been consumed: this might be because `x(code)` instead of `x` was returned");
+ self.index += 1;
+
match code {
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
self.point.line += 1;
@@ -323,7 +324,6 @@ impl<'a> Tokenizer<'a> {
}
}
- self.index += 1;
self.previous = code;
// Mark as consumed.
self.consumed = true;
@@ -335,7 +335,7 @@ impl<'a> Tokenizer<'a> {
}
pub fn enter_with_content(&mut self, token_type: Token, content_type: Option<ContentType>) {
- log::debug!("enter `{:?}` ({:?})", token_type, self.point);
+ log::debug!("enter: `{:?}` ({:?})", token_type, self.point);
self.events.push(Event {
event_type: EventType::Enter,
token_type: token_type.clone(),
@@ -366,19 +366,23 @@ impl<'a> Tokenizer<'a> {
"expected non-empty token"
);
- // A bit weird, but if we exit right after a line ending, we *don’t* want to consider\
+ // A bit weird, but if we exit right after a line ending, we *don’t* want to consider
// potential skips.
if matches!(
self.previous,
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
) {
- let shift = point.column - 1;
- point.column -= shift;
- point.offset -= shift;
- index -= shift;
+ point.column = 1;
+ point.offset = previous.point.offset
+ + if self.previous == Code::CarriageReturnLineFeed {
+ 2
+ } else {
+ 1
+ };
+ index = previous.index + 1;
}
- log::debug!("exit `{:?}` ({:?})", token_type, point);
+ log::debug!("exit: `{:?}` ({:?})", token_type, point);
self.events.push(Event {
event_type: EventType::Exit,
token_type,
@@ -683,7 +687,7 @@ fn feed_impl(
break;
}
State::Fn(func) => {
- log::debug!("main: passing `{:?}`", code);
+ log::debug!("main: passing: `{:?}`", code);
tokenizer.expect(code, false);
let (next, remainder) = check_statefn_result(func(tokenizer, code));
state = next;
diff --git a/tests/list.rs b/tests/list.rs
index 0360ee7..2ef349b 100644
--- a/tests/list.rs
+++ b/tests/list.rs
@@ -528,12 +528,11 @@ fn list() {
"should remove indent of code (fenced) in list (4 spaces)"
);
- // To do: list (something ugly with tabs and counts and skips?).
- // assert_eq!(
- // micromark("- ```\n\t\n ```"),
- // "<ul>\n<li>\n<pre><code> \n</code></pre>\n</li>\n</ul>",
- // "should remove indent of code (fenced) in list (1 tab)"
- // );
+ assert_eq!(
+ micromark("- ```\n\t\n ```"),
+ "<ul>\n<li>\n<pre><code> \n</code></pre>\n</li>\n</ul>",
+ "should remove indent of code (fenced) in list (1 tab)"
+ );
assert_eq!(
micromark("- +\n-"),