diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-22 15:03:15 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-22 15:03:15 +0200 |
commit | 41fc406af206e21014eaaba94bcf6b1854f892b3 (patch) | |
tree | 510f6e1d763643da9072f9cf7e097e777fdbd5b8 /src/tokenizer.rs | |
parent | 37fad739ba73d488d4c3652caee01f1ec5d0aaaa (diff) | |
download | markdown-rs-41fc406af206e21014eaaba94bcf6b1854f892b3.tar.gz markdown-rs-41fc406af206e21014eaaba94bcf6b1854f892b3.tar.bz2 markdown-rs-41fc406af206e21014eaaba94bcf6b1854f892b3.zip |
Refactor to pass ints instead of vecs around
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r-- | src/tokenizer.rs | 121 |
1 files changed, 47 insertions, 74 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs index f5ac1af..544e8b0 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -16,7 +16,7 @@ use crate::token::{Token, VOID_TOKENS}; use crate::util::edit_map::EditMap; /// Embedded content type. -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub enum ContentType { /// Represents [text content][crate::content::text]. Text, @@ -44,7 +44,7 @@ pub enum Code { /// /// The interface for the location in the document comes from unist `Point`: /// <https://github.com/syntax-tree/unist#point>. -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone)] pub struct Point { /// 1-indexed line number. pub line: usize, @@ -92,7 +92,7 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> StateFnResult; /// Each [`StateFn`][] yields something back: primarily the state. /// In certain cases, it can also yield back up parsed codes that were passed down. -pub type StateFnResult = (State, Option<Vec<Code>>); +pub type StateFnResult = (State, usize); /// Callback that can be registered and is called when the tokenizer is done. /// @@ -479,11 +479,11 @@ impl<'a> Tokenizer<'a> { state_fn, |_code| false, vec![], - |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer, _state| { - if ok { - feed_impl(tokenizer, &if ok { result.1 } else { result.0 }, after) + |result: (Vec<Code>, Vec<Code>), tokenizer: &mut Tokenizer, state| { + if matches!(state, State::Ok) { + feed_impl(tokenizer, &result.1, after) } else { - (State::Nok, None) + (State::Nok, 0) } }, ) @@ -502,9 +502,9 @@ impl<'a> Tokenizer<'a> { state_fn, until, vec![], - |result: (Vec<Code>, Vec<Code>), _ok, tokenizer: &mut Tokenizer, state| { + |result: (Vec<Code>, Vec<Code>), tokenizer: &mut Tokenizer, state| { tokenizer.consumed = true; - done(check_statefn_result((state, Some(result.1)))) + done((state, result.1.len())) }, ) } @@ -529,9 +529,10 @@ impl<'a> Tokenizer<'a> { state_fn, |_code| false, vec![], - |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer, _state| { + |mut result: (Vec<Code>, Vec<Code>), tokenizer: &mut Tokenizer, state| { tokenizer.free(previous); - feed_impl(tokenizer, &result.0, done(ok)) + result.0.append(&mut result.1); + feed_impl(tokenizer, &result.0, done(matches!(state, State::Ok))) }, ) } @@ -558,12 +559,19 @@ impl<'a> Tokenizer<'a> { state_fn, |_code| false, vec![], - |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer, _state| { + |mut result: (Vec<Code>, Vec<Code>), tokenizer: &mut Tokenizer, state| { + let ok = matches!(state, State::Ok); + if !ok { tokenizer.free(previous); } - let codes = if ok { result.1 } else { result.0 }; + let codes = if ok { + result.1 + } else { + result.0.append(&mut result.1); + result.0 + }; log::debug!( "attempt: {:?}, codes: {:?}, at {:?}", @@ -571,6 +579,7 @@ impl<'a> Tokenizer<'a> { codes, tokenizer.point ); + feed_impl(tokenizer, &codes, done(ok)) }, ) @@ -670,19 +679,19 @@ fn attempt_impl( state: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, mut pause: impl FnMut(Code) -> bool + 'static, mut codes: Vec<Code>, - done: impl FnOnce((Vec<Code>, Vec<Code>), bool, &mut Tokenizer, State) -> StateFnResult + 'static, + done: impl FnOnce((Vec<Code>, Vec<Code>), &mut Tokenizer, State) -> StateFnResult + 'static, ) -> Box<StateFn> { Box::new(|tokenizer, code| { if !codes.is_empty() && pause(tokenizer.previous) { - return done( - (codes, vec![code]), - false, - tokenizer, - State::Fn(Box::new(state)), - ); + let after = if matches!(code, Code::None) { + vec![] + } else { + vec![code] + }; + return done((codes, after), tokenizer, State::Fn(Box::new(state))); } - let (next, remainder) = check_statefn_result(state(tokenizer, code)); + let (next, back) = state(tokenizer, code); match code { Code::None => {} @@ -691,22 +700,19 @@ fn attempt_impl( } } - if let Some(ref list) = remainder { - assert!( - list.len() <= codes.len(), - "`remainder` must be less than or equal to `codes`" - ); - } + assert!( + back <= codes.len(), + "`back` must be smaller than or equal to `codes.len()`" + ); match next { - State::Ok => { - let remaining = if let Some(x) = remainder { x } else { vec![] }; - check_statefn_result(done((codes, remaining), true, tokenizer, next)) + State::Ok | State::Nok => { + let remaining = codes.split_off(codes.len() - back); + done((codes, remaining), tokenizer, next) } - State::Nok => check_statefn_result(done((codes, vec![]), false, tokenizer, next)), State::Fn(func) => { - assert!(remainder.is_none(), "expected no remainder"); - check_statefn_result((State::Fn(attempt_impl(func, pause, codes, done)), None)) + assert_eq!(back, 0, "expected no remainder"); + (State::Fn(attempt_impl(func, pause, codes, done)), 0) } } }) @@ -727,27 +733,18 @@ fn feed_impl( let code = codes[index]; match state { - State::Nok | State::Ok => { - break; - } + State::Ok | State::Nok => break, State::Fn(func) => { - log::debug!("main: passing: `{:?}`", code); + log::debug!("main: passing: `{:?}` ({:?})", code, index); tokenizer.expect(code, false); - let (next, remainder) = check_statefn_result(func(tokenizer, code)); + let (next, back) = func(tokenizer, code); state = next; - index = index + 1 - - (if let Some(ref x) = remainder { - x.len() - } else { - 0 - }); + index = index + 1 - back; } } } - // Yield to a higher loop. - // To do: do not copy? - check_statefn_result((state, Some(codes[index..].to_vec()))) + (state, codes.len() - index) } /// Flush `start`: pass `eof`s to it until done. @@ -766,8 +763,8 @@ fn flush_impl( let code = Code::None; log::debug!("main: passing eof"); tokenizer.expect(code, false); - let (next, remainder) = check_statefn_result(func(tokenizer, code)); - assert!(remainder.is_none(), "expected no remainder"); + let (next, remainder) = func(tokenizer, code); + assert_eq!(remainder, 0, "expected no remainder"); state = next; } } @@ -778,7 +775,7 @@ fn flush_impl( _ => unreachable!("expected final state to be `State::Ok`"), } - check_statefn_result((state, None)) + (state, 0) } /// Define a jump between two places. @@ -798,27 +795,3 @@ fn define_skip_impl(tokenizer: &mut Tokenizer, line: usize, info: (usize, usize, tokenizer.account_for_potential_skip(); } - -/// Check a [`StateFnResult`][], make sure its valid (that there are no bugs), -/// and clean a final eof passed back in `remainder`. -fn check_statefn_result(result: StateFnResult) -> StateFnResult { - let (state, mut remainder) = result; - - // Remove an eof. - // For convencience, feeding back an eof is allowed, but cleaned here. - // Most states handle eof and eol in the same branch, and hence pass - // all back. - // This might not be needed, because if EOF is passed back, we’re at the EOF. - // But they’re not supposed to be in codes, so here we remove them. - if let Some(ref mut list) = remainder { - if Some(&Code::None) == list.last() { - list.pop(); - } - - if list.is_empty() { - return (state, None); - } - } - - (state, remainder) -} |