diff options
| -rw-r--r-- | readme.md | 3 | ||||
| -rw-r--r-- | src/tokenizer.rs | 174 | 
2 files changed, 98 insertions, 79 deletions
@@ -123,8 +123,6 @@ cargo doc --document-private-items  #### Docs  - [ ] (1) `space_or_tab_one_line_ending` -- [ ] (1) `ParseState` -- [ ] (1) Resolvers, push, feed, etc.  - [ ] (1) Go through all bnf  - [ ] (1) Go through all docs  - [ ] (1) Add overview docs on how everything works @@ -283,3 +281,4 @@ important.  - [x] (2) Refactor to externalize handlers of compiler  - [x] (1) Add support for compiling shared references and definitions  - [x] (1) Add docs to Image, Link, and other media tokens +- [x] (1) Add docs on resolver, clean feed diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 17cf392..fe69366 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1595,10 +1595,12 @@ pub enum TokenType {      ThematicBreakSequence,  } -/// To do +/// Embedded content type.  #[derive(Debug, Clone, Copy, PartialEq)]  pub enum ContentType { +    /// Represents [text content][crate::content::text].      Text, +    /// Represents [string content][crate::content::string].      String,  } @@ -1665,7 +1667,11 @@ pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> StateFnResult;  /// In certain cases, it can also yield back up parsed codes that were passed down.  pub type StateFnResult = (State, Option<Vec<Code>>); -/// To do. +/// Callback that can be registered and is called when the tokenizer is done. +/// +/// Resolvers are supposed to change the list of events, because parsing is +/// sometimes messy, and they help expose a cleaner interface of events to +/// the compiler and other users.  pub type Resolver = dyn FnOnce(&mut Tokenizer) -> Vec<Event>;  /// The result of a state. @@ -1678,10 +1684,10 @@ pub enum State {      Nok,  } -/// To do. +/// Loose label starts we found.  #[derive(Debug)]  pub struct LabelStart { -    /// To do. +    /// Indices of where the label starts and ends in `events`.      pub start: (usize, usize),      /// A boolean used internally to figure out if a label start link can’t be      /// used (because links in links are incorrect). @@ -1691,14 +1697,14 @@ pub struct LabelStart {      pub balanced: bool,  } -/// To do. +/// Media we found.  #[derive(Debug)]  pub struct Media { -    /// To do. +    /// Indices of where the media’s label start starts and ends in `events`.      pub start: (usize, usize), -    /// To do. +    /// Indices of where the media’s label end starts and ends in `events`.      pub end: (usize, usize), -    /// To do. +    /// Identifier      pub id: String,  } @@ -1731,6 +1737,8 @@ pub struct Tokenizer<'a> {      ///      /// Tracked to make sure everything’s valid.      consumed: bool, +    /// Track whether this tokenizer is done. +    drained: bool,      /// Semantic labels of one or more codes in `codes`.      pub events: Vec<Event>,      /// Hierarchy of semantic labels. @@ -1767,6 +1775,7 @@ impl<'a> Tokenizer<'a> {              column_start: HashMap::new(),              index,              consumed: true, +            drained: false,              point,              stack: vec![],              events: vec![], @@ -1955,7 +1964,12 @@ impl<'a> Tokenizer<'a> {              vec![],              |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| {                  if ok { -                    tokenizer.feed(&if ok { result.1 } else { result.0 }, after, false) +                    feed_impl( +                        tokenizer, +                        &if ok { result.1 } else { result.0 }, +                        after, +                        false, +                    )                  } else {                      (State::Nok, None)                  } @@ -1984,7 +1998,7 @@ impl<'a> Tokenizer<'a> {              vec![],              |result: (Vec<Code>, Vec<Code>), ok, tokenizer: &mut Tokenizer| {                  tokenizer.free(previous); -                tokenizer.feed(&result.0, done(ok), false) +                feed_impl(tokenizer, &result.0, done(ok), false)              },          )      } @@ -2023,7 +2037,7 @@ impl<'a> Tokenizer<'a> {                      codes,                      tokenizer.point                  ); -                tokenizer.feed(&codes, done(ok), false) +                feed_impl(tokenizer, &codes, done(ok), false)              },          )      } @@ -2063,80 +2077,19 @@ impl<'a> Tokenizer<'a> {      /// This is set up to support repeatedly calling `feed`, and thus streaming      /// markdown into the state machine, and normally pauses after feeding.      /// When `done: true` is passed, the EOF is fed. -    // To do: call this `feed_impl`, and rename `push` to `feed`? -    fn feed( -        &mut self, -        codes: &[Code], -        start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, -        drain: bool, -    ) -> StateFnResult { -        let codes = codes; -        let mut state = State::Fn(Box::new(start)); -        let mut index = 0; - -        self.consumed = true; - -        while index < codes.len() { -            let code = codes[index]; - -            match state { -                State::Nok | State::Ok => { -                    break; -                } -                State::Fn(func) => { -                    log::debug!("main: passing `{:?}`", code); -                    self.expect(code); -                    let (next, remainder) = check_statefn_result(func(self, code)); -                    state = next; -                    index = index + 1 -                        - (if let Some(ref x) = remainder { -                            x.len() -                        } else { -                            0 -                        }); -                } -            } -        } - -        // Yield to a higher loop if we shouldn’t feed EOFs. -        if !drain { -            return check_statefn_result((state, Some(codes[index..].to_vec()))); -        } - -        loop { -            // Feed EOF. -            match state { -                State::Ok | State::Nok => break, -                State::Fn(func) => { -                    let code = Code::None; -                    log::debug!("main: passing eof"); -                    self.expect(code); -                    let (next, remainder) = check_statefn_result(func(self, code)); -                    assert!(remainder.is_none(), "expected no remainder"); -                    state = next; -                } -            } -        } - -        match state { -            State::Ok => {} -            _ => unreachable!("expected final state to be `State::Ok`"), -        } - -        check_statefn_result((state, None)) -    } - -    /// To do. -    // To do: set a `drained` to prevent passing after draining?      pub fn push(          &mut self,          codes: &[Code],          start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,          drain: bool,      ) -> StateFnResult { -        let result = self.feed(codes, start, drain); +        assert!(!self.drained, "cannot feed after drain"); + +        let result = feed_impl(self, codes, start, drain);          if drain { +            self.drained = true; +              while !self.resolvers.is_empty() {                  let resolver = self.resolvers.remove(0);                  self.events = resolver(self); @@ -2187,6 +2140,73 @@ fn attempt_impl(      })  } +/// Feed a list of `codes` into `start`. +/// +/// This is set up to support repeatedly calling `feed`, and thus streaming +/// markdown into the state machine, and normally pauses after feeding. +/// When `done: true` is passed, the EOF is fed. +fn feed_impl( +    tokenizer: &mut Tokenizer, +    codes: &[Code], +    start: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, +    drain: bool, +) -> StateFnResult { +    let codes = codes; +    let mut state = State::Fn(Box::new(start)); +    let mut index = 0; + +    tokenizer.consumed = true; + +    while index < codes.len() { +        let code = codes[index]; + +        match state { +            State::Nok | State::Ok => { +                break; +            } +            State::Fn(func) => { +                log::debug!("main: passing `{:?}`", code); +                tokenizer.expect(code); +                let (next, remainder) = check_statefn_result(func(tokenizer, code)); +                state = next; +                index = index + 1 +                    - (if let Some(ref x) = remainder { +                        x.len() +                    } else { +                        0 +                    }); +            } +        } +    } + +    // Yield to a higher loop if we shouldn’t feed EOFs. +    if !drain { +        return check_statefn_result((state, Some(codes[index..].to_vec()))); +    } + +    loop { +        // Feed EOF. +        match state { +            State::Ok | State::Nok => break, +            State::Fn(func) => { +                let code = Code::None; +                log::debug!("main: passing eof"); +                tokenizer.expect(code); +                let (next, remainder) = check_statefn_result(func(tokenizer, code)); +                assert!(remainder.is_none(), "expected no remainder"); +                state = next; +            } +        } +    } + +    match state { +        State::Ok => {} +        _ => unreachable!("expected final state to be `State::Ok`"), +    } + +    check_statefn_result((state, None)) +} +  /// Turn a string into codes.  pub fn as_codes(value: &str) -> Vec<Code> {      let mut codes: Vec<Code> = vec![];  | 
