diff options
| author | 2022-06-21 13:14:07 +0200 | |
|---|---|---|
| committer | 2022-06-21 13:14:07 +0200 | |
| commit | bcc4676b84a06af5e38ebaa31f0217cae090be08 (patch) | |
| tree | 4b823b2bb9d5d298a2f7f06c40dccd55e193f10e | |
| parent | f99d131ec3ab60956344d001bcd40244343c241b (diff) | |
| download | markdown-rs-bcc4676b84a06af5e38ebaa31f0217cae090be08.tar.gz markdown-rs-bcc4676b84a06af5e38ebaa31f0217cae090be08.tar.bz2 markdown-rs-bcc4676b84a06af5e38ebaa31f0217cae090be08.zip  | |
Update todo list
Diffstat (limited to '')
| -rw-r--r-- | readme.md | 86 | ||||
| -rw-r--r-- | src/subtokenize.rs | 9 | ||||
| -rw-r--r-- | src/tokenizer.rs | 39 | 
3 files changed, 99 insertions, 35 deletions
@@ -64,37 +64,101 @@ cargo doc --document-private-items  - [ ] (5) Figure out extensions  - [ ] (1) Support turning off constructs -### Small things +### All the things -- [ ] (1) Use `impl fmt::Display for x` for a bunch of enums, e.g., markers -- [ ] (1) Parse initial and final whitespace of paragraphs (in text) -- [ ] (1) Add docs to subtokenize +#### Docs + +- [ ] (1) Add docs for `default_line_ending` +- [ ] (1) Add docs for virtual spaces +- [ ] (1) Add docs to `subtokenize.rs` +- [ ] (1) Add docs for `link.rs` +- [ ] (1) Add docs for token types +- [ ] (1) Add docs for tokenizer (`go`, `define_skip`, +      `account_for_potential_skip`, `attempt_5`, `attempt_7`, `call_multiple`) +- [ ] (1) Add docs for sanitation (autolink, definition, resource) +- [ ] (1) Add docs for how references and definitions match (definition, reference) +- [ ] (1) Go through all bnf +- [ ] (1) Go through all docs  - [ ] (1) Add module docs to parser  - [ ] (1) Add overview docs on how everything works + +#### Refactor +  - [ ] (1) Move safe protocols to constants -- [ ] (3) Clean compiler +- [ ] (1) Use `impl fmt::Display for x` for a bunch of enums, e.g., markers +- [ ] (1) Make text data, string data constructs (document in +      `construct/mod.rs`) +- [ ] (1) Configurable tokens (destination, label, title) +- [ ] (1) Configurable limit (destination) + +#### Parse + +- [ ] (1) Parse initial and final whitespace of paragraphs (in text)\ +       test (`code_indented`, `hard_break_escape`, `hard_break_trailing`, +      `heading_atx`, `heading_setext`, `html_flow`, `misc_soft_break`, +      `misc_tabs`, `thematic_break`) +- [ ] (1) Get definition identifiers (definition) +- [ ] (3) Interrupting (html flow complete) +- [ ] (5) labels\ +       test (`character_escape`, `character_reference`, `definition`, +      `misc_dangerous_protocol`, `misc_tabs`, `misc_url`, `thematic_break`)\ +       link link reference (definition)\ +       link label end (destination, label, title)\ +       link label start (label) +- [ ] (5) attention\ +       test (`character_reference`, `hard_break_escape`, `hard_break_trailing`, +      `heading_atx`, `heading_setext`, `html_flow`, `thematic_break`)\ +- [ ] (8) block quote\ +       test (`code_fenced`, `code_indented`, `heading_atx`, `heading_setext`, +      `html_flow`, `misc_default_line_ending`, `thematic_break`) +- [ ] (8) list\ +       test (`character_reference`, `code_indented`, `heading_setext`, +      `html_flow`, `thematic_break`)\ +       link (`blank line`, `thematic break`) +- [ ] (3) Lazy lines (`code indented`, `html flow`) +- [ ] (3) Concrete (`html flow`) +- [ ] (3) Turn off things (enable every test for these) +- [ ] (3) Make tokenizer tokens extendable + +#### Test +  - [ ] (1) Make sure positional info is perfect -- [ ] (3) Figure out lifetimes of things (see `life time` in source)  - [ ] (3) Use `commonmark` tests  - [ ] (3) Share a bunch of tests with `micromark-js` + +#### Misc + +- [ ] (3) Check subtokenizer unraveling is ok +- [ ] (3) Remove splicing and cloning in subtokenizer +- [ ] (3) Pass more references around +- [ ] (1) Remove todos in `span.rs` if not needed +- [ ] (1) Get markers from constructs (`string`, `text`) +- [ ] (1) Do not capture in `tokenizer.go` +- [ ] (1) Clean attempts +- [ ] (3) Clean compiler +- [ ] (3) Figure out lifetimes of things (see `life time` in source)  - [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept,        how to integrate with streams or so?  - [ ] (1) Go through clippy rules, and such, to add strict code styles  - [ ] (1) Make sure that rust character groups match CM character groups (e.g., is        `unicode_whitespace` or so the same?)  - [ ] (1) Any special handling of surrogates? -- [ ] (1) Make sure debugging is useful for other folks +- [ ] (1) Make sure debugging, assertions are useful for other folks  - [ ] (3) Add some benchmarks, do some perf testing  - [ ] (3) Write comparison to other parsers  - [ ] (3) Add node/etc bindings? -- [ ] (8) After all extensions, including MDX, are done, see if we can integrate -      this with SWC to compile MDX  - [ ] (3) Bunch of docs  - [ ] (5) Site +#### After + +- [ ] (8) Extensions! +- [ ] (8) After all extensions, including MDX, are done, see if we can integrate +      this with SWC to compile MDX +  ### Constructs -- [ ] (5) attention (strong, emphasis) (text) +- [ ] (5) attention (strong, emphasis)  - [x] autolink  - [x] blank line  - [ ] (5) block quote @@ -132,7 +196,7 @@ cargo doc --document-private-items    - [x] html (flow)    - [x] paragraph    - [x] thematic break -- [ ] (5) text +- [ ] (8) text    - [ ] attention (strong, emphasis) (text)    - [x] autolink    - [x] character escape diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 0623a37..1188c61 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -66,14 +66,7 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {                  };                  result = tokenizer.feed(span::codes(codes, &span), func, enter.next == None); - -                if let Some(ref x) = result.1 { -                    if !x.is_empty() { -                        // To do: handle? -                        unreachable!("subtokenize:remainder {:?}", x); -                    } -                } - +                assert!(result.1.is_none(), "expected no remainder");                  index_opt = enter.next;              } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ba9bcbb..909a1d1 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -222,12 +222,14 @@ impl Tokenizer {          self.current = code;      } +    /// To do.      pub fn define_skip(&mut self, point: &Point, index: usize) {          self.column_start.insert(point.line, point.column);          self.account_for_potential_skip();          log::debug!("position: define skip: `{:?}` ({:?})", point, index);      } +    /// To do.      fn account_for_potential_skip(&mut self) {          match self.column_start.get(&self.point.line) {              None => {} @@ -462,6 +464,7 @@ impl Tokenizer {      }      // To do: lifetimes, boxes, lmao. +    /// To do.      pub fn attempt_2(          &mut self,          a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, @@ -481,6 +484,7 @@ impl Tokenizer {          )      } +    /// To do.      #[allow(clippy::too_many_arguments, clippy::many_single_char_names)]      pub fn attempt_5(          &mut self, @@ -504,6 +508,7 @@ impl Tokenizer {          )      } +    /// To do.      #[allow(clippy::too_many_arguments, clippy::many_single_char_names)]      pub fn attempt_7(          &mut self, @@ -529,6 +534,7 @@ impl Tokenizer {          )      } +    /// To do.      #[allow(clippy::too_many_arguments, clippy::many_single_char_names)]      pub fn call_multiple(          &mut self, @@ -606,7 +612,7 @@ impl Tokenizer {          // Yield to a higher loop if we shouldn’t feed EOFs.          if !drain { -            return (state, Some(codes[index..].to_vec())); +            return check_statefn_result((state, Some(codes[index..].to_vec())));          }          loop { @@ -618,14 +624,7 @@ impl Tokenizer {                      log::debug!("main: passing eof");                      self.expect(code);                      let (next, remainder) = check_statefn_result(func(self, code)); - -                    if let Some(ref x) = remainder { -                        if !x.is_empty() { -                            // To do: handle? -                            unreachable!("drain:remainder {:?}", x); -                        } -                    } - +                    assert!(remainder.is_none(), "expected no remainder");                      state = next;                  }              } @@ -661,8 +660,13 @@ fn attempt_impl(              }          } -        // To do: `remainder` must never be bigger than codes I guess? -        // To do: `remainder` probably has to be taken *from* `codes`, in a similar vain to the `Ok` handling below. +        if let Some(ref list) = remainder { +            assert!( +                list.len() <= codes.len(), +                "`remainder` must be less than or equal to `codes`" +            ); +        } +          match next {              State::Ok => {                  let remaining = if let Some(x) = remainder { x } else { vec![] }; @@ -670,6 +674,7 @@ fn attempt_impl(              }              State::Nok => check_statefn_result(done((codes, vec![]), false, tokenizer)),              State::Fn(func) => { +                assert!(remainder.is_none(), "expected no remainder");                  check_statefn_result((State::Fn(attempt_impl(func, codes, done)), None))              }          } @@ -712,20 +717,18 @@ pub fn as_codes(value: &str) -> Vec<Code> {                  }                  // Send a tab and virtual spaces.                  '\t' => { -                    // To do: is this correct?                      let remainder = column % TAB_SIZE; -                    let virtual_spaces = if remainder == 0 { +                    let mut virtual_spaces = if remainder == 0 {                          0                      } else {                          TAB_SIZE - remainder                      };                      codes.push(Code::Char(char));                      column += 1; -                    let mut index = 0; -                    while index < virtual_spaces { +                    while virtual_spaces > 0 {                          codes.push(Code::VirtualSpace);                          column += 1; -                        index += 1; +                        virtual_spaces -= 1;                      }                  }                  // Send an LF. @@ -770,6 +773,10 @@ fn check_statefn_result(result: StateFnResult) -> StateFnResult {          if Some(&Code::None) == list.last() {              list.pop();          } + +        if list.is_empty() { +            return (state, None); +        }      }      (state, remainder)  | 
