From bcc4676b84a06af5e38ebaa31f0217cae090be08 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 21 Jun 2022 13:14:07 +0200 Subject: Update todo list --- readme.md | 86 +++++++++++++++++++++++++++++++++++++++++++++++------- src/subtokenize.rs | 9 +----- src/tokenizer.rs | 39 +++++++++++++++---------- 3 files changed, 99 insertions(+), 35 deletions(-) diff --git a/readme.md b/readme.md index d991a58..9986d3f 100644 --- a/readme.md +++ b/readme.md @@ -64,37 +64,101 @@ cargo doc --document-private-items - [ ] (5) Figure out extensions - [ ] (1) Support turning off constructs -### Small things +### All the things -- [ ] (1) Use `impl fmt::Display for x` for a bunch of enums, e.g., markers -- [ ] (1) Parse initial and final whitespace of paragraphs (in text) -- [ ] (1) Add docs to subtokenize +#### Docs + +- [ ] (1) Add docs for `default_line_ending` +- [ ] (1) Add docs for virtual spaces +- [ ] (1) Add docs to `subtokenize.rs` +- [ ] (1) Add docs for `link.rs` +- [ ] (1) Add docs for token types +- [ ] (1) Add docs for tokenizer (`go`, `define_skip`, + `account_for_potential_skip`, `attempt_5`, `attempt_7`, `call_multiple`) +- [ ] (1) Add docs for sanitation (autolink, definition, resource) +- [ ] (1) Add docs for how references and definitions match (definition, reference) +- [ ] (1) Go through all bnf +- [ ] (1) Go through all docs - [ ] (1) Add module docs to parser - [ ] (1) Add overview docs on how everything works + +#### Refactor + - [ ] (1) Move safe protocols to constants -- [ ] (3) Clean compiler +- [ ] (1) Use `impl fmt::Display for x` for a bunch of enums, e.g., markers +- [ ] (1) Make text data, string data constructs (document in + `construct/mod.rs`) +- [ ] (1) Configurable tokens (destination, label, title) +- [ ] (1) Configurable limit (destination) + +#### Parse + +- [ ] (1) Parse initial and final whitespace of paragraphs (in text)\ + test (`code_indented`, `hard_break_escape`, `hard_break_trailing`, + `heading_atx`, `heading_setext`, `html_flow`, `misc_soft_break`, + `misc_tabs`, `thematic_break`) +- [ ] (1) Get definition identifiers (definition) +- [ ] (3) Interrupting (html flow complete) +- [ ] (5) labels\ + test (`character_escape`, `character_reference`, `definition`, + `misc_dangerous_protocol`, `misc_tabs`, `misc_url`, `thematic_break`)\ + link link reference (definition)\ + link label end (destination, label, title)\ + link label start (label) +- [ ] (5) attention\ + test (`character_reference`, `hard_break_escape`, `hard_break_trailing`, + `heading_atx`, `heading_setext`, `html_flow`, `thematic_break`)\ +- [ ] (8) block quote\ + test (`code_fenced`, `code_indented`, `heading_atx`, `heading_setext`, + `html_flow`, `misc_default_line_ending`, `thematic_break`) +- [ ] (8) list\ + test (`character_reference`, `code_indented`, `heading_setext`, + `html_flow`, `thematic_break`)\ + link (`blank line`, `thematic break`) +- [ ] (3) Lazy lines (`code indented`, `html flow`) +- [ ] (3) Concrete (`html flow`) +- [ ] (3) Turn off things (enable every test for these) +- [ ] (3) Make tokenizer tokens extendable + +#### Test + - [ ] (1) Make sure positional info is perfect -- [ ] (3) Figure out lifetimes of things (see `life time` in source) - [ ] (3) Use `commonmark` tests - [ ] (3) Share a bunch of tests with `micromark-js` + +#### Misc + +- [ ] (3) Check subtokenizer unraveling is ok +- [ ] (3) Remove splicing and cloning in subtokenizer +- [ ] (3) Pass more references around +- [ ] (1) Remove todos in `span.rs` if not needed +- [ ] (1) Get markers from constructs (`string`, `text`) +- [ ] (1) Do not capture in `tokenizer.go` +- [ ] (1) Clean attempts +- [ ] (3) Clean compiler +- [ ] (3) Figure out lifetimes of things (see `life time` in source) - [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept, how to integrate with streams or so? - [ ] (1) Go through clippy rules, and such, to add strict code styles - [ ] (1) Make sure that rust character groups match CM character groups (e.g., is `unicode_whitespace` or so the same?) - [ ] (1) Any special handling of surrogates? -- [ ] (1) Make sure debugging is useful for other folks +- [ ] (1) Make sure debugging, assertions are useful for other folks - [ ] (3) Add some benchmarks, do some perf testing - [ ] (3) Write comparison to other parsers - [ ] (3) Add node/etc bindings? -- [ ] (8) After all extensions, including MDX, are done, see if we can integrate - this with SWC to compile MDX - [ ] (3) Bunch of docs - [ ] (5) Site +#### After + +- [ ] (8) Extensions! +- [ ] (8) After all extensions, including MDX, are done, see if we can integrate + this with SWC to compile MDX + ### Constructs -- [ ] (5) attention (strong, emphasis) (text) +- [ ] (5) attention (strong, emphasis) - [x] autolink - [x] blank line - [ ] (5) block quote @@ -132,7 +196,7 @@ cargo doc --document-private-items - [x] html (flow) - [x] paragraph - [x] thematic break -- [ ] (5) text +- [ ] (8) text - [ ] attention (strong, emphasis) (text) - [x] autolink - [x] character escape diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 0623a37..1188c61 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -66,14 +66,7 @@ pub fn subtokenize(events: Vec, codes: &[Code]) -> (Vec, bool) { }; result = tokenizer.feed(span::codes(codes, &span), func, enter.next == None); - - if let Some(ref x) = result.1 { - if !x.is_empty() { - // To do: handle? - unreachable!("subtokenize:remainder {:?}", x); - } - } - + assert!(result.1.is_none(), "expected no remainder"); index_opt = enter.next; } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index ba9bcbb..909a1d1 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -222,12 +222,14 @@ impl Tokenizer { self.current = code; } + /// To do. pub fn define_skip(&mut self, point: &Point, index: usize) { self.column_start.insert(point.line, point.column); self.account_for_potential_skip(); log::debug!("position: define skip: `{:?}` ({:?})", point, index); } + /// To do. fn account_for_potential_skip(&mut self) { match self.column_start.get(&self.point.line) { None => {} @@ -462,6 +464,7 @@ impl Tokenizer { } // To do: lifetimes, boxes, lmao. + /// To do. pub fn attempt_2( &mut self, a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, @@ -481,6 +484,7 @@ impl Tokenizer { ) } + /// To do. #[allow(clippy::too_many_arguments, clippy::many_single_char_names)] pub fn attempt_5( &mut self, @@ -504,6 +508,7 @@ impl Tokenizer { ) } + /// To do. #[allow(clippy::too_many_arguments, clippy::many_single_char_names)] pub fn attempt_7( &mut self, @@ -529,6 +534,7 @@ impl Tokenizer { ) } + /// To do. #[allow(clippy::too_many_arguments, clippy::many_single_char_names)] pub fn call_multiple( &mut self, @@ -606,7 +612,7 @@ impl Tokenizer { // Yield to a higher loop if we shouldn’t feed EOFs. if !drain { - return (state, Some(codes[index..].to_vec())); + return check_statefn_result((state, Some(codes[index..].to_vec()))); } loop { @@ -618,14 +624,7 @@ impl Tokenizer { log::debug!("main: passing eof"); self.expect(code); let (next, remainder) = check_statefn_result(func(self, code)); - - if let Some(ref x) = remainder { - if !x.is_empty() { - // To do: handle? - unreachable!("drain:remainder {:?}", x); - } - } - + assert!(remainder.is_none(), "expected no remainder"); state = next; } } @@ -661,8 +660,13 @@ fn attempt_impl( } } - // To do: `remainder` must never be bigger than codes I guess? - // To do: `remainder` probably has to be taken *from* `codes`, in a similar vain to the `Ok` handling below. + if let Some(ref list) = remainder { + assert!( + list.len() <= codes.len(), + "`remainder` must be less than or equal to `codes`" + ); + } + match next { State::Ok => { let remaining = if let Some(x) = remainder { x } else { vec![] }; @@ -670,6 +674,7 @@ fn attempt_impl( } State::Nok => check_statefn_result(done((codes, vec![]), false, tokenizer)), State::Fn(func) => { + assert!(remainder.is_none(), "expected no remainder"); check_statefn_result((State::Fn(attempt_impl(func, codes, done)), None)) } } @@ -712,20 +717,18 @@ pub fn as_codes(value: &str) -> Vec { } // Send a tab and virtual spaces. '\t' => { - // To do: is this correct? let remainder = column % TAB_SIZE; - let virtual_spaces = if remainder == 0 { + let mut virtual_spaces = if remainder == 0 { 0 } else { TAB_SIZE - remainder }; codes.push(Code::Char(char)); column += 1; - let mut index = 0; - while index < virtual_spaces { + while virtual_spaces > 0 { codes.push(Code::VirtualSpace); column += 1; - index += 1; + virtual_spaces -= 1; } } // Send an LF. @@ -770,6 +773,10 @@ fn check_statefn_result(result: StateFnResult) -> StateFnResult { if Some(&Code::None) == list.last() { list.pop(); } + + if list.is_empty() { + return (state, None); + } } (state, remainder) -- cgit