aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--readme.md86
-rw-r--r--src/subtokenize.rs9
-rw-r--r--src/tokenizer.rs39
3 files changed, 99 insertions, 35 deletions
diff --git a/readme.md b/readme.md
index d991a58..9986d3f 100644
--- a/readme.md
+++ b/readme.md
@@ -64,37 +64,101 @@ cargo doc --document-private-items
- [ ] (5) Figure out extensions
- [ ] (1) Support turning off constructs
-### Small things
+### All the things
-- [ ] (1) Use `impl fmt::Display for x` for a bunch of enums, e.g., markers
-- [ ] (1) Parse initial and final whitespace of paragraphs (in text)
-- [ ] (1) Add docs to subtokenize
+#### Docs
+
+- [ ] (1) Add docs for `default_line_ending`
+- [ ] (1) Add docs for virtual spaces
+- [ ] (1) Add docs to `subtokenize.rs`
+- [ ] (1) Add docs for `link.rs`
+- [ ] (1) Add docs for token types
+- [ ] (1) Add docs for tokenizer (`go`, `define_skip`,
+ `account_for_potential_skip`, `attempt_5`, `attempt_7`, `call_multiple`)
+- [ ] (1) Add docs for sanitation (autolink, definition, resource)
+- [ ] (1) Add docs for how references and definitions match (definition, reference)
+- [ ] (1) Go through all bnf
+- [ ] (1) Go through all docs
- [ ] (1) Add module docs to parser
- [ ] (1) Add overview docs on how everything works
+
+#### Refactor
+
- [ ] (1) Move safe protocols to constants
-- [ ] (3) Clean compiler
+- [ ] (1) Use `impl fmt::Display for x` for a bunch of enums, e.g., markers
+- [ ] (1) Make text data, string data constructs (document in
+ `construct/mod.rs`)
+- [ ] (1) Configurable tokens (destination, label, title)
+- [ ] (1) Configurable limit (destination)
+
+#### Parse
+
+- [ ] (1) Parse initial and final whitespace of paragraphs (in text)\
+ test (`code_indented`, `hard_break_escape`, `hard_break_trailing`,
+ `heading_atx`, `heading_setext`, `html_flow`, `misc_soft_break`,
+ `misc_tabs`, `thematic_break`)
+- [ ] (1) Get definition identifiers (definition)
+- [ ] (3) Interrupting (html flow complete)
+- [ ] (5) labels\
+ test (`character_escape`, `character_reference`, `definition`,
+ `misc_dangerous_protocol`, `misc_tabs`, `misc_url`, `thematic_break`)\
+ link link reference (definition)\
+ link label end (destination, label, title)\
+ link label start (label)
+- [ ] (5) attention\
+ test (`character_reference`, `hard_break_escape`, `hard_break_trailing`,
+ `heading_atx`, `heading_setext`, `html_flow`, `thematic_break`)\
+- [ ] (8) block quote\
+ test (`code_fenced`, `code_indented`, `heading_atx`, `heading_setext`,
+ `html_flow`, `misc_default_line_ending`, `thematic_break`)
+- [ ] (8) list\
+ test (`character_reference`, `code_indented`, `heading_setext`,
+ `html_flow`, `thematic_break`)\
+ link (`blank line`, `thematic break`)
+- [ ] (3) Lazy lines (`code indented`, `html flow`)
+- [ ] (3) Concrete (`html flow`)
+- [ ] (3) Turn off things (enable every test for these)
+- [ ] (3) Make tokenizer tokens extendable
+
+#### Test
+
- [ ] (1) Make sure positional info is perfect
-- [ ] (3) Figure out lifetimes of things (see `life time` in source)
- [ ] (3) Use `commonmark` tests
- [ ] (3) Share a bunch of tests with `micromark-js`
+
+#### Misc
+
+- [ ] (3) Check subtokenizer unraveling is ok
+- [ ] (3) Remove splicing and cloning in subtokenizer
+- [ ] (3) Pass more references around
+- [ ] (1) Remove todos in `span.rs` if not needed
+- [ ] (1) Get markers from constructs (`string`, `text`)
+- [ ] (1) Do not capture in `tokenizer.go`
+- [ ] (1) Clean attempts
+- [ ] (3) Clean compiler
+- [ ] (3) Figure out lifetimes of things (see `life time` in source)
- [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept,
how to integrate with streams or so?
- [ ] (1) Go through clippy rules, and such, to add strict code styles
- [ ] (1) Make sure that rust character groups match CM character groups (e.g., is
`unicode_whitespace` or so the same?)
- [ ] (1) Any special handling of surrogates?
-- [ ] (1) Make sure debugging is useful for other folks
+- [ ] (1) Make sure debugging, assertions are useful for other folks
- [ ] (3) Add some benchmarks, do some perf testing
- [ ] (3) Write comparison to other parsers
- [ ] (3) Add node/etc bindings?
-- [ ] (8) After all extensions, including MDX, are done, see if we can integrate
- this with SWC to compile MDX
- [ ] (3) Bunch of docs
- [ ] (5) Site
+#### After
+
+- [ ] (8) Extensions!
+- [ ] (8) After all extensions, including MDX, are done, see if we can integrate
+ this with SWC to compile MDX
+
### Constructs
-- [ ] (5) attention (strong, emphasis) (text)
+- [ ] (5) attention (strong, emphasis)
- [x] autolink
- [x] blank line
- [ ] (5) block quote
@@ -132,7 +196,7 @@ cargo doc --document-private-items
- [x] html (flow)
- [x] paragraph
- [x] thematic break
-- [ ] (5) text
+- [ ] (8) text
- [ ] attention (strong, emphasis) (text)
- [x] autolink
- [x] character escape
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 0623a37..1188c61 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -66,14 +66,7 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
};
result = tokenizer.feed(span::codes(codes, &span), func, enter.next == None);
-
- if let Some(ref x) = result.1 {
- if !x.is_empty() {
- // To do: handle?
- unreachable!("subtokenize:remainder {:?}", x);
- }
- }
-
+ assert!(result.1.is_none(), "expected no remainder");
index_opt = enter.next;
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index ba9bcbb..909a1d1 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -222,12 +222,14 @@ impl Tokenizer {
self.current = code;
}
+ /// To do.
pub fn define_skip(&mut self, point: &Point, index: usize) {
self.column_start.insert(point.line, point.column);
self.account_for_potential_skip();
log::debug!("position: define skip: `{:?}` ({:?})", point, index);
}
+ /// To do.
fn account_for_potential_skip(&mut self) {
match self.column_start.get(&self.point.line) {
None => {}
@@ -462,6 +464,7 @@ impl Tokenizer {
}
// To do: lifetimes, boxes, lmao.
+ /// To do.
pub fn attempt_2(
&mut self,
a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
@@ -481,6 +484,7 @@ impl Tokenizer {
)
}
+ /// To do.
#[allow(clippy::too_many_arguments, clippy::many_single_char_names)]
pub fn attempt_5(
&mut self,
@@ -504,6 +508,7 @@ impl Tokenizer {
)
}
+ /// To do.
#[allow(clippy::too_many_arguments, clippy::many_single_char_names)]
pub fn attempt_7(
&mut self,
@@ -529,6 +534,7 @@ impl Tokenizer {
)
}
+ /// To do.
#[allow(clippy::too_many_arguments, clippy::many_single_char_names)]
pub fn call_multiple(
&mut self,
@@ -606,7 +612,7 @@ impl Tokenizer {
// Yield to a higher loop if we shouldn’t feed EOFs.
if !drain {
- return (state, Some(codes[index..].to_vec()));
+ return check_statefn_result((state, Some(codes[index..].to_vec())));
}
loop {
@@ -618,14 +624,7 @@ impl Tokenizer {
log::debug!("main: passing eof");
self.expect(code);
let (next, remainder) = check_statefn_result(func(self, code));
-
- if let Some(ref x) = remainder {
- if !x.is_empty() {
- // To do: handle?
- unreachable!("drain:remainder {:?}", x);
- }
- }
-
+ assert!(remainder.is_none(), "expected no remainder");
state = next;
}
}
@@ -661,8 +660,13 @@ fn attempt_impl(
}
}
- // To do: `remainder` must never be bigger than codes I guess?
- // To do: `remainder` probably has to be taken *from* `codes`, in a similar vain to the `Ok` handling below.
+ if let Some(ref list) = remainder {
+ assert!(
+ list.len() <= codes.len(),
+ "`remainder` must be less than or equal to `codes`"
+ );
+ }
+
match next {
State::Ok => {
let remaining = if let Some(x) = remainder { x } else { vec![] };
@@ -670,6 +674,7 @@ fn attempt_impl(
}
State::Nok => check_statefn_result(done((codes, vec![]), false, tokenizer)),
State::Fn(func) => {
+ assert!(remainder.is_none(), "expected no remainder");
check_statefn_result((State::Fn(attempt_impl(func, codes, done)), None))
}
}
@@ -712,20 +717,18 @@ pub fn as_codes(value: &str) -> Vec<Code> {
}
// Send a tab and virtual spaces.
'\t' => {
- // To do: is this correct?
let remainder = column % TAB_SIZE;
- let virtual_spaces = if remainder == 0 {
+ let mut virtual_spaces = if remainder == 0 {
0
} else {
TAB_SIZE - remainder
};
codes.push(Code::Char(char));
column += 1;
- let mut index = 0;
- while index < virtual_spaces {
+ while virtual_spaces > 0 {
codes.push(Code::VirtualSpace);
column += 1;
- index += 1;
+ virtual_spaces -= 1;
}
}
// Send an LF.
@@ -770,6 +773,10 @@ fn check_statefn_result(result: StateFnResult) -> StateFnResult {
if Some(&Code::None) == list.last() {
list.pop();
}
+
+ if list.is_empty() {
+ return (state, None);
+ }
}
(state, remainder)