From db5a491e6c2223d1db9b458307431a54db3c40f2 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Thu, 11 Aug 2022 17:54:10 +0200 Subject: Refactor `readme.md` --- Untitled.txt | 28 +++++++++ readme.md | 192 +++++++++++++++-------------------------------------------- 2 files changed, 77 insertions(+), 143 deletions(-) diff --git a/Untitled.txt b/Untitled.txt index e796b86..8238cf7 100644 --- a/Untitled.txt +++ b/Untitled.txt @@ -2,3 +2,31 @@ micromark.js: unquoted: is `completeAttributeValueUnquoted`s case for `completeA micromark.js: `]` case in cdata_end does not need to consume, it can defer to `cdata_close`, which should save 1 line micromark.js: should `tagOpenAttributeValueUnquoted` also support a slash? micromark.js: `atLineEnding` in html (text) should always eat arbitrary whitespace? code (indented) has no effect on html (text)? + +```rs +// --------------------- +// Useful helper: + +let mut index = 0; +let mut balance = 0; +println!("before: {:?}", events.len()); +while index < events.len() { + let event = &events[index]; + if event.event_type == EventType::Exit { + balance -= 1; + } + let prefix = String::from_utf8(vec![b' '; balance * 2]).unwrap(); + println!( + "ev: {}{:?}:{:?} ({:?}): {:?}", + prefix, + event.kind, + event.name, + index, + event.link, + ); + if event.event_type == EventType::Enter { + balance += 1; + } + index += 1; +} +``` diff --git a/readme.md b/readme.md index 4ab2730..2de5b63 100644 --- a/readme.md +++ b/readme.md @@ -3,84 +3,41 @@ Crate docs are currently at [`wooorm.com/micromark-rs/micromark/`](https://wooorm.com/micromark-rs/micromark/). -There’s still a lot to do, but, already: **100%** CommonMark 🥳 - -## Some useful scripts for now - -Run examples: - -```sh -RUST_BACKTRACE=1 RUST_LOG=debug cargo run --example lib -``` - -Format: - -```sh -cargo fmt --all -``` - -Lint: - -```sh -cargo fmt --all -- --check && cargo clippy -- -W clippy::pedantic -``` - -Tests: - -```sh -RUST_BACKTRACE=1 cargo test -``` - -Docs: - -```sh -cargo doc --document-private-items -``` - -(add `--open` to open them in a browser) - ## To do -### Some major obstacles - -- [ ] (5) There’s a lot of rust-related choosing whether to pass (mutable) - references or whatever around that should be refactored -- [ ] (5) Figure out extensions - -### All the things +### Docs -#### Docs - -- [ ] (1) Go through all bnf +- [ ] (1) Go through all functions, add docs - [ ] (1) Go through all docs +- [ ] (1) Go through all bnf docs - [ ] (1) Add overview docs on how everything works +- [ ] (1) Add more examples -#### Refactor +### Refactor +- [ ] (1) Move `content` to `construct` - [ ] (1) Improve `interrupt`, `concrete`, `lazy` fields somehow? +- [ ] (?) Remove last box: the one around the child tokenizer? +- [ ] (1) Add helper to get byte at, get char before/after, etc. +- [ ] (?) Use smaller things that usizes? -#### Parse - -- [ ] (3) Make tokens extendable for extensions? - -#### Test +### Test - [ ] (1) Make sure positional info is perfect -- [ ] (3) Share a bunch of tests with `micromark-js` +- [ ] (3) Share tests with `micromark-js` +- [ ] (3) Add tests for a zillion attention markers, tons of lists, tons of labels, etc? -#### Misc +### Misc - [ ] (3) `no_std`? -- [ ] (3) Pass more references around -- [ ] (1) Get markers from constructs (`string`, `text`) +- [ ] (?) Improve document performance (potential 50%) +- [ ] (?) Improve paragraph performance (potential 15%) +- [ ] (?) Improve label (link, image) performance (potential 7%) - [ ] (3) Read through rust docs to figure out what useful functions there are, and fix stuff I’m doing manually now - [ ] (5) Do some research on rust best practices for APIs, e.g., what to accept, how to integrate with streams or so? - [ ] (1) Go through clippy rules, and such, to add strict code styles -- [ ] (1) Any special handling of surrogates? -- [ ] (1) Make sure debugging, assertions are useful for other folks -- [ ] (3) Add some benchmarks (against comrak, pulldown-cmark, kramdown?), do some perf testing - [ ] (3) Write comparison to other parsers - [ ] (3) Add node/etc bindings? - [ ] (3) Bunch of docs @@ -125,87 +82,36 @@ important. - [ ] (8) After all extensions, including MDX, are done, see if we can integrate this with SWC to compile MDX -### Done - -- [x] (8) Subtokenization: figure out a good, fast way to deal with constructs in - one content type that also are another content type -- [x] (3) Encode urls -- [x] (1) Optionally remove dangerous protocols when compiling -- [x] (1) Add docs to html (text) -- [x] (1) Add docs on bnf -- [x] (1) Reorganize to split util -- [x] (1) Add examples to `Options` docs -- [x] (3) Fix deep subtokenization -- [x] (1) text in heading -- [x] (1) Setext headings, solved in flow -- [x] (1) Add docs to partials -- [x] (1) Remove all `pub fn`s from constructs, except for start -- [x] (1) Remove `content` content type, as it is no longer needed -- [x] (1) Paragraph -- [x] (1) Parse whitespace in each flow construct -- [x] (1) Connect `ChunkString` in label, destination, title -- [x] (1) Add support for line endings in `string` -- [x] (1) Handle BOM at start -- [x] (1) Make sure tabs are handled properly -- [x] (1) Add tests for `default-line-ending`, `line-ending` -- [x] (1) Use preferred line ending style in markdown -- [x] (1) Make sure crlf/cr/lf are working perfectly -- [x] (1) Figure out lifetimes of things (see `life time` in source) -- [x] (1) Use traits for a bunch of enums, e.g., markers -- [x] (1) Move safe protocols to constants -- [x] (1) Make text data, string data constructs (document in - `construct/mod.rs`) -- [x] (1) Configurable tokens (destination, label, title) -- [x] (1) Configurable limit (destination) -- [x] (1) Add docs for `default_line_ending` -- [x] (1) Add docs for virtual spaces -- [x] (1) Add docs to `subtokenize.rs` -- [x] (1) Add docs for `link.rs` -- [x] (1) Add docs for token types -- [x] (1) Do not capture in `tokenizer.go` -- [x] (1) Clean attempts -- [x] (1) Add docs for tokenizer -- [x] (1) Add docs for sanitation -- [x] (1) Get definition identifiers (definition) -- [x] (1) Add docs to `normalize_identifier` -- [x] (1) Add docs for how references and definitions match -- [x] (1) Add module docs to parser -- [x] (1) Add improved docs in compiler -- [x] (1) Add docs for `RESOURCE_DESTINATION_BALANCE_MAX` -- [x] (1) Add docs for `label_start_image`, `label_start_link` -- [x] (1) Add docs for `label_end` -- [x] (1) Move map handling from `resolve_media` -- [x] (5) Add support for sharing identifiers, references before definitions -- [x] (2) Refactor to externalize handlers of compiler -- [x] (1) Add support for compiling shared references and definitions -- [x] (1) Add docs to Image, Link, and other media tokens -- [x] (1) Add docs on resolver, clean feed -- [x] (3) Clean compiler -- [x] (1) Parse initial and final space_or_tab of paragraphs (in string, text) -- [x] (1) Refactor to clean and document `space_or_tab` -- [x] (1) Refactor to clean and document `edit_map` -- [x] (8) Make paragraphs fast by merging them at the end, not checking whether - things interrupt them each line -- [x] (3) Add support for interrupting (or not) -- [x] (5) attention -- [x] (3) Unicode punctuation -- [x] (1) Use rust to crawl unicode -- [x] (1) Document attention -- [x] (1) Remove todos in `span.rs` if not needed -- [x] (2) Fix resizing attention bug -- [x] (2) Fix interleaving of attention/label -- [x] (8) Add basic support for block quotes -- [x] (1) Use `char::REPLACEMENT_CHARACTER`? -- [x] (3) Add support for concrete constructs - (html (flow) or code (fenced) cannot be “pierced” into by containers) -- [x] (1) Make sure that rust character groups match CM character groups -- [x] (3) Fix block quote bug -- [x] (3) Add support for lazy lines -- [x] (5) Containers! -- [x] (3) Check subtokenizer unraveling is ok -- [x] (1) Add list of void tokens, check that they’re void -- [x] (3) Use `commonmark` tests -- [x] (3) Add support for turning off constructs -- [x] (1) Use `edit_map` in `subtokenize` -- [x] (3) Remove all `HashMap`s -- [x] (3) Remove splicing and cloning in subtokenizer +## Scripts + +Run examples: + +```sh +RUST_BACKTRACE=1 RUST_LOG=debug cargo run --example lib +``` + +Format: + +```sh +cargo fmt --all +``` + +Lint: + +```sh +cargo fmt --all -- --check && cargo clippy -- -W clippy::pedantic +``` + +Tests: + +```sh +RUST_BACKTRACE=1 cargo test +``` + +Docs: + +```sh +cargo doc --document-private-items +``` + +(add `--open` to open them in a browser) -- cgit