From 262aec96cece3e9dd55828397b8ec859e7cff606 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 20 Jun 2022 12:59:06 +0200 Subject: Remove unneeded `content` content type --- readme.md | 25 +++++++--------- src/compiler.rs | 2 -- src/constant.rs | 5 ++-- src/construct/code_fenced.rs | 2 +- src/construct/html_text.rs | 6 ++-- src/construct/mod.rs | 1 - src/content/content.rs | 70 -------------------------------------------- src/content/flow.rs | 54 +++++++++++++++++----------------- src/content/mod.rs | 1 - src/content/string.rs | 4 ++- src/subtokenize.rs | 9 ++---- src/tokenizer.rs | 2 -- 12 files changed, 50 insertions(+), 131 deletions(-) delete mode 100644 src/content/content.rs diff --git a/readme.md b/readme.md index 0cd5bd2..082dd4c 100644 --- a/readme.md +++ b/readme.md @@ -46,9 +46,9 @@ cargo doc --document-private-items ### Some major obstacles -- [ ] (8) Can content (and to a lesser extent string and text) operate more - performantly than checking whether other flow constructs start a line, - before exiting and actually attempting flow constructs? +- [ ] (8) Can paragraphs (and to a lesser extent string data and text data) + operate more performantly than checking whether other flow constructs + start a line, before exiting and actually attempting flow constructs? - [ ] (5) Figure out sharing definition and identifiers, and references before definitions - [ ] (3) Interrupting: sometimes flow can or cannot start depending on the @@ -57,8 +57,8 @@ cargo doc --document-private-items subtokenization is solved - [ ] (3) Concrete constructs: HTML or code (fenced) cannot be “pierced” into by containers -- [ ] (3) Lazy lines, in containers, in flow and content in a paragraph, a line - does not need to be indented +- [ ] (3) Lazy lines, in containers, in flow in a paragraph, a line does not + need to be indented - [ ] (5) There’s a lot of rust-related choosing whether to pass (mutable) references or whatever around that should be refactored - [ ] (5) Figure out extensions @@ -66,11 +66,9 @@ cargo doc --document-private-items ### Small things -- [ ] (1) Remove `content` content type, as it is no longer needed - [ ] (1) Connect `ChunkString` in label, destination, title - [ ] (1) Add support for line endings in `string` - [ ] (1) Add docs to subtokenize -- [ ] (1) Add module docs to content - [ ] (1) Add module docs to parser - [ ] (1) Add overview docs on how everything works - [ ] (1) Move safe protocols to constants @@ -109,8 +107,7 @@ cargo doc --document-private-items - [x] character reference - [x] code (fenced) - [x] code (indented) -- [x] (1) code (text) -- [ ] (3) content +- [x] code (text) - [x] definition - [x] hard break (escape) - [x] hard break (trailing) @@ -134,14 +131,12 @@ cargo doc --document-private-items - [x] blank line - [x] code (fenced) - [x] code (indented) - - [x] content - [x] definition - [x] heading (atx) - [x] heading (setext) - [x] html (flow) - - [x] thematic break -- [x] content - [x] paragraph + - [x] thematic break - [ ] (5) text - [ ] attention (strong, emphasis) (text) - [x] autolink @@ -170,10 +165,10 @@ cargo doc --document-private-items - [x] (1) Add examples to `CompileOptions` docs - [x] (3) Fix deep subtokenization - [x] (1) text in heading -- [x] (1) Setext headings: can they be solved in content, or do they have to be - solved in flow somehow +- [x] (1) Setext headings, solved in flow - [x] (1) Add docs to partials - [x] (1) Remove all `pub fn`s from constructs, except for start +- [x] (1) Remove `content` content type, as it is no longer needed ### Extensions @@ -188,7 +183,7 @@ important. — [`micromark-extension-frontmatter`](https://github.com/micromark/micromark-extension-frontmatter) - [ ] (3) autolink literal (GFM) (text) — [`micromark-extension-gfm-autolink-literal`](https://github.com/micromark/micromark-extension-gfm-autolink-literal) -- [ ] (3) footnote (GFM) (content, text) +- [ ] (3) footnote (GFM) (flow, text) — [`micromark-extension-gfm-footnote`](https://github.com/micromark/micromark-extension-gfm-footnote) - [ ] (3) strikethrough (GFM) (text) — [`micromark-extension-gfm-strikethrough`](https://github.com/micromark/micromark-extension-gfm-strikethrough) diff --git a/src/compiler.rs b/src/compiler.rs index be5d0fe..59fcd22 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -126,7 +126,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CodeTextData | TokenType::CodeTextLineEnding | TokenType::CodeTextSequence - | TokenType::Content | TokenType::Data | TokenType::DefinitionLabel | TokenType::DefinitionLabelMarker @@ -213,7 +212,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CodeFencedFenceWhitespace | TokenType::CodeIndentedPrefixWhitespace | TokenType::CodeTextSequence - | TokenType::Content | TokenType::DefinitionLabel | TokenType::DefinitionLabelMarker | TokenType::DefinitionLabelData diff --git a/src/constant.rs b/src/constant.rs index 1f833c2..e7594b9 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -103,8 +103,9 @@ pub const HTML_RAW_SIZE_MAX: usize = 8; /// List of HTML tag names that form the **basic** production of /// [HTML (flow)][html_flow]. /// -/// The **basic** production allows interleaving HTML and markdown with blank lines -/// and allows flow (block) elements to interrupt content. +/// The **basic** production allows interleaving HTML and markdown with blank +/// lines and allows flow (block) elements to interrupt definitions, paragraphs, +/// and heading (setext). /// Tag name matching must be performed insensitive to case, and thus this list /// includes lowercase tag names. /// diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 12c8bd6..28ac20b 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -27,7 +27,7 @@ //! The above grammar does not show how whitespace is handled. //! To parse code (fenced), let `X` be the number of whitespace characters //! before the opening fence sequence. -//! Each line of content is then allowed (not required) to be indented with up +//! Each line of text is then allowed (not required) to be indented with up //! to `X` spaces or tabs, which are then ignored as an indent instead of being //! considered as part of the code. //! This indent does not affect the closing fence. diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index d50a8ce..93b4b62 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -632,7 +632,7 @@ fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// At an allowed line ending. /// -/// > **Note**: we can’t have blank lines in content, so no need to worry about +/// > **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. /// /// ```markdown @@ -661,7 +661,7 @@ fn at_line_ending( /// After a line ending. /// -/// > **Note**: we can’t have blank lines in content, so no need to worry about +/// > **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. /// /// ```markdown @@ -681,7 +681,7 @@ fn after_line_ending( /// After a line ending, after indent. /// -/// > **Note**: we can’t have blank lines in content, so no need to worry about +/// > **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. /// /// ```markdown diff --git a/src/construct/mod.rs b/src/construct/mod.rs index a5e95bc..3195205 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -24,7 +24,6 @@ //! * [code (fenced)][code_fenced] //! * [code (indented)][code_indented] //! * [code (text)][code_text] -//! * content //! * [definition][] //! * [hard break (escape)][hard_break_escape] //! * [hard break (trailing)][hard_break_trailing] diff --git a/src/content/content.rs b/src/content/content.rs deleted file mode 100644 index 86bc290..0000000 --- a/src/content/content.rs +++ /dev/null @@ -1,70 +0,0 @@ -//! The `content`, ahum, content type. -//! -//! **Content** is zero or more definitions, and then zero or one paragraph. -//! It’s a weird one, and needed to make certain edge cases around definitions -//! spec compliant. -//! Definitions are unlike other things in markdown, in that they behave like -//! **text** in that they can contain arbitrary line endings, but *have* to end -//! at a line ending. -//! If they end in something else, the whole definition instead is seen as a -//! paragraph. -//! -//! The constructs found in content are: -//! -//! * Definition -//! * Paragraph - -use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; - -/// Before a paragraph. -/// -/// ```markdown -/// |asd -/// ``` -pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - unreachable!("expected non-eol/eof"); - } - _ => { - tokenizer.enter(TokenType::Paragraph); - tokenizer.enter(TokenType::ChunkText); - inside(tokenizer, code, tokenizer.events.len() - 1) - } - } -} - -/// In a line in a paragraph. -/// -/// ```markdown -/// |\& -/// |qwe -/// ``` -fn inside(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult { - match code { - Code::None => { - tokenizer.exit(TokenType::ChunkText); - tokenizer.exit(TokenType::Paragraph); - (State::Ok, None) - } - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - tokenizer.consume(code); - tokenizer.exit(TokenType::ChunkText); - tokenizer.enter(TokenType::ChunkText); - let next_index = tokenizer.events.len() - 1; - tokenizer.events[previous_index].next = Some(next_index); - tokenizer.events[next_index].previous = Some(previous_index); - ( - State::Fn(Box::new(move |t, c| inside(t, c, next_index))), - None, - ) - } - _ => { - tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| inside(t, c, previous_index))), - None, - ) - } - } -} diff --git a/src/content/flow.rs b/src/content/flow.rs index 3fab523..58be61d 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -1,7 +1,7 @@ //! The flow content type. //! -//! **Flow** represents the sections, such as headings, code, and content, which -//! is parsed per line. +//! **Flow** represents the sections, such as headings and code, which are +//! parsed per line. //! An example is HTML, which has a certain starting condition (such as //! `