From 262aec96cece3e9dd55828397b8ec859e7cff606 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 20 Jun 2022 12:59:06 +0200 Subject: Remove unneeded `content` content type --- src/compiler.rs | 2 -- src/constant.rs | 5 ++-- src/construct/code_fenced.rs | 2 +- src/construct/html_text.rs | 6 ++-- src/construct/mod.rs | 1 - src/content/content.rs | 70 -------------------------------------------- src/content/flow.rs | 54 +++++++++++++++++----------------- src/content/mod.rs | 1 - src/content/string.rs | 4 ++- src/subtokenize.rs | 9 ++---- src/tokenizer.rs | 2 -- 11 files changed, 40 insertions(+), 116 deletions(-) delete mode 100644 src/content/content.rs (limited to 'src') diff --git a/src/compiler.rs b/src/compiler.rs index be5d0fe..59fcd22 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -126,7 +126,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CodeTextData | TokenType::CodeTextLineEnding | TokenType::CodeTextSequence - | TokenType::Content | TokenType::Data | TokenType::DefinitionLabel | TokenType::DefinitionLabelMarker @@ -213,7 +212,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CodeFencedFenceWhitespace | TokenType::CodeIndentedPrefixWhitespace | TokenType::CodeTextSequence - | TokenType::Content | TokenType::DefinitionLabel | TokenType::DefinitionLabelMarker | TokenType::DefinitionLabelData diff --git a/src/constant.rs b/src/constant.rs index 1f833c2..e7594b9 100644 --- a/src/constant.rs +++ b/src/constant.rs @@ -103,8 +103,9 @@ pub const HTML_RAW_SIZE_MAX: usize = 8; /// List of HTML tag names that form the **basic** production of /// [HTML (flow)][html_flow]. /// -/// The **basic** production allows interleaving HTML and markdown with blank lines -/// and allows flow (block) elements to interrupt content. +/// The **basic** production allows interleaving HTML and markdown with blank +/// lines and allows flow (block) elements to interrupt definitions, paragraphs, +/// and heading (setext). /// Tag name matching must be performed insensitive to case, and thus this list /// includes lowercase tag names. /// diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index 12c8bd6..28ac20b 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -27,7 +27,7 @@ //! The above grammar does not show how whitespace is handled. //! To parse code (fenced), let `X` be the number of whitespace characters //! before the opening fence sequence. -//! Each line of content is then allowed (not required) to be indented with up +//! Each line of text is then allowed (not required) to be indented with up //! to `X` spaces or tabs, which are then ignored as an indent instead of being //! considered as part of the code. //! This indent does not affect the closing fence. diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index d50a8ce..93b4b62 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -632,7 +632,7 @@ fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// At an allowed line ending. /// -/// > **Note**: we can’t have blank lines in content, so no need to worry about +/// > **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. /// /// ```markdown @@ -661,7 +661,7 @@ fn at_line_ending( /// After a line ending. /// -/// > **Note**: we can’t have blank lines in content, so no need to worry about +/// > **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. /// /// ```markdown @@ -681,7 +681,7 @@ fn after_line_ending( /// After a line ending, after indent. /// -/// > **Note**: we can’t have blank lines in content, so no need to worry about +/// > **Note**: we can’t have blank lines in text, so no need to worry about /// > empty tokens. /// /// ```markdown diff --git a/src/construct/mod.rs b/src/construct/mod.rs index a5e95bc..3195205 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -24,7 +24,6 @@ //! * [code (fenced)][code_fenced] //! * [code (indented)][code_indented] //! * [code (text)][code_text] -//! * content //! * [definition][] //! * [hard break (escape)][hard_break_escape] //! * [hard break (trailing)][hard_break_trailing] diff --git a/src/content/content.rs b/src/content/content.rs deleted file mode 100644 index 86bc290..0000000 --- a/src/content/content.rs +++ /dev/null @@ -1,70 +0,0 @@ -//! The `content`, ahum, content type. -//! -//! **Content** is zero or more definitions, and then zero or one paragraph. -//! It’s a weird one, and needed to make certain edge cases around definitions -//! spec compliant. -//! Definitions are unlike other things in markdown, in that they behave like -//! **text** in that they can contain arbitrary line endings, but *have* to end -//! at a line ending. -//! If they end in something else, the whole definition instead is seen as a -//! paragraph. -//! -//! The constructs found in content are: -//! -//! * Definition -//! * Paragraph - -use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; - -/// Before a paragraph. -/// -/// ```markdown -/// |asd -/// ``` -pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - unreachable!("expected non-eol/eof"); - } - _ => { - tokenizer.enter(TokenType::Paragraph); - tokenizer.enter(TokenType::ChunkText); - inside(tokenizer, code, tokenizer.events.len() - 1) - } - } -} - -/// In a line in a paragraph. -/// -/// ```markdown -/// |\& -/// |qwe -/// ``` -fn inside(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult { - match code { - Code::None => { - tokenizer.exit(TokenType::ChunkText); - tokenizer.exit(TokenType::Paragraph); - (State::Ok, None) - } - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - tokenizer.consume(code); - tokenizer.exit(TokenType::ChunkText); - tokenizer.enter(TokenType::ChunkText); - let next_index = tokenizer.events.len() - 1; - tokenizer.events[previous_index].next = Some(next_index); - tokenizer.events[next_index].previous = Some(previous_index); - ( - State::Fn(Box::new(move |t, c| inside(t, c, next_index))), - None, - ) - } - _ => { - tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| inside(t, c, previous_index))), - None, - ) - } - } -} diff --git a/src/content/flow.rs b/src/content/flow.rs index 3fab523..58be61d 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -1,7 +1,7 @@ //! The flow content type. //! -//! **Flow** represents the sections, such as headings, code, and content, which -//! is parsed per line. +//! **Flow** represents the sections, such as headings and code, which are +//! parsed per line. //! An example is HTML, which has a certain starting condition (such as //! `