diff options
Diffstat (limited to '')
-rw-r--r-- | src/content/content.rs | 70 | ||||
-rw-r--r-- | src/content/flow.rs | 54 | ||||
-rw-r--r-- | src/content/mod.rs | 1 | ||||
-rw-r--r-- | src/content/string.rs | 4 |
4 files changed, 30 insertions, 99 deletions
diff --git a/src/content/content.rs b/src/content/content.rs deleted file mode 100644 index 86bc290..0000000 --- a/src/content/content.rs +++ /dev/null @@ -1,70 +0,0 @@ -//! The `content`, ahum, content type. -//! -//! **Content** is zero or more definitions, and then zero or one paragraph. -//! It’s a weird one, and needed to make certain edge cases around definitions -//! spec compliant. -//! Definitions are unlike other things in markdown, in that they behave like -//! **text** in that they can contain arbitrary line endings, but *have* to end -//! at a line ending. -//! If they end in something else, the whole definition instead is seen as a -//! paragraph. -//! -//! The constructs found in content are: -//! -//! * Definition -//! * Paragraph - -use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; - -/// Before a paragraph. -/// -/// ```markdown -/// |asd -/// ``` -pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - unreachable!("expected non-eol/eof"); - } - _ => { - tokenizer.enter(TokenType::Paragraph); - tokenizer.enter(TokenType::ChunkText); - inside(tokenizer, code, tokenizer.events.len() - 1) - } - } -} - -/// In a line in a paragraph. -/// -/// ```markdown -/// |\& -/// |qwe -/// ``` -fn inside(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult { - match code { - Code::None => { - tokenizer.exit(TokenType::ChunkText); - tokenizer.exit(TokenType::Paragraph); - (State::Ok, None) - } - Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { - tokenizer.consume(code); - tokenizer.exit(TokenType::ChunkText); - tokenizer.enter(TokenType::ChunkText); - let next_index = tokenizer.events.len() - 1; - tokenizer.events[previous_index].next = Some(next_index); - tokenizer.events[next_index].previous = Some(previous_index); - ( - State::Fn(Box::new(move |t, c| inside(t, c, next_index))), - None, - ) - } - _ => { - tokenizer.consume(code); - ( - State::Fn(Box::new(move |t, c| inside(t, c, previous_index))), - None, - ) - } - } -} diff --git a/src/content/flow.rs b/src/content/flow.rs index 3fab523..58be61d 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -1,7 +1,7 @@ //! The flow content type. //! -//! **Flow** represents the sections, such as headings, code, and content, which -//! is parsed per line. +//! **Flow** represents the sections, such as headings and code, which are +//! parsed per line. //! An example is HTML, which has a certain starting condition (such as //! `<script>` on its own line), then continues for a while, until an end //! condition is found (such as `</style>`). @@ -18,8 +18,6 @@ //! * [Heading (setext)][crate::construct::heading_setext] //! * [HTML (flow)][crate::construct::html_flow] //! * [Thematic break][crate::construct::thematic_break] -//! -//! <!-- To do: Link to content. --> use crate::constant::TAB_SIZE; use crate::construct::{ @@ -153,45 +151,43 @@ pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResu thematic_break, definition, heading_setext, - |ok| Box::new(if ok { after } else { content_before }), + |ok| Box::new(if ok { after } else { paragraph_before }), )(tokenizer, code) } -/// Before content. +/// Before a paragraph. /// /// ```markdown /// |qwe /// ``` -/// -// To do: we don’t need content anymore in `micromark-rs` it seems? -fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +fn paragraph_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { after(tokenizer, code) } _ => { - tokenizer.enter(TokenType::Content); - tokenizer.enter(TokenType::ChunkContent); - content(tokenizer, code, tokenizer.events.len() - 1) + tokenizer.enter(TokenType::Paragraph); + tokenizer.enter(TokenType::ChunkText); + paragraph_inside(tokenizer, code, tokenizer.events.len() - 1) } } } -/// In content. +/// In a paragraph. /// /// ```markdown /// al|pha /// ``` -fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult { +fn paragraph_inside(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnResult { match code { - Code::None => content_end(tokenizer, code), + Code::None => paragraph_end(tokenizer, code), Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { tokenizer.check(continuation_construct, move |ok| { Box::new(move |t, c| { if ok { - content_continue(t, c, previous) + paragraph_continue(t, c, previous) } else { - content_end(t, c) + paragraph_end(t, c) } }) })(tokenizer, code) @@ -199,7 +195,7 @@ fn content(tokenizer: &mut Tokenizer, code: Code, previous: usize) -> StateFnRes _ => { tokenizer.consume(code); ( - State::Fn(Box::new(move |t, c| content(t, c, previous))), + State::Fn(Box::new(move |t, c| paragraph_inside(t, c, previous))), None, ) } @@ -248,9 +244,9 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> } match code { - // Blank lines are not allowed in content. + // Blank lines are not allowed in paragraph. Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None), - // To do: If code is disabled, indented lines are part of the content. + // To do: If code is disabled, indented lines are part of the paragraph. _ if prefix >= TAB_SIZE => (State::Ok, None), // To do: definitions, setext headings, etc? _ => tokenizer.attempt_2(heading_atx, thematic_break, |ok| { @@ -264,21 +260,25 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> } } -fn content_continue(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult { +fn paragraph_continue( + tokenizer: &mut Tokenizer, + code: Code, + previous_index: usize, +) -> StateFnResult { tokenizer.consume(code); - tokenizer.exit(TokenType::ChunkContent); - tokenizer.enter(TokenType::ChunkContent); + tokenizer.exit(TokenType::ChunkText); + tokenizer.enter(TokenType::ChunkText); let next_index = tokenizer.events.len() - 1; tokenizer.events[previous_index].next = Some(next_index); tokenizer.events[next_index].previous = Some(previous_index); ( - State::Fn(Box::new(move |t, c| content(t, c, next_index))), + State::Fn(Box::new(move |t, c| paragraph_inside(t, c, next_index))), None, ) } -fn content_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.exit(TokenType::ChunkContent); - tokenizer.exit(TokenType::Content); +fn paragraph_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.exit(TokenType::ChunkText); + tokenizer.exit(TokenType::Paragraph); after(tokenizer, code) } diff --git a/src/content/mod.rs b/src/content/mod.rs index d13df79..395e41b 100644 --- a/src/content/mod.rs +++ b/src/content/mod.rs @@ -1,7 +1,6 @@ //! Content types found in markdown. #[allow(clippy::module_inception)] -pub mod content; pub mod flow; pub mod string; pub mod text; diff --git a/src/content/string.rs b/src/content/string.rs index e8134c4..f591cd7 100644 --- a/src/content/string.rs +++ b/src/content/string.rs @@ -1,6 +1,6 @@ //! The string content type. //! -//! **String** is a limited **text** like content type which only allows +//! **String** is a limited [text][] like content type which only allows //! character escapes and character references. //! It exists in things such as identifiers (media references, definitions), //! titles, URLs, code (fenced) info and meta parts. @@ -9,6 +9,8 @@ //! //! * [Character escape][crate::construct::character_escape] //! * [Character reference][crate::construct::character_reference] +//! +//! [text]: crate::content::text use crate::construct::{ character_escape::start as character_escape, character_reference::start as character_reference, |