aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/content.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/construct/content.rs')
-rw-r--r--src/construct/content.rs188
1 files changed, 188 insertions, 0 deletions
diff --git a/src/construct/content.rs b/src/construct/content.rs
new file mode 100644
index 0000000..6c10cea
--- /dev/null
+++ b/src/construct/content.rs
@@ -0,0 +1,188 @@
+//! Content occurs in the [flow][] content type.
+//!
+//! Content contains zero or more [definition][definition]s, followed by zero
+//! or one [paragraph][].
+//!
+//! The constructs found in flow are:
+//!
+//! * [Definition][crate::construct::definition]
+//! * [Paragraph][crate::construct::paragraph]
+//!
+//! ## Tokens
+//!
+//! * [`Content`][Name::Content]
+//!
+//! > 👉 **Note**: while parsing, [`Content`][Name::Content]
+//! > is used, which is later compiled away.
+//!
+//! ## References
+//!
+//! * [`content.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-core-commonmark/dev/lib/content.js)
+//!
+//! [flow]: crate::construct::flow
+//! [definition]: crate::construct::definition
+//! [paragraph]: crate::construct::paragraph
+
+use crate::event::{Content, Kind, Link, Name};
+use crate::resolve::Name as ResolveName;
+use crate::state::{Name as StateName, State};
+use crate::subtokenize::{subtokenize, Subresult};
+use crate::tokenizer::Tokenizer;
+use alloc::{string::String, vec};
+
+/// Before a content content.
+///
+/// ```markdown
+/// > | abc
+/// ^
+/// ```
+pub fn chunk_start(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => unreachable!("unexpected eol/eof"),
+ _ => {
+ tokenizer.enter_link(
+ Name::Content,
+ Link {
+ previous: None,
+ next: None,
+ content: Content::Content,
+ },
+ );
+ State::Retry(StateName::ContentChunkInside)
+ }
+ }
+}
+
+/// In a content chunk.
+///
+/// ```markdown
+/// > | abc
+/// ^^^
+/// ```
+pub fn chunk_inside(tokenizer: &mut Tokenizer) -> State {
+ match tokenizer.current {
+ None | Some(b'\n') => {
+ tokenizer.exit(Name::Content);
+ tokenizer.register_resolver_before(ResolveName::Content);
+ // You’d be interrupting.
+ tokenizer.interrupt = true;
+ State::Ok
+ }
+ _ => {
+ tokenizer.consume();
+ State::Next(StateName::ContentChunkInside)
+ }
+ }
+}
+
+/// Before a definition.
+///
+/// ```markdown
+/// > | [a]: b
+/// ^
+/// ```
+pub fn definition_before(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.attempt(
+ State::Next(StateName::ContentDefinitionAfter),
+ State::Next(StateName::ParagraphStart),
+ );
+ State::Retry(StateName::DefinitionStart)
+}
+
+/// After a definition.
+///
+/// ```markdown
+/// > | [a]: b
+/// ^
+/// | c
+/// ```
+pub fn definition_after(tokenizer: &mut Tokenizer) -> State {
+ debug_assert!(matches!(tokenizer.current, None | Some(b'\n')));
+ if tokenizer.current.is_none() {
+ State::Ok
+ } else {
+ tokenizer.enter(Name::LineEnding);
+ tokenizer.consume();
+ tokenizer.exit(Name::LineEnding);
+ State::Next(StateName::ContentDefinitionBefore)
+ }
+}
+
+/// Merge `Content` chunks, which currently span a single line, into actual
+/// `Content`s that span multiple lines.
+pub fn resolve(tokenizer: &mut Tokenizer) -> Result<Option<Subresult>, String> {
+ let mut index = 0;
+
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+
+ if event.kind == Kind::Enter && event.name == Name::Content {
+ // Exit:Content
+ let mut exit_index = index + 1;
+
+ loop {
+ let mut enter_index = exit_index + 1;
+
+ if enter_index == tokenizer.events.len()
+ || tokenizer.events[enter_index].name != Name::LineEnding
+ {
+ break;
+ }
+
+ // Skip past line ending.
+ enter_index += 2;
+
+ // Skip past prefix.
+ while enter_index < tokenizer.events.len() {
+ let event = &tokenizer.events[enter_index];
+
+ if event.name != Name::SpaceOrTab
+ && event.name != Name::BlockQuotePrefix
+ && event.name != Name::BlockQuoteMarker
+ {
+ break;
+ }
+
+ enter_index += 1;
+ }
+
+ if enter_index == tokenizer.events.len()
+ || tokenizer.events[enter_index].name != Name::Content
+ {
+ break;
+ }
+
+ // Set Exit:Content point to Exit:LineEnding.
+ tokenizer.events[exit_index].point = tokenizer.events[exit_index + 2].point.clone();
+ // Remove Enter:LineEnding, Exit:LineEnding.
+ tokenizer.map.add(exit_index + 1, 2, vec![]);
+
+ // Link Enter:Content to Enter:Content on this line and vice versa.
+ tokenizer.events[exit_index - 1].link.as_mut().unwrap().next = Some(enter_index);
+ tokenizer.events[enter_index]
+ .link
+ .as_mut()
+ .unwrap()
+ .previous = Some(exit_index - 1);
+
+ // Potential next start.
+ exit_index = enter_index + 1;
+ }
+
+ // Move to `Exit:Content`.
+ index = exit_index;
+ }
+
+ index += 1;
+ }
+
+ tokenizer.map.consume(&mut tokenizer.events);
+
+ let result = subtokenize(
+ &mut tokenizer.events,
+ tokenizer.parse_state,
+ &Some(Content::Content),
+ )?;
+
+ Ok(Some(result))
+}