From 128058ea948909c21a9cfd0b58cbd3a13e238e57 Mon Sep 17 00:00:00 2001 From: Héctor Ramón Jiménez Date: Fri, 31 Jan 2025 17:35:38 +0100 Subject: Draft incremental `markdown` parsing Specially useful when dealing with long Markdown streams, like LLMs. --- widget/src/markdown.rs | 99 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 17 deletions(-) (limited to 'widget/src/markdown.rs') diff --git a/widget/src/markdown.rs b/widget/src/markdown.rs index 2b7bc0fc..0365dee8 100644 --- a/widget/src/markdown.rs +++ b/widget/src/markdown.rs @@ -47,6 +47,7 @@ //! } //! } //! ``` +#![allow(missing_docs)] use crate::core::border; use crate::core::font::{self, Font}; use crate::core::padding; @@ -57,12 +58,47 @@ use crate::core::{ use crate::{column, container, rich_text, row, scrollable, span, text}; use std::cell::{Cell, RefCell}; +use std::ops::Range; use std::sync::Arc; pub use core::text::Highlight; pub use pulldown_cmark::HeadingLevel; pub use url::Url; +#[derive(Debug, Clone)] +pub struct Content { + items: Vec, + state: State, +} + +impl Content { + pub fn parse(markdown: &str) -> Self { + let mut state = State::default(); + let items = parse_with(&mut state, markdown).collect(); + + Self { items, state } + } + + pub fn push_str(&mut self, markdown: &str) { + // Append to last leftover text + let mut leftover = std::mem::take(&mut self.state.leftover); + leftover.push_str(markdown); + + // Pop the last item + let _ = self.items.pop(); + + // Re-parse last item and new text + let new_items = parse_with(&mut self.state, &leftover); + self.items.extend(new_items); + + dbg!(&self.state); + } + + pub fn items(&self) -> &[Item] { + &self.items + } +} + /// A Markdown item. #[derive(Debug, Clone)] pub enum Item { @@ -232,6 +268,24 @@ impl Span { /// } /// ``` pub fn parse(markdown: &str) -> impl Iterator + '_ { + parse_with(State::default(), markdown) +} + +#[derive(Debug, Clone, Default)] +pub struct State { + leftover: String, +} + +impl AsMut for State { + fn as_mut(&mut self) -> &mut Self { + self + } +} + +fn parse_with<'a>( + mut state: impl AsMut + 'a, + markdown: &'a str, +) -> impl Iterator + 'a { struct List { start: Option, items: Vec>, @@ -255,27 +309,31 @@ pub fn parse(markdown: &str) -> impl Iterator + '_ { | pulldown_cmark::Options::ENABLE_PLUSES_DELIMITED_METADATA_BLOCKS | pulldown_cmark::Options::ENABLE_TABLES | pulldown_cmark::Options::ENABLE_STRIKETHROUGH, - ); - - let produce = |lists: &mut Vec, item| { - if lists.is_empty() { - Some(item) - } else { - lists - .last_mut() - .expect("list context") - .items - .last_mut() - .expect("item context") - .push(item); + ) + .into_offset_iter(); - None - } - }; + let mut produce = + move |lists: &mut Vec, item, source: Range| { + if lists.is_empty() { + state.as_mut().leftover = markdown[source.start..].to_owned(); + + Some(item) + } else { + lists + .last_mut() + .expect("list context") + .items + .last_mut() + .expect("item context") + .push(item); + + None + } + }; // We want to keep the `spans` capacity #[allow(clippy::drain_collect)] - parser.filter_map(move |event| match event { + parser.filter_map(move |(event, source)| match event { pulldown_cmark::Event::Start(tag) => match tag { pulldown_cmark::Tag::Strong if !metadata && !table => { strong = true; @@ -311,6 +369,7 @@ pub fn parse(markdown: &str) -> impl Iterator + '_ { produce( &mut lists, Item::Paragraph(Text::new(spans.drain(..).collect())), + source, ) }; @@ -350,6 +409,7 @@ pub fn parse(markdown: &str) -> impl Iterator + '_ { produce( &mut lists, Item::Paragraph(Text::new(spans.drain(..).collect())), + source, ) }; @@ -370,6 +430,7 @@ pub fn parse(markdown: &str) -> impl Iterator + '_ { produce( &mut lists, Item::Heading(level, Text::new(spans.drain(..).collect())), + source, ) } pulldown_cmark::TagEnd::Strong if !metadata && !table => { @@ -392,6 +453,7 @@ pub fn parse(markdown: &str) -> impl Iterator + '_ { produce( &mut lists, Item::Paragraph(Text::new(spans.drain(..).collect())), + source, ) } pulldown_cmark::TagEnd::Item if !metadata && !table => { @@ -401,6 +463,7 @@ pub fn parse(markdown: &str) -> impl Iterator + '_ { produce( &mut lists, Item::Paragraph(Text::new(spans.drain(..).collect())), + source, ) } } @@ -413,6 +476,7 @@ pub fn parse(markdown: &str) -> impl Iterator + '_ { start: list.start, items: list.items, }, + source, ) } pulldown_cmark::TagEnd::CodeBlock if !metadata && !table => { @@ -424,6 +488,7 @@ pub fn parse(markdown: &str) -> impl Iterator + '_ { produce( &mut lists, Item::CodeBlock(Text::new(spans.drain(..).collect())), + source, ) } pulldown_cmark::TagEnd::MetadataBlock(_) => { -- cgit