diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-09-14 16:21:42 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-09-14 16:26:24 +0200 |
commit | 74d2688aa329f0a41c2a92034c3454ed9299e71a (patch) | |
tree | 9ec8fdc6e40ff7cd40a14408afcc47716990134e /src/subtokenize.rs | |
parent | 65d4b46c2a3bdecb0493e484473d2de3d124f839 (diff) | |
download | markdown-rs-74d2688aa329f0a41c2a92034c3454ed9299e71a.tar.gz markdown-rs-74d2688aa329f0a41c2a92034c3454ed9299e71a.tar.bz2 markdown-rs-74d2688aa329f0a41c2a92034c3454ed9299e71a.zip |
Fix to prefer flow over definitions, setext headings
An undocumented part of CommonMark is how to deal with things in definition
labels or definition titles (which both can span multiple lines).
Can flow (or containers?) interrupt them?
They can according to the `cmark` reference parser, so this was implemented here.
This adds a new `Content` content type, which houses zero or more definitions,
and then zero-or-one paragraphs.
Content can be followed by a setext heading underline, which either turns
into a setext heading when the content ends in a paragraph, or turns into
the start of the following paragraph when it is followed by content that
starts with a paragraph, or turns into a stray paragraph.
Diffstat (limited to 'src/subtokenize.rs')
-rw-r--r-- | src/subtokenize.rs | 61 |
1 files changed, 45 insertions, 16 deletions
diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 12f91cf..5bb7e98 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -24,6 +24,13 @@ use crate::tokenizer::Tokenizer; use crate::util::{edit_map::EditMap, skip}; use alloc::{string::String, vec, vec::Vec}; +#[derive(Debug)] +pub struct Subresult { + pub done: bool, + pub gfm_footnote_definitions: Vec<String>, + pub definitions: Vec<String>, +} + /// Link two [`Event`][]s. /// /// Arbitrary (void) events can be linked together. @@ -69,10 +76,19 @@ pub fn link_to(events: &mut [Event], previous: usize, next: usize) { /// Parse linked events. /// /// Supposed to be called repeatedly, returns `true` when done. -pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result<bool, String> { +pub fn subtokenize( + events: &mut Vec<Event>, + parse_state: &ParseState, + filter: &Option<Content>, +) -> Result<Subresult, String> { let mut map = EditMap::new(); - let mut done = true; let mut index = 0; + let mut value = Subresult { + done: true, + gfm_footnote_definitions: vec![], + definitions: vec![], + }; + let mut acc = (0, 0); while index < events.len() { let event = &events[index]; @@ -82,16 +98,19 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result< debug_assert_eq!(event.kind, Kind::Enter); // No need to enter linked events again. - if link.previous == None { + if link.previous == None + && (filter.is_none() || &link.content == filter.as_ref().unwrap()) + { // Index into `events` pointing to a chunk. let mut link_index = Some(index); // Subtokenizer. let mut tokenizer = Tokenizer::new(event.point.clone(), parse_state); // Substate. - let mut state = State::Next(if link.content == Content::String { - StateName::StringStart - } else { - StateName::TextStart + let mut state = State::Next(match link.content { + Content::Flow => unreachable!("flow subcontent not implemented yet"), + Content::Content => StateName::ContentDefinitionBefore, + Content::String => StateName::StringStart, + Content::Text => StateName::TextStart, }); // Check if this is the first paragraph, after zero or more @@ -143,11 +162,14 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result< link_index = link_curr.next; } - tokenizer.flush(state, true)?; + let mut result = tokenizer.flush(state, true)?; + value + .gfm_footnote_definitions + .append(&mut result.gfm_footnote_definitions); + value.definitions.append(&mut result.definitions); + value.done = false; - divide_events(&mut map, events, index, &mut tokenizer.events); - - done = false; + acc = divide_events(&mut map, events, index, &mut tokenizer.events, acc); } } @@ -156,7 +178,7 @@ pub fn subtokenize(events: &mut Vec<Event>, parse_state: &ParseState) -> Result< map.consume(events); - Ok(done) + Ok(value) } /// Divide `child_events` over links in `events`, the first of which is at @@ -166,15 +188,17 @@ pub fn divide_events( events: &[Event], mut link_index: usize, child_events: &mut Vec<Event>, -) { + acc_before: (usize, usize), +) -> (usize, usize) { // Loop through `child_events` to figure out which parts belong where and // fix deep links. let mut child_index = 0; let mut slices = vec![]; let mut slice_start = 0; let mut old_prev: Option<usize> = None; + let len = child_events.len(); - while child_index < child_events.len() { + while child_index < len { let current = &child_events[child_index].point; let end = &events[link_index + 1].point; @@ -200,7 +224,8 @@ pub fn divide_events( } else { old_prev + link_index - (slices.len() - 1) * 2 }; - prev_event.link.as_mut().unwrap().next = Some(new_link); + prev_event.link.as_mut().unwrap().next = + Some(new_link + acc_before.1 - acc_before.0); } } @@ -219,7 +244,9 @@ pub fn divide_events( // The `index` in `events` where the current link is, // minus 2 events (the enter and exit) for each removed // link. - .map(|previous| previous + link_index - (slices.len() * 2)); + .map(|previous| { + previous + link_index - (slices.len() * 2) + acc_before.1 - acc_before.0 + }); } } @@ -245,4 +272,6 @@ pub fn divide_events( child_events.split_off(slices[index].1), ); } + + (acc_before.0 + (slices.len() * 2), acc_before.1 + len) } |