From ef644f4def7d5cad3fb5307ec5e00fc7b0b025ff Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 13 Jun 2022 18:42:36 +0200 Subject: Add basic html (text) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Add all states for html (text) * Fix to link paragraph tokens together * Add note about uncovered bug where linking paragraph tokens together doesn’t work 😅 --- src/subtokenize.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/subtokenize.rs') diff --git a/src/subtokenize.rs b/src/subtokenize.rs index d72eb69..ee826b8 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -36,10 +36,10 @@ pub fn subtokenize(events: Vec, codes: &[Code]) -> (Vec, bool) { let mut result: StateFnResult = ( State::Fn(Box::new(if event.token_type == TokenType::ChunkContent { content - } else if event.token_type == TokenType::ChunkText { - text - } else { + } else if event.token_type == TokenType::ChunkString { string + } else { + text })), None, ); @@ -49,6 +49,7 @@ pub fn subtokenize(events: Vec, codes: &[Code]) -> (Vec, bool) { // Loop through chunks to pass them in order to the subtokenizer. while let Some(index_ptr) = index_opt { let enter = &events[index_ptr]; + assert_eq!(enter.event_type, EventType::Enter); let span = Span { start_index: enter.index, end_index: events[index_ptr + 1].index, @@ -119,6 +120,11 @@ pub fn subtokenize(events: Vec, codes: &[Code]) -> (Vec, bool) { // from each slice and slices from events? let mut index = events.len() - 1; + // To do: this is broken, because it can inject linked events, which point + // to their links through indices, and this messes with all indices. + // We should try walking front to end instead, keep a count of the shifted + // index. + // It’s a bit complex but should work? while index > 0 { let slice_opt = link_to_info.get(&index); -- cgit