From a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 24 Jun 2022 17:57:10 +0200 Subject: Add link, images (resource) This is still some messy code that needs cleaning up, but it adds support for links and images, of the resource kind (`[a](b)`). References (`[a][b]`) are parsed and will soon be supported, but need matching. * Fix bug to pad percent-encoded bytes when normalizing urls * Fix bug with escapes counting as balancing in destination * Add `space_or_tab_one_line_ending`, to parse whitespace including up to one line ending (but not a blank line) * Add `ParserState` to share codes, definitions, etc --- src/subtokenize.rs | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) (limited to 'src/subtokenize.rs') diff --git a/src/subtokenize.rs b/src/subtokenize.rs index 4ee2242..58db3c6 100644 --- a/src/subtokenize.rs +++ b/src/subtokenize.rs @@ -28,9 +28,8 @@ use std::collections::HashMap; use crate::content::{string::start as string, text::start as text}; -use crate::tokenizer::{ - Code, Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer, -}; +use crate::parser::ParseState; +use crate::tokenizer::{Event, EventType, State, StateFn, StateFnResult, TokenType, Tokenizer}; use crate::util::span; /// Create a link between two [`Event`][]s. @@ -39,25 +38,36 @@ use crate::util::span; /// This optimizes for the common case where the token at `index` is connected /// to the previous void token. pub fn link(events: &mut [Event], index: usize) { - let prev = &mut events[index - 2]; + link_to(events, index - 2, index); +} + +/// To do +pub fn link_to(events: &mut [Event], pevious: usize, next: usize) { + let prev = &mut events[pevious]; + // To do: force chunks? + // assert!( + // prev.token_type == TokenType::ChunkString || prev.token_type == TokenType::ChunkText, + // "{:?}", + // prev.token_type.to_owned() + // ); assert_eq!(prev.event_type, EventType::Enter); - prev.next = Some(index); + prev.next = Some(next); - let prev_ref = &events[index - 2]; - let prev_exit_ref = &events[index - 1]; + let prev_ref = &events[pevious]; + let prev_exit_ref = &events[pevious + 1]; assert_eq!(prev_exit_ref.event_type, EventType::Exit); assert_eq!(prev_exit_ref.token_type, prev_ref.token_type); - let curr = &mut events[index]; + let curr = &mut events[next]; assert_eq!(curr.event_type, EventType::Enter); - curr.previous = Some(index - 2); + curr.previous = Some(pevious); // Note: the exit of this event may not exist, so don’t check for that. } /// Parse linked events. /// /// Supposed to be called repeatedly, returns `1: true` when done. -pub fn subtokenize(mut events: Vec, codes: &[Code]) -> (Vec, bool) { +pub fn subtokenize(mut events: Vec, parse_state: &ParseState) -> (Vec, bool) { let mut index = 0; // Map of first chunks to their tokenizer. let mut head_to_tokenizer: HashMap = HashMap::new(); @@ -83,7 +93,7 @@ pub fn subtokenize(mut events: Vec, codes: &[Code]) -> (Vec, bool) // Index into `events` pointing to a chunk. let mut index_opt: Option = Some(index); // Subtokenizer. - let mut tokenizer = Tokenizer::new(event.point.clone(), event.index); + let mut tokenizer = Tokenizer::new(event.point.clone(), event.index, parse_state); // Substate. let mut result: StateFnResult = ( State::Fn(Box::new(if event.token_type == TokenType::ChunkString { @@ -115,7 +125,11 @@ pub fn subtokenize(mut events: Vec, codes: &[Code]) -> (Vec, bool) _ => unreachable!("cannot be ok/nok"), }; - result = tokenizer.feed(span::codes(codes, &span), func, enter.next == None); + result = tokenizer.push( + span::codes(&parse_state.codes, &span), + func, + enter.next == None, + ); assert!(result.1.is_none(), "expected no remainder"); index_opt = enter.next; } -- cgit