From a3dd207e3b1ebcbcb6cec0f703a695e51ae4ece0 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Fri, 24 Jun 2022 17:57:10 +0200 Subject: Add link, images (resource) This is still some messy code that needs cleaning up, but it adds support for links and images, of the resource kind (`[a](b)`). References (`[a][b]`) are parsed and will soon be supported, but need matching. * Fix bug to pad percent-encoded bytes when normalizing urls * Fix bug with escapes counting as balancing in destination * Add `space_or_tab_one_line_ending`, to parse whitespace including up to one line ending (but not a blank line) * Add `ParserState` to share codes, definitions, etc --- src/content/flow.rs | 20 ++++++++++++++------ src/content/text.rs | 17 ++++++++++++----- 2 files changed, 26 insertions(+), 11 deletions(-) (limited to 'src/content') diff --git a/src/content/flow.rs b/src/content/flow.rs index e71d25a..546712f 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -26,6 +26,7 @@ use crate::construct::{ html_flow::start as html_flow, paragraph::start as paragraph, thematic_break::start as thematic_break, }; +use crate::parser::ParseState; use crate::subtokenize::subtokenize; use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer}; use crate::util::{ @@ -34,9 +35,10 @@ use crate::util::{ }; /// Turn `codes` as the flow content type into events. -pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec { - let mut tokenizer = Tokenizer::new(point, index); - tokenizer.feed(codes, Box::new(start), true); +pub fn flow(parse_state: &ParseState, point: Point, index: usize) -> Vec { + let mut tokenizer = Tokenizer::new(point, index, parse_state); + + tokenizer.push(&parse_state.codes, Box::new(start), true); let mut index = 0; @@ -47,9 +49,14 @@ pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec { && event.token_type == TokenType::DefinitionLabelString { let id = normalize_identifier( - serialize(codes, &from_exit_event(&tokenizer.events, index), false).as_str(), + serialize( + &parse_state.codes, + &from_exit_event(&tokenizer.events, index), + false, + ) + .as_str(), ); - println!("to do: use identifier {:?}", id); + println!("to do: use definition identifier {:?}", id); } index += 1; @@ -58,8 +65,9 @@ pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec { let mut result = (tokenizer.events, false); while !result.1 { - result = subtokenize(result.0, codes); + result = subtokenize(result.0, parse_state); } + result.0 } diff --git a/src/content/text.rs b/src/content/text.rs index 1224064..5718617 100644 --- a/src/content/text.rs +++ b/src/content/text.rs @@ -21,15 +21,19 @@ use crate::construct::{ character_reference::start as character_reference, code_text::start as code_text, hard_break_escape::start as hard_break_escape, hard_break_trailing::start as hard_break_trailing, html_text::start as html_text, - partial_data::start as data, + label_end::start as label_end, label_start_image::start as label_start_image, + label_start_link::start as label_start_link, partial_data::start as data, }; use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; -const MARKERS: [Code; 5] = [ +const MARKERS: [Code; 8] = [ Code::Char(' '), // `hard_break_trailing` + Code::Char('!'), // `label_start_image` Code::Char('&'), // `character_reference` Code::Char('<'), // `autolink`, `html_text` + Code::Char('['), // `label_start_link` Code::Char('\\'), // `character_escape`, `hard_break_escape` + Code::Char(']'), // `label_end` Code::Char('`'), // `code_text` ]; @@ -47,13 +51,16 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { Code::None => (State::Ok, None), _ => tokenizer.attempt_n( vec![ - Box::new(character_reference), + Box::new(autolink), Box::new(character_escape), + Box::new(character_reference), + Box::new(code_text), Box::new(hard_break_escape), Box::new(hard_break_trailing), - Box::new(autolink), Box::new(html_text), - Box::new(code_text), + Box::new(label_end), + Box::new(label_start_image), + Box::new(label_start_link), ], |ok| Box::new(if ok { start } else { before_data }), )(tokenizer, code), -- cgit