From e97ad954e1468b90722cf91996d7dfc069fedf78 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 1 Aug 2022 11:01:00 +0200 Subject: Refactor to pass more `&str`s, work on more bytes --- src/compiler.rs | 155 +++++++++++++++++++++++------------------------------- src/util/slice.rs | 5 ++ 2 files changed, 70 insertions(+), 90 deletions(-) diff --git a/src/compiler.rs b/src/compiler.rs index 12a0585..b86fd82 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -11,6 +11,7 @@ use crate::util::{ slice::{Position, Slice}, }; use crate::{LineEnding, Options}; +use std::str; /// Representation of a link or image, resource or reference. /// Reused for temporary definitions as well, in the first pass. @@ -23,7 +24,7 @@ struct Media { /// identifier, meaning that the original source characters are used /// instead of interpreting them. /// Not interpreted. - label_id: Option, + label_id: Option<(usize, usize)>, /// The text between the brackets (`x` in `![x]()` and `[x]()`), as /// interpreted content. /// When this is a link, it can contain further text content and thus HTML @@ -34,7 +35,7 @@ struct Media { /// The text between the explicit brackets of the reference (`y` in /// `[x][y]`), as content. /// Not interpreted. - reference_id: Option, + reference_id: Option<(usize, usize)>, /// The destination (url). /// Interpreted string content. destination: Option, @@ -138,6 +139,7 @@ impl<'a> CompileContext<'a> { self.buffers.pop().expect("Cannot resume w/o buffer") } + /// Push a str to the last buffer. pub fn push(&mut self, value: &str) { self.buffers .last_mut() @@ -145,17 +147,6 @@ impl<'a> CompileContext<'a> { .push_str(value); } - pub fn push_raw(&mut self, value: &str) { - self.push(&encode(value, self.encode_html)); - } - - /// Get the current buffer. - pub fn buf_tail(&self) -> &String { - self.buffers - .last() - .expect("at least one buffer should exist") - } - /// Add a line ending. pub fn line_ending(&mut self) { let eol = self.line_ending_default.as_str().to_string(); @@ -164,19 +155,14 @@ impl<'a> CompileContext<'a> { /// Add a line ending if needed (as in, there’s no eol/eof already). pub fn line_ending_if_needed(&mut self) { - // To do: fix to use bytes. - let last_char = self.buf_tail().chars().last(); - let mut add = true; - - if let Some(x) = last_char { - if x == '\n' || x == '\r' { - add = false; - } - } else { - add = false; - } + let tail = self + .buffers + .last() + .expect("at least one buffer should exist") + .as_bytes() + .last(); - if add { + if !matches!(tail, None | Some(b'\n' | b'\r')) { self.line_ending(); } } @@ -212,19 +198,6 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { options.default_line_ending.clone() }; - // Handle one event. - let handle = |context: &mut CompileContext, index: usize| { - let event = &events[index]; - - context.index = index; - - if event.event_type == EventType::Enter { - enter(context); - } else { - exit(context); - } - }; - let mut context = CompileContext::new(events, bytes, options, line_ending_default); let mut definition_indices = vec![]; let mut index = 0; @@ -287,6 +260,17 @@ pub fn compile(events: &[Event], bytes: &[u8], options: &Options) -> String { .to_string() } +// Handle the event at `index`. +fn handle(context: &mut CompileContext, index: usize) { + context.index = index; + + if context.events[index].event_type == EventType::Enter { + enter(context); + } else { + exit(context); + } +} + /// Handle [`Enter`][EventType::Enter]. fn enter(context: &mut CompileContext) { match context.events[context.index].token_type { @@ -607,7 +591,7 @@ fn on_exit_autolink_email(context: &mut CompileContext) { context.push("\">"); } - context.push_raw(value); + context.push(&encode(value, context.encode_html)); if !context.in_image_alt { context.push(""); @@ -628,7 +612,7 @@ fn on_exit_autolink_protocol(context: &mut CompileContext) { context.push("\">"); } - context.push_raw(value); + context.push(&encode(value, context.encode_html)); if !context.in_image_alt { context.push(""); @@ -691,20 +675,21 @@ fn on_exit_character_reference_value(context: &mut CompileContext) { _ => panic!("impossible"), }; - context.push_raw(&value); + context.push(&encode(&value, context.encode_html)); } /// Handle [`Exit`][EventType::Exit]:[`CodeFlowChunk`][Token::CodeFlowChunk]. fn on_exit_code_flow_chunk(context: &mut CompileContext) { context.code_flow_seen_data = Some(true); - context.push_raw( + context.push(&encode( &Slice::from_position( context.bytes, &Position::from_exit_event(context.events, context.index), ) // Must serialize to get virtual spaces. .serialize(), - ); + context.encode_html, + )); } /// Handle [`Exit`][EventType::Exit]:[`CodeFencedFence`][Token::CodeFencedFence]. @@ -775,26 +760,29 @@ fn on_exit_code_flow(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`CodeText`][Token::CodeText]. fn on_exit_code_text(context: &mut CompileContext) { let result = context.resume(); - // To do: use bytes. - let mut chars = result.chars(); + let mut bytes = result.as_bytes(); let mut trim = false; + let mut index = 0; + let mut end = bytes.len(); - if Some(' ') == chars.next() && Some(' ') == chars.next_back() { - let mut next = chars.next(); - while next != None && !trim { - if Some(' ') != next { + if end > 2 && bytes[index] == b' ' && bytes[end - 1] == b' ' { + index += 1; + end -= 1; + while index < end && !trim { + if bytes[index] != b' ' { trim = true; + break; } - next = chars.next(); + index += 1; } } + if trim { + bytes = &bytes[1..end]; + } + context.code_text_inside = false; - context.push(&if trim { - result[1..(result.len() - 1)].to_string() - } else { - result - }); + context.push(str::from_utf8(bytes).unwrap()); if !context.in_image_alt { context.push(""); @@ -810,20 +798,23 @@ fn on_exit_drop(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:{[`CodeTextData`][Token::CodeTextData],[`Data`][Token::Data],[`CharacterEscapeValue`][Token::CharacterEscapeValue]}. fn on_exit_data(context: &mut CompileContext) { - context.push_raw( + context.push(&encode( Slice::from_position( context.bytes, &Position::from_exit_event(context.events, context.index), ) .as_str(), - ); + context.encode_html, + )); } /// Handle [`Exit`][EventType::Exit]:[`Definition`][Token::Definition]. fn on_exit_definition(context: &mut CompileContext) { context.resume(); let media = context.media_stack.pop().unwrap(); - let id = normalize_identifier(&media.reference_id.unwrap()); + let indices = media.reference_id.unwrap(); + let id = + normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()); context.definitions.push(( id, @@ -845,14 +836,8 @@ fn on_exit_definition_destination_string(context: &mut CompileContext) { fn on_exit_definition_label_string(context: &mut CompileContext) { // Discard label, use the source content instead. context.resume(); - context.media_stack.last_mut().unwrap().reference_id = Some( - // To do: lifetimes, reference bytes? - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .serialize(), - ); + context.media_stack.last_mut().unwrap().reference_id = + Some(Position::from_exit_event(context.events, context.index).to_indices()); } /// Handle [`Exit`][EventType::Exit]:[`DefinitionTitleString`][Token::DefinitionTitleString]. @@ -940,13 +925,14 @@ fn on_exit_html(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:{[`HtmlFlowData`][Token::HtmlFlowData],[`HtmlTextData`][Token::HtmlTextData]}. fn on_exit_html_data(context: &mut CompileContext) { - context.push_raw( + context.push(&encode( Slice::from_position( context.bytes, &Position::from_exit_event(context.events, context.index), ) .as_str(), - ); + context.encode_html, + )); } /// Handle [`Exit`][EventType::Exit]:[`Label`][Token::Label]. @@ -957,14 +943,8 @@ fn on_exit_label(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`LabelText`][Token::LabelText]. fn on_exit_label_text(context: &mut CompileContext) { - context.media_stack.last_mut().unwrap().label_id = Some( - // To do: lifetimes, reference bytes? - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .serialize(), - ); + context.media_stack.last_mut().unwrap().label_id = + Some(Position::from_exit_event(context.events, context.index).to_indices()); } /// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding]. @@ -974,13 +954,14 @@ fn on_exit_line_ending(context: &mut CompileContext) { } else if context.slurp_one_line_ending { context.slurp_one_line_ending = false; } else { - context.push_raw( + context.push(&encode( Slice::from_position( context.bytes, &Position::from_exit_event(context.events, context.index), ) .as_str(), - ); + context.encode_html, + )); } } @@ -1062,10 +1043,9 @@ fn on_exit_media(context: &mut CompileContext) { let media = context.media_stack.pop().unwrap(); let label = media.label.unwrap(); let in_image_alt = context.in_image_alt; - let id = media - .reference_id - .or(media.label_id) - .map(|id| normalize_identifier(&id)); + let id = media.reference_id.or(media.label_id).map(|indices| { + normalize_identifier(Slice::from_indices(context.bytes, indices.0, indices.1).as_str()) + }); let definition_index = if media.destination.is_none() { id.and_then(|id| { @@ -1164,14 +1144,9 @@ fn on_exit_paragraph(context: &mut CompileContext) { fn on_exit_reference_string(context: &mut CompileContext) { // Drop stuff. context.resume(); - // To do: lifetimes, reference bytes. - context.media_stack.last_mut().unwrap().reference_id = Some( - Slice::from_position( - context.bytes, - &Position::from_exit_event(context.events, context.index), - ) - .serialize(), - ); + + context.media_stack.last_mut().unwrap().reference_id = + Some(Position::from_exit_event(context.events, context.index).to_indices()); } /// Handle [`Exit`][EventType::Exit]:[`ResourceDestinationString`][Token::ResourceDestinationString]. diff --git a/src/util/slice.rs b/src/util/slice.rs index d899dac..13b664d 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -42,6 +42,11 @@ impl<'a> Position<'a> { enter_index -= 1; } } + + /// To do. + pub fn to_indices(&self) -> (usize, usize) { + (self.start.index, self.end.index) + } } /// Chars belonging to a range. -- cgit