From a4b56e7b971fa81c56a59b465f90c8016f01320d Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Tue, 11 Oct 2022 09:54:56 +0200 Subject: Add support for proper positional info in swc tree * Fix some positional info in SWC error messages * Add positional info in `to_document` on duplicate layouts * Add support for `path` on `Program` (`to_swc`, `to_document`, `jsx_rewrite`), for the path of a file on disk * Add support for `development` to `jsx-rewrite`, which when defined will embed info on where tags were written into the runtime code when they are not passed * Refactor to move some utilities to `micromark_swc_utils.rs`, `swc_utils.rs` --- src/construct/mdx_esm.rs | 19 ++-- src/construct/partial_mdx_expression.rs | 24 +++-- src/lib.rs | 11 ++- src/mdast.rs | 17 +++- src/parser.rs | 16 +++- src/to_mdast.rs | 153 +++++++++++++++++++------------- src/util/location.rs | 111 +++++++++++++++++++++++ src/util/mdx_collect.rs | 81 +++++++---------- src/util/mod.rs | 1 + 9 files changed, 298 insertions(+), 135 deletions(-) create mode 100644 src/util/location.rs (limited to 'src') diff --git a/src/construct/mdx_esm.rs b/src/construct/mdx_esm.rs index 53f8beb..4fb6b50 100644 --- a/src/construct/mdx_esm.rs +++ b/src/construct/mdx_esm.rs @@ -31,10 +31,7 @@ use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -use crate::util::{ - mdx_collect::{collect, place_to_point}, - slice::Slice, -}; +use crate::util::{mdx_collect::collect, slice::Slice}; use crate::MdxSignal; use alloc::format; @@ -197,16 +194,24 @@ fn parse_esm(tokenizer: &mut Tokenizer) -> State { // Collect the body of the ESM and positional info for each run of it. let result = collect( - tokenizer, + &tokenizer.events, + tokenizer.parse_state.bytes, tokenizer.tokenize_state.start, &[Name::MdxEsmData, Name::LineEnding], + &[], ); // Parse and handle what was signaled back. match parse(&result.value) { MdxSignal::Ok => State::Ok, - MdxSignal::Error(message, place) => { - let point = place_to_point(&result, place); + MdxSignal::Error(message, relative) => { + let point = tokenizer + .parse_state + .location + .as_ref() + .expect("expected location index if aware mdx is on") + .relative_to_point(&result.stops, relative) + .expect("expected non-empty string"); State::Error(format!("{}:{}: {}", point.line, point.column, message)) } MdxSignal::Eof(message) => { diff --git a/src/construct/partial_mdx_expression.rs b/src/construct/partial_mdx_expression.rs index 789443e..fbb13e0 100644 --- a/src/construct/partial_mdx_expression.rs +++ b/src/construct/partial_mdx_expression.rs @@ -60,10 +60,7 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max; use crate::event::Name; use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; -use crate::util::{ - constant::TAB_SIZE, - mdx_collect::{collect, place_to_point}, -}; +use crate::util::{constant::TAB_SIZE, mdx_collect::collect}; use crate::{MdxExpressionKind, MdxExpressionParse, MdxSignal}; use alloc::{format, string::ToString}; @@ -205,9 +202,11 @@ pub fn eol_after(tokenizer: &mut Tokenizer) -> State { fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> State { // Collect the body of the expression and positional info for each run of it. let result = collect( - tokenizer, + &tokenizer.events, + tokenizer.parse_state.bytes, tokenizer.tokenize_state.start, &[Name::MdxExpressionData, Name::LineEnding], + &[], ); // Turn the name of the expression into a kind. @@ -221,9 +220,18 @@ fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> St // Parse and handle what was signaled back. match parse(&result.value, &kind) { MdxSignal::Ok => State::Ok, - MdxSignal::Error(message, place) => { - let point = place_to_point(&result, place); - State::Error(format!("{}:{}: {}", point.line, point.column, message)) + MdxSignal::Error(message, relative) => { + let point = tokenizer + .parse_state + .location + .as_ref() + .expect("expected location index if aware mdx is on") + .relative_to_point(&result.stops, relative) + .map_or((tokenizer.point.line, tokenizer.point.column), |d| { + (d.line, d.column) + }); + + State::Error(format!("{}:{}: {}", point.0, point.1, message)) } MdxSignal::Eof(message) => { tokenizer.tokenize_state.mdx_last_parse_error = Some(message); diff --git a/src/lib.rs b/src/lib.rs index 02fb5f5..fd0580a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -41,6 +41,9 @@ use util::{ sanitize_uri::sanitize, }; +#[doc(hidden)] +pub use util::location::Location; + /// Type of line endings in markdown. #[derive(Clone, Debug, Default, Eq, PartialEq)] pub enum LineEnding { @@ -1252,8 +1255,8 @@ pub fn micromark(value: &str) -> String { /// # } /// ``` pub fn micromark_with_options(value: &str, options: &Options) -> Result { - let (events, bytes) = parse(value, &options.parse)?; - Ok(to_html(&events, bytes, &options.compile)) + let (events, parse_state) = parse(value, &options.parse)?; + Ok(to_html(&events, parse_state.bytes, &options.compile)) } /// Turn markdown into a syntax tree. @@ -1279,8 +1282,8 @@ pub fn micromark_with_options(value: &str, options: &Options) -> Result Result { - let (events, bytes) = parse(value, options)?; - let node = to_mdast(&events, bytes)?; + let (events, parse_state) = parse(value, options)?; + let node = to_mdast(&events, parse_state.bytes)?; Ok(node) } diff --git a/src/mdast.rs b/src/mdast.rs index 8b5b74d..de53532 100644 --- a/src/mdast.rs +++ b/src/mdast.rs @@ -9,6 +9,10 @@ use alloc::{ vec::Vec, }; +/// Relative byte index into a string, to an absolute byte index into the +/// whole document. +pub type Stop = (usize, usize); + /// Explicitness of a reference. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum ReferenceKind { @@ -429,7 +433,7 @@ pub enum AttributeContent { /// > | /// ^^^^^^ /// ``` - Expression(String), + Expression(String, Vec), /// JSX property. /// /// ```markdown @@ -448,7 +452,7 @@ pub enum AttributeValue { /// > | /// ^^^ /// ``` - Expression(String), + Expression(String, Vec), /// Static value. /// /// ```markdown @@ -1040,6 +1044,9 @@ pub struct MdxjsEsm { pub value: String, /// Positional info. pub position: Option, + + // Custom data on where each slice of `value` came from. + pub stops: Vec, } /// MDX: expression (flow). @@ -1055,6 +1062,9 @@ pub struct MdxFlowExpression { pub value: String, /// Positional info. pub position: Option, + + // Custom data on where each slice of `value` came from. + pub stops: Vec, } /// MDX: expression (text). @@ -1070,6 +1080,9 @@ pub struct MdxTextExpression { pub value: String, /// Positional info. pub position: Option, + + // Custom data on where each slice of `value` came from. + pub stops: Vec, } /// MDX: JSX element (container). diff --git a/src/parser.rs b/src/parser.rs index b694bc5..a7962d0 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,6 +4,7 @@ use crate::event::{Event, Point}; use crate::state::{Name as StateName, State}; use crate::subtokenize::subtokenize; use crate::tokenizer::Tokenizer; +use crate::util::location::Location; use crate::ParseOptions; use alloc::{string::String, vec, vec::Vec}; @@ -13,6 +14,8 @@ use alloc::{string::String, vec, vec::Vec}; /// It also references the input value as bytes (`u8`). #[derive(Debug)] pub struct ParseState<'a> { + /// Configuration. + pub location: Option, /// Configuration. pub options: &'a ParseOptions, /// List of chars. @@ -29,10 +32,17 @@ pub struct ParseState<'a> { pub fn parse<'a>( value: &'a str, options: &'a ParseOptions, -) -> Result<(Vec, &'a [u8]), String> { +) -> Result<(Vec, ParseState<'a>), String> { + let bytes = value.as_bytes(); + let mut parse_state = ParseState { options, - bytes: value.as_bytes(), + bytes, + location: if options.mdx_esm_parse.is_some() || options.mdx_expression_parse.is_some() { + Some(Location::new(bytes)) + } else { + None + }, definitions: vec![], gfm_footnote_definitions: vec![], }; @@ -72,5 +82,5 @@ pub fn parse<'a>( } } - Ok((events, parse_state.bytes)) + Ok((events, parse_state)) } diff --git a/src/to_mdast.rs b/src/to_mdast.rs index 4db76e6..f2b3c30 100644 --- a/src/to_mdast.rs +++ b/src/to_mdast.rs @@ -1,6 +1,6 @@ //! Turn events into a syntax tree. -use crate::event::{Event, Kind, Name}; +use crate::event::{Event, Kind, Name, Point as EventPoint}; use crate::mdast::{ AttributeContent, AttributeValue, BlockQuote, Break, Code, Definition, Delete, Emphasis, FootnoteDefinition, FootnoteReference, Heading, Html, Image, ImageReference, InlineCode, @@ -14,6 +14,7 @@ use crate::util::{ decode as decode_character_reference, parse as parse_character_reference, }, infer::{gfm_table_align, list_item_loose, list_loose}, + mdx_collect::collect, normalize_identifier::normalize_identifier, slice::{Position as SlicePosition, Slice}, }; @@ -255,8 +256,6 @@ fn enter(context: &mut CompileContext) -> Result<(), String> { | Name::HtmlTextData | Name::MathFlowChunk | Name::MathTextData - | Name::MdxExpressionData - | Name::MdxEsmData | Name::MdxJsxTagAttributeValueLiteralValue => on_enter_data(context), Name::CodeFencedFenceInfo | Name::CodeFencedFenceMeta @@ -267,7 +266,6 @@ fn enter(context: &mut CompileContext) -> Result<(), String> { | Name::LabelText | Name::MathFlowFenceMeta | Name::MdxJsxTagAttributeValueLiteral - | Name::MdxJsxTagAttributeValueExpression | Name::ReferenceString | Name::ResourceDestinationString | Name::ResourceTitleString => on_enter_buffer(context), @@ -306,6 +304,9 @@ fn enter(context: &mut CompileContext) -> Result<(), String> { Name::MdxJsxTagClosingMarker => on_enter_mdx_jsx_tag_closing_marker(context)?, Name::MdxJsxTagAttribute => on_enter_mdx_jsx_tag_attribute(context)?, Name::MdxJsxTagAttributeExpression => on_enter_mdx_jsx_tag_attribute_expression(context)?, + Name::MdxJsxTagAttributeValueExpression => { + on_enter_mdx_jsx_tag_attribute_value_expression(context); + } Name::MdxJsxTagSelfClosingMarker => on_enter_mdx_jsx_tag_self_closing_marker(context)?, Name::Paragraph => on_enter_paragraph(context), Name::Reference => on_enter_reference(context), @@ -347,11 +348,12 @@ fn exit(context: &mut CompileContext) -> Result<(), String> { | Name::HtmlTextData | Name::MathFlowChunk | Name::MathTextData - | Name::MdxExpressionData - | Name::MdxEsmData | Name::MdxJsxTagAttributeValueLiteralValue => { on_exit_data(context)?; } + Name::MdxJsxTagAttributeExpression | Name::MdxJsxTagAttributeValueExpression => { + on_exit_drop(context); + } Name::AutolinkProtocol => on_exit_autolink_protocol(context)?, Name::AutolinkEmail => on_exit_autolink_email(context)?, Name::CharacterReferenceMarker => on_exit_character_reference_marker(context), @@ -391,28 +393,23 @@ fn exit(context: &mut CompileContext) -> Result<(), String> { Name::HeadingSetext => on_exit_heading_setext(context)?, Name::HeadingSetextUnderlineSequence => on_exit_heading_setext_underline_sequence(context), Name::HeadingSetextText => on_exit_heading_setext_text(context), - Name::HtmlFlow - | Name::HtmlText - | Name::MdxEsm - | Name::MdxFlowExpression - | Name::MdxTextExpression => on_exit_literal(context)?, + Name::HtmlFlow | Name::HtmlText => on_exit_html(context)?, Name::LabelText => on_exit_label_text(context), Name::LineEnding => on_exit_line_ending(context)?, Name::ListItemValue => on_exit_list_item_value(context), + Name::MdxEsm | Name::MdxFlowExpression | Name::MdxTextExpression => { + on_exit_mdx_esm_or_expression(context)?; + } Name::MdxJsxFlowTag | Name::MdxJsxTextTag => on_exit_mdx_jsx_tag(context)?, Name::MdxJsxTagClosingMarker => on_exit_mdx_jsx_tag_closing_marker(context), Name::MdxJsxTagNamePrimary => on_exit_mdx_jsx_tag_name_primary(context), Name::MdxJsxTagNameMember => on_exit_mdx_jsx_tag_name_member(context), Name::MdxJsxTagNameLocal => on_exit_mdx_jsx_tag_name_local(context), - Name::MdxJsxTagAttributeExpression => on_exit_mdx_jsx_tag_attribute_expression(context), Name::MdxJsxTagAttributePrimaryName => on_exit_mdx_jsx_tag_attribute_primary_name(context), Name::MdxJsxTagAttributeNameLocal => on_exit_mdx_jsx_tag_attribute_name_local(context), Name::MdxJsxTagAttributeValueLiteral => { on_exit_mdx_jsx_tag_attribute_value_literal(context); } - Name::MdxJsxTagAttributeValueExpression => { - on_exit_mdx_jsx_tag_attribute_value_expression(context); - } Name::MdxJsxTagSelfClosingMarker => on_exit_mdx_jsx_tag_self_closing_marker(context), Name::ReferenceString => on_exit_reference_string(context), @@ -499,27 +496,51 @@ fn on_enter_math_text(context: &mut CompileContext) { /// Handle [`Enter`][Kind::Enter]:[`MdxEsm`][Name::MdxEsm]. fn on_enter_mdx_esm(context: &mut CompileContext) { + let result = collect( + context.events, + context.bytes, + context.index, + &[Name::MdxEsmData, Name::LineEnding], + &[Name::MdxEsm], + ); context.tail_push(Node::MdxjsEsm(MdxjsEsm { - value: String::new(), + value: result.value, position: None, + stops: result.stops, })); context.buffer(); } /// Handle [`Enter`][Kind::Enter]:[`MdxFlowExpression`][Name::MdxFlowExpression]. fn on_enter_mdx_flow_expression(context: &mut CompileContext) { + let result = collect( + context.events, + context.bytes, + context.index, + &[Name::MdxExpressionData, Name::LineEnding], + &[Name::MdxFlowExpression], + ); context.tail_push(Node::MdxFlowExpression(MdxFlowExpression { - value: String::new(), + value: result.value, position: None, + stops: result.stops, })); context.buffer(); } /// Handle [`Enter`][Kind::Enter]:[`MdxTextExpression`][Name::MdxTextExpression]. fn on_enter_mdx_text_expression(context: &mut CompileContext) { + let result = collect( + context.events, + context.bytes, + context.index, + &[Name::MdxExpressionData, Name::LineEnding], + &[Name::MdxTextExpression], + ); context.tail_push(Node::MdxTextExpression(MdxTextExpression { - value: String::new(), + value: result.value, position: None, + stops: result.stops, })); context.buffer(); } @@ -801,18 +822,50 @@ fn on_enter_mdx_jsx_tag_attribute(context: &mut CompileContext) -> Result<(), St fn on_enter_mdx_jsx_tag_attribute_expression(context: &mut CompileContext) -> Result<(), String> { on_enter_mdx_jsx_tag_any_attribute(context)?; + let result = collect( + context.events, + context.bytes, + context.index, + &[Name::MdxExpressionData, Name::LineEnding], + &[Name::MdxJsxTagAttributeExpression], + ); context .jsx_tag .as_mut() .expect("expected tag") .attributes - .push(AttributeContent::Expression(String::new())); + .push(AttributeContent::Expression(result.value, result.stops)); context.buffer(); Ok(()) } +/// Handle [`Enter`][Kind::Enter]:[`MdxJsxTagAttributeValueExpression`][Name::MdxJsxTagAttributeValueExpression]. +fn on_enter_mdx_jsx_tag_attribute_value_expression(context: &mut CompileContext) { + let result = collect( + context.events, + context.bytes, + context.index, + &[Name::MdxExpressionData, Name::LineEnding], + &[Name::MdxJsxTagAttributeValueExpression], + ); + + if let Some(AttributeContent::Property(node)) = context + .jsx_tag + .as_mut() + .expect("expected tag") + .attributes + .last_mut() + { + node.value = Some(AttributeValue::Expression(result.value, result.stops)); + } else { + unreachable!("expected property") + } + + context.buffer(); +} + /// Handle [`Enter`][Kind::Enter]:[`MdxJsxTagSelfClosingMarker`][Name::MdxJsxTagSelfClosingMarker]. fn on_enter_mdx_jsx_tag_self_closing_marker(context: &mut CompileContext) -> Result<(), String> { let tag = context.jsx_tag.as_ref().expect("expected tag"); @@ -1086,6 +1139,11 @@ fn on_exit_definition_title_string(context: &mut CompileContext) { } } +/// Handle [`Exit`][Kind::Exit]:*, by dropping the current buffer. +fn on_exit_drop(context: &mut CompileContext) { + context.resume(); +} + /// Handle [`Exit`][Kind::Exit]:[`Frontmatter`][Name::Frontmatter]. fn on_exit_frontmatter(context: &mut CompileContext) -> Result<(), String> { let value = trim_eol(context.resume().to_string(), true, true); @@ -1280,20 +1338,16 @@ fn on_exit_line_ending(context: &mut CompileContext) -> Result<(), String> { Ok(()) } -/// Handle [`Exit`][Kind::Exit]:{[`HtmlFlow`][Name::HtmlFlow],[`MdxFlowExpression`][Name::MdxFlowExpression],etc}. -fn on_exit_literal(context: &mut CompileContext) -> Result<(), String> { +/// Handle [`Exit`][Kind::Exit]:{[`HtmlFlow`][Name::HtmlFlow],[`HtmlText`][Name::HtmlText]}. +fn on_exit_html(context: &mut CompileContext) -> Result<(), String> { let value = context.resume().to_string(); match context.tail_mut() { Node::Html(node) => node.value = value, - Node::MdxFlowExpression(node) => node.value = value, - Node::MdxTextExpression(node) => node.value = value, - Node::MdxjsEsm(node) => node.value = value, - _ => unreachable!("expected html, mdx expression, etc on stack for value"), + _ => unreachable!("expected html on stack for value"), } on_exit(context)?; - Ok(()) } @@ -1483,26 +1537,13 @@ fn on_exit_mdx_jsx_tag_name_local(context: &mut CompileContext) { name.push_str(slice.as_str()); } -/// Handle [`Exit`][Kind::Exit]:[`MdxJsxTagAttributeExpression`][Name::MdxJsxTagAttributeExpression]. -fn on_exit_mdx_jsx_tag_attribute_expression(context: &mut CompileContext) { - let value = context.resume(); - - if let Some(AttributeContent::Expression(expression)) = context - .jsx_tag - .as_mut() - .expect("expected tag") - .attributes - .last_mut() - { - expression.push_str(value.to_string().as_str()); - } else { - unreachable!("expected expression") - } +/// Handle [`Exit`][Kind::Exit]:{[`MdxEsm`][Name::MdxEsm],[`MdxFlowExpression`][Name::MdxFlowExpression],[`MdxTextExpression`][Name::MdxTextExpression]}. +fn on_exit_mdx_esm_or_expression(context: &mut CompileContext) -> Result<(), String> { + on_exit_drop(context); + context.tail_pop()?; + Ok(()) } -// Name:: => (context), -// Name:: => (context), - /// Handle [`Exit`][Kind::Exit]:[`MdxJsxTagAttributePrimaryName`][Name::MdxJsxTagAttributePrimaryName]. fn on_exit_mdx_jsx_tag_attribute_primary_name(context: &mut CompileContext) { let slice = Slice::from_position( @@ -1563,23 +1604,6 @@ fn on_exit_mdx_jsx_tag_attribute_value_literal(context: &mut CompileContext) { } } -/// Handle [`Exit`][Kind::Exit]:[`MdxJsxTagAttributeValueExpression`][Name::MdxJsxTagAttributeValueExpression]. -fn on_exit_mdx_jsx_tag_attribute_value_expression(context: &mut CompileContext) { - let value = context.resume(); - - if let Some(AttributeContent::Property(node)) = context - .jsx_tag - .as_mut() - .expect("expected tag") - .attributes - .last_mut() - { - node.value = Some(AttributeValue::Expression(value.to_string())); - } else { - unreachable!("expected property") - } -} - /// Handle [`Exit`][Kind::Exit]:[`MdxJsxTagSelfClosingMarker`][Name::MdxJsxTagSelfClosingMarker]. fn on_exit_mdx_jsx_tag_self_closing_marker(context: &mut CompileContext) { context.jsx_tag.as_mut().expect("expected tag").self_closing = true; @@ -1624,9 +1648,14 @@ fn on_exit_resource_title_string(context: &mut CompileContext) { } } +// Create a point from an event. +fn point_from_event_point(point: &EventPoint) -> Point { + Point::new(point.line, point.column, point.index) +} + // Create a point from an event. fn point_from_event(event: &Event) -> Point { - Point::new(event.point.line, event.point.column, event.point.index) + point_from_event_point(&event.point) } // Create a position from an event. diff --git a/src/util/location.rs b/src/util/location.rs new file mode 100644 index 0000000..0c9c426 --- /dev/null +++ b/src/util/location.rs @@ -0,0 +1,111 @@ +//! Deal with positions in a file. +//! +//! * Convert between byte indices and unist points. +//! * Convert between byte indices into a string which is built up of several +//! slices in a whole document, and byte indices into that whole document. + +use crate::unist::Point; +use alloc::{vec, vec::Vec}; + +/// Each stop represents a new slice, which contains the byte index into the +/// corresponding string where the slice starts (`0`), and the byte index into +/// the whole document where that slice starts (`1`). +pub type Stop = (usize, usize); + +#[derive(Debug)] +pub struct Location { + /// List, where each index is a line number (0-based), and each value is + /// the byte index *after* where the line ends. + indices: Vec, +} + +impl Location { + /// Get an index for the given `bytes`. + /// + /// Port of + #[must_use] + pub fn new(bytes: &[u8]) -> Self { + let mut index = 0; + let mut location_index = Self { indices: vec![] }; + + while index < bytes.len() { + if bytes[index] == b'\r' { + if index + 1 < bytes.len() && bytes[index + 1] == b'\n' { + location_index.indices.push(index + 2); + } else { + location_index.indices.push(index + 1); + } + } else if bytes[index] == b'\n' { + location_index.indices.push(index + 1); + } + + index += 1; + } + + location_index.indices.push(index + 1); + location_index + } + + /// Get the line and column-based `point` for `offset` in the bound indices. + /// + /// Returns `None` when given out of bounds input. + /// + /// Port of + #[must_use] + pub fn to_point(&self, offset: usize) -> Option { + let mut index = 0; + + if let Some(end) = self.indices.last() { + if offset < *end { + while index < self.indices.len() { + if self.indices[index] > offset { + break; + } + + index += 1; + } + + let previous = if index > 0 { + self.indices[index - 1] + } else { + 0 + }; + return Some(Point { + line: index + 1, + column: offset + 1 - previous, + offset, + }); + } + } + + None + } + + /// Like `to_point`, but takes a relative offset from a certain string + /// instead of an absolute offset into the whole document. + /// + /// The relative offset is made absolute based on `stops`, which represent + /// where that certain string is in the whole document. + #[must_use] + pub fn relative_to_point(&self, stops: &[Stop], relative: usize) -> Option { + Location::relative_to_absolute(stops, relative).and_then(|absolute| self.to_point(absolute)) + } + + /// Turn a relative offset into an absolute offset. + #[must_use] + pub fn relative_to_absolute(stops: &[Stop], relative: usize) -> Option { + let mut index = 0; + + while index < stops.len() && stops[index].0 <= relative { + index += 1; + } + + // There are no points: that only occurs if there was an empty string. + if index == 0 { + None + } else { + let (stop_relative, stop_absolute) = &stops[index - 1]; + Some(stop_absolute + (relative - stop_relative)) + } + } +} diff --git a/src/util/mdx_collect.rs b/src/util/mdx_collect.rs index 73ead51..02921a4 100644 --- a/src/util/mdx_collect.rs +++ b/src/util/mdx_collect.rs @@ -1,70 +1,53 @@ //! Collect info for MDX. -use crate::event::{Kind, Name, Point}; -use crate::tokenizer::Tokenizer; +use crate::event::{Event, Kind, Name}; use crate::util::slice::{Position, Slice}; use alloc::{string::String, vec, vec::Vec}; -pub type Location<'a> = (usize, &'a Point); +pub type Stop = (usize, usize); -pub struct Result<'a> { - pub start: &'a Point, +#[derive(Debug)] +pub struct Result { pub value: String, - pub locations: Vec>, + pub stops: Vec, } -pub fn collect<'a>(tokenizer: &'a Tokenizer, from: usize, names: &[Name]) -> Result<'a> { +pub fn collect( + events: &[Event], + bytes: &[u8], + from: usize, + names: &[Name], + stop: &[Name], +) -> Result { let mut result = Result { - start: &tokenizer.events[from].point, value: String::new(), - locations: vec![], + stops: vec![], }; let mut index = from; - let mut acc = 0; - while index < tokenizer.events.len() { - if tokenizer.events[index].kind == Kind::Enter - && names.contains(&tokenizer.events[index].name) - { - // Include virtual spaces. - let value = Slice::from_position( - tokenizer.parse_state.bytes, - &Position { - start: &tokenizer.events[index].point, - end: &tokenizer.events[index + 1].point, - }, - ) - .serialize(); - acc += value.len(); - result.locations.push((acc, &tokenizer.events[index].point)); - result.value.push_str(&value); - } - - index += 1; - } - - result -} - -// Turn an index of `result.value` into a point in the whole document. -pub fn place_to_point(result: &Result, place: usize) -> Point { - let mut index = 0; - let mut point = result.start; - let mut rest = place; - - while index < result.locations.len() { - point = result.locations[index].1; - - if result.locations[index].0 > place { + while index < events.len() { + if events[index].kind == Kind::Enter { + if names.contains(&events[index].name) { + // Include virtual spaces, and assume void. + let value = Slice::from_position( + bytes, + &Position { + start: &events[index].point, + end: &events[index + 1].point, + }, + ) + .serialize(); + result + .stops + .push((result.value.len(), events[index].point.index)); + result.value.push_str(&value); + } + } else if stop.contains(&events[index].name) { break; } - rest = place - result.locations[index].0; index += 1; } - let mut point = point.clone(); - point.column += rest; - point.index += rest; - point + result } diff --git a/src/util/mod.rs b/src/util/mod.rs index ac93be0..f44e183 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -8,6 +8,7 @@ pub mod encode; pub mod gfm_tagfilter; pub mod identifier; pub mod infer; +pub mod location; pub mod mdx_collect; pub mod normalize_identifier; pub mod sanitize_uri; -- cgit