aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-10-11 09:54:56 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-10-11 09:55:16 +0200
commita4b56e7b971fa81c56a59b465f90c8016f01320d (patch)
tree7002a44087e57c8158a51dd30b6eb89eb260af2b /src
parent1fd94f512834aa7bd70f22a60229ce01edfc754e (diff)
downloadmarkdown-rs-a4b56e7b971fa81c56a59b465f90c8016f01320d.tar.gz
markdown-rs-a4b56e7b971fa81c56a59b465f90c8016f01320d.tar.bz2
markdown-rs-a4b56e7b971fa81c56a59b465f90c8016f01320d.zip
Add support for proper positional info in swc tree
* Fix some positional info in SWC error messages * Add positional info in `to_document` on duplicate layouts * Add support for `path` on `Program` (`to_swc`, `to_document`, `jsx_rewrite`), for the path of a file on disk * Add support for `development` to `jsx-rewrite`, which when defined will embed info on where tags were written into the runtime code when they are not passed * Refactor to move some utilities to `micromark_swc_utils.rs`, `swc_utils.rs`
Diffstat (limited to '')
-rw-r--r--src/construct/mdx_esm.rs19
-rw-r--r--src/construct/partial_mdx_expression.rs24
-rw-r--r--src/lib.rs11
-rw-r--r--src/mdast.rs17
-rw-r--r--src/parser.rs16
-rw-r--r--src/to_mdast.rs153
-rw-r--r--src/util/location.rs111
-rw-r--r--src/util/mdx_collect.rs81
-rw-r--r--src/util/mod.rs1
9 files changed, 298 insertions, 135 deletions
diff --git a/src/construct/mdx_esm.rs b/src/construct/mdx_esm.rs
index 53f8beb..4fb6b50 100644
--- a/src/construct/mdx_esm.rs
+++ b/src/construct/mdx_esm.rs
@@ -31,10 +31,7 @@
use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-use crate::util::{
- mdx_collect::{collect, place_to_point},
- slice::Slice,
-};
+use crate::util::{mdx_collect::collect, slice::Slice};
use crate::MdxSignal;
use alloc::format;
@@ -197,16 +194,24 @@ fn parse_esm(tokenizer: &mut Tokenizer) -> State {
// Collect the body of the ESM and positional info for each run of it.
let result = collect(
- tokenizer,
+ &tokenizer.events,
+ tokenizer.parse_state.bytes,
tokenizer.tokenize_state.start,
&[Name::MdxEsmData, Name::LineEnding],
+ &[],
);
// Parse and handle what was signaled back.
match parse(&result.value) {
MdxSignal::Ok => State::Ok,
- MdxSignal::Error(message, place) => {
- let point = place_to_point(&result, place);
+ MdxSignal::Error(message, relative) => {
+ let point = tokenizer
+ .parse_state
+ .location
+ .as_ref()
+ .expect("expected location index if aware mdx is on")
+ .relative_to_point(&result.stops, relative)
+ .expect("expected non-empty string");
State::Error(format!("{}:{}: {}", point.line, point.column, message))
}
MdxSignal::Eof(message) => {
diff --git a/src/construct/partial_mdx_expression.rs b/src/construct/partial_mdx_expression.rs
index 789443e..fbb13e0 100644
--- a/src/construct/partial_mdx_expression.rs
+++ b/src/construct/partial_mdx_expression.rs
@@ -60,10 +60,7 @@ use crate::construct::partial_space_or_tab::space_or_tab_min_max;
use crate::event::Name;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
-use crate::util::{
- constant::TAB_SIZE,
- mdx_collect::{collect, place_to_point},
-};
+use crate::util::{constant::TAB_SIZE, mdx_collect::collect};
use crate::{MdxExpressionKind, MdxExpressionParse, MdxSignal};
use alloc::{format, string::ToString};
@@ -205,9 +202,11 @@ pub fn eol_after(tokenizer: &mut Tokenizer) -> State {
fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> State {
// Collect the body of the expression and positional info for each run of it.
let result = collect(
- tokenizer,
+ &tokenizer.events,
+ tokenizer.parse_state.bytes,
tokenizer.tokenize_state.start,
&[Name::MdxExpressionData, Name::LineEnding],
+ &[],
);
// Turn the name of the expression into a kind.
@@ -221,9 +220,18 @@ fn parse_expression(tokenizer: &mut Tokenizer, parse: &MdxExpressionParse) -> St
// Parse and handle what was signaled back.
match parse(&result.value, &kind) {
MdxSignal::Ok => State::Ok,
- MdxSignal::Error(message, place) => {
- let point = place_to_point(&result, place);
- State::Error(format!("{}:{}: {}", point.line, point.column, message))
+ MdxSignal::Error(message, relative) => {
+ let point = tokenizer
+ .parse_state
+ .location
+ .as_ref()
+ .expect("expected location index if aware mdx is on")
+ .relative_to_point(&result.stops, relative)
+ .map_or((tokenizer.point.line, tokenizer.point.column), |d| {
+ (d.line, d.column)
+ });
+
+ State::Error(format!("{}:{}: {}", point.0, point.1, message))
}
MdxSignal::Eof(message) => {
tokenizer.tokenize_state.mdx_last_parse_error = Some(message);
diff --git a/src/lib.rs b/src/lib.rs
index 02fb5f5..fd0580a 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -41,6 +41,9 @@ use util::{
sanitize_uri::sanitize,
};
+#[doc(hidden)]
+pub use util::location::Location;
+
/// Type of line endings in markdown.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub enum LineEnding {
@@ -1252,8 +1255,8 @@ pub fn micromark(value: &str) -> String {
/// # }
/// ```
pub fn micromark_with_options(value: &str, options: &Options) -> Result<String, String> {
- let (events, bytes) = parse(value, &options.parse)?;
- Ok(to_html(&events, bytes, &options.compile))
+ let (events, parse_state) = parse(value, &options.parse)?;
+ Ok(to_html(&events, parse_state.bytes, &options.compile))
}
/// Turn markdown into a syntax tree.
@@ -1279,8 +1282,8 @@ pub fn micromark_with_options(value: &str, options: &Options) -> Result<String,
/// # }
/// ```
pub fn micromark_to_mdast(value: &str, options: &ParseOptions) -> Result<Node, String> {
- let (events, bytes) = parse(value, options)?;
- let node = to_mdast(&events, bytes)?;
+ let (events, parse_state) = parse(value, options)?;
+ let node = to_mdast(&events, parse_state.bytes)?;
Ok(node)
}
diff --git a/src/mdast.rs b/src/mdast.rs
index 8b5b74d..de53532 100644
--- a/src/mdast.rs
+++ b/src/mdast.rs
@@ -9,6 +9,10 @@ use alloc::{
vec::Vec,
};
+/// Relative byte index into a string, to an absolute byte index into the
+/// whole document.
+pub type Stop = (usize, usize);
+
/// Explicitness of a reference.
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub enum ReferenceKind {
@@ -429,7 +433,7 @@ pub enum AttributeContent {
/// > | <a {...b} />
/// ^^^^^^
/// ```
- Expression(String),
+ Expression(String, Vec<Stop>),
/// JSX property.
///
/// ```markdown
@@ -448,7 +452,7 @@ pub enum AttributeValue {
/// > | <a b={c} />
/// ^^^
/// ```
- Expression(String),
+ Expression(String, Vec<Stop>),
/// Static value.
///
/// ```markdown
@@ -1040,6 +1044,9 @@ pub struct MdxjsEsm {
pub value: String,
/// Positional info.
pub position: Option<Position>,
+
+ // Custom data on where each slice of `value` came from.
+ pub stops: Vec<Stop>,
}
/// MDX: expression (flow).
@@ -1055,6 +1062,9 @@ pub struct MdxFlowExpression {
pub value: String,
/// Positional info.
pub position: Option<Position>,
+
+ // Custom data on where each slice of `value` came from.
+ pub stops: Vec<Stop>,
}
/// MDX: expression (text).
@@ -1070,6 +1080,9 @@ pub struct MdxTextExpression {
pub value: String,
/// Positional info.
pub position: Option<Position>,
+
+ // Custom data on where each slice of `value` came from.
+ pub stops: Vec<Stop>,
}
/// MDX: JSX element (container).
diff --git a/src/parser.rs b/src/parser.rs
index b694bc5..a7962d0 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -4,6 +4,7 @@ use crate::event::{Event, Point};
use crate::state::{Name as StateName, State};
use crate::subtokenize::subtokenize;
use crate::tokenizer::Tokenizer;
+use crate::util::location::Location;
use crate::ParseOptions;
use alloc::{string::String, vec, vec::Vec};
@@ -14,6 +15,8 @@ use alloc::{string::String, vec, vec::Vec};
#[derive(Debug)]
pub struct ParseState<'a> {
/// Configuration.
+ pub location: Option<Location>,
+ /// Configuration.
pub options: &'a ParseOptions,
/// List of chars.
pub bytes: &'a [u8],
@@ -29,10 +32,17 @@ pub struct ParseState<'a> {
pub fn parse<'a>(
value: &'a str,
options: &'a ParseOptions,
-) -> Result<(Vec<Event>, &'a [u8]), String> {
+) -> Result<(Vec<Event>, ParseState<'a>), String> {
+ let bytes = value.as_bytes();
+
let mut parse_state = ParseState {
options,
- bytes: value.as_bytes(),
+ bytes,
+ location: if options.mdx_esm_parse.is_some() || options.mdx_expression_parse.is_some() {
+ Some(Location::new(bytes))
+ } else {
+ None
+ },
definitions: vec![],
gfm_footnote_definitions: vec![],
};
@@ -72,5 +82,5 @@ pub fn parse<'a>(
}
}
- Ok((events, parse_state.bytes))
+ Ok((events, parse_state))
}
diff --git a/src/to_mdast.rs b/src/to_mdast.rs
index 4db76e6..f2b3c30 100644
--- a/src/to_mdast.rs
+++ b/src/to_mdast.rs
@@ -1,6 +1,6 @@
//! Turn events into a syntax tree.
-use crate::event::{Event, Kind, Name};
+use crate::event::{Event, Kind, Name, Point as EventPoint};
use crate::mdast::{
AttributeContent, AttributeValue, BlockQuote, Break, Code, Definition, Delete, Emphasis,
FootnoteDefinition, FootnoteReference, Heading, Html, Image, ImageReference, InlineCode,
@@ -14,6 +14,7 @@ use crate::util::{
decode as decode_character_reference, parse as parse_character_reference,
},
infer::{gfm_table_align, list_item_loose, list_loose},
+ mdx_collect::collect,
normalize_identifier::normalize_identifier,
slice::{Position as SlicePosition, Slice},
};
@@ -255,8 +256,6 @@ fn enter(context: &mut CompileContext) -> Result<(), String> {
| Name::HtmlTextData
| Name::MathFlowChunk
| Name::MathTextData
- | Name::MdxExpressionData
- | Name::MdxEsmData
| Name::MdxJsxTagAttributeValueLiteralValue => on_enter_data(context),
Name::CodeFencedFenceInfo
| Name::CodeFencedFenceMeta
@@ -267,7 +266,6 @@ fn enter(context: &mut CompileContext) -> Result<(), String> {
| Name::LabelText
| Name::MathFlowFenceMeta
| Name::MdxJsxTagAttributeValueLiteral
- | Name::MdxJsxTagAttributeValueExpression
| Name::ReferenceString
| Name::ResourceDestinationString
| Name::ResourceTitleString => on_enter_buffer(context),
@@ -306,6 +304,9 @@ fn enter(context: &mut CompileContext) -> Result<(), String> {
Name::MdxJsxTagClosingMarker => on_enter_mdx_jsx_tag_closing_marker(context)?,
Name::MdxJsxTagAttribute => on_enter_mdx_jsx_tag_attribute(context)?,
Name::MdxJsxTagAttributeExpression => on_enter_mdx_jsx_tag_attribute_expression(context)?,
+ Name::MdxJsxTagAttributeValueExpression => {
+ on_enter_mdx_jsx_tag_attribute_value_expression(context);
+ }
Name::MdxJsxTagSelfClosingMarker => on_enter_mdx_jsx_tag_self_closing_marker(context)?,
Name::Paragraph => on_enter_paragraph(context),
Name::Reference => on_enter_reference(context),
@@ -347,11 +348,12 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
| Name::HtmlTextData
| Name::MathFlowChunk
| Name::MathTextData
- | Name::MdxExpressionData
- | Name::MdxEsmData
| Name::MdxJsxTagAttributeValueLiteralValue => {
on_exit_data(context)?;
}
+ Name::MdxJsxTagAttributeExpression | Name::MdxJsxTagAttributeValueExpression => {
+ on_exit_drop(context);
+ }
Name::AutolinkProtocol => on_exit_autolink_protocol(context)?,
Name::AutolinkEmail => on_exit_autolink_email(context)?,
Name::CharacterReferenceMarker => on_exit_character_reference_marker(context),
@@ -391,28 +393,23 @@ fn exit(context: &mut CompileContext) -> Result<(), String> {
Name::HeadingSetext => on_exit_heading_setext(context)?,
Name::HeadingSetextUnderlineSequence => on_exit_heading_setext_underline_sequence(context),
Name::HeadingSetextText => on_exit_heading_setext_text(context),
- Name::HtmlFlow
- | Name::HtmlText
- | Name::MdxEsm
- | Name::MdxFlowExpression
- | Name::MdxTextExpression => on_exit_literal(context)?,
+ Name::HtmlFlow | Name::HtmlText => on_exit_html(context)?,
Name::LabelText => on_exit_label_text(context),
Name::LineEnding => on_exit_line_ending(context)?,
Name::ListItemValue => on_exit_list_item_value(context),
+ Name::MdxEsm | Name::MdxFlowExpression | Name::MdxTextExpression => {
+ on_exit_mdx_esm_or_expression(context)?;
+ }
Name::MdxJsxFlowTag | Name::MdxJsxTextTag => on_exit_mdx_jsx_tag(context)?,
Name::MdxJsxTagClosingMarker => on_exit_mdx_jsx_tag_closing_marker(context),
Name::MdxJsxTagNamePrimary => on_exit_mdx_jsx_tag_name_primary(context),
Name::MdxJsxTagNameMember => on_exit_mdx_jsx_tag_name_member(context),
Name::MdxJsxTagNameLocal => on_exit_mdx_jsx_tag_name_local(context),
- Name::MdxJsxTagAttributeExpression => on_exit_mdx_jsx_tag_attribute_expression(context),
Name::MdxJsxTagAttributePrimaryName => on_exit_mdx_jsx_tag_attribute_primary_name(context),
Name::MdxJsxTagAttributeNameLocal => on_exit_mdx_jsx_tag_attribute_name_local(context),
Name::MdxJsxTagAttributeValueLiteral => {
on_exit_mdx_jsx_tag_attribute_value_literal(context);
}
- Name::MdxJsxTagAttributeValueExpression => {
- on_exit_mdx_jsx_tag_attribute_value_expression(context);
- }
Name::MdxJsxTagSelfClosingMarker => on_exit_mdx_jsx_tag_self_closing_marker(context),
Name::ReferenceString => on_exit_reference_string(context),
@@ -499,27 +496,51 @@ fn on_enter_math_text(context: &mut CompileContext) {
/// Handle [`Enter`][Kind::Enter]:[`MdxEsm`][Name::MdxEsm].
fn on_enter_mdx_esm(context: &mut CompileContext) {
+ let result = collect(
+ context.events,
+ context.bytes,
+ context.index,
+ &[Name::MdxEsmData, Name::LineEnding],
+ &[Name::MdxEsm],
+ );
context.tail_push(Node::MdxjsEsm(MdxjsEsm {
- value: String::new(),
+ value: result.value,
position: None,
+ stops: result.stops,
}));
context.buffer();
}
/// Handle [`Enter`][Kind::Enter]:[`MdxFlowExpression`][Name::MdxFlowExpression].
fn on_enter_mdx_flow_expression(context: &mut CompileContext) {
+ let result = collect(
+ context.events,
+ context.bytes,
+ context.index,
+ &[Name::MdxExpressionData, Name::LineEnding],
+ &[Name::MdxFlowExpression],
+ );
context.tail_push(Node::MdxFlowExpression(MdxFlowExpression {
- value: String::new(),
+ value: result.value,
position: None,
+ stops: result.stops,
}));
context.buffer();
}
/// Handle [`Enter`][Kind::Enter]:[`MdxTextExpression`][Name::MdxTextExpression].
fn on_enter_mdx_text_expression(context: &mut CompileContext) {
+ let result = collect(
+ context.events,
+ context.bytes,
+ context.index,
+ &[Name::MdxExpressionData, Name::LineEnding],
+ &[Name::MdxTextExpression],
+ );
context.tail_push(Node::MdxTextExpression(MdxTextExpression {
- value: String::new(),
+ value: result.value,
position: None,
+ stops: result.stops,
}));
context.buffer();
}
@@ -801,18 +822,50 @@ fn on_enter_mdx_jsx_tag_attribute(context: &mut CompileContext) -> Result<(), St
fn on_enter_mdx_jsx_tag_attribute_expression(context: &mut CompileContext) -> Result<(), String> {
on_enter_mdx_jsx_tag_any_attribute(context)?;
+ let result = collect(
+ context.events,
+ context.bytes,
+ context.index,
+ &[Name::MdxExpressionData, Name::LineEnding],
+ &[Name::MdxJsxTagAttributeExpression],
+ );
context
.jsx_tag
.as_mut()
.expect("expected tag")
.attributes
- .push(AttributeContent::Expression(String::new()));
+ .push(AttributeContent::Expression(result.value, result.stops));
context.buffer();
Ok(())
}
+/// Handle [`Enter`][Kind::Enter]:[`MdxJsxTagAttributeValueExpression`][Name::MdxJsxTagAttributeValueExpression].
+fn on_enter_mdx_jsx_tag_attribute_value_expression(context: &mut CompileContext) {
+ let result = collect(
+ context.events,
+ context.bytes,
+ context.index,
+ &[Name::MdxExpressionData, Name::LineEnding],
+ &[Name::MdxJsxTagAttributeValueExpression],
+ );
+
+ if let Some(AttributeContent::Property(node)) = context
+ .jsx_tag
+ .as_mut()
+ .expect("expected tag")
+ .attributes
+ .last_mut()
+ {
+ node.value = Some(AttributeValue::Expression(result.value, result.stops));
+ } else {
+ unreachable!("expected property")
+ }
+
+ context.buffer();
+}
+
/// Handle [`Enter`][Kind::Enter]:[`MdxJsxTagSelfClosingMarker`][Name::MdxJsxTagSelfClosingMarker].
fn on_enter_mdx_jsx_tag_self_closing_marker(context: &mut CompileContext) -> Result<(), String> {
let tag = context.jsx_tag.as_ref().expect("expected tag");
@@ -1086,6 +1139,11 @@ fn on_exit_definition_title_string(context: &mut CompileContext) {
}
}
+/// Handle [`Exit`][Kind::Exit]:*, by dropping the current buffer.
+fn on_exit_drop(context: &mut CompileContext) {
+ context.resume();
+}
+
/// Handle [`Exit`][Kind::Exit]:[`Frontmatter`][Name::Frontmatter].
fn on_exit_frontmatter(context: &mut CompileContext) -> Result<(), String> {
let value = trim_eol(context.resume().to_string(), true, true);
@@ -1280,20 +1338,16 @@ fn on_exit_line_ending(context: &mut CompileContext) -> Result<(), String> {
Ok(())
}
-/// Handle [`Exit`][Kind::Exit]:{[`HtmlFlow`][Name::HtmlFlow],[`MdxFlowExpression`][Name::MdxFlowExpression],etc}.
-fn on_exit_literal(context: &mut CompileContext) -> Result<(), String> {
+/// Handle [`Exit`][Kind::Exit]:{[`HtmlFlow`][Name::HtmlFlow],[`HtmlText`][Name::HtmlText]}.
+fn on_exit_html(context: &mut CompileContext) -> Result<(), String> {
let value = context.resume().to_string();
match context.tail_mut() {
Node::Html(node) => node.value = value,
- Node::MdxFlowExpression(node) => node.value = value,
- Node::MdxTextExpression(node) => node.value = value,
- Node::MdxjsEsm(node) => node.value = value,
- _ => unreachable!("expected html, mdx expression, etc on stack for value"),
+ _ => unreachable!("expected html on stack for value"),
}
on_exit(context)?;
-
Ok(())
}
@@ -1483,26 +1537,13 @@ fn on_exit_mdx_jsx_tag_name_local(context: &mut CompileContext) {
name.push_str(slice.as_str());
}
-/// Handle [`Exit`][Kind::Exit]:[`MdxJsxTagAttributeExpression`][Name::MdxJsxTagAttributeExpression].
-fn on_exit_mdx_jsx_tag_attribute_expression(context: &mut CompileContext) {
- let value = context.resume();
-
- if let Some(AttributeContent::Expression(expression)) = context
- .jsx_tag
- .as_mut()
- .expect("expected tag")
- .attributes
- .last_mut()
- {
- expression.push_str(value.to_string().as_str());
- } else {
- unreachable!("expected expression")
- }
+/// Handle [`Exit`][Kind::Exit]:{[`MdxEsm`][Name::MdxEsm],[`MdxFlowExpression`][Name::MdxFlowExpression],[`MdxTextExpression`][Name::MdxTextExpression]}.
+fn on_exit_mdx_esm_or_expression(context: &mut CompileContext) -> Result<(), String> {
+ on_exit_drop(context);
+ context.tail_pop()?;
+ Ok(())
}
-// Name:: => (context),
-// Name:: => (context),
-
/// Handle [`Exit`][Kind::Exit]:[`MdxJsxTagAttributePrimaryName`][Name::MdxJsxTagAttributePrimaryName].
fn on_exit_mdx_jsx_tag_attribute_primary_name(context: &mut CompileContext) {
let slice = Slice::from_position(
@@ -1563,23 +1604,6 @@ fn on_exit_mdx_jsx_tag_attribute_value_literal(context: &mut CompileContext) {
}
}
-/// Handle [`Exit`][Kind::Exit]:[`MdxJsxTagAttributeValueExpression`][Name::MdxJsxTagAttributeValueExpression].
-fn on_exit_mdx_jsx_tag_attribute_value_expression(context: &mut CompileContext) {
- let value = context.resume();
-
- if let Some(AttributeContent::Property(node)) = context
- .jsx_tag
- .as_mut()
- .expect("expected tag")
- .attributes
- .last_mut()
- {
- node.value = Some(AttributeValue::Expression(value.to_string()));
- } else {
- unreachable!("expected property")
- }
-}
-
/// Handle [`Exit`][Kind::Exit]:[`MdxJsxTagSelfClosingMarker`][Name::MdxJsxTagSelfClosingMarker].
fn on_exit_mdx_jsx_tag_self_closing_marker(context: &mut CompileContext) {
context.jsx_tag.as_mut().expect("expected tag").self_closing = true;
@@ -1625,8 +1649,13 @@ fn on_exit_resource_title_string(context: &mut CompileContext) {
}
// Create a point from an event.
+fn point_from_event_point(point: &EventPoint) -> Point {
+ Point::new(point.line, point.column, point.index)
+}
+
+// Create a point from an event.
fn point_from_event(event: &Event) -> Point {
- Point::new(event.point.line, event.point.column, event.point.index)
+ point_from_event_point(&event.point)
}
// Create a position from an event.
diff --git a/src/util/location.rs b/src/util/location.rs
new file mode 100644
index 0000000..0c9c426
--- /dev/null
+++ b/src/util/location.rs
@@ -0,0 +1,111 @@
+//! Deal with positions in a file.
+//!
+//! * Convert between byte indices and unist points.
+//! * Convert between byte indices into a string which is built up of several
+//! slices in a whole document, and byte indices into that whole document.
+
+use crate::unist::Point;
+use alloc::{vec, vec::Vec};
+
+/// Each stop represents a new slice, which contains the byte index into the
+/// corresponding string where the slice starts (`0`), and the byte index into
+/// the whole document where that slice starts (`1`).
+pub type Stop = (usize, usize);
+
+#[derive(Debug)]
+pub struct Location {
+ /// List, where each index is a line number (0-based), and each value is
+ /// the byte index *after* where the line ends.
+ indices: Vec<usize>,
+}
+
+impl Location {
+ /// Get an index for the given `bytes`.
+ ///
+ /// Port of <https://github.com/vfile/vfile-location/blob/main/index.js>
+ #[must_use]
+ pub fn new(bytes: &[u8]) -> Self {
+ let mut index = 0;
+ let mut location_index = Self { indices: vec![] };
+
+ while index < bytes.len() {
+ if bytes[index] == b'\r' {
+ if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
+ location_index.indices.push(index + 2);
+ } else {
+ location_index.indices.push(index + 1);
+ }
+ } else if bytes[index] == b'\n' {
+ location_index.indices.push(index + 1);
+ }
+
+ index += 1;
+ }
+
+ location_index.indices.push(index + 1);
+ location_index
+ }
+
+ /// Get the line and column-based `point` for `offset` in the bound indices.
+ ///
+ /// Returns `None` when given out of bounds input.
+ ///
+ /// Port of <https://github.com/vfile/vfile-location/blob/main/index.js>
+ #[must_use]
+ pub fn to_point(&self, offset: usize) -> Option<Point> {
+ let mut index = 0;
+
+ if let Some(end) = self.indices.last() {
+ if offset < *end {
+ while index < self.indices.len() {
+ if self.indices[index] > offset {
+ break;
+ }
+
+ index += 1;
+ }
+
+ let previous = if index > 0 {
+ self.indices[index - 1]
+ } else {
+ 0
+ };
+ return Some(Point {
+ line: index + 1,
+ column: offset + 1 - previous,
+ offset,
+ });
+ }
+ }
+
+ None
+ }
+
+ /// Like `to_point`, but takes a relative offset from a certain string
+ /// instead of an absolute offset into the whole document.
+ ///
+ /// The relative offset is made absolute based on `stops`, which represent
+ /// where that certain string is in the whole document.
+ #[must_use]
+ pub fn relative_to_point(&self, stops: &[Stop], relative: usize) -> Option<Point> {
+ Location::relative_to_absolute(stops, relative).and_then(|absolute| self.to_point(absolute))
+ }
+
+ /// Turn a relative offset into an absolute offset.
+ #[must_use]
+ pub fn relative_to_absolute(stops: &[Stop], relative: usize) -> Option<usize> {
+ let mut index = 0;
+
+ while index < stops.len() && stops[index].0 <= relative {
+ index += 1;
+ }
+
+ // There are no points: that only occurs if there was an empty string.
+ if index == 0 {
+ None
+ } else {
+ let (stop_relative, stop_absolute) = &stops[index - 1];
+ Some(stop_absolute + (relative - stop_relative))
+ }
+ }
+}
diff --git a/src/util/mdx_collect.rs b/src/util/mdx_collect.rs
index 73ead51..02921a4 100644
--- a/src/util/mdx_collect.rs
+++ b/src/util/mdx_collect.rs
@@ -1,70 +1,53 @@
//! Collect info for MDX.
-use crate::event::{Kind, Name, Point};
-use crate::tokenizer::Tokenizer;
+use crate::event::{Event, Kind, Name};
use crate::util::slice::{Position, Slice};
use alloc::{string::String, vec, vec::Vec};
-pub type Location<'a> = (usize, &'a Point);
+pub type Stop = (usize, usize);
-pub struct Result<'a> {
- pub start: &'a Point,
+#[derive(Debug)]
+pub struct Result {
pub value: String,
- pub locations: Vec<Location<'a>>,
+ pub stops: Vec<Stop>,
}
-pub fn collect<'a>(tokenizer: &'a Tokenizer, from: usize, names: &[Name]) -> Result<'a> {
+pub fn collect(
+ events: &[Event],
+ bytes: &[u8],
+ from: usize,
+ names: &[Name],
+ stop: &[Name],
+) -> Result {
let mut result = Result {
- start: &tokenizer.events[from].point,
value: String::new(),
- locations: vec![],
+ stops: vec![],
};
let mut index = from;
- let mut acc = 0;
- while index < tokenizer.events.len() {
- if tokenizer.events[index].kind == Kind::Enter
- && names.contains(&tokenizer.events[index].name)
- {
- // Include virtual spaces.
- let value = Slice::from_position(
- tokenizer.parse_state.bytes,
- &Position {
- start: &tokenizer.events[index].point,
- end: &tokenizer.events[index + 1].point,
- },
- )
- .serialize();
- acc += value.len();
- result.locations.push((acc, &tokenizer.events[index].point));
- result.value.push_str(&value);
- }
-
- index += 1;
- }
-
- result
-}
-
-// Turn an index of `result.value` into a point in the whole document.
-pub fn place_to_point(result: &Result, place: usize) -> Point {
- let mut index = 0;
- let mut point = result.start;
- let mut rest = place;
-
- while index < result.locations.len() {
- point = result.locations[index].1;
-
- if result.locations[index].0 > place {
+ while index < events.len() {
+ if events[index].kind == Kind::Enter {
+ if names.contains(&events[index].name) {
+ // Include virtual spaces, and assume void.
+ let value = Slice::from_position(
+ bytes,
+ &Position {
+ start: &events[index].point,
+ end: &events[index + 1].point,
+ },
+ )
+ .serialize();
+ result
+ .stops
+ .push((result.value.len(), events[index].point.index));
+ result.value.push_str(&value);
+ }
+ } else if stop.contains(&events[index].name) {
break;
}
- rest = place - result.locations[index].0;
index += 1;
}
- let mut point = point.clone();
- point.column += rest;
- point.index += rest;
- point
+ result
}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index ac93be0..f44e183 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -8,6 +8,7 @@ pub mod encode;
pub mod gfm_tagfilter;
pub mod identifier;
pub mod infer;
+pub mod location;
pub mod mdx_collect;
pub mod normalize_identifier;
pub mod sanitize_uri;