aboutsummaryrefslogtreecommitdiffstats
path: root/src/util
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-10-11 09:54:56 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-10-11 09:55:16 +0200
commita4b56e7b971fa81c56a59b465f90c8016f01320d (patch)
tree7002a44087e57c8158a51dd30b6eb89eb260af2b /src/util
parent1fd94f512834aa7bd70f22a60229ce01edfc754e (diff)
downloadmarkdown-rs-a4b56e7b971fa81c56a59b465f90c8016f01320d.tar.gz
markdown-rs-a4b56e7b971fa81c56a59b465f90c8016f01320d.tar.bz2
markdown-rs-a4b56e7b971fa81c56a59b465f90c8016f01320d.zip
Add support for proper positional info in swc tree
* Fix some positional info in SWC error messages * Add positional info in `to_document` on duplicate layouts * Add support for `path` on `Program` (`to_swc`, `to_document`, `jsx_rewrite`), for the path of a file on disk * Add support for `development` to `jsx-rewrite`, which when defined will embed info on where tags were written into the runtime code when they are not passed * Refactor to move some utilities to `micromark_swc_utils.rs`, `swc_utils.rs`
Diffstat (limited to '')
-rw-r--r--src/util/location.rs111
-rw-r--r--src/util/mdx_collect.rs81
-rw-r--r--src/util/mod.rs1
3 files changed, 144 insertions, 49 deletions
diff --git a/src/util/location.rs b/src/util/location.rs
new file mode 100644
index 0000000..0c9c426
--- /dev/null
+++ b/src/util/location.rs
@@ -0,0 +1,111 @@
+//! Deal with positions in a file.
+//!
+//! * Convert between byte indices and unist points.
+//! * Convert between byte indices into a string which is built up of several
+//! slices in a whole document, and byte indices into that whole document.
+
+use crate::unist::Point;
+use alloc::{vec, vec::Vec};
+
+/// Each stop represents a new slice, which contains the byte index into the
+/// corresponding string where the slice starts (`0`), and the byte index into
+/// the whole document where that slice starts (`1`).
+pub type Stop = (usize, usize);
+
+#[derive(Debug)]
+pub struct Location {
+ /// List, where each index is a line number (0-based), and each value is
+ /// the byte index *after* where the line ends.
+ indices: Vec<usize>,
+}
+
+impl Location {
+ /// Get an index for the given `bytes`.
+ ///
+ /// Port of <https://github.com/vfile/vfile-location/blob/main/index.js>
+ #[must_use]
+ pub fn new(bytes: &[u8]) -> Self {
+ let mut index = 0;
+ let mut location_index = Self { indices: vec![] };
+
+ while index < bytes.len() {
+ if bytes[index] == b'\r' {
+ if index + 1 < bytes.len() && bytes[index + 1] == b'\n' {
+ location_index.indices.push(index + 2);
+ } else {
+ location_index.indices.push(index + 1);
+ }
+ } else if bytes[index] == b'\n' {
+ location_index.indices.push(index + 1);
+ }
+
+ index += 1;
+ }
+
+ location_index.indices.push(index + 1);
+ location_index
+ }
+
+ /// Get the line and column-based `point` for `offset` in the bound indices.
+ ///
+ /// Returns `None` when given out of bounds input.
+ ///
+ /// Port of <https://github.com/vfile/vfile-location/blob/main/index.js>
+ #[must_use]
+ pub fn to_point(&self, offset: usize) -> Option<Point> {
+ let mut index = 0;
+
+ if let Some(end) = self.indices.last() {
+ if offset < *end {
+ while index < self.indices.len() {
+ if self.indices[index] > offset {
+ break;
+ }
+
+ index += 1;
+ }
+
+ let previous = if index > 0 {
+ self.indices[index - 1]
+ } else {
+ 0
+ };
+ return Some(Point {
+ line: index + 1,
+ column: offset + 1 - previous,
+ offset,
+ });
+ }
+ }
+
+ None
+ }
+
+ /// Like `to_point`, but takes a relative offset from a certain string
+ /// instead of an absolute offset into the whole document.
+ ///
+ /// The relative offset is made absolute based on `stops`, which represent
+ /// where that certain string is in the whole document.
+ #[must_use]
+ pub fn relative_to_point(&self, stops: &[Stop], relative: usize) -> Option<Point> {
+ Location::relative_to_absolute(stops, relative).and_then(|absolute| self.to_point(absolute))
+ }
+
+ /// Turn a relative offset into an absolute offset.
+ #[must_use]
+ pub fn relative_to_absolute(stops: &[Stop], relative: usize) -> Option<usize> {
+ let mut index = 0;
+
+ while index < stops.len() && stops[index].0 <= relative {
+ index += 1;
+ }
+
+ // There are no points: that only occurs if there was an empty string.
+ if index == 0 {
+ None
+ } else {
+ let (stop_relative, stop_absolute) = &stops[index - 1];
+ Some(stop_absolute + (relative - stop_relative))
+ }
+ }
+}
diff --git a/src/util/mdx_collect.rs b/src/util/mdx_collect.rs
index 73ead51..02921a4 100644
--- a/src/util/mdx_collect.rs
+++ b/src/util/mdx_collect.rs
@@ -1,70 +1,53 @@
//! Collect info for MDX.
-use crate::event::{Kind, Name, Point};
-use crate::tokenizer::Tokenizer;
+use crate::event::{Event, Kind, Name};
use crate::util::slice::{Position, Slice};
use alloc::{string::String, vec, vec::Vec};
-pub type Location<'a> = (usize, &'a Point);
+pub type Stop = (usize, usize);
-pub struct Result<'a> {
- pub start: &'a Point,
+#[derive(Debug)]
+pub struct Result {
pub value: String,
- pub locations: Vec<Location<'a>>,
+ pub stops: Vec<Stop>,
}
-pub fn collect<'a>(tokenizer: &'a Tokenizer, from: usize, names: &[Name]) -> Result<'a> {
+pub fn collect(
+ events: &[Event],
+ bytes: &[u8],
+ from: usize,
+ names: &[Name],
+ stop: &[Name],
+) -> Result {
let mut result = Result {
- start: &tokenizer.events[from].point,
value: String::new(),
- locations: vec![],
+ stops: vec![],
};
let mut index = from;
- let mut acc = 0;
- while index < tokenizer.events.len() {
- if tokenizer.events[index].kind == Kind::Enter
- && names.contains(&tokenizer.events[index].name)
- {
- // Include virtual spaces.
- let value = Slice::from_position(
- tokenizer.parse_state.bytes,
- &Position {
- start: &tokenizer.events[index].point,
- end: &tokenizer.events[index + 1].point,
- },
- )
- .serialize();
- acc += value.len();
- result.locations.push((acc, &tokenizer.events[index].point));
- result.value.push_str(&value);
- }
-
- index += 1;
- }
-
- result
-}
-
-// Turn an index of `result.value` into a point in the whole document.
-pub fn place_to_point(result: &Result, place: usize) -> Point {
- let mut index = 0;
- let mut point = result.start;
- let mut rest = place;
-
- while index < result.locations.len() {
- point = result.locations[index].1;
-
- if result.locations[index].0 > place {
+ while index < events.len() {
+ if events[index].kind == Kind::Enter {
+ if names.contains(&events[index].name) {
+ // Include virtual spaces, and assume void.
+ let value = Slice::from_position(
+ bytes,
+ &Position {
+ start: &events[index].point,
+ end: &events[index + 1].point,
+ },
+ )
+ .serialize();
+ result
+ .stops
+ .push((result.value.len(), events[index].point.index));
+ result.value.push_str(&value);
+ }
+ } else if stop.contains(&events[index].name) {
break;
}
- rest = place - result.locations[index].0;
index += 1;
}
- let mut point = point.clone();
- point.column += rest;
- point.index += rest;
- point
+ result
}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index ac93be0..f44e183 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -8,6 +8,7 @@ pub mod encode;
pub mod gfm_tagfilter;
pub mod identifier;
pub mod infer;
+pub mod location;
pub mod mdx_collect;
pub mod normalize_identifier;
pub mod sanitize_uri;