diff options
Diffstat (limited to 'src/util')
-rw-r--r-- | src/util/location.rs | 111 | ||||
-rw-r--r-- | src/util/mdx_collect.rs | 81 | ||||
-rw-r--r-- | src/util/mod.rs | 1 |
3 files changed, 144 insertions, 49 deletions
diff --git a/src/util/location.rs b/src/util/location.rs new file mode 100644 index 0000000..0c9c426 --- /dev/null +++ b/src/util/location.rs @@ -0,0 +1,111 @@ +//! Deal with positions in a file. +//! +//! * Convert between byte indices and unist points. +//! * Convert between byte indices into a string which is built up of several +//! slices in a whole document, and byte indices into that whole document. + +use crate::unist::Point; +use alloc::{vec, vec::Vec}; + +/// Each stop represents a new slice, which contains the byte index into the +/// corresponding string where the slice starts (`0`), and the byte index into +/// the whole document where that slice starts (`1`). +pub type Stop = (usize, usize); + +#[derive(Debug)] +pub struct Location { + /// List, where each index is a line number (0-based), and each value is + /// the byte index *after* where the line ends. + indices: Vec<usize>, +} + +impl Location { + /// Get an index for the given `bytes`. + /// + /// Port of <https://github.com/vfile/vfile-location/blob/main/index.js> + #[must_use] + pub fn new(bytes: &[u8]) -> Self { + let mut index = 0; + let mut location_index = Self { indices: vec![] }; + + while index < bytes.len() { + if bytes[index] == b'\r' { + if index + 1 < bytes.len() && bytes[index + 1] == b'\n' { + location_index.indices.push(index + 2); + } else { + location_index.indices.push(index + 1); + } + } else if bytes[index] == b'\n' { + location_index.indices.push(index + 1); + } + + index += 1; + } + + location_index.indices.push(index + 1); + location_index + } + + /// Get the line and column-based `point` for `offset` in the bound indices. + /// + /// Returns `None` when given out of bounds input. + /// + /// Port of <https://github.com/vfile/vfile-location/blob/main/index.js> + #[must_use] + pub fn to_point(&self, offset: usize) -> Option<Point> { + let mut index = 0; + + if let Some(end) = self.indices.last() { + if offset < *end { + while index < self.indices.len() { + if self.indices[index] > offset { + break; + } + + index += 1; + } + + let previous = if index > 0 { + self.indices[index - 1] + } else { + 0 + }; + return Some(Point { + line: index + 1, + column: offset + 1 - previous, + offset, + }); + } + } + + None + } + + /// Like `to_point`, but takes a relative offset from a certain string + /// instead of an absolute offset into the whole document. + /// + /// The relative offset is made absolute based on `stops`, which represent + /// where that certain string is in the whole document. + #[must_use] + pub fn relative_to_point(&self, stops: &[Stop], relative: usize) -> Option<Point> { + Location::relative_to_absolute(stops, relative).and_then(|absolute| self.to_point(absolute)) + } + + /// Turn a relative offset into an absolute offset. + #[must_use] + pub fn relative_to_absolute(stops: &[Stop], relative: usize) -> Option<usize> { + let mut index = 0; + + while index < stops.len() && stops[index].0 <= relative { + index += 1; + } + + // There are no points: that only occurs if there was an empty string. + if index == 0 { + None + } else { + let (stop_relative, stop_absolute) = &stops[index - 1]; + Some(stop_absolute + (relative - stop_relative)) + } + } +} diff --git a/src/util/mdx_collect.rs b/src/util/mdx_collect.rs index 73ead51..02921a4 100644 --- a/src/util/mdx_collect.rs +++ b/src/util/mdx_collect.rs @@ -1,70 +1,53 @@ //! Collect info for MDX. -use crate::event::{Kind, Name, Point}; -use crate::tokenizer::Tokenizer; +use crate::event::{Event, Kind, Name}; use crate::util::slice::{Position, Slice}; use alloc::{string::String, vec, vec::Vec}; -pub type Location<'a> = (usize, &'a Point); +pub type Stop = (usize, usize); -pub struct Result<'a> { - pub start: &'a Point, +#[derive(Debug)] +pub struct Result { pub value: String, - pub locations: Vec<Location<'a>>, + pub stops: Vec<Stop>, } -pub fn collect<'a>(tokenizer: &'a Tokenizer, from: usize, names: &[Name]) -> Result<'a> { +pub fn collect( + events: &[Event], + bytes: &[u8], + from: usize, + names: &[Name], + stop: &[Name], +) -> Result { let mut result = Result { - start: &tokenizer.events[from].point, value: String::new(), - locations: vec![], + stops: vec![], }; let mut index = from; - let mut acc = 0; - while index < tokenizer.events.len() { - if tokenizer.events[index].kind == Kind::Enter - && names.contains(&tokenizer.events[index].name) - { - // Include virtual spaces. - let value = Slice::from_position( - tokenizer.parse_state.bytes, - &Position { - start: &tokenizer.events[index].point, - end: &tokenizer.events[index + 1].point, - }, - ) - .serialize(); - acc += value.len(); - result.locations.push((acc, &tokenizer.events[index].point)); - result.value.push_str(&value); - } - - index += 1; - } - - result -} - -// Turn an index of `result.value` into a point in the whole document. -pub fn place_to_point(result: &Result, place: usize) -> Point { - let mut index = 0; - let mut point = result.start; - let mut rest = place; - - while index < result.locations.len() { - point = result.locations[index].1; - - if result.locations[index].0 > place { + while index < events.len() { + if events[index].kind == Kind::Enter { + if names.contains(&events[index].name) { + // Include virtual spaces, and assume void. + let value = Slice::from_position( + bytes, + &Position { + start: &events[index].point, + end: &events[index + 1].point, + }, + ) + .serialize(); + result + .stops + .push((result.value.len(), events[index].point.index)); + result.value.push_str(&value); + } + } else if stop.contains(&events[index].name) { break; } - rest = place - result.locations[index].0; index += 1; } - let mut point = point.clone(); - point.column += rest; - point.index += rest; - point + result } diff --git a/src/util/mod.rs b/src/util/mod.rs index ac93be0..f44e183 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -8,6 +8,7 @@ pub mod encode; pub mod gfm_tagfilter; pub mod identifier; pub mod infer; +pub mod location; pub mod mdx_collect; pub mod normalize_identifier; pub mod sanitize_uri; |