diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-14 13:47:32 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-14 13:55:03 +0200 |
commit | a1ce43e428754084474a7ecf88ae6debf88b9164 (patch) | |
tree | a1df0ec515f624431d3e398f7d24e7b411c18e6e /src/util/span.rs | |
parent | c587aee9512119e61918bfbe81c3cca3de7e70aa (diff) | |
download | markdown-rs-a1ce43e428754084474a7ecf88ae6debf88b9164.tar.gz markdown-rs-a1ce43e428754084474a7ecf88ae6debf88b9164.tar.bz2 markdown-rs-a1ce43e428754084474a7ecf88ae6debf88b9164.zip |
Reorganize to split util
Diffstat (limited to 'src/util/span.rs')
-rw-r--r-- | src/util/span.rs | 112 |
1 files changed, 112 insertions, 0 deletions
diff --git a/src/util/span.rs b/src/util/span.rs new file mode 100644 index 0000000..c48549b --- /dev/null +++ b/src/util/span.rs @@ -0,0 +1,112 @@ +//! Utilities to deal with semantic labels. + +use crate::tokenizer::{Code, Event, EventType}; + +/// A struct representing the span of an opening and closing event of a token. +#[derive(Debug)] +pub struct Span { + // To do: probably needed in the future. + // start: Point, + /// Absolute offset (and `index` in `codes`) of where this span starts. + pub start_index: usize, + // To do: probably needed in the future. + // end: Point, + /// Absolute offset (and `index` in `codes`) of where this span ends. + pub end_index: usize, + // To do: probably needed in the future. + // token_type: TokenType, +} + +/// Get a span from an event. +/// +/// Get the span of an `exit` event, by looking backwards through the events to +/// find the corresponding `enter` event. +/// This assumes that tokens with the same are not nested. +/// +/// ## Panics +/// +/// This function panics if an enter event is given. +/// When `micromark` is used, this function never panics. +pub fn from_exit_event(events: &[Event], index: usize) -> Span { + let exit = &events[index]; + // let end = exit.point.clone(); + let end_index = exit.index; + let token_type = exit.token_type.clone(); + // To do: support `enter` events if needed and walk forwards? + assert_eq!( + exit.event_type, + EventType::Exit, + "expected `get_span` to be called on `exit` event" + ); + let mut enter_index = index - 1; + + loop { + let enter = &events[enter_index]; + if enter.event_type == EventType::Enter && enter.token_type == token_type { + return Span { + // start: enter.point.clone(), + start_index: enter.index, + // end, + end_index, + // token_type, + }; + } + + enter_index -= 1; + } +} + +/// Serialize a span, optionally expanding tabs. +pub fn serialize(all_codes: &[Code], span: &Span, expand_tabs: bool) -> String { + serialize_codes(codes(all_codes, span), expand_tabs) +} + +/// Get a slice of codes from a span. +pub fn codes<'a>(codes: &'a [Code], span: &Span) -> &'a [Code] { + &codes[span.start_index..span.end_index] +} + +/// Serialize a slice of codes, optionally expanding tabs. +fn serialize_codes(codes: &[Code], expand_tabs: bool) -> String { + let mut at_tab = false; + let mut index = 0; + let mut value: Vec<char> = vec![]; + + while index < codes.len() { + let code = codes[index]; + let mut at_tab_next = false; + + match code { + Code::CarriageReturnLineFeed => { + value.push('\r'); + value.push('\n'); + } + Code::Char(char) if char == '\n' || char == '\r' => { + value.push(char); + } + Code::Char(char) if char == '\t' => { + at_tab_next = true; + value.push(if expand_tabs { ' ' } else { char }); + } + Code::VirtualSpace => { + if !expand_tabs && at_tab { + index += 1; + continue; + } + value.push(' '); + } + Code::Char(char) => { + value.push(char); + } + Code::None => { + unreachable!("unexpected EOF code in codes"); + } + } + + at_tab = at_tab_next; + + index += 1; + } + + value.into_iter().collect() +} |