//! Utilities to deal with semantic labels. use crate::tokenizer::{Code, Event, EventType}; /// A struct representing the span of an opening and closing event of a token. #[derive(Debug)] pub struct Span { // To do: probably needed in the future. // start: Point, /// Absolute offset (and `index` in `codes`) of where this span starts. pub start_index: usize, // To do: probably needed in the future. // end: Point, /// Absolute offset (and `index` in `codes`) of where this span ends. pub end_index: usize, // To do: probably needed in the future. // token_type: TokenType, } /// Get a span from an event. /// /// Get the span of an `exit` event, by looking backwards through the events to /// find the corresponding `enter` event. /// This assumes that tokens with the same are not nested. /// /// ## Panics /// /// This function panics if an enter event is given. /// When `micromark` is used, this function never panics. pub fn from_exit_event(events: &[Event], index: usize) -> Span { let exit = &events[index]; // let end = exit.point.clone(); let end_index = exit.index; let token_type = exit.token_type.clone(); // To do: support `enter` events if needed and walk forwards? assert_eq!( exit.event_type, EventType::Exit, "expected `get_span` to be called on `exit` event" ); let mut enter_index = index - 1; loop { let enter = &events[enter_index]; if enter.event_type == EventType::Enter && enter.token_type == token_type { return Span { // start: enter.point.clone(), start_index: enter.index, // end, end_index, // token_type, }; } enter_index -= 1; } } /// Serialize a span, optionally expanding tabs. pub fn serialize(all_codes: &[Code], span: &Span, expand_tabs: bool) -> String { serialize_codes(codes(all_codes, span), expand_tabs) } /// Get a slice of codes from a span. pub fn codes<'a>(codes: &'a [Code], span: &Span) -> &'a [Code] { &codes[span.start_index..span.end_index] } /// Serialize a slice of codes, optionally expanding tabs. fn serialize_codes(codes: &[Code], expand_tabs: bool) -> String { let mut at_tab = false; let mut index = 0; let mut value: Vec = vec![]; while index < codes.len() { let code = codes[index]; let mut at_tab_next = false; match code { Code::CarriageReturnLineFeed => { value.push('\r'); value.push('\n'); } Code::Char(char) if char == '\n' || char == '\r' => { value.push(char); } Code::Char(char) if char == '\t' => { at_tab_next = true; value.push(if expand_tabs { ' ' } else { char }); } Code::VirtualSpace => { if !expand_tabs && at_tab { index += 1; continue; } value.push(' '); } Code::Char(char) => { value.push(char); } Code::None => { unreachable!("unexpected EOF code in codes"); } } at_tab = at_tab_next; index += 1; } value.into_iter().collect() }