aboutsummaryrefslogblamecommitdiffstats
path: root/src/util/span.rs
blob: c48549bb70bc46a460093eb9b5e00ebd3495f26e (plain) (tree)















































































































                                                                                   
//! Utilities to deal with semantic labels.

use crate::tokenizer::{Code, Event, EventType};

/// A struct representing the span of an opening and closing event of a token.
#[derive(Debug)]
pub struct Span {
    // To do: probably needed in the future.
    // start: Point,
    /// Absolute offset (and `index` in `codes`) of where this span starts.
    pub start_index: usize,
    // To do: probably needed in the future.
    // end: Point,
    /// Absolute offset (and `index` in `codes`) of where this span ends.
    pub end_index: usize,
    // To do: probably needed in the future.
    // token_type: TokenType,
}

/// Get a span from an event.
///
/// Get the span of an `exit` event, by looking backwards through the events to
/// find the corresponding `enter` event.
/// This assumes that tokens with the same are not nested.
///
/// ## Panics
///
/// This function panics if an enter event is given.
/// When `micromark` is used, this function never panics.
pub fn from_exit_event(events: &[Event], index: usize) -> Span {
    let exit = &events[index];
    // let end = exit.point.clone();
    let end_index = exit.index;
    let token_type = exit.token_type.clone();
    // To do: support `enter` events if needed and walk forwards?
    assert_eq!(
        exit.event_type,
        EventType::Exit,
        "expected `get_span` to be called on `exit` event"
    );
    let mut enter_index = index - 1;

    loop {
        let enter = &events[enter_index];
        if enter.event_type == EventType::Enter && enter.token_type == token_type {
            return Span {
                // start: enter.point.clone(),
                start_index: enter.index,
                // end,
                end_index,
                // token_type,
            };
        }

        enter_index -= 1;
    }
}

/// Serialize a span, optionally expanding tabs.
pub fn serialize(all_codes: &[Code], span: &Span, expand_tabs: bool) -> String {
    serialize_codes(codes(all_codes, span), expand_tabs)
}

/// Get a slice of codes from a span.
pub fn codes<'a>(codes: &'a [Code], span: &Span) -> &'a [Code] {
    &codes[span.start_index..span.end_index]
}

/// Serialize a slice of codes, optionally expanding tabs.
fn serialize_codes(codes: &[Code], expand_tabs: bool) -> String {
    let mut at_tab = false;
    let mut index = 0;
    let mut value: Vec<char> = vec![];

    while index < codes.len() {
        let code = codes[index];
        let mut at_tab_next = false;

        match code {
            Code::CarriageReturnLineFeed => {
                value.push('\r');
                value.push('\n');
            }
            Code::Char(char) if char == '\n' || char == '\r' => {
                value.push(char);
            }
            Code::Char(char) if char == '\t' => {
                at_tab_next = true;
                value.push(if expand_tabs { ' ' } else { char });
            }
            Code::VirtualSpace => {
                if !expand_tabs && at_tab {
                    index += 1;
                    continue;
                }
                value.push(' ');
            }
            Code::Char(char) => {
                value.push(char);
            }
            Code::None => {
                unreachable!("unexpected EOF code in codes");
            }
        }

        at_tab = at_tab_next;

        index += 1;
    }

    value.into_iter().collect()
}