aboutsummaryrefslogblamecommitdiffstats
path: root/src/util/slice.rs
blob: d02a526162d80c441e2826d5d1991cb376d7cb33 (plain) (tree)
1
2
3
4
5
6
7
                    
 
                                       
                                    

                          
 





















                                                                           
                               

                         
                    
                         
                  














                                                                               
                         

                       





                                                                     
                                                                     








                                        
 




                                                                          


                                                

 
                               
   
                                                               

                      
              
                        
                                                  
                      
                                                 



                     
                                       
                                                                    

                                    




                                          








                                                                            
                                                                      




                     
                                       

                                                                 
                                                                   
                                                    

     
                                   
                                                                             



















                                                                                
                                      




                   
                                    

                                                                 





                                                                                 

     
                                                             


                                                   
 

                                                                           
                                      
                            

                                         

                
                               


         

                                     
                                                         



                                           


                                       
                                       
                                                           




                                    
                                       








                             
//! Deal with bytes.

use crate::event::{Event, Kind, Point};
use crate::util::constant::TAB_SIZE;
use alloc::string::String;
use core::str;

/// Get a [`char`][] right before `index` in bytes (`&[u8]`).
///
/// In most cases, markdown operates on ASCII bytes.
/// In a few cases, it is unicode aware, so we need to find an actual char.
pub fn char_before_index(bytes: &[u8], index: usize) -> Option<char> {
    let start = if index < 4 { 0 } else { index - 4 };
    String::from_utf8_lossy(&bytes[start..index]).chars().last()
}

/// Get a [`char`][] right at `index` in bytes (`&[u8]`).
///
/// In most cases, markdown operates on ASCII bytes.
/// In a few cases, it is unicode aware, so we need to find an actual char.
pub fn char_after_index(bytes: &[u8], index: usize) -> Option<char> {
    let end = if index + 4 > bytes.len() {
        bytes.len()
    } else {
        index + 4
    };
    String::from_utf8_lossy(&bytes[index..end]).chars().next()
}

/// A range between two points.
#[derive(Debug)]
pub struct Position<'a> {
    /// Start point.
    pub start: &'a Point,
    /// End point.
    pub end: &'a Point,
}

impl<'a> Position<'a> {
    /// Get a position from an exit event.
    ///
    /// Looks backwards for the corresponding `enter` event.
    /// This does not support nested events (such as lists in lists).
    ///
    /// ## Panics
    ///
    /// This function panics if an enter event is given.
    /// When `micromark` is used, this function never panics.
    pub fn from_exit_event(events: &'a [Event], index: usize) -> Position<'a> {
        let exit = &events[index];
        debug_assert_eq!(
            exit.kind,
            Kind::Exit,
            "expected `from_exit_event` to be called on `exit` event"
        );
        let mut enter_index = index - 1;

        loop {
            let enter = &events[enter_index];
            if enter.kind == Kind::Enter && enter.name == exit.name {
                return Position {
                    start: &enter.point,
                    end: &exit.point,
                };
            }

            enter_index -= 1;
        }
    }

    /// Turn a position into indices.
    ///
    /// Indices are places in `bytes` where this position starts and ends.
    ///
    /// > 👉 **Note**: indices cannot represent virtual spaces.
    pub fn to_indices(&self) -> (usize, usize) {
        (self.start.index, self.end.index)
    }
}

/// Bytes belonging to a range.
///
/// Includes info on virtual spaces before and after the bytes.
#[derive(Debug)]
pub struct Slice<'a> {
    /// Bytes.
    pub bytes: &'a [u8],
    /// Number of virtual spaces before the bytes.
    pub before: usize,
    /// Number of virtual spaces after the bytes.
    pub after: usize,
}

impl<'a> Slice<'a> {
    /// Get a slice for a single point.
    pub fn from_point(bytes: &'a [u8], point: &Point) -> Slice<'a> {
        let mut before = point.vs;
        let mut start = point.index;
        let end = if start < bytes.len() {
            start + 1
        } else {
            start
        };

        // If we have virtual spaces before, it means we are past the actual
        // character at that index, and those virtual spaces.
        if before > 0 {
            before = TAB_SIZE - before;
            start += 1;
        };

        Slice {
            bytes: if start < end { &bytes[start..end] } else { &[] },
            before,
            after: 0,
        }
    }

    /// Get a slice for a single index.
    ///
    /// > 👉 **Note**: indices cannot represent virtual spaces.
    pub fn from_index(bytes: &'a [u8], index: usize) -> Slice<'a> {
        Slice::from_indices(bytes, index, index + 1)
    }

    /// Get a slice for a position.
    pub fn from_position(bytes: &'a [u8], position: &Position) -> Slice<'a> {
        let mut before = position.start.vs;
        let mut after = position.end.vs;
        let mut start = position.start.index;
        let mut end = position.end.index;

        // If we have virtual spaces before, it means we are past the actual
        // character at that index, and those virtual spaces.
        if before > 0 {
            before = TAB_SIZE - before;
            start += 1;
        };

        // If we have virtual spaces after, it means that character is included,
        // and one less virtual space.
        if after > 0 {
            after -= 1;
            end += 1;
        }

        Slice {
            bytes: &bytes[start..end],
            before,
            after,
        }
    }

    /// Get a slice for two indices.
    ///
    /// > 👉 **Note**: indices cannot represent virtual spaces.
    pub fn from_indices(bytes: &'a [u8], start: usize, end: usize) -> Slice<'a> {
        Slice {
            bytes: &bytes[start..end],
            before: 0,
            after: 0,
        }
    }

    /// Get the size of this slice, including virtual spaces.
    pub fn len(&self) -> usize {
        self.bytes.len() + self.before + self.after
    }

    /// Get the first byte in this slice, representing a virtual space as a
    /// space.
    pub fn head(&self) -> Option<u8> {
        if self.before > 0 {
            Some(b' ')
        } else if self.bytes.is_empty() {
            None
        } else {
            Some(self.bytes[0])
        }
    }

    /// Turn the slice into a `&str`.
    ///
    /// > 👉 **Note**: cannot represent virtual spaces.
    pub fn as_str(&self) -> &str {
        str::from_utf8(self.bytes).unwrap()
    }

    /// Turn the slice into a `String`.
    ///
    /// Support virtual spaces.
    pub fn serialize(&self) -> String {
        let mut string = String::with_capacity(self.len());
        let mut index = self.before;
        while index > 0 {
            string.push(' ');
            index -= 1;
        }
        string.push_str(self.as_str());
        index = self.after;
        while index > 0 {
            string.push(' ');
            index -= 1;
        }

        string
    }
}