From 16de10fe2395002644d685fdfcf76823346d1cc4 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 5 Sep 2022 12:00:33 +0200 Subject: Add support for getting `char`s from bytes --- src/util/slice.rs | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'src/util/slice.rs') diff --git a/src/util/slice.rs b/src/util/slice.rs index 0734d78..d02a526 100644 --- a/src/util/slice.rs +++ b/src/util/slice.rs @@ -5,6 +5,28 @@ use crate::util::constant::TAB_SIZE; use alloc::string::String; use core::str; +/// Get a [`char`][] right before `index` in bytes (`&[u8]`). +/// +/// In most cases, markdown operates on ASCII bytes. +/// In a few cases, it is unicode aware, so we need to find an actual char. +pub fn char_before_index(bytes: &[u8], index: usize) -> Option { + let start = if index < 4 { 0 } else { index - 4 }; + String::from_utf8_lossy(&bytes[start..index]).chars().last() +} + +/// Get a [`char`][] right at `index` in bytes (`&[u8]`). +/// +/// In most cases, markdown operates on ASCII bytes. +/// In a few cases, it is unicode aware, so we need to find an actual char. +pub fn char_after_index(bytes: &[u8], index: usize) -> Option { + let end = if index + 4 > bytes.len() { + bytes.len() + } else { + index + 4 + }; + String::from_utf8_lossy(&bytes[index..end]).chars().next() +} + /// A range between two points. #[derive(Debug)] pub struct Position<'a> { -- cgit