aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-05 12:00:33 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-09-05 12:00:33 +0200
commit16de10fe2395002644d685fdfcf76823346d1cc4 (patch)
treef2f01942d8aa218ba9bdc8ce47990d1da6cbc1b9
parent4aa3e7fb4cf0e42fff25d836ce99a82d00cba120 (diff)
downloadmarkdown-rs-16de10fe2395002644d685fdfcf76823346d1cc4.tar.gz
markdown-rs-16de10fe2395002644d685fdfcf76823346d1cc4.tar.bz2
markdown-rs-16de10fe2395002644d685fdfcf76823346d1cc4.zip
Add support for getting `char`s from bytes
Diffstat (limited to '')
-rw-r--r--src/construct/attention.rs31
-rw-r--r--src/util/slice.rs22
2 files changed, 32 insertions, 21 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 526f58c..947a79b 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -81,8 +81,8 @@ use crate::resolve::Name as ResolveName;
use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::classify_character::{classify_opt, Kind as CharacterKind};
-use crate::util::slice::Slice;
-use alloc::{string::String, vec, vec::Vec};
+use crate::util::slice::{char_after_index, char_before_index, Slice};
+use alloc::{vec, vec::Vec};
/// Attentention sequence that we can take markers from.
#[derive(Debug)]
@@ -234,28 +234,17 @@ fn get_sequences(tokenizer: &mut Tokenizer) -> Vec<Sequence> {
let end = index + 1;
let exit = &tokenizer.events[end];
- let before_end = enter.point.index;
- let before_start = if before_end < 4 { 0 } else { before_end - 4 };
- let after_start = exit.point.index;
- let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() {
- tokenizer.parse_state.bytes.len()
- } else {
- after_start + 4
- };
-
let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point)
.head()
.unwrap();
- let before = classify_opt(
- String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end])
- .chars()
- .last(),
- );
- let after = classify_opt(
- String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end])
- .chars()
- .next(),
- );
+ let before = classify_opt(char_before_index(
+ tokenizer.parse_state.bytes,
+ enter.point.index,
+ ));
+ let after = classify_opt(char_after_index(
+ tokenizer.parse_state.bytes,
+ exit.point.index,
+ ));
let open = after == CharacterKind::Other
|| (after == CharacterKind::Punctuation && before != CharacterKind::Other);
let close = before == CharacterKind::Other
diff --git a/src/util/slice.rs b/src/util/slice.rs
index 0734d78..d02a526 100644
--- a/src/util/slice.rs
+++ b/src/util/slice.rs
@@ -5,6 +5,28 @@ use crate::util::constant::TAB_SIZE;
use alloc::string::String;
use core::str;
+/// Get a [`char`][] right before `index` in bytes (`&[u8]`).
+///
+/// In most cases, markdown operates on ASCII bytes.
+/// In a few cases, it is unicode aware, so we need to find an actual char.
+pub fn char_before_index(bytes: &[u8], index: usize) -> Option<char> {
+ let start = if index < 4 { 0 } else { index - 4 };
+ String::from_utf8_lossy(&bytes[start..index]).chars().last()
+}
+
+/// Get a [`char`][] right at `index` in bytes (`&[u8]`).
+///
+/// In most cases, markdown operates on ASCII bytes.
+/// In a few cases, it is unicode aware, so we need to find an actual char.
+pub fn char_after_index(bytes: &[u8], index: usize) -> Option<char> {
+ let end = if index + 4 > bytes.len() {
+ bytes.len()
+ } else {
+ index + 4
+ };
+ String::from_utf8_lossy(&bytes[index..end]).chars().next()
+}
+
/// A range between two points.
#[derive(Debug)]
pub struct Position<'a> {