aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/list.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 18:22:59 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 18:22:59 +0200
commit0eeff9148e327183e532752f46421a75506dd7a6 (patch)
tree4f0aed04f90aa759ce96a2e87aa719e7fa95c450 /src/construct/list.rs
parent148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (diff)
downloadmarkdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.gz
markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.bz2
markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.zip
Refactor to improve states
* Remove custom kind wrappers, use plain bytes instead * Remove `Into`s, use the explicit expected types instead * Refactor to use `slice.as_str` in most places * Remove unneeded unique check before adding a definition * Use a shared CDATA prefix in constants * Inline byte checks into matches * Pass bytes back from parser instead of whole parse state * Refactor to work more often on bytes * Rename custom `size` to `len`
Diffstat (limited to 'src/construct/list.rs')
-rw-r--r--src/construct/list.rs135
1 files changed, 44 insertions, 91 deletions
diff --git a/src/construct/list.rs b/src/construct/list.rs
index 9b59130..d5a9899 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -56,69 +56,6 @@ use crate::util::{
slice::{Position, Slice},
};
-/// Type of list.
-#[derive(Debug, PartialEq)]
-enum Kind {
- /// In a dot (`.`) list item.
- ///
- /// ## Example
- ///
- /// ```markdown
- /// 1. a
- /// ```
- Dot,
- /// In a paren (`)`) list item.
- ///
- /// ## Example
- ///
- /// ```markdown
- /// 1) a
- /// ```
- Paren,
- /// In an asterisk (`*`) list item.
- ///
- /// ## Example
- ///
- /// ```markdown
- /// * a
- /// ```
- Asterisk,
- /// In a plus (`+`) list item.
- ///
- /// ## Example
- ///
- /// ```markdown
- /// + a
- /// ```
- Plus,
- /// In a dash (`-`) list item.
- ///
- /// ## Example
- ///
- /// ```markdown
- /// - a
- /// ```
- Dash,
-}
-
-impl Kind {
- /// Turn a byte ([u8]) into a kind.
- ///
- /// ## Panics
- ///
- /// Panics if `byte` is not `.`, `)`, `*`, `+`, or `-`.
- fn from_byte(byte: u8) -> Kind {
- match byte {
- b'.' => Kind::Dot,
- b')' => Kind::Paren,
- b'*' => Kind::Asterisk,
- b'+' => Kind::Plus,
- b'-' => Kind::Dash,
- _ => unreachable!("invalid byte"),
- }
- }
-}
-
/// Start of list item.
///
/// ```markdown
@@ -126,15 +63,19 @@ impl Kind {
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- let max = if tokenizer.parse_state.constructs.code_indented {
- TAB_SIZE - 1
- } else {
- usize::MAX
- };
-
if tokenizer.parse_state.constructs.list {
tokenizer.enter(Token::ListItem);
- tokenizer.go(space_or_tab_min_max(0, max), before)(tokenizer)
+ tokenizer.go(
+ space_or_tab_min_max(
+ 0,
+ if tokenizer.parse_state.constructs.code_indented {
+ TAB_SIZE - 1
+ } else {
+ usize::MAX
+ },
+ ),
+ before,
+ )(tokenizer)
} else {
State::Nok
}
@@ -149,15 +90,13 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Unordered.
- Some(b'*' | b'+' | b'-') => tokenizer.check(thematic_break, |ok| {
+ Some(b'*' | b'-') => tokenizer.check(thematic_break, |ok| {
Box::new(if ok { nok } else { before_unordered })
})(tokenizer),
+ Some(b'+') => before_unordered(tokenizer),
// Ordered.
- Some(byte) if byte.is_ascii_digit() && (!tokenizer.interrupt || byte == b'1') => {
- tokenizer.enter(Token::ListItemPrefix);
- tokenizer.enter(Token::ListItemValue);
- inside(tokenizer, 0)
- }
+ Some(b'0'..=b'9') if !tokenizer.interrupt => before_ordered(tokenizer),
+ Some(b'1') => before_ordered(tokenizer),
_ => State::Nok,
}
}
@@ -175,6 +114,18 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State {
marker(tokenizer)
}
+/// Start of an ordered list item.
+///
+/// ```markdown
+/// > | * a
+/// ^
+/// ```
+fn before_ordered(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.enter(Token::ListItemPrefix);
+ tokenizer.enter(Token::ListItemValue);
+ inside(tokenizer, 0)
+}
+
/// In an ordered list item value.
///
/// ```markdown
@@ -183,14 +134,14 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Some(byte) if byte.is_ascii_digit() && size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
- tokenizer.consume();
- State::Fn(Box::new(move |t| inside(t, size + 1)))
- }
Some(b'.' | b')') if !tokenizer.interrupt || size < 2 => {
tokenizer.exit(Token::ListItemValue);
marker(tokenizer)
}
+ Some(b'0'..=b'9') if size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
+ tokenizer.consume();
+ State::Fn(Box::new(move |t| inside(t, size + 1)))
+ }
_ => State::Nok,
}
}
@@ -262,7 +213,7 @@ fn whitespace(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
- if matches!(tokenizer.current, Some(b'\t' | b' ')) {
+ if let Some(b'\t' | b' ') = tokenizer.current {
State::Nok
} else {
State::Ok
@@ -309,7 +260,7 @@ fn after(tokenizer: &mut Tokenizer, blank: bool) -> State {
end: &tokenizer.point,
},
)
- .size();
+ .len();
if blank {
prefix += 1;
@@ -389,8 +340,8 @@ fn nok(_tokenizer: &mut Tokenizer) -> State {
pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
let mut index = 0;
let mut balance = 0;
- let mut lists_wip: Vec<(Kind, usize, usize, usize)> = vec![];
- let mut lists: Vec<(Kind, usize, usize, usize)> = vec![];
+ let mut lists_wip: Vec<(u8, usize, usize, usize)> = vec![];
+ let mut lists: Vec<(u8, usize, usize, usize)> = vec![];
// Merge list items.
while index < tokenizer.events.len() {
@@ -400,12 +351,14 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
if event.event_type == EventType::Enter {
let end = skip::opt(&tokenizer.events, index, &[Token::ListItem]) - 1;
let marker = skip::to(&tokenizer.events, index, &[Token::ListItemMarker]);
- let kind = Kind::from_byte(
- Slice::from_point(tokenizer.parse_state.bytes, &tokenizer.events[marker].point)
- .head()
- .unwrap(),
- );
- let current = (kind, balance, index, end);
+ // Guaranteed to be a valid ASCII byte.
+ let marker = Slice::from_index(
+ tokenizer.parse_state.bytes,
+ tokenizer.events[marker].point.index,
+ )
+ .head()
+ .unwrap();
+ let current = (marker, balance, index, end);
let mut list_index = lists_wip.len();
let mut matched = false;
@@ -475,7 +428,7 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
let mut list_start = tokenizer.events[list_item.2].clone();
let mut list_end = tokenizer.events[list_item.3].clone();
let token_type = match list_item.0 {
- Kind::Paren | Kind::Dot => Token::ListOrdered,
+ b'.' | b')' => Token::ListOrdered,
_ => Token::ListUnordered,
};
list_start.token_type = token_type.clone();