Refactor to improve docs of each function

author: Titus Wormer <tituswormer@gmail.com> 2022-08-12 14:21:53 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-08-12 14:21:53 +0200
commit: 504729a4a0c8f3e0d8fc9159e0273150b169e184 (patch)
tree: a6bf291322decccd6011580337b1feed6151b554 /src/construct/attention.rs
parent: db5a491e6c2223d1db9b458307431a54db3c40f2 (diff)
download: markdown-rs-504729a4a0c8f3e0d8fc9159e0273150b169e184.tar.gz
markdown-rs-504729a4a0c8f3e0d8fc9159e0273150b169e184.tar.bz2
markdown-rs-504729a4a0c8f3e0d8fc9159e0273150b169e184.zip
1 files changed, 183 insertions, 205 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index e974fae..3a29d06 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -60,7 +60,7 @@ use crate::util::slice::Slice;
 
 /// Character code kinds.
 #[derive(Debug, PartialEq)]
-enum GroupKind {
+enum CharacterKind {
     /// Whitespace.
     ///
     /// ## Example
@@ -98,7 +98,7 @@ struct Sequence {
     /// The depth in events where this sequence resides.
     balance: usize,
     /// The index into events where this sequence’s `Enter` currently resides.
-    event_index: usize,
+    index: usize,
     /// The (shifted) point where this sequence starts.
     start_point: Point,
     /// The (shifted) point where this sequence end.
@@ -111,7 +111,7 @@ struct Sequence {
     close: bool,
 }
 
-/// Before a sequence.
+/// At start of attention.
 ///
 /// ```markdown
 /// > | **
@@ -128,7 +128,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
     }
 }
 
-/// In a sequence.
+/// In sequence.
 ///
 /// ```markdown
 /// > | **
@@ -136,7 +136,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
 /// ```
 pub fn inside(tokenizer: &mut Tokenizer) -> State {
     match tokenizer.current {
-        Some(b'*' | b'_') if tokenizer.current.unwrap() == tokenizer.tokenize_state.marker => {
+        Some(b'*' | b'_') if tokenizer.current == Some(tokenizer.tokenize_state.marker) => {
             tokenizer.consume();
             State::Next(StateName::AttentionInside)
         }
@@ -150,28 +150,28 @@ pub fn inside(tokenizer: &mut Tokenizer) -> State {
 }
 
 /// Resolve attention sequences.
-#[allow(clippy::too_many_lines)]
 pub fn resolve(tokenizer: &mut Tokenizer) {
-    let mut start = 0;
+    let mut index = 0;
     let mut balance = 0;
     let mut sequences = vec![];
 
-    // Find sequences of sequences and information about them.
-    while start < tokenizer.events.len() {
-        let enter = &tokenizer.events[start];
+    // Find all sequences, gather info about them.
+    while index < tokenizer.events.len() {
+        let enter = &tokenizer.events[index];
 
         if enter.kind == Kind::Enter {
             balance += 1;
 
             if enter.name == Name::AttentionSequence {
-                let end = start + 1;
+                let end = index + 1;
                 let exit = &tokenizer.events[end];
 
                 let before_end = enter.point.index;
                 let before_start = if before_end < 4 { 0 } else { before_end - 4 };
-                let string_before =
-                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end]);
-                let char_before = string_before.chars().last();
+                let char_before =
+                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[before_start..before_end])
+                        .chars()
+                        .last();
 
                 let after_start = exit.point.index;
                 let after_end = if after_start + 4 > tokenizer.parse_state.bytes.len() {
@@ -179,26 +179,27 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
                 } else {
                     after_start + 4
                 };
-                let string_after =
-                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]);
-                let char_after = string_after.chars().next();
+                let char_after =
+                    String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end])
+                        .chars()
+                        .next();
 
                 let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point)
                     .head()
                     .unwrap();
                 let before = classify_character(char_before);
                 let after = classify_character(char_after);
-                let open = after == GroupKind::Other
-                    || (after == GroupKind::Punctuation && before != GroupKind::Other);
+                let open = after == CharacterKind::Other
+                    || (after == CharacterKind::Punctuation && before != CharacterKind::Other);
                 // To do: GFM strikethrough?
-                // || attentionMarkers.includes(code)
-                let close = before == GroupKind::Other
-                    || (before == GroupKind::Punctuation && after != GroupKind::Other);
+                // || char_after == '~'
+                let close = before == CharacterKind::Other
+                    || (before == CharacterKind::Punctuation && after != CharacterKind::Other);
                 // To do: GFM strikethrough?
-                // || attentionMarkers.includes(previous)
+                // || char_before == '~'
 
                 sequences.push(Sequence {
-                    event_index: start,
+                    index,
                     balance,
                     start_point: enter.point.clone(),
                     end_point: exit.point.clone(),
@@ -206,12 +207,12 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
                     open: if marker == b'*' {
                         open
                     } else {
-                        open && (before != GroupKind::Other || !close)
+                        open && (before != CharacterKind::Other || !close)
                     },
                     close: if marker == b'*' {
                         close
                     } else {
-                        close && (after != GroupKind::Other || !open)
+                        close && (after != CharacterKind::Other || !open)
                     },
                     marker,
                 });
@@ -220,10 +221,10 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
             balance -= 1;
         }
 
-        start += 1;
+        index += 1;
     }
 
-    // Walk through sequences and match them.
+    // Now walk through them and match them.
     let mut close = 0;
 
     while close < sequences.len() {
@@ -240,7 +241,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
 
                 let sequence_open = &sequences[open];
 
-                // We found a sequence that can open the closer we found.
+                // An opener matching our closer:
                 if sequence_open.open
                     && sequence_close.marker == sequence_open.marker
                     && sequence_close.balance == sequence_open.balance
@@ -257,175 +258,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
                     }
 
                     // We’ve found a match!
-
-                    // Number of markers to use from the sequence.
-                    let take = if sequence_open.size > 1 && sequence_close.size > 1 {
-                        2
-                    } else {
-                        1
-                    };
-
-                    // We’re *on* a closing sequence, with a matching opening
-                    // sequence.
-                    // Now we make sure that we can’t have misnested attention:
-                    //
-                    // ```html
-                    // <em>a <strong>b</em> c</strong>
-                    // ```
-                    //
-                    // Do that by marking everything between it as no longer
-                    // possible to open anything.
-                    // Theoretically we could mark non-closing as well, but we
-                    // don’t look for closers backwards.
-                    let mut between = open + 1;
-
-                    while between < close {
-                        sequences[between].open = false;
-                        between += 1;
-                    }
-
-                    let sequence_close = &mut sequences[close];
-                    let close_event_index = sequence_close.event_index;
-                    let seq_close_enter = sequence_close.start_point.clone();
-                    // No need to worry about `VS`, because sequences are only actual characters.
-                    sequence_close.size -= take;
-                    sequence_close.start_point.column += take;
-                    sequence_close.start_point.index += take;
-                    let seq_close_exit = sequence_close.start_point.clone();
-
-                    // Stay on this closing sequence for the next iteration: it
-                    // might close more things.
-                    next_index -= 1;
-
-                    // Remove closing sequence if fully used.
-                    if sequence_close.size == 0 {
-                        sequences.remove(close);
-                        tokenizer.map.add(close_event_index, 2, vec![]);
-                    } else {
-                        // Shift remaining closing sequence forward.
-                        // Do it here because a sequence can open and close different
-                        // other sequences, and the remainder can be on any side or
-                        // somewhere in the middle.
-                        let mut enter = &mut tokenizer.events[close_event_index];
-                        enter.point = seq_close_exit.clone();
-                    }
-
-                    let sequence_open = &mut sequences[open];
-                    let open_event_index = sequence_open.event_index;
-                    let seq_open_exit = sequence_open.end_point.clone();
-                    // No need to worry about `VS`, because sequences are only actual characters.
-                    sequence_open.size -= take;
-                    sequence_open.end_point.column -= take;
-                    sequence_open.end_point.index -= take;
-                    let seq_open_enter = sequence_open.end_point.clone();
-
-                    // Remove opening sequence if fully used.
-                    if sequence_open.size == 0 {
-                        sequences.remove(open);
-                        tokenizer.map.add(open_event_index, 2, vec![]);
-                        next_index -= 1;
-                    } else {
-                        // Shift remaining opening sequence backwards.
-                        // See note above for why that happens here.
-                        let mut exit = &mut tokenizer.events[open_event_index + 1];
-                        exit.point = seq_open_enter.clone();
-                    }
-
-                    // Opening.
-                    tokenizer.map.add_before(
-                        // Add after the current sequence (it might remain).
-                        open_event_index + 2,
-                        0,
-                        vec![
-                            Event {
-                                kind: Kind::Enter,
-                                name: if take == 1 {
-                                    Name::Emphasis
-                                } else {
-                                    Name::Strong
-                                },
-                                point: seq_open_enter.clone(),
-                                link: None,
-                            },
-                            Event {
-                                kind: Kind::Enter,
-                                name: if take == 1 {
-                                    Name::EmphasisSequence
-                                } else {
-                                    Name::StrongSequence
-                                },
-                                point: seq_open_enter.clone(),
-                                link: None,
-                            },
-                            Event {
-                                kind: Kind::Exit,
-                                name: if take == 1 {
-                                    Name::EmphasisSequence
-                                } else {
-                                    Name::StrongSequence
-                                },
-                                point: seq_open_exit.clone(),
-                                link: None,
-                            },
-                            Event {
-                                kind: Kind::Enter,
-                                name: if take == 1 {
-                                    Name::EmphasisText
-                                } else {
-                                    Name::StrongText
-                                },
-                                point: seq_open_exit.clone(),
-                                link: None,
-                            },
-                        ],
-                    );
-                    // Closing.
-                    tokenizer.map.add(
-                        close_event_index,
-                        0,
-                        vec![
-                            Event {
-                                kind: Kind::Exit,
-                                name: if take == 1 {
-                                    Name::EmphasisText
-                                } else {
-                                    Name::StrongText
-                                },
-                                point: seq_close_enter.clone(),
-                                link: None,
-                            },
-                            Event {
-                                kind: Kind::Enter,
-                                name: if take == 1 {
-                                    Name::EmphasisSequence
-                                } else {
-                                    Name::StrongSequence
-                                },
-                                point: seq_close_enter.clone(),
-                                link: None,
-                            },
-                            Event {
-                                kind: Kind::Exit,
-                                name: if take == 1 {
-                                    Name::EmphasisSequence
-                                } else {
-                                    Name::StrongSequence
-                                },
-                                point: seq_close_exit.clone(),
-                                link: None,
-                            },
-                            Event {
-                                kind: Kind::Exit,
-                                name: if take == 1 {
-                                    Name::Emphasis
-                                } else {
-                                    Name::Strong
-                                },
-                                point: seq_close_exit.clone(),
-                                link: None,
-                            },
-                        ],
-                    );
+                    next_index = match_sequences(tokenizer, &mut sequences, open, close);
 
                     break;
                 }
@@ -439,14 +272,159 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
     let mut index = 0;
     while index < sequences.len() {
         let sequence = &sequences[index];
-        tokenizer.events[sequence.event_index].name = Name::Data;
-        tokenizer.events[sequence.event_index + 1].name = Name::Data;
+        tokenizer.events[sequence.index].name = Name::Data;
+        tokenizer.events[sequence.index + 1].name = Name::Data;
         index += 1;
     }
 
     tokenizer.map.consume(&mut tokenizer.events);
 }
 
+/// Match two sequences.
+fn match_sequences(
+    tokenizer: &mut Tokenizer,
+    sequences: &mut Vec<Sequence>,
+    open: usize,
+    close: usize,
+) -> usize {
+    // Where to move to next.
+    // Stay on this closing sequence for the next iteration: it
+    // might close more things.
+    // It’s changed if sequences are removed.
+    let mut next = close;
+
+    // Number of markers to use from the sequence.
+    let take = if sequences[open].size > 1 && sequences[close].size > 1 {
+        2
+    } else {
+        1
+    };
+
+    // We’re *on* a closing sequence, with a matching opening
+    // sequence.
+    // Now we make sure that we can’t have misnested attention:
+    //
+    // ```html
+    // <em>a <strong>b</em> c</strong>
+    // ```
+    //
+    // Do that by marking everything between it as no longer
+    // possible to open anything.
+    // Theoretically we should mark as `close: false` too, but
+    // we don’t look for closers backwards, so it’s not needed.
+    let mut between = open + 1;
+
+    while between < close {
+        sequences[between].open = false;
+        between += 1;
+    }
+
+    let (group_name, seq_name, text_name) = if take == 1 {
+        (Name::Emphasis, Name::EmphasisSequence, Name::EmphasisText)
+    } else {
+        (Name::Strong, Name::StrongSequence, Name::StrongText)
+    };
+    let open_index = sequences[open].index;
+    let close_index = sequences[close].index;
+    let open_exit = sequences[open].end_point.clone();
+    let close_enter = sequences[close].start_point.clone();
+
+    // No need to worry about `VS`, because sequences are only actual characters.
+    sequences[open].size -= take;
+    sequences[close].size -= take;
+    sequences[open].end_point.column -= take;
+    sequences[open].end_point.index -= take;
+    sequences[close].start_point.column += take;
+    sequences[close].start_point.index += take;
+
+    // Opening.
+    tokenizer.map.add_before(
+        // Add after the current sequence (it might remain).
+        open_index + 2,
+        0,
+        vec![
+            Event {
+                kind: Kind::Enter,
+                name: group_name.clone(),
+                point: sequences[open].end_point.clone(),
+                link: None,
+            },
+            Event {
+                kind: Kind::Enter,
+                name: seq_name.clone(),
+                point: sequences[open].end_point.clone(),
+                link: None,
+            },
+            Event {
+                kind: Kind::Exit,
+                name: seq_name.clone(),
+                point: open_exit.clone(),
+                link: None,
+            },
+            Event {
+                kind: Kind::Enter,
+                name: text_name.clone(),
+                point: open_exit,
+                link: None,
+            },
+        ],
+    );
+    // Closing.
+    tokenizer.map.add(
+        close_index,
+        0,
+        vec![
+            Event {
+                kind: Kind::Exit,
+                name: text_name,
+                point: close_enter.clone(),
+                link: None,
+            },
+            Event {
+                kind: Kind::Enter,
+                name: seq_name.clone(),
+                point: close_enter,
+                link: None,
+            },
+            Event {
+                kind: Kind::Exit,
+                name: seq_name,
+                point: sequences[close].start_point.clone(),
+                link: None,
+            },
+            Event {
+                kind: Kind::Exit,
+                name: group_name,
+                point: sequences[close].start_point.clone(),
+                link: None,
+            },
+        ],
+    );
+
+    // Remove closing sequence if fully used.
+    if sequences[close].size == 0 {
+        sequences.remove(close);
+        tokenizer.map.add(close_index, 2, vec![]);
+    } else {
+        // Shift remaining closing sequence forward.
+        // Do it here because a sequence can open and close different
+        // other sequences, and the remainder can be on any side or
+        // somewhere in the middle.
+        tokenizer.events[close_index].point = sequences[close].start_point.clone();
+    }
+
+    if sequences[open].size == 0 {
+        sequences.remove(open);
+        tokenizer.map.add(open_index, 2, vec![]);
+        // Everything shifts one to the left, account for it in next iteration.
+        next -= 1;
+    } else {
+        tokenizer.events[open_index + 1].point = sequences[open].end_point.clone();
+    }
+
+    next
+}
+
 /// Classify whether a character code represents whitespace, punctuation, or
 /// something else.
 ///
@@ -458,15 +436,15 @@ pub fn resolve(tokenizer: &mut Tokenizer) {
 /// ## References
 ///
 /// *   [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)
-fn classify_character(char: Option<char>) -> GroupKind {
+fn classify_character(char: Option<char>) -> CharacterKind {
     match char {
         // EOF.
-        None => GroupKind::Whitespace,
+        None => CharacterKind::Whitespace,
         // Unicode whitespace.
-        Some(char) if char.is_whitespace() => GroupKind::Whitespace,
+        Some(char) if char.is_whitespace() => CharacterKind::Whitespace,
         // Unicode punctuation.
-        Some(char) if PUNCTUATION.contains(&char) => GroupKind::Punctuation,
+        Some(char) if PUNCTUATION.contains(&char) => CharacterKind::Punctuation,
         // Everything else.
-        Some(_) => GroupKind::Other,
+        Some(_) => CharacterKind::Other,
     }
 }
author	Titus Wormer <tituswormer@gmail.com>	2022-08-12 14:21:53 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-08-12 14:21:53 +0200
commit	504729a4a0c8f3e0d8fc9159e0273150b169e184 (patch)
tree	a6bf291322decccd6011580337b1feed6151b554 /src/construct/attention.rs
parent	db5a491e6c2223d1db9b458307431a54db3c40f2 (diff)
download	markdown-rs-504729a4a0c8f3e0d8fc9159e0273150b169e184.tar.gz markdown-rs-504729a4a0c8f3e0d8fc9159e0273150b169e184.tar.bz2 markdown-rs-504729a4a0c8f3e0d8fc9159e0273150b169e184.zip