diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-29 18:22:59 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-29 18:22:59 +0200 |
commit | 0eeff9148e327183e532752f46421a75506dd7a6 (patch) | |
tree | 4f0aed04f90aa759ce96a2e87aa719e7fa95c450 /src/construct/attention.rs | |
parent | 148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (diff) | |
download | markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.gz markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.bz2 markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.zip |
Refactor to improve states
* Remove custom kind wrappers, use plain bytes instead
* Remove `Into`s, use the explicit expected types instead
* Refactor to use `slice.as_str` in most places
* Remove unneeded unique check before adding a definition
* Use a shared CDATA prefix in constants
* Inline byte checks into matches
* Pass bytes back from parser instead of whole parse state
* Refactor to work more often on bytes
* Rename custom `size` to `len`
Diffstat (limited to '')
-rw-r--r-- | src/construct/attention.rs | 88 |
1 files changed, 21 insertions, 67 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs index b042645..583fde2 100644 --- a/src/construct/attention.rs +++ b/src/construct/attention.rs @@ -88,54 +88,11 @@ enum GroupKind { Other, } -/// Type of sequence. -#[derive(Debug, PartialEq)] -enum MarkerKind { - /// In a run with asterisks. - /// - /// ## Example - /// - /// ```markdown - /// *a* - /// ``` - Asterisk, - /// In a run with underscores. - /// - /// ## Example - /// - /// ```markdown - /// _a_ - /// ``` - Underscore, -} - -impl MarkerKind { - /// Turn the kind into a byte ([u8]). - fn as_byte(&self) -> u8 { - match self { - MarkerKind::Asterisk => b'*', - MarkerKind::Underscore => b'_', - } - } - /// Turn a byte ([u8]) into a kind. - /// - /// ## Panics - /// - /// Panics if `byte` is not `*` or `_`. - fn from_byte(byte: u8) -> MarkerKind { - match byte { - b'*' => MarkerKind::Asterisk, - b'_' => MarkerKind::Underscore, - _ => unreachable!("invalid byte"), - } - } -} - /// Attentention sequence that we can take markers from. #[derive(Debug)] struct Sequence { - /// Marker used in this sequence. - marker: MarkerKind, + /// Marker as a byte (`u8`) used in this sequence. + marker: u8, /// The depth in events where this sequence resides. balance: usize, /// The index into events where this sequence’s `Enter` currently resides. @@ -160,9 +117,9 @@ struct Sequence { /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(byte) if tokenizer.parse_state.constructs.attention && matches!(byte, b'*' | b'_') => { + Some(b'*' | b'_') if tokenizer.parse_state.constructs.attention => { tokenizer.enter(Token::AttentionSequence); - inside(tokenizer, MarkerKind::from_byte(byte)) + inside(tokenizer, tokenizer.current.unwrap()) } _ => State::Nok, } @@ -174,14 +131,17 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// > | ** /// ^^ /// ``` -fn inside(tokenizer: &mut Tokenizer, marker: MarkerKind) -> State { - if tokenizer.current == Some(marker.as_byte()) { - tokenizer.consume(); - State::Fn(Box::new(move |t| inside(t, marker))) - } else { - tokenizer.exit(Token::AttentionSequence); - tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention)); - State::Ok +fn inside(tokenizer: &mut Tokenizer, marker: u8) -> State { + match tokenizer.current { + Some(b'*' | b'_') if tokenizer.current.unwrap() == marker => { + tokenizer.consume(); + State::Fn(Box::new(move |t| inside(t, marker))) + } + _ => { + tokenizer.exit(Token::AttentionSequence); + tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention)); + State::Ok + } } } @@ -219,16 +179,10 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { String::from_utf8_lossy(&tokenizer.parse_state.bytes[after_start..after_end]); let char_after = string_after.chars().next(); - let marker = MarkerKind::from_byte( - Slice::from_point(tokenizer.parse_state.bytes, &enter.point) - .head() - .unwrap(), - ); - let before = classify_character(if enter.point.index > 0 { - char_before - } else { - None - }); + let marker = Slice::from_point(tokenizer.parse_state.bytes, &enter.point) + .head() + .unwrap(); + let before = classify_character(char_before); let after = classify_character(char_after); let open = after == GroupKind::Other || (after == GroupKind::Punctuation && before != GroupKind::Other); @@ -245,12 +199,12 @@ fn resolve_attention(tokenizer: &mut Tokenizer) { start_point: enter.point.clone(), end_point: exit.point.clone(), size: exit.point.index - enter.point.index, - open: if marker == MarkerKind::Asterisk { + open: if marker == b'*' { open } else { open && (before != GroupKind::Other || !close) }, - close: if marker == MarkerKind::Asterisk { + close: if marker == b'*' { close } else { close && (after != GroupKind::Other || !open) |