aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/autolink.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 18:22:59 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-29 18:22:59 +0200
commit0eeff9148e327183e532752f46421a75506dd7a6 (patch)
tree4f0aed04f90aa759ce96a2e87aa719e7fa95c450 /src/construct/autolink.rs
parent148ede7f0f42f0ccb1620b13d91f35d0c7d04c2f (diff)
downloadmarkdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.gz
markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.tar.bz2
markdown-rs-0eeff9148e327183e532752f46421a75506dd7a6.zip
Refactor to improve states
* Remove custom kind wrappers, use plain bytes instead * Remove `Into`s, use the explicit expected types instead * Refactor to use `slice.as_str` in most places * Remove unneeded unique check before adding a definition * Use a shared CDATA prefix in constants * Inline byte checks into matches * Pass bytes back from parser instead of whole parse state * Refactor to work more often on bytes * Rename custom `size` to `len`
Diffstat (limited to 'src/construct/autolink.rs')
-rw-r--r--src/construct/autolink.rs57
1 files changed, 29 insertions, 28 deletions
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index b843af8..c0514ae 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -137,12 +137,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(byte) if byte.is_ascii_alphabetic() => {
+ // ASCII alphabetic.
+ Some(b'A'..=b'Z' | b'a'..=b'z') => {
tokenizer.consume();
State::Fn(Box::new(scheme_or_email_atext))
}
- Some(byte) if is_ascii_atext(byte) => email_atext(tokenizer),
- _ => State::Nok,
+ _ => email_atext(tokenizer),
}
}
@@ -199,8 +199,8 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::AutolinkProtocol);
end(tokenizer)
}
- Some(byte) if byte.is_ascii_control() => State::Nok,
- None | Some(b' ') => State::Nok,
+ // ASCII control or space.
+ None | Some(b'\0'..=0x1F | b' ' | 0x7F) => State::Nok,
Some(_) => {
tokenizer.consume();
State::Fn(Box::new(url_inside))
@@ -220,7 +220,26 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
State::Fn(Box::new(|t| email_at_sign_or_dot(t, 0)))
}
- Some(byte) if is_ascii_atext(byte) => {
+ // ASCII atext.
+ //
+ // atext is an ASCII alphanumeric (see [`is_ascii_alphanumeric`][]), or
+ // a byte in the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027
+ // APOSTROPHE (`'`), U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`),
+ // U+002D DASH (`-`), U+002F SLASH (`/`), U+003D EQUALS TO (`=`),
+ // U+003F QUESTION MARK (`?`), U+005E CARET (`^`) to U+0060 GRAVE
+ // ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE (`{`) to U+007E TILDE
+ // (`~`).
+ //
+ // See:
+ // **\[RFC5322]**:
+ // [Internet Message Format](https://tools.ietf.org/html/rfc5322).
+ // P. Resnick.
+ // IETF.
+ //
+ // [`is_ascii_alphanumeric`]: char::is_ascii_alphanumeric
+ Some(
+ b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~',
+ ) => {
tokenizer.consume();
State::Fn(Box::new(email_atext))
}
@@ -236,7 +255,8 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
/// ```
fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Some(byte) if byte.is_ascii_alphanumeric() => email_value(tokenizer, size),
+ // ASCII alphanumeric.
+ Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => email_value(tokenizer, size),
_ => State::Nok,
}
}
@@ -279,7 +299,8 @@ fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State {
tokenizer.consume();
State::Fn(Box::new(move |t| email_value(t, size + 1)))
}
- Some(byte) if byte.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => {
+ // ASCII alphanumeric.
+ Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') if size < AUTOLINK_DOMAIN_SIZE_MAX => {
tokenizer.consume();
State::Fn(Box::new(move |t| email_label(t, size + 1)))
}
@@ -307,23 +328,3 @@ fn end(tokenizer: &mut Tokenizer) -> State {
_ => unreachable!("expected `>`"),
}
}
-
-/// Check whether the character code represents an ASCII atext.
-///
-/// atext is an ASCII alphanumeric (see [`is_ascii_alphanumeric`][]), or a character in
-/// the inclusive ranges U+0023 NUMBER SIGN (`#`) to U+0027 APOSTROPHE (`'`),
-/// U+002A ASTERISK (`*`), U+002B PLUS SIGN (`+`), U+002D DASH (`-`), U+002F
-/// SLASH (`/`), U+003D EQUALS TO (`=`), U+003F QUESTION MARK (`?`), U+005E
-/// CARET (`^`) to U+0060 GRAVE ACCENT (`` ` ``), or U+007B LEFT CURLY BRACE
-/// (`{`) to U+007E TILDE (`~`).
-///
-/// See:
-/// **\[RFC5322]**:
-/// [Internet Message Format](https://tools.ietf.org/html/rfc5322).
-/// P. Resnick.
-/// IETF.
-///
-/// [`is_ascii_alphanumeric`]: char::is_ascii_alphanumeric
-fn is_ascii_atext(byte: u8) -> bool {
- matches!(byte, b'#'..=b'\'' | b'*' | b'+' | b'-'..=b'9' | b'=' | b'?' | b'A'..=b'Z' | b'^'..=b'~')
-}