aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-01 10:17:25 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-08-01 10:17:25 +0200
commit82221ba13d2a4ccd119f280aaa3cea9ca2c8fdaf (patch)
tree5a9fe3ad27ab001e613d6e50c1dd67041862ce3a /src
parent0eeff9148e327183e532752f46421a75506dd7a6 (diff)
downloadmarkdown-rs-82221ba13d2a4ccd119f280aaa3cea9ca2c8fdaf.tar.gz
markdown-rs-82221ba13d2a4ccd119f280aaa3cea9ca2c8fdaf.tar.bz2
markdown-rs-82221ba13d2a4ccd119f280aaa3cea9ca2c8fdaf.zip
Refactor some states
Diffstat (limited to 'src')
-rw-r--r--src/construct/autolink.rs21
-rw-r--r--src/construct/blank_line.rs8
-rw-r--r--src/construct/character_reference.rs1
-rw-r--r--src/construct/code_text.rs23
-rw-r--r--src/construct/html_text.rs4
-rw-r--r--src/construct/label_end.rs3
6 files changed, 32 insertions, 28 deletions
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index c0514ae..bac291e 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -156,6 +156,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
+ // ASCII alphanumeric and `+`, `-`, and `.`.
Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => {
scheme_inside_or_email_atext(tokenizer, 1)
}
@@ -177,6 +178,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer, size: usize) -> State
tokenizer.consume();
State::Fn(Box::new(url_inside))
}
+ // ASCII alphanumeric and `+`, `-`, and `.`.
Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
if size < AUTOLINK_SCHEME_SIZE_MAX =>
{
@@ -199,8 +201,8 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {
tokenizer.exit(Token::AutolinkProtocol);
end(tokenizer)
}
- // ASCII control or space.
- None | Some(b'\0'..=0x1F | b' ' | 0x7F) => State::Nok,
+ // ASCII control, space, or `<`.
+ None | Some(b'\0'..=0x1F | b' ' | b'<' | 0x7F) => State::Nok,
Some(_) => {
tokenizer.consume();
State::Fn(Box::new(url_inside))
@@ -295,14 +297,15 @@ fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {
/// ```
fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Some(b'-') if size < AUTOLINK_DOMAIN_SIZE_MAX => {
+ // ASCII alphanumeric or `-`.
+ Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') if size < AUTOLINK_DOMAIN_SIZE_MAX => {
+ let func = if matches!(tokenizer.current, Some(b'-')) {
+ email_value
+ } else {
+ email_label
+ };
tokenizer.consume();
- State::Fn(Box::new(move |t| email_value(t, size + 1)))
- }
- // ASCII alphanumeric.
- Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') if size < AUTOLINK_DOMAIN_SIZE_MAX => {
- tokenizer.consume();
- State::Fn(Box::new(move |t| email_label(t, size + 1)))
+ State::Fn(Box::new(move |t| func(t, size + 1)))
}
_ => State::Nok,
}
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index f397a48..c4eacf5 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -40,9 +40,9 @@ use crate::tokenizer::{State, Tokenizer};
/// > 👉 **Note**: `␠` represents a space character.
///
/// ```markdown
-/// > | ␠␠
+/// > | ␠␠␊
/// ^
-/// > |
+/// > | ␊
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
@@ -52,9 +52,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// After zero or more spaces or tabs, before a line ending or EOF.
///
/// ```markdown
-/// > | ␠␠
+/// > | ␠␠␊
/// ^
-/// > |
+/// > | ␊
/// ^
/// ```
fn after(tokenizer: &mut Tokenizer) -> State {
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index cd489a4..9393691 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -167,6 +167,7 @@ fn numeric(tokenizer: &mut Tokenizer) -> State {
/// Inside a character reference value, after the markers (`&#x`, `&#`, or
/// `&`) that define its kind, but before the `;`.
+///
/// The character reference kind defines what and how many characters are
/// allowed.
///
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index d70fbc2..3f9e5e5 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -184,18 +184,19 @@ fn sequence_close(tokenizer: &mut Tokenizer, size_open: usize, size: usize) -> S
tokenizer.consume();
State::Fn(Box::new(move |t| sequence_close(t, size_open, size + 1)))
}
- _ if size_open == size => {
- tokenizer.exit(Token::CodeTextSequence);
- tokenizer.exit(Token::CodeText);
- State::Ok
- }
_ => {
- let index = tokenizer.events.len();
- tokenizer.exit(Token::CodeTextSequence);
- // Change the token type.
- tokenizer.events[index - 1].token_type = Token::CodeTextData;
- tokenizer.events[index].token_type = Token::CodeTextData;
- between(tokenizer, size_open)
+ if size_open == size {
+ tokenizer.exit(Token::CodeTextSequence);
+ tokenizer.exit(Token::CodeText);
+ State::Ok
+ } else {
+ let index = tokenizer.events.len();
+ tokenizer.exit(Token::CodeTextSequence);
+ // More or less accents: mark as data.
+ tokenizer.events[index - 1].token_type = Token::CodeTextData;
+ tokenizer.events[index].token_type = Token::CodeTextData;
+ between(tokenizer, size_open)
+ }
}
}
}
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index 51beda5..8a44c29 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -169,7 +169,7 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
/// [html_flow]: crate::construct::html_flow
fn comment_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'>') => State::Nok,
+ Some(b'>') => State::Nok,
Some(b'-') => {
tokenizer.consume();
State::Fn(Box::new(comment_start_dash))
@@ -193,7 +193,7 @@ fn comment_start(tokenizer: &mut Tokenizer) -> State {
/// [html_flow]: crate::construct::html_flow
fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- None | Some(b'>') => State::Nok,
+ Some(b'>') => State::Nok,
_ => comment(tokenizer),
}
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index a1ec8d9..d3191a8 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -231,7 +231,6 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(Token::LabelMarker);
tokenizer.exit(Token::LabelEnd);
-
return State::Fn(Box::new(move |t| after(t, info)));
}
}
@@ -278,7 +277,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {
}
})
})(tokenizer),
- // Shortcut reference: `[asd]`?
+ // Shortcut (`[asd]`) reference?
_ => {
if defined {
ok(tokenizer, info)