diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-08-01 10:17:25 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-08-01 10:17:25 +0200 |
commit | 82221ba13d2a4ccd119f280aaa3cea9ca2c8fdaf (patch) | |
tree | 5a9fe3ad27ab001e613d6e50c1dd67041862ce3a | |
parent | 0eeff9148e327183e532752f46421a75506dd7a6 (diff) | |
download | markdown-rs-82221ba13d2a4ccd119f280aaa3cea9ca2c8fdaf.tar.gz markdown-rs-82221ba13d2a4ccd119f280aaa3cea9ca2c8fdaf.tar.bz2 markdown-rs-82221ba13d2a4ccd119f280aaa3cea9ca2c8fdaf.zip |
Refactor some states
-rw-r--r-- | src/construct/autolink.rs | 21 | ||||
-rw-r--r-- | src/construct/blank_line.rs | 8 | ||||
-rw-r--r-- | src/construct/character_reference.rs | 1 | ||||
-rw-r--r-- | src/construct/code_text.rs | 23 | ||||
-rw-r--r-- | src/construct/html_text.rs | 4 | ||||
-rw-r--r-- | src/construct/label_end.rs | 3 |
6 files changed, 32 insertions, 28 deletions
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index c0514ae..bac291e 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -156,6 +156,7 @@ fn open(tokenizer: &mut Tokenizer) -> State { /// ``` fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { + // ASCII alphanumeric and `+`, `-`, and `.`. Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') => { scheme_inside_or_email_atext(tokenizer, 1) } @@ -177,6 +178,7 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer, size: usize) -> State tokenizer.consume(); State::Fn(Box::new(url_inside)) } + // ASCII alphanumeric and `+`, `-`, and `.`. Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') if size < AUTOLINK_SCHEME_SIZE_MAX => { @@ -199,8 +201,8 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State { tokenizer.exit(Token::AutolinkProtocol); end(tokenizer) } - // ASCII control or space. - None | Some(b'\0'..=0x1F | b' ' | 0x7F) => State::Nok, + // ASCII control, space, or `<`. + None | Some(b'\0'..=0x1F | b' ' | b'<' | 0x7F) => State::Nok, Some(_) => { tokenizer.consume(); State::Fn(Box::new(url_inside)) @@ -295,14 +297,15 @@ fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State { /// ``` fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State { match tokenizer.current { - Some(b'-') if size < AUTOLINK_DOMAIN_SIZE_MAX => { + // ASCII alphanumeric or `-`. + Some(b'-' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') if size < AUTOLINK_DOMAIN_SIZE_MAX => { + let func = if matches!(tokenizer.current, Some(b'-')) { + email_value + } else { + email_label + }; tokenizer.consume(); - State::Fn(Box::new(move |t| email_value(t, size + 1))) - } - // ASCII alphanumeric. - Some(b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') if size < AUTOLINK_DOMAIN_SIZE_MAX => { - tokenizer.consume(); - State::Fn(Box::new(move |t| email_label(t, size + 1))) + State::Fn(Box::new(move |t| func(t, size + 1))) } _ => State::Nok, } diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs index f397a48..c4eacf5 100644 --- a/src/construct/blank_line.rs +++ b/src/construct/blank_line.rs @@ -40,9 +40,9 @@ use crate::tokenizer::{State, Tokenizer}; /// > 👉 **Note**: `␠` represents a space character. /// /// ```markdown -/// > | ␠␠ +/// > | ␠␠␊ /// ^ -/// > | +/// > | ␊ /// ^ /// ``` pub fn start(tokenizer: &mut Tokenizer) -> State { @@ -52,9 +52,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { /// After zero or more spaces or tabs, before a line ending or EOF. /// /// ```markdown -/// > | ␠␠ +/// > | ␠␠␊ /// ^ -/// > | +/// > | ␊ /// ^ /// ``` fn after(tokenizer: &mut Tokenizer) -> State { diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index cd489a4..9393691 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -167,6 +167,7 @@ fn numeric(tokenizer: &mut Tokenizer) -> State { /// Inside a character reference value, after the markers (`&#x`, `&#`, or /// `&`) that define its kind, but before the `;`. +/// /// The character reference kind defines what and how many characters are /// allowed. /// diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs index d70fbc2..3f9e5e5 100644 --- a/src/construct/code_text.rs +++ b/src/construct/code_text.rs @@ -184,18 +184,19 @@ fn sequence_close(tokenizer: &mut Tokenizer, size_open: usize, size: usize) -> S tokenizer.consume(); State::Fn(Box::new(move |t| sequence_close(t, size_open, size + 1))) } - _ if size_open == size => { - tokenizer.exit(Token::CodeTextSequence); - tokenizer.exit(Token::CodeText); - State::Ok - } _ => { - let index = tokenizer.events.len(); - tokenizer.exit(Token::CodeTextSequence); - // Change the token type. - tokenizer.events[index - 1].token_type = Token::CodeTextData; - tokenizer.events[index].token_type = Token::CodeTextData; - between(tokenizer, size_open) + if size_open == size { + tokenizer.exit(Token::CodeTextSequence); + tokenizer.exit(Token::CodeText); + State::Ok + } else { + let index = tokenizer.events.len(); + tokenizer.exit(Token::CodeTextSequence); + // More or less accents: mark as data. + tokenizer.events[index - 1].token_type = Token::CodeTextData; + tokenizer.events[index].token_type = Token::CodeTextData; + between(tokenizer, size_open) + } } } } diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 51beda5..8a44c29 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -169,7 +169,7 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { /// [html_flow]: crate::construct::html_flow fn comment_start(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'>') => State::Nok, + Some(b'>') => State::Nok, Some(b'-') => { tokenizer.consume(); State::Fn(Box::new(comment_start_dash)) @@ -193,7 +193,7 @@ fn comment_start(tokenizer: &mut Tokenizer) -> State { /// [html_flow]: crate::construct::html_flow fn comment_start_dash(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - None | Some(b'>') => State::Nok, + Some(b'>') => State::Nok, _ => comment(tokenizer), } } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index a1ec8d9..d3191a8 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -231,7 +231,6 @@ pub fn start(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(Token::LabelMarker); tokenizer.exit(Token::LabelEnd); - return State::Fn(Box::new(move |t| after(t, info))); } } @@ -278,7 +277,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State { } }) })(tokenizer), - // Shortcut reference: `[asd]`? + // Shortcut (`[asd]`) reference? _ => { if defined { ok(tokenizer, info) |