diff options
-rw-r--r-- | src/construct/autolink.rs | 8 | ||||
-rw-r--r-- | src/construct/content.rs | 2 | ||||
-rw-r--r-- | src/construct/html_flow.rs | 9 | ||||
-rw-r--r-- | src/construct/html_text.rs | 4 | ||||
-rw-r--r-- | src/construct/label_end.rs | 17 | ||||
-rw-r--r-- | src/construct/label_start_image.rs | 2 | ||||
-rw-r--r-- | src/construct/raw_flow.rs | 10 | ||||
-rw-r--r-- | src/construct/raw_text.rs | 2 | ||||
-rw-r--r-- | src/state.rs | 2 | ||||
-rw-r--r-- | src/util/character_reference.rs | 8 |
10 files changed, 39 insertions, 25 deletions
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 0819f3a..e8f30fb 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -126,7 +126,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX}; -/// Start of autolink. +/// Start of an autolink. /// /// ```markdown /// > | a<https://example.com>b @@ -205,8 +205,8 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State { Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z') if tokenizer.tokenize_state.size < AUTOLINK_SCHEME_SIZE_MAX => { - tokenizer.tokenize_state.size += 1; tokenizer.consume(); + tokenizer.tokenize_state.size += 1; State::Next(StateName::AutolinkSchemeInsideOrEmailAtext) } _ => { @@ -305,12 +305,11 @@ pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State { pub fn email_label(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { Some(b'.') => { - tokenizer.tokenize_state.size = 0; tokenizer.consume(); + tokenizer.tokenize_state.size = 0; State::Next(StateName::AutolinkEmailAtSignOrDot) } Some(b'>') => { - tokenizer.tokenize_state.size = 0; let index = tokenizer.events.len(); tokenizer.exit(Name::AutolinkProtocol); // Change the event name. @@ -320,6 +319,7 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State { tokenizer.consume(); tokenizer.exit(Name::AutolinkMarker); tokenizer.exit(Name::Autolink); + tokenizer.tokenize_state.size = 0; State::Ok } _ => State::Retry(StateName::AutolinkEmailValue), diff --git a/src/construct/content.rs b/src/construct/content.rs index 6c10cea..e29dac9 100644 --- a/src/construct/content.rs +++ b/src/construct/content.rs @@ -30,7 +30,7 @@ use crate::subtokenize::{subtokenize, Subresult}; use crate::tokenizer::Tokenizer; use alloc::{string::String, vec}; -/// Before a content content. +/// Before a content chunk. /// /// ```markdown /// > | abc diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 3de70de..c994ec7 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -197,8 +197,8 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::HtmlFlowTagCloseStart) } Some(b'?') => { - tokenizer.tokenize_state.marker = INSTRUCTION; tokenizer.consume(); + tokenizer.tokenize_state.marker = INSTRUCTION; // Do not form containers. tokenizer.concrete = true; // While we’re in an instruction instead of a declaration, we’re on a `?` @@ -273,8 +273,8 @@ pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) { - tokenizer.tokenize_state.size += 1; tokenizer.consume(); + tokenizer.tokenize_state.size += 1; if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() { tokenizer.tokenize_state.size = 0; @@ -531,8 +531,8 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State { if tokenizer.current == Some(tokenizer.tokenize_state.marker_b) { - tokenizer.tokenize_state.marker_b = 0; tokenizer.consume(); + tokenizer.tokenize_state.marker_b = 0; State::Next(StateName::HtmlFlowCompleteAttributeValueQuotedAfter) } else if matches!(tokenizer.current, None | Some(b'\n')) { tokenizer.tokenize_state.marker = 0; @@ -640,8 +640,7 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State { } else if tokenizer.tokenize_state.marker == CDATA && tokenizer.current == Some(b']') { tokenizer.consume(); State::Next(StateName::HtmlFlowContinuationCdataInside) - } else if (tokenizer.tokenize_state.marker == BASIC - || tokenizer.tokenize_state.marker == COMPLETE) + } else if matches!(tokenizer.tokenize_state.marker, BASIC | COMPLETE) && tokenizer.current == Some(b'\n') { tokenizer.exit(Name::HtmlFlowData); diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 7812da6..025d498 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -57,7 +57,7 @@ use crate::state::{Name as StateName, State}; use crate::tokenizer::Tokenizer; use crate::util::constant::HTML_CDATA_PREFIX; -/// Start of HTML (text) +/// Start of HTML (text). /// /// ```markdown /// > | a <b> c @@ -606,7 +606,7 @@ pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State { /// ``` pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State { match tokenizer.current { - Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => State::Retry(StateName::HtmlTextTagOpenBetween), + Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => State::Retry(StateName::HtmlTextTagOpenBetween), _ => State::Nok, } } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index f47920c..1c5e0bd 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -529,7 +529,7 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State { if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) { tokenizer.attempt( - State::Next(StateName::LabelEndResourceBetween), + State::Next(StateName::LabelEndResourceEnd), State::Next(StateName::LabelEndResourceEnd), ); State::Retry(space_or_tab_eol(tokenizer)) @@ -571,7 +571,7 @@ pub fn reference_full(tokenizer: &mut Tokenizer) -> State { tokenizer.tokenize_state.token_3 = Name::ReferenceString; tokenizer.attempt( State::Next(StateName::LabelEndReferenceFullAfter), - State::Nok, + State::Next(StateName::LabelEndReferenceFullMissing) ); State::Retry(StateName::LabelStart) } @@ -615,6 +615,19 @@ pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State { } } +/// In reference (full) that was missing. +/// +/// ```markdown +/// > | [a][b d +/// ^ +/// ``` +pub fn reference_full_missing(tokenizer: &mut Tokenizer) -> State { + tokenizer.tokenize_state.token_1 = Name::Data; + tokenizer.tokenize_state.token_2 = Name::Data; + tokenizer.tokenize_state.token_3 = Name::Data; + State::Nok +} + /// In reference (collapsed), at `[`. /// /// > 👉 **Note**: we only get here if the label is defined. diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 4511794..784c264 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -68,6 +68,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State { tokenizer.enter(Name::LabelMarker); tokenizer.consume(); tokenizer.exit(Name::LabelMarker); + tokenizer.exit(Name::LabelImage); State::Next(StateName::LabelStartImageAfter) } _ => State::Nok, @@ -107,7 +108,6 @@ pub fn after(tokenizer: &mut Tokenizer) -> State { { State::Nok } else { - tokenizer.exit(Name::LabelImage); tokenizer.tokenize_state.label_starts.push(LabelStart { kind: LabelKind::Image, start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1), diff --git a/src/construct/raw_flow.rs b/src/construct/raw_flow.rs index 549c40d..15c6865 100644 --- a/src/construct/raw_flow.rs +++ b/src/construct/raw_flow.rs @@ -434,7 +434,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State { } } -/// At eol/eof in code, before a non-lazy closing fence or content. +/// At eol/eof in raw, before a non-lazy closing fence or content. /// /// ```markdown /// > | ~~~js @@ -552,7 +552,7 @@ pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State { } } -/// Before closing fence, at eol. +/// Before raw content, not a closing fence, at eol. /// /// ```markdown /// | ~~~js @@ -567,7 +567,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State { State::Next(StateName::RawFlowContentStart) } -/// Before code content, definitely not before a closing fence. +/// Before raw content, not a closing fence. /// /// ```markdown /// | ~~~js @@ -591,7 +591,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State { } } -/// Before code content, after optional prefix. +/// Before raw content, after optional prefix. /// /// ```markdown /// | ~~~js @@ -615,7 +615,7 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State { } } -/// In code content. +/// In raw content. /// /// ```markdown /// | ~~~js diff --git a/src/construct/raw_text.rs b/src/construct/raw_text.rs index 9d5fdea..f8ee1c1 100644 --- a/src/construct/raw_text.rs +++ b/src/construct/raw_text.rs @@ -185,7 +185,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State { } } -/// Between something and something else +/// Between something and something else. /// /// ```markdown /// > | `a` diff --git a/src/state.rs b/src/state.rs index b013c39..427a388 100644 --- a/src/state.rs +++ b/src/state.rs @@ -316,6 +316,7 @@ pub enum Name { LabelEndNok, LabelEndReferenceFull, LabelEndReferenceFullAfter, + LabelEndReferenceFullMissing, LabelEndReferenceNotFull, LabelEndReferenceCollapsed, LabelEndReferenceCollapsedOpen, @@ -801,6 +802,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State { Name::LabelEndNok => construct::label_end::nok, Name::LabelEndReferenceFull => construct::label_end::reference_full, Name::LabelEndReferenceFullAfter => construct::label_end::reference_full_after, + Name::LabelEndReferenceFullMissing => construct::label_end::reference_full_missing, Name::LabelEndReferenceNotFull => construct::label_end::reference_not_full, Name::LabelEndReferenceCollapsed => construct::label_end::reference_collapsed, Name::LabelEndReferenceCollapsedOpen => construct::label_end::reference_collapsed_open, diff --git a/src/util/character_reference.rs b/src/util/character_reference.rs index f28c135..c6d1797 100644 --- a/src/util/character_reference.rs +++ b/src/util/character_reference.rs @@ -15,7 +15,7 @@ use core::str; /// [`CHARACTER_REFERENCES`][] (or [`CHARACTER_REFERENCES_HTML_4`][]) and then /// takes the corresponding value from `1`. /// -/// The `html5` boolean us used for named character references, and specifier +/// The `html5` boolean is used for named character references, and specifier /// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are /// supported. /// @@ -82,9 +82,9 @@ pub fn decode_named(value: &str, html5: bool) -> Option<String> { pub fn decode_numeric(value: &str, radix: u32) -> String { if let Some(char) = char::from_u32(u32::from_str_radix(value, radix).unwrap()) { if !matches!(char, - // C0 except for HT, LF, FF, CR, space + // C0 except for HT, LF, FF, CR, space. '\0'..='\u{08}' | '\u{0B}' | '\u{0E}'..='\u{1F}' | - // Control character (DEL) of c0, and C1 controls. + // Control character (DEL) of C0, and C1 controls. '\u{7F}'..='\u{9F}' // Lone surrogates, noncharacters, and out of range are handled by // Rust. @@ -104,7 +104,7 @@ pub fn decode_numeric(value: &str, radix: u32) -> String { /// The marker specifies the format: `#` for hexadecimal, `x` for decimal, and /// `&` for named. /// -/// The `html5` boolean us used for named character references, and specifier +/// The `html5` boolean is used for named character references, and specifier /// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are /// supported. /// |