diff options
Diffstat (limited to '')
| -rw-r--r-- | src/construct/autolink.rs | 8 | ||||
| -rw-r--r-- | src/construct/content.rs | 2 | ||||
| -rw-r--r-- | src/construct/html_flow.rs | 9 | ||||
| -rw-r--r-- | src/construct/html_text.rs | 4 | ||||
| -rw-r--r-- | src/construct/label_end.rs | 17 | ||||
| -rw-r--r-- | src/construct/label_start_image.rs | 2 | ||||
| -rw-r--r-- | src/construct/raw_flow.rs | 10 | ||||
| -rw-r--r-- | src/construct/raw_text.rs | 2 | ||||
| -rw-r--r-- | src/state.rs | 2 | ||||
| -rw-r--r-- | src/util/character_reference.rs | 8 | 
10 files changed, 39 insertions, 25 deletions
| diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs index 0819f3a..e8f30fb 100644 --- a/src/construct/autolink.rs +++ b/src/construct/autolink.rs @@ -126,7 +126,7 @@ use crate::state::{Name as StateName, State};  use crate::tokenizer::Tokenizer;  use crate::util::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX}; -/// Start of autolink. +/// Start of an autolink.  ///  /// ```markdown  /// > | a<https://example.com>b @@ -205,8 +205,8 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {          Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')              if tokenizer.tokenize_state.size < AUTOLINK_SCHEME_SIZE_MAX =>          { -            tokenizer.tokenize_state.size += 1;              tokenizer.consume(); +            tokenizer.tokenize_state.size += 1;              State::Next(StateName::AutolinkSchemeInsideOrEmailAtext)          }          _ => { @@ -305,12 +305,11 @@ pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {  pub fn email_label(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current {          Some(b'.') => { -            tokenizer.tokenize_state.size = 0;              tokenizer.consume(); +            tokenizer.tokenize_state.size = 0;              State::Next(StateName::AutolinkEmailAtSignOrDot)          }          Some(b'>') => { -            tokenizer.tokenize_state.size = 0;              let index = tokenizer.events.len();              tokenizer.exit(Name::AutolinkProtocol);              // Change the event name. @@ -320,6 +319,7 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State {              tokenizer.consume();              tokenizer.exit(Name::AutolinkMarker);              tokenizer.exit(Name::Autolink); +            tokenizer.tokenize_state.size = 0;              State::Ok          }          _ => State::Retry(StateName::AutolinkEmailValue), diff --git a/src/construct/content.rs b/src/construct/content.rs index 6c10cea..e29dac9 100644 --- a/src/construct/content.rs +++ b/src/construct/content.rs @@ -30,7 +30,7 @@ use crate::subtokenize::{subtokenize, Subresult};  use crate::tokenizer::Tokenizer;  use alloc::{string::String, vec}; -/// Before a content content. +/// Before a content chunk.  ///  /// ```markdown  /// > | abc diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index 3de70de..c994ec7 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -197,8 +197,8 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {              State::Next(StateName::HtmlFlowTagCloseStart)          }          Some(b'?') => { -            tokenizer.tokenize_state.marker = INSTRUCTION;              tokenizer.consume(); +            tokenizer.tokenize_state.marker = INSTRUCTION;              // Do not form containers.              tokenizer.concrete = true;              // While we’re in an instruction instead of a declaration, we’re on a `?` @@ -273,8 +273,8 @@ pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {  /// ```  pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {      if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) { -        tokenizer.tokenize_state.size += 1;          tokenizer.consume(); +        tokenizer.tokenize_state.size += 1;          if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() {              tokenizer.tokenize_state.size = 0; @@ -531,8 +531,8 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {  /// ```  pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {      if tokenizer.current == Some(tokenizer.tokenize_state.marker_b) { -        tokenizer.tokenize_state.marker_b = 0;          tokenizer.consume(); +        tokenizer.tokenize_state.marker_b = 0;          State::Next(StateName::HtmlFlowCompleteAttributeValueQuotedAfter)      } else if matches!(tokenizer.current, None | Some(b'\n')) {          tokenizer.tokenize_state.marker = 0; @@ -640,8 +640,7 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State {      } else if tokenizer.tokenize_state.marker == CDATA && tokenizer.current == Some(b']') {          tokenizer.consume();          State::Next(StateName::HtmlFlowContinuationCdataInside) -    } else if (tokenizer.tokenize_state.marker == BASIC -        || tokenizer.tokenize_state.marker == COMPLETE) +    } else if matches!(tokenizer.tokenize_state.marker, BASIC | COMPLETE)          && tokenizer.current == Some(b'\n')      {          tokenizer.exit(Name::HtmlFlowData); diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs index 7812da6..025d498 100644 --- a/src/construct/html_text.rs +++ b/src/construct/html_text.rs @@ -57,7 +57,7 @@ use crate::state::{Name as StateName, State};  use crate::tokenizer::Tokenizer;  use crate::util::constant::HTML_CDATA_PREFIX; -/// Start of HTML (text) +/// Start of HTML (text).  ///  /// ```markdown  /// > | a <b> c @@ -606,7 +606,7 @@ pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {  /// ```  pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => State::Retry(StateName::HtmlTextTagOpenBetween), +        Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => State::Retry(StateName::HtmlTextTagOpenBetween),          _ => State::Nok,      }  } diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs index f47920c..1c5e0bd 100644 --- a/src/construct/label_end.rs +++ b/src/construct/label_end.rs @@ -529,7 +529,7 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State {      if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) {          tokenizer.attempt( -            State::Next(StateName::LabelEndResourceBetween), +            State::Next(StateName::LabelEndResourceEnd),              State::Next(StateName::LabelEndResourceEnd),          );          State::Retry(space_or_tab_eol(tokenizer)) @@ -571,7 +571,7 @@ pub fn reference_full(tokenizer: &mut Tokenizer) -> State {              tokenizer.tokenize_state.token_3 = Name::ReferenceString;              tokenizer.attempt(                  State::Next(StateName::LabelEndReferenceFullAfter), -                State::Nok, +                State::Next(StateName::LabelEndReferenceFullMissing)              );              State::Retry(StateName::LabelStart)          } @@ -615,6 +615,19 @@ pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State {      }  } +/// In reference (full) that was missing. +/// +/// ```markdown +/// > | [a][b d +///        ^ +/// ``` +pub fn reference_full_missing(tokenizer: &mut Tokenizer) -> State { +    tokenizer.tokenize_state.token_1 = Name::Data; +    tokenizer.tokenize_state.token_2 = Name::Data; +    tokenizer.tokenize_state.token_3 = Name::Data; +    State::Nok +} +  /// In reference (collapsed), at `[`.  ///  /// > 👉 **Note**: we only get here if the label is defined. diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs index 4511794..784c264 100644 --- a/src/construct/label_start_image.rs +++ b/src/construct/label_start_image.rs @@ -68,6 +68,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {              tokenizer.enter(Name::LabelMarker);              tokenizer.consume();              tokenizer.exit(Name::LabelMarker); +            tokenizer.exit(Name::LabelImage);              State::Next(StateName::LabelStartImageAfter)          }          _ => State::Nok, @@ -107,7 +108,6 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {      {          State::Nok      } else { -        tokenizer.exit(Name::LabelImage);          tokenizer.tokenize_state.label_starts.push(LabelStart {              kind: LabelKind::Image,              start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1), diff --git a/src/construct/raw_flow.rs b/src/construct/raw_flow.rs index 549c40d..15c6865 100644 --- a/src/construct/raw_flow.rs +++ b/src/construct/raw_flow.rs @@ -434,7 +434,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State {      }  } -/// At eol/eof in code, before a non-lazy closing fence or content. +/// At eol/eof in raw, before a non-lazy closing fence or content.  ///  /// ```markdown  /// > | ~~~js @@ -552,7 +552,7 @@ pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State {      }  } -/// Before closing fence, at eol. +/// Before raw content, not a closing fence, at eol.  ///  /// ```markdown  ///   | ~~~js @@ -567,7 +567,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State {      State::Next(StateName::RawFlowContentStart)  } -/// Before code content, definitely not before a closing fence. +/// Before raw content, not a closing fence.  ///  /// ```markdown  ///   | ~~~js @@ -591,7 +591,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State {      }  } -/// Before code content, after optional prefix. +/// Before raw content, after optional prefix.  ///  /// ```markdown  ///   | ~~~js @@ -615,7 +615,7 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State {      }  } -/// In code content. +/// In raw content.  ///  /// ```markdown  ///   | ~~~js diff --git a/src/construct/raw_text.rs b/src/construct/raw_text.rs index 9d5fdea..f8ee1c1 100644 --- a/src/construct/raw_text.rs +++ b/src/construct/raw_text.rs @@ -185,7 +185,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {      }  } -/// Between something and something else +/// Between something and something else.  ///  /// ```markdown  /// > | `a` diff --git a/src/state.rs b/src/state.rs index b013c39..427a388 100644 --- a/src/state.rs +++ b/src/state.rs @@ -316,6 +316,7 @@ pub enum Name {      LabelEndNok,      LabelEndReferenceFull,      LabelEndReferenceFullAfter, +    LabelEndReferenceFullMissing,      LabelEndReferenceNotFull,      LabelEndReferenceCollapsed,      LabelEndReferenceCollapsedOpen, @@ -801,6 +802,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {          Name::LabelEndNok => construct::label_end::nok,          Name::LabelEndReferenceFull => construct::label_end::reference_full,          Name::LabelEndReferenceFullAfter => construct::label_end::reference_full_after, +        Name::LabelEndReferenceFullMissing => construct::label_end::reference_full_missing,          Name::LabelEndReferenceNotFull => construct::label_end::reference_not_full,          Name::LabelEndReferenceCollapsed => construct::label_end::reference_collapsed,          Name::LabelEndReferenceCollapsedOpen => construct::label_end::reference_collapsed_open, diff --git a/src/util/character_reference.rs b/src/util/character_reference.rs index f28c135..c6d1797 100644 --- a/src/util/character_reference.rs +++ b/src/util/character_reference.rs @@ -15,7 +15,7 @@ use core::str;  /// [`CHARACTER_REFERENCES`][] (or [`CHARACTER_REFERENCES_HTML_4`][]) and then  /// takes the corresponding value from `1`.  /// -/// The `html5` boolean us used for named character references, and specifier +/// The `html5` boolean is used for named character references, and specifier  /// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are  /// supported.  /// @@ -82,9 +82,9 @@ pub fn decode_named(value: &str, html5: bool) -> Option<String> {  pub fn decode_numeric(value: &str, radix: u32) -> String {      if let Some(char) = char::from_u32(u32::from_str_radix(value, radix).unwrap()) {          if !matches!(char, -            // C0 except for HT, LF, FF, CR, space +            // C0 except for HT, LF, FF, CR, space.              '\0'..='\u{08}' | '\u{0B}' | '\u{0E}'..='\u{1F}' | -            // Control character (DEL) of c0, and C1 controls. +            // Control character (DEL) of C0, and C1 controls.              '\u{7F}'..='\u{9F}'              // Lone surrogates, noncharacters, and out of range are handled by              // Rust. @@ -104,7 +104,7 @@ pub fn decode_numeric(value: &str, radix: u32) -> String {  /// The marker specifies the format: `#` for hexadecimal, `x` for decimal, and  /// `&` for named.  /// -/// The `html5` boolean us used for named character references, and specifier +/// The `html5` boolean is used for named character references, and specifier  /// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are  /// supported.  /// | 
