aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/construct/autolink.rs8
-rw-r--r--src/construct/content.rs2
-rw-r--r--src/construct/html_flow.rs9
-rw-r--r--src/construct/html_text.rs4
-rw-r--r--src/construct/label_end.rs17
-rw-r--r--src/construct/label_start_image.rs2
-rw-r--r--src/construct/raw_flow.rs10
-rw-r--r--src/construct/raw_text.rs2
-rw-r--r--src/state.rs2
-rw-r--r--src/util/character_reference.rs8
10 files changed, 39 insertions, 25 deletions
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 0819f3a..e8f30fb 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -126,7 +126,7 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};
-/// Start of autolink.
+/// Start of an autolink.
///
/// ```markdown
/// > | a<https://example.com>b
@@ -205,8 +205,8 @@ pub fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer) -> State {
Some(b'+' | b'-' | b'.' | b'0'..=b'9' | b'A'..=b'Z' | b'a'..=b'z')
if tokenizer.tokenize_state.size < AUTOLINK_SCHEME_SIZE_MAX =>
{
- tokenizer.tokenize_state.size += 1;
tokenizer.consume();
+ tokenizer.tokenize_state.size += 1;
State::Next(StateName::AutolinkSchemeInsideOrEmailAtext)
}
_ => {
@@ -305,12 +305,11 @@ pub fn email_at_sign_or_dot(tokenizer: &mut Tokenizer) -> State {
pub fn email_label(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
Some(b'.') => {
- tokenizer.tokenize_state.size = 0;
tokenizer.consume();
+ tokenizer.tokenize_state.size = 0;
State::Next(StateName::AutolinkEmailAtSignOrDot)
}
Some(b'>') => {
- tokenizer.tokenize_state.size = 0;
let index = tokenizer.events.len();
tokenizer.exit(Name::AutolinkProtocol);
// Change the event name.
@@ -320,6 +319,7 @@ pub fn email_label(tokenizer: &mut Tokenizer) -> State {
tokenizer.consume();
tokenizer.exit(Name::AutolinkMarker);
tokenizer.exit(Name::Autolink);
+ tokenizer.tokenize_state.size = 0;
State::Ok
}
_ => State::Retry(StateName::AutolinkEmailValue),
diff --git a/src/construct/content.rs b/src/construct/content.rs
index 6c10cea..e29dac9 100644
--- a/src/construct/content.rs
+++ b/src/construct/content.rs
@@ -30,7 +30,7 @@ use crate::subtokenize::{subtokenize, Subresult};
use crate::tokenizer::Tokenizer;
use alloc::{string::String, vec};
-/// Before a content content.
+/// Before a content chunk.
///
/// ```markdown
/// > | abc
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 3de70de..c994ec7 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -197,8 +197,8 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {
State::Next(StateName::HtmlFlowTagCloseStart)
}
Some(b'?') => {
- tokenizer.tokenize_state.marker = INSTRUCTION;
tokenizer.consume();
+ tokenizer.tokenize_state.marker = INSTRUCTION;
// Do not form containers.
tokenizer.concrete = true;
// While we’re in an instruction instead of a declaration, we’re on a `?`
@@ -273,8 +273,8 @@ pub fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn cdata_open_inside(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(HTML_CDATA_PREFIX[tokenizer.tokenize_state.size]) {
- tokenizer.tokenize_state.size += 1;
tokenizer.consume();
+ tokenizer.tokenize_state.size += 1;
if tokenizer.tokenize_state.size == HTML_CDATA_PREFIX.len() {
tokenizer.tokenize_state.size = 0;
@@ -531,8 +531,8 @@ pub fn complete_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer) -> State {
if tokenizer.current == Some(tokenizer.tokenize_state.marker_b) {
- tokenizer.tokenize_state.marker_b = 0;
tokenizer.consume();
+ tokenizer.tokenize_state.marker_b = 0;
State::Next(StateName::HtmlFlowCompleteAttributeValueQuotedAfter)
} else if matches!(tokenizer.current, None | Some(b'\n')) {
tokenizer.tokenize_state.marker = 0;
@@ -640,8 +640,7 @@ pub fn continuation(tokenizer: &mut Tokenizer) -> State {
} else if tokenizer.tokenize_state.marker == CDATA && tokenizer.current == Some(b']') {
tokenizer.consume();
State::Next(StateName::HtmlFlowContinuationCdataInside)
- } else if (tokenizer.tokenize_state.marker == BASIC
- || tokenizer.tokenize_state.marker == COMPLETE)
+ } else if matches!(tokenizer.tokenize_state.marker, BASIC | COMPLETE)
&& tokenizer.current == Some(b'\n')
{
tokenizer.exit(Name::HtmlFlowData);
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index 7812da6..025d498 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -57,7 +57,7 @@ use crate::state::{Name as StateName, State};
use crate::tokenizer::Tokenizer;
use crate::util::constant::HTML_CDATA_PREFIX;
-/// Start of HTML (text)
+/// Start of HTML (text).
///
/// ```markdown
/// > | a <b> c
@@ -606,7 +606,7 @@ pub fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Some(b'\t' | b'\n' | b' ' | b'>' | b'/') => State::Retry(StateName::HtmlTextTagOpenBetween),
+ Some(b'\t' | b'\n' | b' ' | b'/' | b'>') => State::Retry(StateName::HtmlTextTagOpenBetween),
_ => State::Nok,
}
}
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index f47920c..1c5e0bd 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -529,7 +529,7 @@ pub fn resource_title_after(tokenizer: &mut Tokenizer) -> State {
if matches!(tokenizer.current, Some(b'\t' | b'\n' | b' ')) {
tokenizer.attempt(
- State::Next(StateName::LabelEndResourceBetween),
+ State::Next(StateName::LabelEndResourceEnd),
State::Next(StateName::LabelEndResourceEnd),
);
State::Retry(space_or_tab_eol(tokenizer))
@@ -571,7 +571,7 @@ pub fn reference_full(tokenizer: &mut Tokenizer) -> State {
tokenizer.tokenize_state.token_3 = Name::ReferenceString;
tokenizer.attempt(
State::Next(StateName::LabelEndReferenceFullAfter),
- State::Nok,
+ State::Next(StateName::LabelEndReferenceFullMissing)
);
State::Retry(StateName::LabelStart)
}
@@ -615,6 +615,19 @@ pub fn reference_full_after(tokenizer: &mut Tokenizer) -> State {
}
}
+/// In reference (full) that was missing.
+///
+/// ```markdown
+/// > | [a][b d
+/// ^
+/// ```
+pub fn reference_full_missing(tokenizer: &mut Tokenizer) -> State {
+ tokenizer.tokenize_state.token_1 = Name::Data;
+ tokenizer.tokenize_state.token_2 = Name::Data;
+ tokenizer.tokenize_state.token_3 = Name::Data;
+ State::Nok
+}
+
/// In reference (collapsed), at `[`.
///
/// > 👉 **Note**: we only get here if the label is defined.
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 4511794..784c264 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -68,6 +68,7 @@ pub fn open(tokenizer: &mut Tokenizer) -> State {
tokenizer.enter(Name::LabelMarker);
tokenizer.consume();
tokenizer.exit(Name::LabelMarker);
+ tokenizer.exit(Name::LabelImage);
State::Next(StateName::LabelStartImageAfter)
}
_ => State::Nok,
@@ -107,7 +108,6 @@ pub fn after(tokenizer: &mut Tokenizer) -> State {
{
State::Nok
} else {
- tokenizer.exit(Name::LabelImage);
tokenizer.tokenize_state.label_starts.push(LabelStart {
kind: LabelKind::Image,
start: (tokenizer.events.len() - 6, tokenizer.events.len() - 1),
diff --git a/src/construct/raw_flow.rs b/src/construct/raw_flow.rs
index 549c40d..15c6865 100644
--- a/src/construct/raw_flow.rs
+++ b/src/construct/raw_flow.rs
@@ -434,7 +434,7 @@ pub fn meta(tokenizer: &mut Tokenizer) -> State {
}
}
-/// At eol/eof in code, before a non-lazy closing fence or content.
+/// At eol/eof in raw, before a non-lazy closing fence or content.
///
/// ```markdown
/// > | ~~~js
@@ -552,7 +552,7 @@ pub fn sequence_close_after(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Before closing fence, at eol.
+/// Before raw content, not a closing fence, at eol.
///
/// ```markdown
/// | ~~~js
@@ -567,7 +567,7 @@ pub fn content_before(tokenizer: &mut Tokenizer) -> State {
State::Next(StateName::RawFlowContentStart)
}
-/// Before code content, definitely not before a closing fence.
+/// Before raw content, not a closing fence.
///
/// ```markdown
/// | ~~~js
@@ -591,7 +591,7 @@ pub fn content_start(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Before code content, after optional prefix.
+/// Before raw content, after optional prefix.
///
/// ```markdown
/// | ~~~js
@@ -615,7 +615,7 @@ pub fn before_content_chunk(tokenizer: &mut Tokenizer) -> State {
}
}
-/// In code content.
+/// In raw content.
///
/// ```markdown
/// | ~~~js
diff --git a/src/construct/raw_text.rs b/src/construct/raw_text.rs
index 9d5fdea..f8ee1c1 100644
--- a/src/construct/raw_text.rs
+++ b/src/construct/raw_text.rs
@@ -185,7 +185,7 @@ pub fn sequence_open(tokenizer: &mut Tokenizer) -> State {
}
}
-/// Between something and something else
+/// Between something and something else.
///
/// ```markdown
/// > | `a`
diff --git a/src/state.rs b/src/state.rs
index b013c39..427a388 100644
--- a/src/state.rs
+++ b/src/state.rs
@@ -316,6 +316,7 @@ pub enum Name {
LabelEndNok,
LabelEndReferenceFull,
LabelEndReferenceFullAfter,
+ LabelEndReferenceFullMissing,
LabelEndReferenceNotFull,
LabelEndReferenceCollapsed,
LabelEndReferenceCollapsedOpen,
@@ -801,6 +802,7 @@ pub fn call(tokenizer: &mut Tokenizer, name: Name) -> State {
Name::LabelEndNok => construct::label_end::nok,
Name::LabelEndReferenceFull => construct::label_end::reference_full,
Name::LabelEndReferenceFullAfter => construct::label_end::reference_full_after,
+ Name::LabelEndReferenceFullMissing => construct::label_end::reference_full_missing,
Name::LabelEndReferenceNotFull => construct::label_end::reference_not_full,
Name::LabelEndReferenceCollapsed => construct::label_end::reference_collapsed,
Name::LabelEndReferenceCollapsedOpen => construct::label_end::reference_collapsed_open,
diff --git a/src/util/character_reference.rs b/src/util/character_reference.rs
index f28c135..c6d1797 100644
--- a/src/util/character_reference.rs
+++ b/src/util/character_reference.rs
@@ -15,7 +15,7 @@ use core::str;
/// [`CHARACTER_REFERENCES`][] (or [`CHARACTER_REFERENCES_HTML_4`][]) and then
/// takes the corresponding value from `1`.
///
-/// The `html5` boolean us used for named character references, and specifier
+/// The `html5` boolean is used for named character references, and specifier
/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are
/// supported.
///
@@ -82,9 +82,9 @@ pub fn decode_named(value: &str, html5: bool) -> Option<String> {
pub fn decode_numeric(value: &str, radix: u32) -> String {
if let Some(char) = char::from_u32(u32::from_str_radix(value, radix).unwrap()) {
if !matches!(char,
- // C0 except for HT, LF, FF, CR, space
+ // C0 except for HT, LF, FF, CR, space.
'\0'..='\u{08}' | '\u{0B}' | '\u{0E}'..='\u{1F}' |
- // Control character (DEL) of c0, and C1 controls.
+ // Control character (DEL) of C0, and C1 controls.
'\u{7F}'..='\u{9F}'
// Lone surrogates, noncharacters, and out of range are handled by
// Rust.
@@ -104,7 +104,7 @@ pub fn decode_numeric(value: &str, radix: u32) -> String {
/// The marker specifies the format: `#` for hexadecimal, `x` for decimal, and
/// `&` for named.
///
-/// The `html5` boolean us used for named character references, and specifier
+/// The `html5` boolean is used for named character references, and specifier
/// whether the 2125 names from HTML 5 or the 252 names from HTML 4 are
/// supported.
///