aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/compiler.rs58
-rw-r--r--src/construct/autolink.rs7
-rw-r--r--src/construct/character_reference.rs64
-rw-r--r--src/construct/code_fenced.rs68
-rw-r--r--src/construct/code_text.rs26
-rw-r--r--src/construct/definition.rs12
-rw-r--r--src/construct/heading_setext.rs56
-rw-r--r--src/construct/html_flow.rs462
-rw-r--r--src/construct/html_text.rs20
-rw-r--r--src/construct/paragraph.rs46
-rw-r--r--src/construct/partial_label.rs111
-rw-r--r--src/construct/partial_space_or_tab.rs70
-rw-r--r--src/construct/partial_title.rs91
-rw-r--r--src/construct/thematic_break.rs97
-rw-r--r--src/content/string.rs8
-rw-r--r--src/tokenizer.rs1
16 files changed, 605 insertions, 592 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 5c7f6d8..9bc2488 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -9,13 +9,37 @@ use crate::util::{
};
/// To do.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
pub enum LineEnding {
CarriageReturnLineFeed,
CarriageReturn,
LineFeed,
}
+impl LineEnding {
+ /// Turn the line ending into a [str].
+ fn as_str(&self) -> &str {
+ match self {
+ LineEnding::CarriageReturnLineFeed => "\r\n",
+ LineEnding::CarriageReturn => "\r",
+ LineEnding::LineFeed => "\n",
+ }
+ }
+ /// Turn a [Code] into a line ending.
+ ///
+ /// ## Panics
+ ///
+ /// Panics if `code` is not `\r\n`, `\r`, or `\n`.
+ fn from_code(code: Code) -> LineEnding {
+ match code {
+ Code::CarriageReturnLineFeed => LineEnding::CarriageReturnLineFeed,
+ Code::Char('\r') => LineEnding::CarriageReturn,
+ Code::Char('\n') => LineEnding::LineFeed,
+ _ => unreachable!("invalid code"),
+ }
+ }
+}
+
/// Configuration (optional).
#[derive(Default, Debug)]
pub struct Options {
@@ -120,29 +144,20 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
|| event.token_type == TokenType::LineEnding)
{
let codes = codes_from_span(codes, &from_exit_event(events, index));
- let code = *codes.first().unwrap();
- line_ending_inferred = Some(if code == Code::CarriageReturnLineFeed {
- LineEnding::CarriageReturnLineFeed
- } else if code == Code::Char('\r') {
- LineEnding::CarriageReturn
- } else {
- LineEnding::LineFeed
- });
+ line_ending_inferred = Some(LineEnding::from_code(*codes.first().unwrap()));
break;
}
index += 1;
}
- let line_ending_default: LineEnding;
-
- if let Some(value) = line_ending_inferred {
- line_ending_default = value;
+ let line_ending_default = if let Some(value) = line_ending_inferred {
+ value
} else if let Some(value) = &options.default_line_ending {
- line_ending_default = value.clone();
+ value.clone()
} else {
- line_ending_default = LineEnding::LineFeed;
- }
+ LineEnding::LineFeed
+ };
index = 0;
@@ -557,17 +572,8 @@ fn buf_tail(buffers: &mut [Vec<String>]) -> &Vec<String> {
/// Add a line ending.
fn line_ending(buffers: &mut [Vec<String>], default: &LineEnding) {
let tail = buf_tail_mut(buffers);
-
- println!("xxx: {:?}", default);
-
- let line_ending = match default {
- LineEnding::CarriageReturnLineFeed => "\r\n",
- LineEnding::CarriageReturn => "\r",
- LineEnding::LineFeed => "\n",
- };
-
// lastWasTag = false
- tail.push(line_ending.to_string());
+ tail.push(default.as_str().to_string());
}
/// Add a line ending if needed (as in, there’s no eol/eof already).
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index c9596a6..8376b98 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -266,12 +266,11 @@ fn email_label(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnRes
)
}
Code::Char('>') => {
- let tail_index = tokenizer.events.len();
- let head_index = tokenizer.events.len() - 1;
+ let index = tokenizer.events.len();
tokenizer.exit(TokenType::AutolinkProtocol);
// Change the token type.
- tokenizer.events[head_index].token_type = TokenType::AutolinkEmail;
- tokenizer.events[tail_index].token_type = TokenType::AutolinkEmail;
+ tokenizer.events[index - 1].token_type = TokenType::AutolinkEmail;
+ tokenizer.events[index].token_type = TokenType::AutolinkEmail;
end(tokenizer, code)
}
_ => email_value(tokenizer, code, size),
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index c946dae..decf852 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -59,7 +59,7 @@ use crate::constant::{
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Kind of a character reference.
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, PartialEq)]
pub enum Kind {
/// Numeric decimal character reference (`&#x9;`).
Decimal,
@@ -69,6 +69,28 @@ pub enum Kind {
Named,
}
+impl Kind {
+ /// Get the maximum size of characters allowed in a character reference.
+ fn max(&self) -> usize {
+ match self {
+ Kind::Hexadecimal => CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX,
+ Kind::Decimal => CHARACTER_REFERENCE_DECIMAL_SIZE_MAX,
+ Kind::Named => CHARACTER_REFERENCE_NAMED_SIZE_MAX,
+ }
+ }
+
+ /// Check if a char is allowed.
+ fn allowed(&self, char: char) -> bool {
+ let check = match self {
+ Kind::Hexadecimal => char::is_ascii_hexdigit,
+ Kind::Decimal => char::is_ascii_digit,
+ Kind::Named => char::is_ascii_alphanumeric,
+ };
+
+ check(&char)
+ }
+}
+
/// State needed to parse character references.
#[derive(Debug, Clone)]
struct Info {
@@ -141,10 +163,10 @@ fn numeric(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::CharacterReferenceValue);
(
- State::Fn(Box::new(|tokenizer, code| {
+ State::Fn(Box::new(|t, c| {
value(
- tokenizer,
- code,
+ t,
+ c,
Info {
buffer: vec![],
kind: Kind::Hexadecimal,
@@ -179,7 +201,7 @@ fn numeric(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// a&#1|23;b
/// a&#x|9;b
/// ```
-fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+fn value(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
Code::Char(';') if !info.buffer.is_empty() => {
tokenizer.exit(TokenType::CharacterReferenceValue);
@@ -198,36 +220,10 @@ fn value(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
(State::Ok, None)
}
Code::Char(char) => {
- let len = info.buffer.len();
-
- let cont = match info.kind {
- Kind::Hexadecimal
- if char.is_ascii_hexdigit()
- && len < CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX =>
- {
- true
- }
- Kind::Decimal
- if char.is_ascii_digit() && len < CHARACTER_REFERENCE_DECIMAL_SIZE_MAX =>
- {
- true
- }
- Kind::Named
- if char.is_ascii_alphanumeric() && len < CHARACTER_REFERENCE_NAMED_SIZE_MAX =>
- {
- true
- }
- _ => false,
- };
-
- if cont {
- let mut clone = info;
- clone.buffer.push(char);
+ if info.buffer.len() < info.kind.max() && info.kind.allowed(char) {
+ info.buffer.push(char);
tokenizer.consume(code);
- (
- State::Fn(Box::new(|tokenizer, code| value(tokenizer, code, clone))),
- None,
- )
+ (State::Fn(Box::new(|t, c| value(t, c, info))), None)
} else {
(State::Nok, None)
}
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 30ec911..f79705c 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -99,11 +99,49 @@ use crate::util::span::from_exit_event;
#[derive(Debug, Clone, PartialEq)]
pub enum Kind {
/// Grave accent (tick) code.
+ ///
+ /// ## Example
+ ///
+ /// ````markdown
+ /// ```rust
+ /// println!("I <3 🦀");
+ /// ```
+ /// ````
GraveAccent,
/// Tilde code.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// ~~~rust
+ /// println!("I <3 🦀");
+ /// ~~~
+ /// ```
Tilde,
}
+impl Kind {
+ /// Turn the kind into a [char].
+ fn as_char(&self) -> char {
+ match self {
+ Kind::GraveAccent => '`',
+ Kind::Tilde => '~',
+ }
+ }
+ /// Turn a [char] into a kind.
+ ///
+ /// ## Panics
+ ///
+ /// Panics if `char` is not `~` or `` ` ``.
+ fn from_char(char: char) -> Kind {
+ match char {
+ '`' => Kind::GraveAccent,
+ '~' => Kind::Tilde,
+ _ => unreachable!("invalid char"),
+ }
+ }
+}
+
/// State needed to parse code (fenced).
#[derive(Debug, Clone)]
struct Info {
@@ -160,11 +198,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
Info {
prefix,
size: 0,
- kind: if char == '`' {
- Kind::GraveAccent
- } else {
- Kind::Tilde
- },
+ kind: Kind::from_char(char),
},
)
}
@@ -180,14 +214,8 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
/// ~~~
/// ```
fn sequence_open(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
- let marker = if info.kind == Kind::GraveAccent {
- '`'
- } else {
- '~'
- };
-
match code {
- Code::Char(char) if char == marker => {
+ Code::Char(char) if char == info.kind.as_char() => {
tokenizer.consume(code);
(
State::Fn(Box::new(|t, c| {
@@ -375,14 +403,8 @@ fn close_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResu
/// |~~~
/// ```
fn close_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
- let marker = if info.kind == Kind::GraveAccent {
- '`'
- } else {
- '~'
- };
-
match code {
- Code::Char(char) if char == marker => {
+ Code::Char(char) if char == info.kind.as_char() => {
tokenizer.enter(TokenType::CodeFencedFenceSequence);
close_sequence(tokenizer, code, info, 0)
}
@@ -398,14 +420,8 @@ fn close_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes
/// ~|~~
/// ```
fn close_sequence(tokenizer: &mut Tokenizer, code: Code, info: Info, size: usize) -> StateFnResult {
- let marker = if info.kind == Kind::GraveAccent {
- '`'
- } else {
- '~'
- };
-
match code {
- Code::Char(char) if char == marker => {
+ Code::Char(char) if char == info.kind.as_char() => {
tokenizer.consume(code);
(
State::Fn(Box::new(move |t, c| close_sequence(t, c, info, size + 1))),
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index 1f34e41..94e0106 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -113,9 +113,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnR
if let Code::Char('`') = code {
tokenizer.consume(code);
(
- State::Fn(Box::new(move |tokenizer, code| {
- sequence_open(tokenizer, code, size + 1)
- })),
+ State::Fn(Box::new(move |t, c| sequence_open(t, c, size + 1))),
None,
)
} else {
@@ -138,9 +136,7 @@ fn between(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnRe
tokenizer.consume(code);
tokenizer.exit(TokenType::CodeTextLineEnding);
(
- State::Fn(Box::new(move |tokenizer, code| {
- between(tokenizer, code, size_open)
- })),
+ State::Fn(Box::new(move |t, c| between(t, c, size_open))),
None,
)
}
@@ -168,12 +164,7 @@ fn data(tokenizer: &mut Tokenizer, code: Code, size_open: usize) -> StateFnResul
}
_ => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(move |tokenizer, code| {
- data(tokenizer, code, size_open)
- })),
- None,
- )
+ (State::Fn(Box::new(move |t, c| data(t, c, size_open))), None)
}
}
}
@@ -193,8 +184,8 @@ fn sequence_close(
Code::Char('`') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(move |tokenizer, code| {
- sequence_close(tokenizer, code, size_open, size + 1)
+ State::Fn(Box::new(move |t, c| {
+ sequence_close(t, c, size_open, size + 1)
})),
None,
)
@@ -205,12 +196,11 @@ fn sequence_close(
(State::Ok, Some(vec![code]))
}
_ => {
- let tail_index = tokenizer.events.len();
- let head_index = tokenizer.events.len() - 1;
+ let index = tokenizer.events.len();
tokenizer.exit(TokenType::CodeTextSequence);
// Change the token type.
- tokenizer.events[head_index].token_type = TokenType::CodeTextData;
- tokenizer.events[tail_index].token_type = TokenType::CodeTextData;
+ tokenizer.events[index - 1].token_type = TokenType::CodeTextData;
+ tokenizer.events[index].token_type = TokenType::CodeTextData;
between(tokenizer, code, size_open)
}
}
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index 03baee6..61c4d34 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -144,16 +144,14 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
let event = tokenizer.events.last().unwrap();
- // Blank line not ok.
- let char_nok = matches!(
- code,
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
- );
// Whitespace.
if (event.token_type == TokenType::LineEnding || event.token_type == TokenType::Whitespace)
- && !char_nok
- {
+ // Blank line not ok.
+ && !matches!(
+ code,
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
+ ) {
tokenizer.go(destination, destination_after)(tokenizer, code)
} else {
(State::Nok, None)
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index a418041..7c41855 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -57,12 +57,49 @@ use crate::util::{link::link, span::from_exit_event};
/// Kind of underline.
#[derive(Debug, Clone, PartialEq)]
pub enum Kind {
- /// Grave accent (tick) code.
+ /// Dash (rank 2) heading.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// alpha
+ /// -----
+ /// ```
Dash,
- /// Tilde code.
+
+ /// Equals to (rank 1) heading.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// alpha
+ /// =====
+ /// ```
EqualsTo,
}
+impl Kind {
+ /// Turn the kind into a [char].
+ fn as_char(&self) -> char {
+ match self {
+ Kind::Dash => '-',
+ Kind::EqualsTo => '=',
+ }
+ }
+ /// Turn a [char] into a kind.
+ ///
+ /// ## Panics
+ ///
+ /// Panics if `char` is not `-` or `=`.
+ fn from_char(char: char) -> Kind {
+ match char {
+ '-' => Kind::Dash,
+ '=' => Kind::EqualsTo,
+ _ => unreachable!("invalid char"),
+ }
+ }
+}
+
/// Start of a heading (setext).
///
/// ```markdown
@@ -232,13 +269,8 @@ fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes
match code {
Code::Char(char) if char == '-' || char == '=' => {
- let marker = if char == '-' {
- Kind::Dash
- } else {
- Kind::EqualsTo
- };
tokenizer.enter(TokenType::HeadingSetextUnderline);
- underline_sequence_inside(tokenizer, code, marker)
+ underline_sequence_inside(tokenizer, code, Kind::from_char(char))
}
_ => (State::Nok, None),
}
@@ -251,15 +283,11 @@ fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes
/// =|=
/// ```
fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
- let marker = if kind == Kind::Dash { '-' } else { '=' };
-
match code {
- Code::Char(char) if char == marker => {
+ Code::Char(char) if char == kind.as_char() => {
tokenizer.consume(code);
(
- State::Fn(Box::new(move |tokenizer, code| {
- underline_sequence_inside(tokenizer, code, kind)
- })),
+ State::Fn(Box::new(move |t, c| underline_sequence_inside(t, c, kind))),
None,
)
}
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 4819e63..d5937c5 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -97,10 +97,8 @@ use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::sp
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Kind of HTML (flow).
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, PartialEq)]
enum Kind {
- /// Not yet known.
- Unknown,
/// Symbol for `<script>` (condition 1).
Raw,
/// Symbol for `<!---->` (condition 2).
@@ -117,19 +115,51 @@ enum Kind {
Complete,
}
-/// Type of quote, if we’re in an attribure, in complete (condition 7).
-#[derive(Debug, Clone, PartialEq)]
+/// Type of quote, if we’re in a quoted attribute, in complete (condition 7).
+#[derive(Debug, PartialEq)]
enum QuoteKind {
- /// Not in a quoted attribute.
- None,
- /// In a double quoted (`"`) attribute.
+ /// In a double quoted (`"`) attribute value.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// <a b="c" />
+ /// ```
Double,
- /// In a single quoted (`"`) attribute.
+ /// In a single quoted (`'`) attribute value.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// <a b='c' />
+ /// ```
Single,
}
+impl QuoteKind {
+ /// Turn the kind into a [char].
+ fn as_char(&self) -> char {
+ match self {
+ QuoteKind::Double => '"',
+ QuoteKind::Single => '\'',
+ }
+ }
+ /// Turn a [char] into a kind.
+ ///
+ /// ## Panics
+ ///
+ /// Panics if `char` is not `"` or `'`.
+ fn from_char(char: char) -> QuoteKind {
+ match char {
+ '"' => QuoteKind::Double,
+ '\'' => QuoteKind::Single,
+ _ => unreachable!("invalid char"),
+ }
+ }
+}
+
/// State needed to parse HTML (flow).
-#[derive(Debug, Clone)]
+#[derive(Debug)]
struct Info {
/// Kind of HTML (flow).
kind: Kind,
@@ -141,7 +171,7 @@ struct Info {
/// `index` into `buffer` when expecting certain characters.
index: usize,
/// Current quote, when in a double or single quoted attribute value.
- quote: QuoteKind,
+ quote: Option<QuoteKind>,
}
// To do: mark as concrete (block quotes or lists can’t “pierce” into HTML).
@@ -166,22 +196,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
if Code::Char('<') == code {
tokenizer.consume(code);
- (
- State::Fn(Box::new(|tokenizer, code| {
- open(
- tokenizer,
- Info {
- kind: Kind::Unknown,
- start_tag: false,
- buffer: vec![],
- index: 0,
- quote: QuoteKind::None,
- },
- code,
- )
- })),
- None,
- )
+ (State::Fn(Box::new(open)), None)
} else {
(State::Nok, None)
}
@@ -194,45 +209,44 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// <|!doctype>
/// <|!--xxx-->
/// ```
-fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ let mut info = Info {
+ // Assume basic.
+ kind: Kind::Basic,
+ start_tag: false,
+ buffer: vec![],
+ index: 0,
+ quote: None,
+ };
+
match code {
Code::Char('!') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- declaration_open(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| declaration_open(t, c, info))),
None,
)
}
Code::Char('/') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- tag_close_start(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| tag_close_start(t, c, info))),
None,
)
}
Code::Char('?') => {
- // To do: life times.
- let mut clone = info;
- clone.kind = Kind::Instruction;
+ info.kind = Kind::Instruction;
tokenizer.consume(code);
// While we’re in an instruction instead of a declaration, we’re on a `?`
// right now, so we do need to search for `>`, similar to declarations.
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_declaration_inside(tokenizer, clone, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
)
}
Code::Char(char) if char.is_ascii_alphabetic() => {
- // To do: life times.
- let mut clone = info;
- clone.start_tag = true;
- tag_name(tokenizer, clone, code)
+ info.start_tag = true;
+ tag_name(tokenizer, code, info)
}
_ => (State::Nok, None),
}
@@ -245,41 +259,31 @@ fn open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
/// <!|--xxx-->
/// <!|[CDATA[>&<]]>
/// ```
-fn declaration_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn declaration_open(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
Code::Char('-') => {
tokenizer.consume(code);
- let mut clone = info;
- clone.kind = Kind::Comment;
+ info.kind = Kind::Comment;
(
- State::Fn(Box::new(|tokenizer, code| {
- comment_open_inside(tokenizer, clone, code)
- })),
+ State::Fn(Box::new(|t, c| comment_open_inside(t, c, info))),
None,
)
}
Code::Char('[') => {
tokenizer.consume(code);
- let mut clone = info;
- clone.kind = Kind::Cdata;
- clone.buffer = vec!['C', 'D', 'A', 'T', 'A', '['];
- clone.index = 0;
+ info.kind = Kind::Cdata;
+ info.buffer = vec!['C', 'D', 'A', 'T', 'A', '['];
+ info.index = 0;
(
- State::Fn(Box::new(|tokenizer, code| {
- cdata_open_inside(tokenizer, clone, code)
- })),
+ State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))),
None,
)
}
Code::Char(char) if char.is_ascii_alphabetic() => {
tokenizer.consume(code);
- // To do: life times.
- let mut clone = info;
- clone.kind = Kind::Declaration;
+ info.kind = Kind::Declaration;
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_declaration_inside(tokenizer, clone, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
)
}
@@ -292,14 +296,12 @@ fn declaration_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateF
/// ```markdown
/// <!-|-xxx-->
/// ```
-fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn comment_open_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char('-') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_declaration_inside(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
)
}
@@ -316,26 +318,18 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Sta
/// <![CDAT|A[>&<]]>
/// <![CDATA|[>&<]]>
/// ```
-fn cdata_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn cdata_open_inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
Code::Char(char) if char == info.buffer[info.index] => {
- let mut clone = info;
- clone.index += 1;
+ info.index += 1;
tokenizer.consume(code);
- if clone.index == clone.buffer.len() {
- clone.buffer.clear();
- (
- State::Fn(Box::new(|tokenizer, code| {
- continuation(tokenizer, clone, code)
- })),
- None,
- )
+ if info.index == info.buffer.len() {
+ info.buffer.clear();
+ (State::Fn(Box::new(|t, c| continuation(t, c, info))), None)
} else {
(
- State::Fn(Box::new(|tokenizer, code| {
- cdata_open_inside(tokenizer, clone, code)
- })),
+ State::Fn(Box::new(|t, c| cdata_open_inside(t, c, info))),
None,
)
}
@@ -349,17 +343,12 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> State
/// ```markdown
/// </|x>
/// ```
-fn tag_close_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn tag_close_start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
Code::Char(char) if char.is_ascii_alphabetic() => {
tokenizer.consume(code);
- // To do: life times.
- let mut clone = info;
- clone.buffer.push(char);
- (
- State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))),
- None,
- )
+ info.buffer.push(char);
+ (State::Fn(Box::new(|t, c| tag_name(t, c, info))), None)
}
_ => (State::Nok, None),
}
@@ -371,7 +360,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFn
/// <a|b>
/// </a|b>
/// ```
-fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
Code::None
| Code::CarriageReturnLineFeed
@@ -385,50 +374,37 @@ fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult
false
};
+ info.buffer.clear();
+
if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) {
- // To do: life times.
- let mut clone = info;
- clone.kind = Kind::Raw;
- clone.buffer.clear();
- continuation(tokenizer, clone, code)
+ info.kind = Kind::Raw;
+ continuation(tokenizer, code, info)
} else if HTML_BLOCK_NAMES.contains(&name) {
- // To do: life times.
- let mut clone = info;
- clone.kind = Kind::Basic;
- clone.buffer.clear();
-
+ // Basic is assumed, no need to set `kind`.
if slash {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- basic_self_closing(tokenizer, clone, code)
- })),
+ State::Fn(Box::new(|t, c| basic_self_closing(t, c, info))),
None,
)
} else {
- continuation(tokenizer, clone, code)
+ continuation(tokenizer, code, info)
}
} else {
- // To do: life times.
- let mut clone = info;
- clone.kind = Kind::Complete;
+ info.kind = Kind::Complete;
// To do: do not support complete HTML when interrupting.
- if clone.start_tag {
- complete_attribute_name_before(tokenizer, clone, code)
+ if info.start_tag {
+ complete_attribute_name_before(tokenizer, code, info)
} else {
- complete_closing_tag_after(tokenizer, clone, code)
+ complete_closing_tag_after(tokenizer, code, info)
}
}
}
Code::Char(char) if char == '-' || char.is_ascii_alphanumeric() => {
tokenizer.consume(code);
- let mut clone = info;
- clone.buffer.push(char);
- (
- State::Fn(Box::new(|tokenizer, code| tag_name(tokenizer, clone, code))),
- None,
- )
+ info.buffer.push(char);
+ (State::Fn(Box::new(|t, c| tag_name(t, c, info))), None)
}
Code::Char(_) => (State::Nok, None),
}
@@ -439,16 +415,11 @@ fn tag_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult
/// ```markdown
/// <div/|>
/// ```
-fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn basic_self_closing(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char('>') => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(|tokenizer, code| {
- continuation(tokenizer, info, code)
- })),
- None,
- )
+ (State::Fn(Box::new(|t, c| continuation(t, c, info))), None)
}
_ => (State::Nok, None),
}
@@ -460,18 +431,16 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Stat
/// <x/|>
/// </x/|>
/// ```
-fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn complete_closing_tag_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_closing_tag_after(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| complete_closing_tag_after(t, c, info))),
None,
)
}
- _ => complete_end(tokenizer, info, code),
+ _ => complete_end(tokenizer, code, info),
}
}
@@ -492,38 +461,29 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info, code: Code)
/// ```
fn complete_attribute_name_before(
tokenizer: &mut Tokenizer,
- info: Info,
code: Code,
+ info: Info,
) -> StateFnResult {
match code {
Code::Char('/') => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(|tokenizer, code| {
- complete_end(tokenizer, info, code)
- })),
- None,
- )
+ (State::Fn(Box::new(|t, c| complete_end(t, c, info))), None)
}
Code::Char(char) if char == ':' || char == '_' || char.is_ascii_alphabetic() => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_name(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))),
None,
)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_name_before(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| complete_attribute_name_before(t, c, info))),
None,
)
}
- _ => complete_end(tokenizer, info, code),
+ _ => complete_end(tokenizer, code, info),
}
}
@@ -534,7 +494,7 @@ fn complete_attribute_name_before(
/// <x _|>
/// <x a|>
/// ```
-fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn complete_attribute_name(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char(char)
if char == '-'
@@ -545,13 +505,11 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) ->
{
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_name(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| complete_attribute_name(t, c, info))),
None,
)
}
- _ => complete_attribute_name_after(tokenizer, info, code),
+ _ => complete_attribute_name_after(tokenizer, code, info),
}
}
@@ -565,29 +523,25 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info, code: Code) ->
/// ```
fn complete_attribute_name_after(
tokenizer: &mut Tokenizer,
- info: Info,
code: Code,
+ info: Info,
) -> StateFnResult {
match code {
Code::Char('=') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_value_before(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| complete_attribute_value_before(t, c, info))),
None,
)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_name_after(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| complete_attribute_name_after(t, c, info))),
None,
)
}
- _ => complete_attribute_name_before(tokenizer, info, code),
+ _ => complete_attribute_name_before(tokenizer, code, info),
}
}
@@ -600,38 +554,27 @@ fn complete_attribute_name_after(
/// ```
fn complete_attribute_value_before(
tokenizer: &mut Tokenizer,
- info: Info,
code: Code,
+ mut info: Info,
) -> StateFnResult {
match code {
Code::None | Code::Char('<' | '=' | '>' | '`') => (State::Nok, None),
Code::Char(char) if char == '"' || char == '\'' => {
tokenizer.consume(code);
- // To do: life times.
- let mut clone = info;
- clone.quote = if char == '"' {
- QuoteKind::Double
- } else {
- QuoteKind::Single
- };
-
+ info.quote = Some(QuoteKind::from_char(char));
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_value_quoted(tokenizer, clone, code)
- })),
+ State::Fn(Box::new(|t, c| complete_attribute_value_quoted(t, c, info))),
None,
)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_value_before(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| complete_attribute_value_before(t, c, info))),
None,
)
}
- _ => complete_attribute_value_unquoted(tokenizer, info, code),
+ _ => complete_attribute_value_unquoted(tokenizer, code, info),
}
}
@@ -643,22 +586,16 @@ fn complete_attribute_value_before(
/// ```
fn complete_attribute_value_quoted(
tokenizer: &mut Tokenizer,
- info: Info,
code: Code,
+ info: Info,
) -> StateFnResult {
- let marker = if info.quote == QuoteKind::Double {
- '"'
- } else {
- '\''
- };
-
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
- Code::Char(char) if char == marker => {
+ Code::Char(char) if char == info.quote.as_ref().unwrap().as_char() => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_value_quoted_after(tokenizer, info, code)
+ State::Fn(Box::new(|t, c| {
+ complete_attribute_value_quoted_after(t, c, info)
})),
None,
)
@@ -666,9 +603,7 @@ fn complete_attribute_value_quoted(
_ => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_value_quoted(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| complete_attribute_value_quoted(t, c, info))),
None,
)
}
@@ -682,21 +617,21 @@ fn complete_attribute_value_quoted(
/// ```
fn complete_attribute_value_unquoted(
tokenizer: &mut Tokenizer,
- info: Info,
code: Code,
+ info: Info,
) -> StateFnResult {
match code {
Code::None
| Code::CarriageReturnLineFeed
| Code::VirtualSpace
| Code::Char('\t' | '\n' | '\r' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => {
- complete_attribute_name_after(tokenizer, info, code)
+ complete_attribute_name_after(tokenizer, code, info)
}
Code::Char(_) => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- complete_attribute_value_unquoted(tokenizer, info, code)
+ State::Fn(Box::new(|t, c| {
+ complete_attribute_value_unquoted(t, c, info)
})),
None,
)
@@ -712,12 +647,12 @@ fn complete_attribute_value_unquoted(
/// ```
fn complete_attribute_value_quoted_after(
tokenizer: &mut Tokenizer,
- info: Info,
code: Code,
+ info: Info,
) -> StateFnResult {
match code {
Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => {
- complete_attribute_name_before(tokenizer, info, code)
+ complete_attribute_name_before(tokenizer, code, info)
}
_ => (State::Nok, None),
}
@@ -728,16 +663,11 @@ fn complete_attribute_value_quoted_after(
/// ```markdown
/// <x a="b"|>
/// ```
-fn complete_end(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn complete_end(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char('>') => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(|tokenizer, code| {
- complete_after(tokenizer, info, code)
- })),
- None,
- )
+ (State::Fn(Box::new(|t, c| complete_after(t, c, info))), None)
}
_ => (State::Nok, None),
}
@@ -748,19 +678,14 @@ fn complete_end(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes
/// ```markdown
/// <x>|
/// ```
-fn complete_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn complete_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- continuation(tokenizer, info, code)
+ continuation(tokenizer, code, info)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(|tokenizer, code| {
- complete_after(tokenizer, info, code)
- })),
- None,
- )
+ (State::Fn(Box::new(|t, c| complete_after(t, c, info))), None)
}
Code::Char(_) => (State::Nok, None),
}
@@ -771,49 +696,41 @@ fn complete_after(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnR
/// ```markdown
/// <!--x|xx-->
/// ```
-fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char('-') if info.kind == Kind::Comment => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_comment_inside(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_comment_inside(t, c, info))),
None,
)
}
Code::Char('<') if info.kind == Kind::Raw => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_raw_tag_open(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_raw_tag_open(t, c, info))),
None,
)
}
Code::Char('>') if info.kind == Kind::Declaration => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_close(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_close(t, c, info))),
None,
)
}
Code::Char('?') if info.kind == Kind::Instruction => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_declaration_inside(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
)
}
Code::Char(']') if info.kind == Kind::Cdata => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_character_data_inside(tokenizer, info, code)
+ State::Fn(Box::new(|t, c| {
+ continuation_character_data_inside(t, c, info)
})),
None,
)
@@ -821,27 +738,21 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
if info.kind == Kind::Basic || info.kind == Kind::Complete =>
{
- let clone = info;
-
tokenizer.check(blank_line_before, |ok| {
- if ok {
- Box::new(|tokenizer, code| continuation_close(tokenizer, clone, code))
+ let func = if ok {
+ continuation_close
} else {
- Box::new(|tokenizer, code| continuation_at_line_ending(tokenizer, clone, code))
- }
+ continuation_at_line_ending
+ };
+ Box::new(move |t, c| func(t, c, info))
})(tokenizer, code)
}
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- continuation_at_line_ending(tokenizer, info, code)
+ continuation_at_line_ending(tokenizer, code, info)
}
_ => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(|tokenizer, code| {
- continuation(tokenizer, info, code)
- })),
- None,
- )
+ (State::Fn(Box::new(|t, c| continuation(t, c, info))), None)
}
}
}
@@ -851,9 +762,9 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes
/// ```markdown
/// <x>|
/// ```
-fn continuation_at_line_ending(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn continuation_at_line_ending(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
tokenizer.exit(TokenType::HtmlFlowData);
- html_continue_start(tokenizer, info, code)
+ html_continue_start(tokenizer, code, info)
}
/// In continuation, after an eol.
@@ -862,7 +773,7 @@ fn continuation_at_line_ending(tokenizer: &mut Tokenizer, info: Info, code: Code
/// <x>|
/// asd
/// ```
-fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::None => {
tokenizer.exit(TokenType::HtmlFlow);
@@ -874,15 +785,13 @@ fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Sta
tokenizer.consume(code);
tokenizer.exit(TokenType::LineEnding);
(
- State::Fn(Box::new(|tokenizer, code| {
- html_continue_start(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| html_continue_start(t, c, info))),
None,
)
}
_ => {
tokenizer.enter(TokenType::HtmlFlowData);
- continuation(tokenizer, info, code)
+ continuation(tokenizer, code, info)
}
}
}
@@ -892,18 +801,16 @@ fn html_continue_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Sta
/// ```markdown
/// <!--xxx-|->
/// ```
-fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn continuation_comment_inside(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
- Code::Char('-') if info.kind == Kind::Comment => {
+ Code::Char('-') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_declaration_inside(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
)
}
- _ => continuation(tokenizer, info, code),
+ _ => continuation(tokenizer, code, info),
}
}
@@ -912,18 +819,16 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info, code: Code
/// ```markdown
/// <script>console.log(1)<|/script>
/// ```
-fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::Char('/') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_raw_end_tag(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_raw_end_tag(t, c, info))),
None,
)
}
- _ => continuation(tokenizer, info, code),
+ _ => continuation(tokenizer, code, info),
}
}
@@ -934,39 +839,38 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info, code: Code)
/// <script>console.log(1)</s|cript>
/// <script>console.log(1)</script|>
/// ```
-fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn continuation_raw_end_tag(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ mut info: Info,
+) -> StateFnResult {
match code {
Code::Char('>') => {
let tag_name_buffer = info.buffer.iter().collect::<String>().to_lowercase();
- // To do: life times.
- let mut clone = info;
- clone.buffer.clear();
+ info.buffer.clear();
if HTML_RAW_NAMES.contains(&tag_name_buffer.as_str()) {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_close(tokenizer, clone, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_close(t, c, info))),
None,
)
} else {
- continuation(tokenizer, clone, code)
+ continuation(tokenizer, code, info)
}
}
Code::Char(char) if char.is_ascii_alphabetic() && info.buffer.len() < HTML_RAW_SIZE_MAX => {
tokenizer.consume(code);
- // To do: life times.
- let mut clone = info;
- clone.buffer.push(char);
+ info.buffer.push(char);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_raw_end_tag(tokenizer, clone, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_raw_end_tag(t, c, info))),
None,
)
}
- _ => continuation(tokenizer, info, code),
+ _ => {
+ info.buffer.clear();
+ continuation(tokenizer, code, info)
+ }
}
}
@@ -977,20 +881,18 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, info: Info, code: Code) -
/// ```
fn continuation_character_data_inside(
tokenizer: &mut Tokenizer,
- info: Info,
code: Code,
+ info: Info,
) -> StateFnResult {
match code {
Code::Char(']') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_declaration_inside(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
)
}
- _ => continuation(tokenizer, info, code),
+ _ => continuation(tokenizer, code, info),
}
}
@@ -1008,29 +910,25 @@ fn continuation_character_data_inside(
/// ```
fn continuation_declaration_inside(
tokenizer: &mut Tokenizer,
- info: Info,
code: Code,
+ info: Info,
) -> StateFnResult {
match code {
Code::Char('>') => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_close(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_close(t, c, info))),
None,
)
}
Code::Char('-') if info.kind == Kind::Comment => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_declaration_inside(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_declaration_inside(t, c, info))),
None,
)
}
- _ => continuation(tokenizer, info, code),
+ _ => continuation(tokenizer, code, info),
}
}
@@ -1039,7 +937,7 @@ fn continuation_declaration_inside(
/// ```markdown
/// <!doctype>|
/// ```
-fn continuation_close(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
tokenizer.exit(TokenType::HtmlFlowData);
@@ -1049,9 +947,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, info: Info, code: Code) -> Stat
_ => {
tokenizer.consume(code);
(
- State::Fn(Box::new(|tokenizer, code| {
- continuation_close(tokenizer, info, code)
- })),
+ State::Fn(Box::new(|t, c| continuation_close(t, c, info))),
None,
)
}
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index a91113f..c4d2353 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -114,9 +114,7 @@ fn declaration_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.consume(code);
let buffer = vec!['C', 'D', 'A', 'T', 'A', '['];
(
- State::Fn(Box::new(|tokenizer, code| {
- cdata_open_inside(tokenizer, code, buffer, 0)
- })),
+ State::Fn(Box::new(|t, c| cdata_open_inside(t, c, buffer, 0))),
None,
)
}
@@ -247,8 +245,8 @@ fn cdata_open_inside(
(State::Fn(Box::new(cdata)), None)
} else {
(
- State::Fn(Box::new(move |tokenizer, code| {
- cdata_open_inside(tokenizer, code, buffer, index + 1)
+ State::Fn(Box::new(move |t, c| {
+ cdata_open_inside(t, c, buffer, index + 1)
})),
None,
)
@@ -526,8 +524,8 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer, code: Code) -> Sta
Code::Char(char) if char == '"' || char == '\'' => {
tokenizer.consume(code);
(
- State::Fn(Box::new(move |tokenizer, code| {
- tag_open_attribute_value_quoted(tokenizer, code, char)
+ State::Fn(Box::new(move |t, c| {
+ tag_open_attribute_value_quoted(t, c, char)
})),
None,
)
@@ -555,9 +553,7 @@ fn tag_open_attribute_value_quoted(
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => at_line_ending(
tokenizer,
code,
- Box::new(move |tokenizer, code| {
- tag_open_attribute_value_quoted(tokenizer, code, marker)
- }),
+ Box::new(move |t, c| tag_open_attribute_value_quoted(t, c, marker)),
),
Code::Char(char) if char == marker => {
tokenizer.consume(code);
@@ -569,8 +565,8 @@ fn tag_open_attribute_value_quoted(
_ => {
tokenizer.consume(code);
(
- State::Fn(Box::new(move |tokenizer, code| {
- tag_open_attribute_value_quoted(tokenizer, code, marker)
+ State::Fn(Box::new(move |t, c| {
+ tag_open_attribute_value_quoted(t, c, marker)
})),
None,
)
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index b00188d..624ee8e 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -129,23 +129,33 @@ fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
// To do: If code is disabled, indented lines are allowed to interrupt.
tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
- if ok {
- Box::new(|_t, code| (State::Ok, Some(vec![code])))
- } else {
- Box::new(|tokenizer, code| {
- tokenizer.attempt_5(
- blank_line,
- code_fenced,
- html_flow,
- heading_atx,
- thematic_break,
- |ok| {
- Box::new(move |_t, code| {
- (if ok { State::Nok } else { State::Ok }, Some(vec![code]))
- })
- },
- )(tokenizer, code)
- })
- }
+ Box::new(if ok { interrupt_indent } else { interrupt_cont })
})(tokenizer, code)
}
+
+/// At an indent.
+///
+/// ```markdown
+/// alpha
+/// |
+/// ```
+pub fn interrupt_indent(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ (State::Ok, Some(vec![code]))
+}
+
+/// Not at an indented line.
+///
+/// ```markdown
+/// alpha
+/// |<div>
+/// ```
+pub fn interrupt_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ tokenizer.attempt_5(
+ blank_line,
+ code_fenced,
+ html_flow,
+ heading_atx,
+ thematic_break,
+ |ok| Box::new(move |_t, code| (if ok { State::Nok } else { State::Ok }, Some(vec![code]))),
+ )(tokenizer, code)
+}
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 55efd13..194165c 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -60,6 +60,17 @@ use crate::construct::partial_space_or_tab::space_or_tab_opt;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
use crate::util::link::link;
+/// State needed to parse labels.
+#[derive(Debug)]
+struct Info {
+ /// Whether we’ve seen our first `ChunkString`.
+ connect: bool,
+ /// Whether there are non-blank characters in the label.
+ data: bool,
+ /// Number of characters in the label.
+ size: usize,
+}
+
/// Before a label.
///
/// ```markdown
@@ -73,10 +84,12 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.consume(code);
tokenizer.exit(TokenType::DefinitionLabelMarker);
tokenizer.enter(TokenType::DefinitionLabelData);
- (
- State::Fn(Box::new(|t, c| at_break(t, c, false, 0, false))),
- None,
- )
+ let info = Info {
+ connect: false,
+ data: false,
+ size: 0,
+ };
+ (State::Fn(Box::new(|t, c| at_break(t, c, info))), None)
}
_ => (State::Nok, None),
}
@@ -88,17 +101,11 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// [|a]
/// [a|]
/// ```
-fn at_break(
- tokenizer: &mut Tokenizer,
- code: Code,
- data: bool,
- size: usize,
- connect: bool,
-) -> StateFnResult {
+fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::None | Code::Char('[') => (State::Nok, None),
- Code::Char(']') if !data => (State::Nok, None),
- _ if size > LINK_REFERENCE_SIZE_MAX => (State::Nok, None),
+ Code::Char(']') if !info.data => (State::Nok, None),
+ _ if info.size > LINK_REFERENCE_SIZE_MAX => (State::Nok, None),
Code::Char(']') => {
tokenizer.exit(TokenType::DefinitionLabelData);
tokenizer.enter(TokenType::DefinitionLabelMarker);
@@ -110,12 +117,12 @@ fn at_break(
_ => {
tokenizer.enter(TokenType::ChunkString);
- if connect {
+ if info.connect {
let index = tokenizer.events.len() - 1;
link(&mut tokenizer.events, index);
}
- label(tokenizer, code, data, size)
+ label(tokenizer, code, info)
}
}
}
@@ -126,16 +133,8 @@ fn at_break(
/// [a
/// |b]
/// ```
-fn line_start(
- tokenizer: &mut Tokenizer,
- code: Code,
- data: bool,
- size: usize,
- connect: bool,
-) -> StateFnResult {
- tokenizer.go(space_or_tab_opt(), move |t, c| {
- line_begin(t, c, data, size, connect)
- })(tokenizer, code)
+fn line_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+ tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, info))(tokenizer, code)
}
/// After a line ending, after optional whitespace.
@@ -144,17 +143,11 @@ fn line_start(
/// [a
/// |b]
/// ```
-fn line_begin(
- tokenizer: &mut Tokenizer,
- code: Code,
- data: bool,
- size: usize,
- connect: bool,
-) -> StateFnResult {
+fn line_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
// Blank line not allowed.
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
- _ => at_break(tokenizer, code, data, size, connect),
+ _ => at_break(tokenizer, code, info),
}
}
@@ -163,44 +156,46 @@ fn line_begin(
/// ```markdown
/// [a|b]
/// ```
-fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
+fn label(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
+ if !info.connect {
+ info.connect = true;
+ }
+
match code {
Code::None | Code::Char('[' | ']') => {
tokenizer.exit(TokenType::ChunkString);
- at_break(tokenizer, code, data, size, true)
+ at_break(tokenizer, code, info)
}
- _ if size > LINK_REFERENCE_SIZE_MAX => {
+ _ if info.size > LINK_REFERENCE_SIZE_MAX => {
tokenizer.exit(TokenType::ChunkString);
- at_break(tokenizer, code, data, size, true)
+ at_break(tokenizer, code, info)
}
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
tokenizer.consume(code);
+ info.size += 1;
tokenizer.exit(TokenType::ChunkString);
- (
- State::Fn(Box::new(move |t, c| line_start(t, c, data, size + 1, true))),
- None,
- )
+ (State::Fn(Box::new(|t, c| line_start(t, c, info))), None)
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(move |t, c| label(t, c, data, size + 1))),
- None,
- )
+ info.size += 1;
+ (State::Fn(Box::new(|t, c| label(t, c, info))), None)
}
Code::Char('/') => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(move |t, c| escape(t, c, true, size + 1))),
- None,
- )
+ info.size += 1;
+ if !info.data {
+ info.data = true;
+ }
+ (State::Fn(Box::new(|t, c| escape(t, c, info))), None)
}
Code::Char(_) => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(move |t, c| label(t, c, true, size + 1))),
- None,
- )
+ info.size += 1;
+ if !info.data {
+ info.data = true;
+ }
+ (State::Fn(Box::new(|t, c| label(t, c, info))), None)
}
}
}
@@ -210,15 +205,13 @@ fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> Stat
/// ```markdown
/// [a\|[b]
/// ```
-fn escape(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
+fn escape(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
Code::Char('[' | '\\' | ']') => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(move |t, c| label(t, c, true, size + 1))),
- None,
- )
+ info.size += 1;
+ (State::Fn(Box::new(|t, c| label(t, c, info))), None)
}
- _ => label(tokenizer, code, data, size),
+ _ => label(tokenizer, code, info),
}
}
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 40ece49..1c4b367 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -6,6 +6,19 @@
use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer};
+/// Options to parse whitespace.
+#[derive(Debug)]
+struct Info {
+ /// Current size.
+ size: usize,
+ /// Minimum allowed characters (inclusive).
+ min: usize,
+ /// Maximum allowed characters (inclusive).
+ max: usize,
+ /// Token type to use for whitespace events.
+ kind: TokenType,
+}
+
/// Optional `space_or_tab`
///
/// ```bnf
@@ -30,7 +43,13 @@ pub fn space_or_tab_min_max(min: usize, max: usize) -> Box<StateFn> {
/// space_or_tab ::= x*y( ' ' '\t' )
/// ```
pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> {
- Box::new(move |t, c| start(t, c, kind, min, max))
+ let info = Info {
+ size: 0,
+ min,
+ max,
+ kind,
+ };
+ Box::new(|t, c| start(t, c, info))
}
/// Before whitespace.
@@ -38,26 +57,16 @@ pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box<StateFn> {
/// ```markdown
/// alpha| bravo
/// ```
-fn start(
- tokenizer: &mut Tokenizer,
- code: Code,
- kind: TokenType,
- min: usize,
- max: usize,
-) -> StateFnResult {
+fn start(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
- Code::VirtualSpace | Code::Char('\t' | ' ') if max > 0 => {
- tokenizer.enter(kind.clone());
+ Code::VirtualSpace | Code::Char('\t' | ' ') if info.max > 0 => {
+ tokenizer.enter(info.kind.clone());
tokenizer.consume(code);
- (
- State::Fn(Box::new(move |tokenizer, code| {
- inside(tokenizer, code, kind, min, max, 1)
- })),
- None,
- )
+ info.size += 1;
+ (State::Fn(Box::new(|t, c| inside(t, c, info))), None)
}
_ => (
- if min == 0 { State::Ok } else { State::Nok },
+ if info.min == 0 { State::Ok } else { State::Nok },
Some(vec![code]),
),
}
@@ -69,28 +78,21 @@ fn start(
/// alpha |bravo
/// alpha | bravo
/// ```
-fn inside(
- tokenizer: &mut Tokenizer,
- code: Code,
- kind: TokenType,
- min: usize,
- max: usize,
- size: usize,
-) -> StateFnResult {
+fn inside(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
- Code::VirtualSpace | Code::Char('\t' | ' ') if size < max => {
+ Code::VirtualSpace | Code::Char('\t' | ' ') if info.size < info.max => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(move |tokenizer, code| {
- inside(tokenizer, code, kind, min, max, size + 1)
- })),
- None,
- )
+ info.size += 1;
+ (State::Fn(Box::new(|t, c| inside(t, c, info))), None)
}
_ => {
- tokenizer.exit(kind);
+ tokenizer.exit(info.kind.clone());
(
- if size >= min { State::Ok } else { State::Nok },
+ if info.size >= info.min {
+ State::Ok
+ } else {
+ State::Nok
+ },
Some(vec![code]),
)
}
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 22c3209..aa1e067 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -38,22 +38,59 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
use crate::util::link::link;
/// Type of title.
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, PartialEq)]
enum Kind {
/// In a parenthesized (`(` and `)`) title.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// [a] b (c)
+ /// ```
Paren,
/// In a double quoted (`"`) title.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// [a] b "c"
+ /// ```
Double,
/// In a single quoted (`'`) title.
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// [a] b 'c'
+ /// ```
Single,
}
-/// Display a marker.
-fn kind_to_marker(kind: &Kind) -> char {
- match kind {
- Kind::Double => '"',
- Kind::Single => '\'',
- Kind::Paren => ')',
+impl Kind {
+ /// Turn the kind into a [char].
+ ///
+ /// > 👉 **Note**: a closing paren is used.
+ fn as_char(&self) -> char {
+ match self {
+ Kind::Paren => ')',
+ Kind::Double => '"',
+ Kind::Single => '\'',
+ }
+ }
+ /// Turn a [char] into a kind.
+ ///
+ /// > 👉 **Note**: an opening paren must be used.
+ ///
+ /// ## Panics
+ ///
+ /// Panics if `char` is not `(`, `"`, or `'`.
+ fn from_char(char: char) -> Kind {
+ match char {
+ '(' => Kind::Paren,
+ '"' => Kind::Double,
+ '\'' => Kind::Single,
+ _ => unreachable!("invalid char"),
+ }
}
}
@@ -65,21 +102,16 @@ fn kind_to_marker(kind: &Kind) -> char {
/// |(a)
/// ```
pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let kind = match code {
- Code::Char('"') => Some(Kind::Double),
- Code::Char('\'') => Some(Kind::Single),
- Code::Char('(') => Some(Kind::Paren),
- _ => None,
- };
-
- if let Some(kind) = kind {
- tokenizer.enter(TokenType::DefinitionTitle);
- tokenizer.enter(TokenType::DefinitionTitleMarker);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::DefinitionTitleMarker);
- (State::Fn(Box::new(|t, c| begin(t, c, kind))), None)
- } else {
- (State::Nok, None)
+ match code {
+ Code::Char(char) if char == '(' || char == '"' || char == '\'' => {
+ let kind = Kind::from_char(char);
+ tokenizer.enter(TokenType::DefinitionTitle);
+ tokenizer.enter(TokenType::DefinitionTitleMarker);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::DefinitionTitleMarker);
+ (State::Fn(Box::new(|t, c| begin(t, c, kind))), None)
+ }
+ _ => (State::Nok, None),
}
}
@@ -94,7 +126,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
- Code::Char(char) if char == kind_to_marker(&kind) => {
+ Code::Char(char) if char == kind.as_char() => {
tokenizer.enter(TokenType::DefinitionTitleMarker);
tokenizer.consume(code);
tokenizer.exit(TokenType::DefinitionTitleMarker);
@@ -118,7 +150,7 @@ fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
/// ```
fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind, connect: bool) -> StateFnResult {
match code {
- Code::Char(char) if char == kind_to_marker(&kind) => {
+ Code::Char(char) if char == kind.as_char() => {
tokenizer.exit(TokenType::DefinitionTitleString);
begin(tokenizer, code, kind)
}
@@ -165,7 +197,7 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResul
/// ```
fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
- Code::Char(char) if char == kind_to_marker(&kind) => {
+ Code::Char(char) if char == kind.as_char() => {
tokenizer.exit(TokenType::ChunkString);
at_break(tokenizer, code, kind, true)
}
@@ -176,10 +208,7 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
tokenizer.consume(code);
tokenizer.exit(TokenType::ChunkString);
- (
- State::Fn(Box::new(move |t, c| line_start(t, c, kind))),
- None,
- )
+ (State::Fn(Box::new(|t, c| line_start(t, c, kind))), None)
}
Code::Char('\\') => {
tokenizer.consume(code);
@@ -199,9 +228,9 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
/// ```
fn escape(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
- Code::Char(char) if char == kind_to_marker(&kind) => {
+ Code::Char(char) if char == kind.as_char() => {
tokenizer.consume(code);
- (State::Fn(Box::new(move |t, c| title(t, c, kind))), None)
+ (State::Fn(Box::new(|t, c| title(t, c, kind))), None)
}
_ => title(tokenizer, code, kind),
}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index abf733d..a9e5732 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -48,6 +48,68 @@ use super::partial_space_or_tab::space_or_tab_opt;
use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+/// Type of thematic break.
+#[derive(Debug, PartialEq)]
+enum Kind {
+ /// In a thematic break using asterisks (`*`).
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// ***
+ /// ```
+ Asterisk,
+ /// In a thematic break using dashes (`-`).
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// ---
+ /// ```
+ Dash,
+ /// In a thematic break using underscores (`_`).
+ ///
+ /// ## Example
+ ///
+ /// ```markdown
+ /// ___
+ /// ```
+ Underscore,
+}
+
+impl Kind {
+ /// Turn the kind into a [char].
+ fn as_char(&self) -> char {
+ match self {
+ Kind::Asterisk => '*',
+ Kind::Dash => '-',
+ Kind::Underscore => '_',
+ }
+ }
+ /// Turn a [char] into a kind.
+ ///
+ /// ## Panics
+ ///
+ /// Panics if `char` is not `*`, `_`, or `_`.
+ fn from_char(char: char) -> Kind {
+ match char {
+ '*' => Kind::Asterisk,
+ '-' => Kind::Dash,
+ '_' => Kind::Underscore,
+ _ => unreachable!("invalid char"),
+ }
+ }
+}
+
+/// State needed to parse thematic breaks.
+#[derive(Debug)]
+struct Info {
+ /// Kind of marker.
+ kind: Kind,
+ /// Number of markers.
+ size: usize,
+}
+
/// Start of a thematic break.
///
/// ```markdown
@@ -65,9 +127,14 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
- Code::Char(char) if char == '*' || char == '-' || char == '_' => {
- at_break(tokenizer, code, char, 0)
- }
+ Code::Char(char) if char == '*' || char == '-' || char == '_' => at_break(
+ tokenizer,
+ code,
+ Info {
+ kind: Kind::from_char(char),
+ size: 0,
+ },
+ ),
_ => (State::Nok, None),
}
}
@@ -79,17 +146,17 @@ pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// *| * *
/// * |* *
/// ```
-fn at_break(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
+fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
match code {
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
- if size >= THEMATIC_BREAK_MARKER_COUNT_MIN =>
+ if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN =>
{
tokenizer.exit(TokenType::ThematicBreak);
(State::Ok, Some(vec![code]))
}
- Code::Char(char) if char == marker => {
+ Code::Char(char) if char == info.kind.as_char() => {
tokenizer.enter(TokenType::ThematicBreakSequence);
- sequence(tokenizer, code, marker, size)
+ sequence(tokenizer, code, info)
}
_ => (State::Nok, None),
}
@@ -102,22 +169,16 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) ->
/// *|**
/// **|*
/// ```
-fn sequence(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
+fn sequence(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
match code {
- Code::Char(char) if char == marker => {
+ Code::Char(char) if char == info.kind.as_char() => {
tokenizer.consume(code);
- (
- State::Fn(Box::new(move |tokenizer, code| {
- sequence(tokenizer, code, marker, size + 1)
- })),
- None,
- )
+ info.size += 1;
+ (State::Fn(Box::new(|t, c| sequence(t, c, info))), None)
}
_ => {
tokenizer.exit(TokenType::ThematicBreakSequence);
- tokenizer.go(space_or_tab_opt(), move |t, c| at_break(t, c, marker, size))(
- tokenizer, code,
- )
+ tokenizer.go(space_or_tab_opt(), |t, c| at_break(t, c, info))(tokenizer, code)
}
}
}
diff --git a/src/content/string.rs b/src/content/string.rs
index c3e825e..bae2646 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -55,14 +55,6 @@ fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
(State::Fn(Box::new(in_data)), None)
}
}
-
- // if let Code::None = code {
- // (State::Ok, None)
- // } else {
- // tokenizer.enter(TokenType::Data);
- // tokenizer.consume(code);
- // (State::Fn(Box::new(in_data)), None)
- // }
}
/// In data.
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 909a1d1..8e9c7c6 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -142,6 +142,7 @@ pub struct Event {
/// It’s responsible for dealing with that single passed [`Code`][].
/// It yields a [`StateFnResult`][].
pub type StateFn = dyn FnOnce(&mut Tokenizer, Code) -> StateFnResult;
+
/// Each [`StateFn`][] yields something back: primarily the state.
/// In certain cases, it can also yield back up parsed codes that were passed down.
pub type StateFnResult = (State, Option<Vec<Code>>);