aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
Diffstat (limited to 'src/construct')
-rw-r--r--src/construct/attention.rs75
-rw-r--r--src/construct/autolink.rs40
-rw-r--r--src/construct/blank_line.rs4
-rw-r--r--src/construct/block_quote.rs27
-rw-r--r--src/construct/character_escape.rs6
-rw-r--r--src/construct/character_reference.rs82
-rw-r--r--src/construct/code_fenced.rs65
-rw-r--r--src/construct/code_indented.rs17
-rw-r--r--src/construct/code_text.rs18
-rw-r--r--src/construct/definition.rs10
-rw-r--r--src/construct/hard_break_escape.rs6
-rw-r--r--src/construct/heading_atx.rs22
-rw-r--r--src/construct/heading_setext.rs8
-rw-r--r--src/construct/html_flow.rs224
-rw-r--r--src/construct/html_text.rs161
-rw-r--r--src/construct/label_end.rs92
-rw-r--r--src/construct/label_start_image.rs6
-rw-r--r--src/construct/label_start_link.rs4
-rw-r--r--src/construct/list.rs57
-rw-r--r--src/construct/paragraph.rs6
-rw-r--r--src/construct/partial_data.rs31
-rw-r--r--src/construct/partial_destination.rs39
-rw-r--r--src/construct/partial_label.rs22
-rw-r--r--src/construct/partial_non_lazy_continuation.rs4
-rw-r--r--src/construct/partial_space_or_tab.rs13
-rw-r--r--src/construct/partial_title.rs35
-rw-r--r--src/construct/partial_whitespace.rs53
-rw-r--r--src/construct/thematic_break.rs27
28 files changed, 540 insertions, 614 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
index 27d7544..65c2f6f 100644
--- a/src/construct/attention.rs
+++ b/src/construct/attention.rs
@@ -52,8 +52,9 @@
//! [html-strong]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-strong-element
use crate::token::Token;
-use crate::tokenizer::{Code, Event, EventType, Point, State, Tokenizer};
+use crate::tokenizer::{Event, EventType, Point, State, Tokenizer};
use crate::unicode::PUNCTUATION;
+use crate::util::slice::Slice;
/// Character code kinds.
#[derive(Debug, PartialEq)]
@@ -128,17 +129,6 @@ impl MarkerKind {
_ => unreachable!("invalid char"),
}
}
- /// Turn [Code] into a kind.
- ///
- /// ## Panics
- ///
- /// Panics if `code` is not `Code::Char('*' | '_')`.
- fn from_code(code: Code) -> MarkerKind {
- match code {
- Code::Char(char) => MarkerKind::from_char(char),
- _ => unreachable!("invalid code"),
- }
- }
}
/// Attentention sequence that we can take markers from.
@@ -170,9 +160,9 @@ struct Sequence {
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('*' | '_') if tokenizer.parse_state.constructs.attention => {
+ Some(char) if tokenizer.parse_state.constructs.attention && matches!(char, '*' | '_') => {
tokenizer.enter(Token::AttentionSequence);
- inside(tokenizer, MarkerKind::from_code(tokenizer.current))
+ inside(tokenizer, MarkerKind::from_char(char))
}
_ => State::Nok,
}
@@ -185,23 +175,20 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^^
/// ```
fn inside(tokenizer: &mut Tokenizer, marker: MarkerKind) -> State {
- match tokenizer.current {
- Code::Char(char) if char == marker.as_char() => {
- tokenizer.consume();
- State::Fn(Box::new(move |t| inside(t, marker)))
- }
- _ => {
- tokenizer.exit(Token::AttentionSequence);
- tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention));
- State::Ok
- }
+ if tokenizer.current == Some(marker.as_char()) {
+ tokenizer.consume();
+ State::Fn(Box::new(move |t| inside(t, marker)))
+ } else {
+ tokenizer.exit(Token::AttentionSequence);
+ tokenizer.register_resolver("attention".to_string(), Box::new(resolve_attention));
+ State::Ok
}
}
/// Resolve attention sequences.
#[allow(clippy::too_many_lines)]
fn resolve_attention(tokenizer: &mut Tokenizer) {
- let codes = &tokenizer.parse_state.codes;
+ let chars = &tokenizer.parse_state.chars;
let mut start = 0;
let mut balance = 0;
let mut sequences = vec![];
@@ -216,17 +203,21 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
if enter.token_type == Token::AttentionSequence {
let end = start + 1;
let exit = &tokenizer.events[end];
- let marker = MarkerKind::from_code(codes[enter.point.index]);
+ let marker =
+ MarkerKind::from_char(Slice::from_point(chars, &enter.point).head().unwrap());
let before = classify_character(if enter.point.index > 0 {
- codes[enter.point.index - 1]
- } else {
- Code::None
- });
- let after = classify_character(if exit.point.index < codes.len() {
- codes[exit.point.index]
+ Slice::from_point(
+ chars,
+ &Point {
+ index: enter.point.index - 1,
+ ..enter.point
+ },
+ )
+ .tail()
} else {
- Code::None
+ None
});
+ let after = classify_character(Slice::from_point(chars, &exit.point).tail());
let open = after == GroupKind::Other
|| (after == GroupKind::Punctuation && before != GroupKind::Other);
// To do: GFM strikethrough?
@@ -326,9 +317,9 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
let sequence_close = &mut sequences[close];
let close_event_index = sequence_close.event_index;
let seq_close_enter = sequence_close.start_point.clone();
+ // No need to worry about `VS`, because sequences are only actual characters.
sequence_close.size -= take;
sequence_close.start_point.column += take;
- sequence_close.start_point.offset += take;
sequence_close.start_point.index += take;
let seq_close_exit = sequence_close.start_point.clone();
@@ -352,9 +343,9 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
let sequence_open = &mut sequences[open];
let open_event_index = sequence_open.event_index;
let seq_open_exit = sequence_open.end_point.clone();
+ // No need to worry about `VS`, because sequences are only actual characters.
sequence_open.size -= take;
sequence_open.end_point.column -= take;
- sequence_open.end_point.offset -= take;
sequence_open.end_point.index -= take;
let seq_open_enter = sequence_open.end_point.clone();
@@ -492,20 +483,20 @@ fn resolve_attention(tokenizer: &mut Tokenizer) {
/// Used for attention (emphasis, strong), whose sequences can open or close
/// based on the class of surrounding characters.
///
-/// > πŸ‘‰ **Note** that eof (`Code::None`) is seen as whitespace.
+/// > πŸ‘‰ **Note** that eof (`None`) is seen as whitespace.
///
/// ## References
///
/// * [`micromark-util-classify-character` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-util-classify-character/dev/index.js)
-fn classify_character(code: Code) -> GroupKind {
- match code {
+fn classify_character(char: Option<char>) -> GroupKind {
+ match char {
// Custom characters.
- Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace => GroupKind::Whitespace,
+ None => GroupKind::Whitespace,
// Unicode whitespace.
- Code::Char(char) if char.is_whitespace() => GroupKind::Whitespace,
+ Some(char) if char.is_whitespace() => GroupKind::Whitespace,
// Unicode punctuation.
- Code::Char(char) if PUNCTUATION.contains(&char) => GroupKind::Punctuation,
+ Some(char) if PUNCTUATION.contains(&char) => GroupKind::Punctuation,
// Everything else.
- Code::Char(_) => GroupKind::Other,
+ Some(_) => GroupKind::Other,
}
}
diff --git a/src/construct/autolink.rs b/src/construct/autolink.rs
index 3933596..399570b 100644
--- a/src/construct/autolink.rs
+++ b/src/construct/autolink.rs
@@ -103,7 +103,7 @@
use crate::constant::{AUTOLINK_DOMAIN_SIZE_MAX, AUTOLINK_SCHEME_SIZE_MAX};
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Start of an autolink.
///
@@ -115,7 +115,7 @@ use crate::tokenizer::{Code, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('<') if tokenizer.parse_state.constructs.autolink => {
+ Some('<') if tokenizer.parse_state.constructs.autolink => {
tokenizer.enter(Token::Autolink);
tokenizer.enter(Token::AutolinkMarker);
tokenizer.consume();
@@ -137,11 +137,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(char) if char.is_ascii_alphabetic() => {
+ Some(char) if char.is_ascii_alphabetic() => {
tokenizer.consume();
State::Fn(Box::new(scheme_or_email_atext))
}
- Code::Char(char) if is_ascii_atext(char) => email_atext(tokenizer),
+ Some(char) if is_ascii_atext(char) => email_atext(tokenizer),
_ => State::Nok,
}
}
@@ -156,7 +156,7 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('+' | '-' | '.' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
+ Some('+' | '-' | '.' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
scheme_inside_or_email_atext(tokenizer, 1)
}
_ => email_atext(tokenizer),
@@ -173,11 +173,11 @@ fn scheme_or_email_atext(tokenizer: &mut Tokenizer) -> State {
/// ```
fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Code::Char(':') => {
+ Some(':') => {
tokenizer.consume();
State::Fn(Box::new(url_inside))
}
- Code::Char('+' | '-' | '.' | '0'..='9' | 'A'..='Z' | 'a'..='z')
+ Some('+' | '-' | '.' | '0'..='9' | 'A'..='Z' | 'a'..='z')
if size < AUTOLINK_SCHEME_SIZE_MAX =>
{
tokenizer.consume();
@@ -195,15 +195,13 @@ fn scheme_inside_or_email_atext(tokenizer: &mut Tokenizer, size: usize) -> State
/// ```
fn url_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('>') => {
+ Some('>') => {
tokenizer.exit(Token::AutolinkProtocol);
end(tokenizer)
}
- Code::Char(char) if char.is_ascii_control() => State::Nok,
- Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(' ') => {
- State::Nok
- }
- Code::Char(_) => {
+ Some(char) if char.is_ascii_control() => State::Nok,
+ None | Some(' ') => State::Nok,
+ Some(_) => {
tokenizer.consume();
State::Fn(Box::new(url_inside))
}
@@ -218,11 +216,11 @@ fn url_inside(tokenizer: &mut Tokenizer) -> State {
/// ```
fn email_atext(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('@') => {
+ Some('@') => {
tokenizer.consume();
State::Fn(Box::new(|t| email_at_sign_or_dot(t, 0)))
}
- Code::Char(char) if is_ascii_atext(char) => {
+ Some(char) if is_ascii_atext(char) => {
tokenizer.consume();
State::Fn(Box::new(email_atext))
}
@@ -238,7 +236,7 @@ fn email_atext(tokenizer: &mut Tokenizer) -> State {
/// ```
fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Code::Char(char) if char.is_ascii_alphanumeric() => email_value(tokenizer, size),
+ Some(char) if char.is_ascii_alphanumeric() => email_value(tokenizer, size),
_ => State::Nok,
}
}
@@ -251,11 +249,11 @@ fn email_at_sign_or_dot(tokenizer: &mut Tokenizer, size: usize) -> State {
/// ```
fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Code::Char('.') => {
+ Some('.') => {
tokenizer.consume();
State::Fn(Box::new(|t| email_at_sign_or_dot(t, 0)))
}
- Code::Char('>') => {
+ Some('>') => {
let index = tokenizer.events.len();
tokenizer.exit(Token::AutolinkProtocol);
// Change the token type.
@@ -277,11 +275,11 @@ fn email_label(tokenizer: &mut Tokenizer, size: usize) -> State {
/// ```
fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Code::Char('-') if size < AUTOLINK_DOMAIN_SIZE_MAX => {
+ Some('-') if size < AUTOLINK_DOMAIN_SIZE_MAX => {
tokenizer.consume();
State::Fn(Box::new(move |t| email_value(t, size + 1)))
}
- Code::Char(char) if char.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => {
+ Some(char) if char.is_ascii_alphanumeric() && size < AUTOLINK_DOMAIN_SIZE_MAX => {
tokenizer.consume();
State::Fn(Box::new(move |t| email_label(t, size + 1)))
}
@@ -299,7 +297,7 @@ fn email_value(tokenizer: &mut Tokenizer, size: usize) -> State {
/// ```
fn end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('>') => {
+ Some('>') => {
tokenizer.enter(Token::AutolinkMarker);
tokenizer.consume();
tokenizer.exit(Token::AutolinkMarker);
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index 537ffc1..6780f40 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -33,7 +33,7 @@
//! [flow]: crate::content::flow
use crate::construct::partial_space_or_tab::space_or_tab;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Start of a blank line.
///
@@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => State::Ok,
+ None | Some('\n') => State::Ok,
_ => State::Nok,
}
}
diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs
index 3bb4b8b..49a0ea0 100644
--- a/src/construct/block_quote.rs
+++ b/src/construct/block_quote.rs
@@ -36,7 +36,7 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::space_or_tab_min_max;
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Start of block quote.
///
@@ -65,7 +65,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('>') => {
+ Some('>') => {
tokenizer.enter(Token::BlockQuote);
cont_before(tokenizer)
}
@@ -98,7 +98,7 @@ pub fn cont(tokenizer: &mut Tokenizer) -> State {
/// ```
fn cont_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('>') => {
+ Some('>') => {
tokenizer.enter(Token::BlockQuotePrefix);
tokenizer.enter(Token::BlockQuoteMarker);
tokenizer.consume();
@@ -118,17 +118,14 @@ fn cont_before(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn cont_after(tokenizer: &mut Tokenizer) -> State {
- match tokenizer.current {
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
- tokenizer.enter(Token::SpaceOrTab);
- tokenizer.consume();
- tokenizer.exit(Token::SpaceOrTab);
- tokenizer.exit(Token::BlockQuotePrefix);
- State::Ok
- }
- _ => {
- tokenizer.exit(Token::BlockQuotePrefix);
- State::Ok
- }
+ if let Some('\t' | ' ') = tokenizer.current {
+ tokenizer.enter(Token::SpaceOrTab);
+ tokenizer.consume();
+ tokenizer.exit(Token::SpaceOrTab);
+ tokenizer.exit(Token::BlockQuotePrefix);
+ State::Ok
+ } else {
+ tokenizer.exit(Token::BlockQuotePrefix);
+ State::Ok
}
}
diff --git a/src/construct/character_escape.rs b/src/construct/character_escape.rs
index 9e9b713..e9263af 100644
--- a/src/construct/character_escape.rs
+++ b/src/construct/character_escape.rs
@@ -34,7 +34,7 @@
//! [hard_break_escape]: crate::construct::hard_break_escape
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Start of a character escape.
///
@@ -44,7 +44,7 @@ use crate::tokenizer::{Code, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('\\') if tokenizer.parse_state.constructs.character_escape => {
+ Some('\\') if tokenizer.parse_state.constructs.character_escape => {
tokenizer.enter(Token::CharacterEscape);
tokenizer.enter(Token::CharacterEscapeMarker);
tokenizer.consume();
@@ -63,7 +63,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(char) if char.is_ascii_punctuation() => {
+ Some(char) if char.is_ascii_punctuation() => {
tokenizer.enter(Token::CharacterEscapeValue);
tokenizer.consume();
tokenizer.exit(Token::CharacterEscapeValue);
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index 8521f15..59043d1 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -66,7 +66,8 @@ use crate::constant::{
CHARACTER_REFERENCE_HEXADECIMAL_SIZE_MAX, CHARACTER_REFERENCE_NAMED_SIZE_MAX,
};
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{Point, State, Tokenizer};
+use crate::util::slice::{Position, Slice};
/// Kind of a character reference.
#[derive(Debug, Clone, PartialEq)]
@@ -120,8 +121,10 @@ impl Kind {
/// State needed to parse character references.
#[derive(Debug, Clone)]
struct Info {
- /// All parsed characters.
- buffer: String,
+ /// Place of value start.
+ start: Point,
+ /// Size of value.
+ size: usize,
/// Kind of character reference.
kind: Kind,
}
@@ -138,7 +141,7 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('&') if tokenizer.parse_state.constructs.character_reference => {
+ Some('&') if tokenizer.parse_state.constructs.character_reference => {
tokenizer.enter(Token::CharacterReference);
tokenizer.enter(Token::CharacterReferenceMarker);
tokenizer.consume();
@@ -161,18 +164,21 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn open(tokenizer: &mut Tokenizer) -> State {
- let info = Info {
- buffer: String::new(),
- kind: Kind::Named,
- };
- if let Code::Char('#') = tokenizer.current {
+ if let Some('#') = tokenizer.current {
tokenizer.enter(Token::CharacterReferenceMarkerNumeric);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarkerNumeric);
- State::Fn(Box::new(|t| numeric(t, info)))
+ State::Fn(Box::new(numeric))
} else {
tokenizer.enter(Token::CharacterReferenceValue);
- value(tokenizer, info)
+ value(
+ tokenizer,
+ Info {
+ start: tokenizer.point.clone(),
+ size: 0,
+ kind: Kind::Named,
+ },
+ )
}
}
@@ -185,17 +191,25 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// > | a&#x9;b
/// ^
/// ```
-fn numeric(tokenizer: &mut Tokenizer, mut info: Info) -> State {
- if let Code::Char('x' | 'X') = tokenizer.current {
+fn numeric(tokenizer: &mut Tokenizer) -> State {
+ if let Some('x' | 'X') = tokenizer.current {
tokenizer.enter(Token::CharacterReferenceMarkerHexadecimal);
tokenizer.consume();
tokenizer.exit(Token::CharacterReferenceMarkerHexadecimal);
tokenizer.enter(Token::CharacterReferenceValue);
- info.kind = Kind::Hexadecimal;
+ let info = Info {
+ start: tokenizer.point.clone(),
+ size: 0,
+ kind: Kind::Hexadecimal,
+ };
State::Fn(Box::new(|t| value(t, info)))
} else {
tokenizer.enter(Token::CharacterReferenceValue);
- info.kind = Kind::Decimal;
+ let info = Info {
+ start: tokenizer.point.clone(),
+ size: 0,
+ kind: Kind::Decimal,
+ };
value(tokenizer, info)
}
}
@@ -215,24 +229,32 @@ fn numeric(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn value(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::Char(';') if !info.buffer.is_empty() => {
- let unknown_named = Kind::Named == info.kind
- && !CHARACTER_REFERENCES.iter().any(|d| d.0 == info.buffer);
+ Some(';') if info.size > 0 => {
+ if Kind::Named == info.kind {
+ let value = Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position {
+ start: &info.start,
+ end: &tokenizer.point,
+ },
+ )
+ .serialize();
- if unknown_named {
- State::Nok
- } else {
- tokenizer.exit(Token::CharacterReferenceValue);
- tokenizer.enter(Token::CharacterReferenceMarkerSemi);
- tokenizer.consume();
- tokenizer.exit(Token::CharacterReferenceMarkerSemi);
- tokenizer.exit(Token::CharacterReference);
- State::Ok
+ if !CHARACTER_REFERENCES.iter().any(|d| d.0 == value) {
+ return State::Nok;
+ }
}
+
+ tokenizer.exit(Token::CharacterReferenceValue);
+ tokenizer.enter(Token::CharacterReferenceMarkerSemi);
+ tokenizer.consume();
+ tokenizer.exit(Token::CharacterReferenceMarkerSemi);
+ tokenizer.exit(Token::CharacterReference);
+ State::Ok
}
- Code::Char(char) => {
- if info.buffer.len() < info.kind.max() && info.kind.allowed(char) {
- info.buffer.push(char);
+ Some(char) => {
+ if info.size < info.kind.max() && info.kind.allowed(char) {
+ info.size += 1;
tokenizer.consume();
State::Fn(Box::new(|t| value(t, info)))
} else {
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index 2fea95e..98fa54f 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -107,8 +107,8 @@ use crate::construct::{
partial_space_or_tab::{space_or_tab, space_or_tab_min_max},
};
use crate::token::Token;
-use crate::tokenizer::{Code, ContentType, State, Tokenizer};
-use crate::util::span::from_exit_event;
+use crate::tokenizer::{ContentType, State, Tokenizer};
+use crate::util::slice::{Position, Slice};
/// Kind of fences.
#[derive(Debug, Clone, PartialEq)]
@@ -155,17 +155,6 @@ impl Kind {
_ => unreachable!("invalid char"),
}
}
- /// Turn [Code] into a kind.
- ///
- /// ## Panics
- ///
- /// Panics if `code` is not ``Code::Char('~' | '`')``.
- fn from_code(code: Code) -> Kind {
- match code {
- Code::Char(char) => Kind::from_char(char),
- _ => unreachable!("invalid code"),
- }
- }
}
/// State needed to parse code (fenced).
@@ -217,20 +206,23 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
if let Some(event) = tail {
if event.token_type == Token::SpaceOrTab {
- let span = from_exit_event(&tokenizer.events, tokenizer.events.len() - 1);
- prefix = span.end_index - span.start_index;
+ prefix = Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position::from_exit_event(&tokenizer.events, tokenizer.events.len() - 1),
+ )
+ .size();
}
}
match tokenizer.current {
- Code::Char('`' | '~') => {
+ Some(char) if matches!(char, '`' | '~') => {
tokenizer.enter(Token::CodeFencedFenceSequence);
sequence_open(
tokenizer,
Info {
prefix,
size: 0,
- kind: Kind::from_code(tokenizer.current),
+ kind: Kind::from_char(char),
},
)
}
@@ -248,7 +240,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn sequence_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::Char(char) if char == info.kind.as_char() => {
+ Some(char) if char == info.kind.as_char() => {
tokenizer.consume();
State::Fn(Box::new(|t| {
info.size += 1;
@@ -273,7 +265,7 @@ fn sequence_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn info_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::CodeFencedFence);
// Do not form containers.
tokenizer.concrete = true;
@@ -282,7 +274,7 @@ fn info_before(tokenizer: &mut Tokenizer, info: Info) -> State {
_ => {
tokenizer.enter(Token::CodeFencedFenceInfo);
tokenizer.enter_with_content(Token::Data, Some(ContentType::String));
- info_inside(tokenizer, info, vec![])
+ info_inside(tokenizer, info)
}
}
}
@@ -295,9 +287,9 @@ fn info_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// | console.log(1)
/// | ~~~
/// ```
-fn info_inside(tokenizer: &mut Tokenizer, info: Info, mut codes: Vec<Code>) -> State {
+fn info_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceInfo);
tokenizer.exit(Token::CodeFencedFence);
@@ -305,16 +297,15 @@ fn info_inside(tokenizer: &mut Tokenizer, info: Info, mut codes: Vec<Code>) -> S
tokenizer.concrete = true;
at_break(tokenizer, info)
}
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\t' | ' ') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceInfo);
tokenizer.attempt_opt(space_or_tab(), |t| meta_before(t, info))(tokenizer)
}
- Code::Char('`') if info.kind == Kind::GraveAccent => State::Nok,
- Code::Char(_) => {
- codes.push(tokenizer.current);
+ Some('`') if info.kind == Kind::GraveAccent => State::Nok,
+ Some(_) => {
tokenizer.consume();
- State::Fn(Box::new(|t| info_inside(t, info, codes)))
+ State::Fn(Box::new(|t| info_inside(t, info)))
}
}
}
@@ -329,7 +320,7 @@ fn info_inside(tokenizer: &mut Tokenizer, info: Info, mut codes: Vec<Code>) -> S
/// ```
fn meta_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::CodeFencedFence);
// Do not form containers.
tokenizer.concrete = true;
@@ -353,7 +344,7 @@ fn meta_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn meta(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::CodeFencedFenceMeta);
tokenizer.exit(Token::CodeFencedFence);
@@ -361,7 +352,7 @@ fn meta(tokenizer: &mut Tokenizer, info: Info) -> State {
tokenizer.concrete = true;
at_break(tokenizer, info)
}
- Code::Char('`') if info.kind == Kind::GraveAccent => State::Nok,
+ Some('`') if info.kind == Kind::GraveAccent => State::Nok,
_ => {
tokenizer.consume();
State::Fn(Box::new(|t| meta(t, info)))
@@ -422,7 +413,7 @@ fn at_non_lazy_break(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn close_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ Some('\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
@@ -461,7 +452,7 @@ fn close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn close_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char(char) if char == info.kind.as_char() => {
+ Some(char) if char == info.kind.as_char() => {
tokenizer.enter(Token::CodeFencedFenceSequence);
close_sequence(tokenizer, info, 0)
}
@@ -479,7 +470,7 @@ fn close_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn close_sequence(tokenizer: &mut Tokenizer, info: Info, size: usize) -> State {
match tokenizer.current {
- Code::Char(char) if char == info.kind.as_char() => {
+ Some(char) if char == info.kind.as_char() => {
tokenizer.consume();
State::Fn(Box::new(move |t| close_sequence(t, info, size + 1)))
}
@@ -501,7 +492,7 @@ fn close_sequence(tokenizer: &mut Tokenizer, info: Info, size: usize) -> State {
/// ```
fn close_sequence_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::CodeFencedFence);
State::Ok
}
@@ -547,9 +538,7 @@ fn content_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn content_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- at_break(tokenizer, info)
- }
+ None | Some('\n') => at_break(tokenizer, info),
_ => {
tokenizer.enter(Token::CodeFlowChunk);
content_continue(tokenizer, info)
@@ -567,7 +556,7 @@ fn content_begin(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn content_continue(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::CodeFlowChunk);
at_break(tokenizer, info)
}
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 015c4a0..bb1615c 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -48,7 +48,7 @@
use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::TAB_SIZE;
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Start of code (indented).
///
@@ -78,11 +78,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None => after(tokenizer),
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer
- .attempt(further_start, |ok| {
- Box::new(if ok { at_break } else { after })
- })(tokenizer),
+ None => after(tokenizer),
+ Some('\n') => tokenizer.attempt(further_start, |ok| {
+ Box::new(if ok { at_break } else { after })
+ })(tokenizer),
_ => {
tokenizer.enter(Token::CodeFlowChunk);
content(tokenizer)
@@ -98,7 +97,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// ```
fn content(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::CodeFlowChunk);
at_break(tokenizer)
}
@@ -134,7 +133,7 @@ fn further_start(tokenizer: &mut Tokenizer) -> State {
State::Nok
} else {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ Some('\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
@@ -178,7 +177,7 @@ fn further_begin(tokenizer: &mut Tokenizer) -> State {
/// ```
fn further_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => further_start(tokenizer),
+ Some('\n') => further_start(tokenizer),
_ => State::Nok,
}
}
diff --git a/src/construct/code_text.rs b/src/construct/code_text.rs
index f5f92fc..150f63b 100644
--- a/src/construct/code_text.rs
+++ b/src/construct/code_text.rs
@@ -84,7 +84,7 @@
//! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Start of code (text).
///
@@ -98,9 +98,9 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
let len = tokenizer.events.len();
match tokenizer.current {
- Code::Char('`')
+ Some('`')
if tokenizer.parse_state.constructs.code_text
- && (tokenizer.previous != Code::Char('`')
+ && (tokenizer.previous != Some('`')
|| (len > 0
&& tokenizer.events[len - 1].token_type == Token::CharacterEscape)) =>
{
@@ -119,7 +119,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State {
- if let Code::Char('`') = tokenizer.current {
+ if let Some('`') = tokenizer.current {
tokenizer.consume();
State::Fn(Box::new(move |t| sequence_open(t, size + 1)))
} else {
@@ -136,14 +136,14 @@ fn sequence_open(tokenizer: &mut Tokenizer, size: usize) -> State {
/// ```
fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State {
match tokenizer.current {
- Code::None => State::Nok,
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None => State::Nok,
+ Some('\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
State::Fn(Box::new(move |t| between(t, size_open)))
}
- Code::Char('`') => {
+ Some('`') => {
tokenizer.enter(Token::CodeTextSequence);
sequence_close(tokenizer, size_open, 0)
}
@@ -162,7 +162,7 @@ fn between(tokenizer: &mut Tokenizer, size_open: usize) -> State {
/// ```
fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r' | '`') => {
+ None | Some('\n' | '`') => {
tokenizer.exit(Token::CodeTextData);
between(tokenizer, size_open)
}
@@ -181,7 +181,7 @@ fn data(tokenizer: &mut Tokenizer, size_open: usize) -> State {
/// ```
fn sequence_close(tokenizer: &mut Tokenizer, size_open: usize, size: usize) -> State {
match tokenizer.current {
- Code::Char('`') => {
+ Some('`') => {
tokenizer.consume();
State::Fn(Box::new(move |t| sequence_close(t, size_open, size + 1)))
}
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index ffaaa98..f2b5ae0 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -100,7 +100,7 @@ use crate::construct::{
partial_title::{start as title, Options as TitleOptions},
};
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
use crate::util::skip::opt_back as skip_opt_back;
/// At the start of a definition.
@@ -137,7 +137,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('[') => tokenizer.go(
+ Some('[') => tokenizer.go(
|t| {
label(
t,
@@ -162,7 +162,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ```
fn label_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(':') => {
+ Some(':') => {
tokenizer.enter(Token::DefinitionMarker);
tokenizer.consume();
tokenizer.exit(Token::DefinitionMarker);
@@ -231,7 +231,7 @@ fn after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn after_whitespace(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::Definition);
// You’d be interrupting.
tokenizer.interrupt = true;
@@ -294,7 +294,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn title_after_after_optional_whitespace(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => State::Ok,
+ None | Some('\n') => State::Ok,
_ => State::Nok,
}
}
diff --git a/src/construct/hard_break_escape.rs b/src/construct/hard_break_escape.rs
index 40a83ef..0585c4c 100644
--- a/src/construct/hard_break_escape.rs
+++ b/src/construct/hard_break_escape.rs
@@ -40,7 +40,7 @@
//! [html]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-br-element
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Start of a hard break (escape).
///
@@ -51,7 +51,7 @@ use crate::tokenizer::{Code, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('\\') if tokenizer.parse_state.constructs.hard_break_escape => {
+ Some('\\') if tokenizer.parse_state.constructs.hard_break_escape => {
tokenizer.enter(Token::HardBreakEscape);
tokenizer.consume();
State::Fn(Box::new(inside))
@@ -69,7 +69,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ Some('\n') => {
tokenizer.exit(Token::HardBreakEscape);
State::Ok
}
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index 5de9a80..7a7cf2e 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -57,7 +57,7 @@
use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};
use crate::token::Token;
-use crate::tokenizer::{Code, ContentType, Event, EventType, State, Tokenizer};
+use crate::tokenizer::{ContentType, Event, EventType, State, Tokenizer};
/// Start of a heading (atx).
///
@@ -87,7 +87,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
- if Code::Char('#') == tokenizer.current {
+ if Some('#') == tokenizer.current {
tokenizer.enter(Token::HeadingAtxSequence);
sequence_open(tokenizer, 0)
} else {
@@ -103,11 +103,11 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ```
fn sequence_open(tokenizer: &mut Tokenizer, rank: usize) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') if rank > 0 => {
+ None | Some('\n') if rank > 0 => {
tokenizer.exit(Token::HeadingAtxSequence);
at_break(tokenizer)
}
- Code::Char('#') if rank < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
+ Some('#') if rank < HEADING_ATX_OPENING_FENCE_SIZE_MAX => {
tokenizer.consume();
State::Fn(Box::new(move |tokenizer| {
sequence_open(tokenizer, rank + 1)
@@ -129,21 +129,19 @@ fn sequence_open(tokenizer: &mut Tokenizer, rank: usize) -> State {
/// ```
fn at_break(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::HeadingAtx);
tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve));
// Feel free to interrupt.
tokenizer.interrupt = false;
State::Ok
}
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
- tokenizer.go(space_or_tab(), at_break)(tokenizer)
- }
- Code::Char('#') => {
+ Some('\t' | ' ') => tokenizer.go(space_or_tab(), at_break)(tokenizer),
+ Some('#') => {
tokenizer.enter(Token::HeadingAtxSequence);
further_sequence(tokenizer)
}
- Code::Char(_) => {
+ Some(_) => {
tokenizer.enter_with_content(Token::Data, Some(ContentType::Text));
data(tokenizer)
}
@@ -159,7 +157,7 @@ fn at_break(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn further_sequence(tokenizer: &mut Tokenizer) -> State {
- if let Code::Char('#') = tokenizer.current {
+ if let Some('#') = tokenizer.current {
tokenizer.consume();
State::Fn(Box::new(further_sequence))
} else {
@@ -177,7 +175,7 @@ fn further_sequence(tokenizer: &mut Tokenizer) -> State {
fn data(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Note: `#` for closing sequence must be preceded by whitespace, otherwise it’s just text.
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\t' | '\n' | '\r' | ' ') => {
+ None | Some('\t' | '\n' | ' ') => {
tokenizer.exit(Token::Data);
at_break(tokenizer)
}
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index a0f7545..f9dd3f7 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -60,7 +60,7 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::token::Token;
-use crate::tokenizer::{Code, EventType, State, Tokenizer};
+use crate::tokenizer::{EventType, State, Tokenizer};
use crate::util::skip::opt_back as skip_opt_back;
/// Kind of underline.
@@ -148,7 +148,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(char) if char == '-' || char == '=' => {
+ Some(char) if matches!(char, '-' | '=') => {
tokenizer.enter(Token::HeadingSetextUnderline);
inside(tokenizer, Kind::from_char(char))
}
@@ -165,7 +165,7 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer, kind: Kind) -> State {
match tokenizer.current {
- Code::Char(char) if char == kind.as_char() => {
+ Some(char) if char == kind.as_char() => {
tokenizer.consume();
State::Fn(Box::new(move |t| inside(t, kind)))
}
@@ -185,7 +185,7 @@ fn inside(tokenizer: &mut Tokenizer, kind: Kind) -> State {
/// ```
fn after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
// Feel free to interrupt.
tokenizer.interrupt = false;
tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve));
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 24d6f98..238963d 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -105,8 +105,10 @@ use crate::construct::{
partial_space_or_tab::{space_or_tab_with_options, Options as SpaceOrTabOptions},
};
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
-use crate::util::codes::{parse, serialize};
+use crate::tokenizer::{Point, State, Tokenizer};
+use crate::util::slice::{Position, Slice};
+
+const CDATA_SEARCH: [char; 6] = ['C', 'D', 'A', 'T', 'A', '['];
/// Kind of HTML (flow).
#[derive(Debug, PartialEq)]
@@ -168,17 +170,6 @@ impl QuoteKind {
_ => unreachable!("invalid char"),
}
}
- /// Turn [Code] into a kind.
- ///
- /// ## Panics
- ///
- /// Panics if `code` is not `Code::Char('"' | '\'')`.
- fn from_code(code: Code) -> QuoteKind {
- match code {
- Code::Char(char) => QuoteKind::from_char(char),
- _ => unreachable!("invalid code"),
- }
- }
}
/// State needed to parse HTML (flow).
@@ -190,9 +181,9 @@ struct Info {
start_tag: bool,
/// Used depending on `kind` to either collect all parsed characters, or to
/// store expected characters.
- buffer: Vec<Code>,
- /// `index` into `buffer` when expecting certain characters.
- index: usize,
+ start: Option<Point>,
+ /// Collected index, for various reasons.
+ size: usize,
/// Current quote, when in a double or single quoted attribute value.
quote: Option<QuoteKind>,
}
@@ -234,7 +225,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
- if Code::Char('<') == tokenizer.current {
+ if Some('<') == tokenizer.current {
tokenizer.enter(Token::HtmlFlowData);
tokenizer.consume();
State::Fn(Box::new(open))
@@ -259,21 +250,22 @@ fn open(tokenizer: &mut Tokenizer) -> State {
kind: Kind::Basic,
// Assume closing tag (or no tag).
start_tag: false,
- buffer: vec![],
- index: 0,
+ start: None,
+ size: 0,
quote: None,
};
match tokenizer.current {
- Code::Char('!') => {
+ Some('!') => {
tokenizer.consume();
State::Fn(Box::new(|t| declaration_open(t, info)))
}
- Code::Char('/') => {
+ Some('/') => {
tokenizer.consume();
+ info.start = Some(tokenizer.point.clone());
State::Fn(Box::new(|t| tag_close_start(t, info)))
}
- Code::Char('?') => {
+ Some('?') => {
info.kind = Kind::Instruction;
tokenizer.consume();
// Do not form containers.
@@ -282,8 +274,9 @@ fn open(tokenizer: &mut Tokenizer) -> State {
// right now, so we do need to search for `>`, similar to declarations.
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
- Code::Char('A'..='Z' | 'a'..='z') => {
+ Some('A'..='Z' | 'a'..='z') => {
info.start_tag = true;
+ info.start = Some(tokenizer.point.clone());
tag_name(tokenizer, info)
}
_ => State::Nok,
@@ -302,19 +295,18 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::Char('-') => {
+ Some('-') => {
tokenizer.consume();
info.kind = Kind::Comment;
State::Fn(Box::new(|t| comment_open_inside(t, info)))
}
- Code::Char('[') => {
+ Some('[') => {
tokenizer.consume();
info.kind = Kind::Cdata;
- info.buffer = parse("CDATA[");
- info.index = 0;
+ info.size = 0;
State::Fn(Box::new(|t| cdata_open_inside(t, info)))
}
- Code::Char('A'..='Z' | 'a'..='z') => {
+ Some('A'..='Z' | 'a'..='z') => {
tokenizer.consume();
info.kind = Kind::Declaration;
// Do not form containers.
@@ -333,7 +325,7 @@ fn declaration_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('-') => {
+ Some('-') => {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
@@ -350,20 +342,21 @@ fn comment_open_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ^^^^^^
/// ```
fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
- if tokenizer.current == info.buffer[info.index] {
- info.index += 1;
- tokenizer.consume();
+ match tokenizer.current {
+ Some(char) if char == CDATA_SEARCH[info.size] => {
+ info.size += 1;
+ tokenizer.consume();
- if info.index == info.buffer.len() {
- info.buffer.clear();
- // Do not form containers.
- tokenizer.concrete = true;
- State::Fn(Box::new(|t| continuation(t, info)))
- } else {
- State::Fn(Box::new(|t| cdata_open_inside(t, info)))
+ if info.size == CDATA_SEARCH.len() {
+ info.size = 0;
+ // Do not form containers.
+ tokenizer.concrete = true;
+ State::Fn(Box::new(|t| continuation(t, info)))
+ } else {
+ State::Fn(Box::new(|t| cdata_open_inside(t, info)))
+ }
}
- } else {
- State::Nok
+ _ => State::Nok,
}
}
@@ -373,11 +366,10 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// > | </x>
/// ^
/// ```
-fn tag_close_start(tokenizer: &mut Tokenizer, mut info: Info) -> State {
+fn tag_close_start(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('A'..='Z' | 'a'..='z') => {
+ Some('A'..='Z' | 'a'..='z') => {
tokenizer.consume();
- info.buffer.push(tokenizer.current);
State::Fn(Box::new(|t| tag_name(t, info)))
}
_ => State::Nok,
@@ -394,22 +386,27 @@ fn tag_close_start(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::None
- | Code::CarriageReturnLineFeed
- | Code::VirtualSpace
- | Code::Char('\t' | '\n' | '\r' | ' ' | '/' | '>') => {
- let tag_name_buffer = serialize(&info.buffer, false).to_lowercase();
- let name = tag_name_buffer.as_str();
- let slash = matches!(tokenizer.current, Code::Char('/'));
-
- info.buffer.clear();
-
- if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name) {
+ None | Some('\t' | '\n' | ' ' | '/' | '>') => {
+ let slash = matches!(tokenizer.current, Some('/'));
+ let start = info.start.take().unwrap();
+ let name = Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position {
+ start: &start,
+ end: &tokenizer.point,
+ },
+ )
+ .serialize()
+ .trim()
+ .to_lowercase();
+ println!("name: {:?}", name);
+
+ if !slash && info.start_tag && HTML_RAW_NAMES.contains(&name.as_str()) {
info.kind = Kind::Raw;
// Do not form containers.
tokenizer.concrete = true;
continuation(tokenizer, info)
- } else if HTML_BLOCK_NAMES.contains(&name) {
+ } else if HTML_BLOCK_NAMES.contains(&name.as_str()) {
// Basic is assumed, no need to set `kind`.
if slash {
tokenizer.consume();
@@ -432,12 +429,11 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
}
}
}
- Code::Char('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
+ Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
tokenizer.consume();
- info.buffer.push(tokenizer.current);
State::Fn(Box::new(|t| tag_name(t, info)))
}
- Code::Char(_) => State::Nok,
+ Some(_) => State::Nok,
}
}
@@ -449,7 +445,7 @@ fn tag_name(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('>') => {
+ Some('>') => {
tokenizer.consume();
// Do not form containers.
tokenizer.concrete = true;
@@ -467,7 +463,7 @@ fn basic_self_closing(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\t' | ' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_closing_tag_after(t, info)))
}
@@ -496,15 +492,15 @@ fn complete_closing_tag_after(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('/') => {
+ Some('/') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_end(t, info)))
}
- Code::Char('0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some('0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name(t, info)))
}
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\t' | ' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name_before(t, info)))
}
@@ -524,7 +520,7 @@ fn complete_attribute_name_before(tokenizer: &mut Tokenizer, info: Info) -> Stat
/// ```
fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name(t, info)))
}
@@ -543,11 +539,11 @@ fn complete_attribute_name(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('=') => {
+ Some('=') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))
}
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\t' | ' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_name_after(t, info)))
}
@@ -566,13 +562,13 @@ fn complete_attribute_name_after(tokenizer: &mut Tokenizer, info: Info) -> State
/// ```
fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::None | Code::Char('<' | '=' | '>' | '`') => State::Nok,
- Code::Char('"' | '\'') => {
+ None | Some('<' | '=' | '>' | '`') => State::Nok,
+ Some(char) if matches!(char, '"' | '\'') => {
+ info.quote = Some(QuoteKind::from_char(char));
tokenizer.consume();
- info.quote = Some(QuoteKind::from_code(tokenizer.current));
State::Fn(Box::new(|t| complete_attribute_value_quoted(t, info)))
}
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\t' | ' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_before(t, info)))
}
@@ -590,8 +586,8 @@ fn complete_attribute_value_before(tokenizer: &mut Tokenizer, mut info: Info) ->
/// ```
fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => State::Nok,
- Code::Char(char) if char == info.quote.as_ref().unwrap().as_char() => {
+ None | Some('\n') => State::Nok,
+ Some(char) if char == info.quote.as_ref().unwrap().as_char() => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_quoted_after(t, info)))
}
@@ -610,13 +606,10 @@ fn complete_attribute_value_quoted(tokenizer: &mut Tokenizer, info: Info) -> Sta
/// ```
fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None
- | Code::CarriageReturnLineFeed
- | Code::VirtualSpace
- | Code::Char('\t' | '\n' | '\r' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => {
+ None | Some('\t' | '\n' | ' ' | '"' | '\'' | '/' | '<' | '=' | '>' | '`') => {
complete_attribute_name_after(tokenizer, info)
}
- Code::Char(_) => {
+ Some(_) => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_attribute_value_unquoted(t, info)))
}
@@ -632,9 +625,7 @@ fn complete_attribute_value_unquoted(tokenizer: &mut Tokenizer, info: Info) -> S
/// ```
fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::VirtualSpace | Code::Char('\t' | ' ' | '/' | '>') => {
- complete_attribute_name_before(tokenizer, info)
- }
+ Some('\t' | ' ' | '/' | '>') => complete_attribute_name_before(tokenizer, info),
_ => State::Nok,
}
}
@@ -647,7 +638,7 @@ fn complete_attribute_value_quoted_after(tokenizer: &mut Tokenizer, info: Info)
/// ```
fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('>') => {
+ Some('>') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_after(t, info)))
}
@@ -663,16 +654,16 @@ fn complete_end(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
// Do not form containers.
tokenizer.concrete = true;
continuation(tokenizer, info)
}
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\t' | ' ') => {
tokenizer.consume();
State::Fn(Box::new(|t| complete_after(t, info)))
}
- Code::Char(_) => State::Nok,
+ Some(_) => State::Nok,
}
}
@@ -684,29 +675,27 @@ fn complete_after(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('-') if info.kind == Kind::Comment => {
+ Some('-') if info.kind == Kind::Comment => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_comment_inside(t, info)))
}
- Code::Char('<') if info.kind == Kind::Raw => {
+ Some('<') if info.kind == Kind::Raw => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_raw_tag_open(t, info)))
}
- Code::Char('>') if info.kind == Kind::Declaration => {
+ Some('>') if info.kind == Kind::Declaration => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_close(t, info)))
}
- Code::Char('?') if info.kind == Kind::Instruction => {
+ Some('?') if info.kind == Kind::Instruction => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
- Code::Char(']') if info.kind == Kind::Cdata => {
+ Some(']') if info.kind == Kind::Cdata => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_character_data_inside(t, info)))
}
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
- if info.kind == Kind::Basic || info.kind == Kind::Complete =>
- {
+ Some('\n') if info.kind == Kind::Basic || info.kind == Kind::Complete => {
tokenizer.exit(Token::HtmlFlowData);
tokenizer.check(blank_line_before, |ok| {
if ok {
@@ -716,7 +705,7 @@ fn continuation(tokenizer: &mut Tokenizer, info: Info) -> State {
}
})(tokenizer)
}
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::HtmlFlowData);
continuation_start(tokenizer, info)
}
@@ -753,7 +742,7 @@ fn continuation_start(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ Some('\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
@@ -772,9 +761,7 @@ fn continuation_start_non_lazy(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- continuation_start(tokenizer, info)
- }
+ None | Some('\n') => continuation_start(tokenizer, info),
_ => {
tokenizer.enter(Token::HtmlFlowData);
continuation(tokenizer, info)
@@ -790,7 +777,7 @@ fn continuation_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('-') => {
+ Some('-') => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
@@ -804,10 +791,11 @@ fn continuation_comment_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
/// > | <script>console.log(1)</script>
/// ^
/// ```
-fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info) -> State {
+fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::Char('/') => {
+ Some('/') => {
tokenizer.consume();
+ info.start = Some(tokenizer.point.clone());
State::Fn(Box::new(|t| continuation_raw_end_tag(t, info)))
}
_ => continuation(tokenizer, info),
@@ -822,24 +810,34 @@ fn continuation_raw_tag_open(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::Char('>') => {
- let tag_name_buffer = serialize(&info.buffer, false).to_lowercase();
- info.buffer.clear();
-
- if HTML_RAW_NAMES.contains(&tag_name_buffer.as_str()) {
+ Some('>') => {
+ info.size = 0;
+
+ let start = info.start.take().unwrap();
+ let name = Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position {
+ start: &start,
+ end: &tokenizer.point,
+ },
+ )
+ .serialize()
+ .to_lowercase();
+
+ if HTML_RAW_NAMES.contains(&name.as_str()) {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_close(t, info)))
} else {
continuation(tokenizer, info)
}
}
- Code::Char('A'..='Z' | 'a'..='z') if info.buffer.len() < HTML_RAW_SIZE_MAX => {
+ Some('A'..='Z' | 'a'..='z') if info.size < HTML_RAW_SIZE_MAX => {
tokenizer.consume();
- info.buffer.push(tokenizer.current);
+ info.size += 1;
State::Fn(Box::new(|t| continuation_raw_end_tag(t, info)))
}
_ => {
- info.buffer.clear();
+ info.size = 0;
continuation(tokenizer, info)
}
}
@@ -853,7 +851,7 @@ fn continuation_raw_end_tag(tokenizer: &mut Tokenizer, mut info: Info) -> State
/// ```
fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char(']') => {
+ Some(']') => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
@@ -877,11 +875,11 @@ fn continuation_character_data_inside(tokenizer: &mut Tokenizer, info: Info) ->
/// ```
fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('>') => {
+ Some('>') => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_close(t, info)))
}
- Code::Char('-') if info.kind == Kind::Comment => {
+ Some('-') if info.kind == Kind::Comment => {
tokenizer.consume();
State::Fn(Box::new(|t| continuation_declaration_inside(t, info)))
}
@@ -897,7 +895,7 @@ fn continuation_declaration_inside(tokenizer: &mut Tokenizer, info: Info) -> Sta
/// ```
fn continuation_close(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::HtmlFlowData);
continuation_after(tokenizer)
}
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index 3ac8d71..b1ad113 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -56,8 +56,9 @@
use crate::construct::partial_space_or_tab::space_or_tab;
use crate::token::Token;
-use crate::tokenizer::{Code, State, StateFn, Tokenizer};
-use crate::util::codes::parse;
+use crate::tokenizer::{State, StateFn, Tokenizer};
+
+const CDATA_SEARCH: [char; 6] = ['C', 'D', 'A', 'T', 'A', '['];
/// Start of HTML (text)
///
@@ -66,7 +67,7 @@ use crate::util::codes::parse;
/// ^
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if Code::Char('<') == tokenizer.current && tokenizer.parse_state.constructs.html_text {
+ if Some('<') == tokenizer.current && tokenizer.parse_state.constructs.html_text {
tokenizer.enter(Token::HtmlText);
tokenizer.enter(Token::HtmlTextData);
tokenizer.consume();
@@ -88,19 +89,19 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('!') => {
+ Some('!') => {
tokenizer.consume();
State::Fn(Box::new(declaration_open))
}
- Code::Char('/') => {
+ Some('/') => {
tokenizer.consume();
State::Fn(Box::new(tag_close_start))
}
- Code::Char('?') => {
+ Some('?') => {
tokenizer.consume();
State::Fn(Box::new(instruction))
}
- Code::Char('A'..='Z' | 'a'..='z') => {
+ Some('A'..='Z' | 'a'..='z') => {
tokenizer.consume();
State::Fn(Box::new(tag_open))
}
@@ -120,16 +121,15 @@ fn open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn declaration_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('-') => {
+ Some('-') => {
tokenizer.consume();
State::Fn(Box::new(comment_open_inside))
}
- Code::Char('[') => {
+ Some('[') => {
tokenizer.consume();
- let buffer = parse("CDATA[");
- State::Fn(Box::new(|t| cdata_open_inside(t, buffer, 0)))
+ State::Fn(Box::new(|t| cdata_open_inside(t, 0)))
}
- Code::Char('A'..='Z' | 'a'..='z') => {
+ Some('A'..='Z' | 'a'..='z') => {
tokenizer.consume();
State::Fn(Box::new(declaration))
}
@@ -145,7 +145,7 @@ fn declaration_open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('-') => {
+ Some('-') => {
tokenizer.consume();
State::Fn(Box::new(comment_start))
}
@@ -168,8 +168,8 @@ fn comment_open_inside(tokenizer: &mut Tokenizer) -> State {
/// [html_flow]: crate::construct::html_flow
fn comment_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::Char('>') => State::Nok,
- Code::Char('-') => {
+ None | Some('>') => State::Nok,
+ Some('-') => {
tokenizer.consume();
State::Fn(Box::new(comment_start_dash))
}
@@ -192,7 +192,7 @@ fn comment_start(tokenizer: &mut Tokenizer) -> State {
/// [html_flow]: crate::construct::html_flow
fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::Char('>') => State::Nok,
+ None | Some('>') => State::Nok,
_ => comment(tokenizer),
}
}
@@ -205,11 +205,9 @@ fn comment_start_dash(tokenizer: &mut Tokenizer) -> State {
/// ```
fn comment(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None => State::Nok,
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- at_line_ending(tokenizer, Box::new(comment))
- }
- Code::Char('-') => {
+ None => State::Nok,
+ Some('\n') => at_line_ending(tokenizer, Box::new(comment)),
+ Some('-') => {
tokenizer.consume();
State::Fn(Box::new(comment_close))
}
@@ -228,7 +226,7 @@ fn comment(tokenizer: &mut Tokenizer) -> State {
/// ```
fn comment_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('-') => {
+ Some('-') => {
tokenizer.consume();
State::Fn(Box::new(end))
}
@@ -242,17 +240,18 @@ fn comment_close(tokenizer: &mut Tokenizer) -> State {
/// > | a <![CDATA[>&<]]> b
/// ^^^^^^
/// ```
-fn cdata_open_inside(tokenizer: &mut Tokenizer, buffer: Vec<Code>, index: usize) -> State {
- if tokenizer.current == buffer[index] {
- tokenizer.consume();
+fn cdata_open_inside(tokenizer: &mut Tokenizer, index: usize) -> State {
+ match tokenizer.current {
+ Some(char) if char == CDATA_SEARCH[index] => {
+ tokenizer.consume();
- if index + 1 == buffer.len() {
- State::Fn(Box::new(cdata))
- } else {
- State::Fn(Box::new(move |t| cdata_open_inside(t, buffer, index + 1)))
+ if index + 1 == CDATA_SEARCH.len() {
+ State::Fn(Box::new(cdata))
+ } else {
+ State::Fn(Box::new(move |t| cdata_open_inside(t, index + 1)))
+ }
}
- } else {
- State::Nok
+ _ => State::Nok,
}
}
@@ -264,11 +263,9 @@ fn cdata_open_inside(tokenizer: &mut Tokenizer, buffer: Vec<Code>, index: usize)
/// ```
fn cdata(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None => State::Nok,
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- at_line_ending(tokenizer, Box::new(cdata))
- }
- Code::Char(']') => {
+ None => State::Nok,
+ Some('\n') => at_line_ending(tokenizer, Box::new(cdata)),
+ Some(']') => {
tokenizer.consume();
State::Fn(Box::new(cdata_close))
}
@@ -287,7 +284,7 @@ fn cdata(tokenizer: &mut Tokenizer) -> State {
/// ```
fn cdata_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(']') => {
+ Some(']') => {
tokenizer.consume();
State::Fn(Box::new(cdata_end))
}
@@ -303,8 +300,8 @@ fn cdata_close(tokenizer: &mut Tokenizer) -> State {
/// ```
fn cdata_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('>') => end(tokenizer),
- Code::Char(']') => cdata_close(tokenizer),
+ Some('>') => end(tokenizer),
+ Some(']') => cdata_close(tokenizer),
_ => cdata(tokenizer),
}
}
@@ -317,10 +314,8 @@ fn cdata_end(tokenizer: &mut Tokenizer) -> State {
/// ```
fn declaration(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::Char('>') => end(tokenizer),
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- at_line_ending(tokenizer, Box::new(declaration))
- }
+ None | Some('>') => end(tokenizer),
+ Some('\n') => at_line_ending(tokenizer, Box::new(declaration)),
_ => {
tokenizer.consume();
State::Fn(Box::new(declaration))
@@ -336,11 +331,9 @@ fn declaration(tokenizer: &mut Tokenizer) -> State {
/// ```
fn instruction(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None => State::Nok,
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- at_line_ending(tokenizer, Box::new(instruction))
- }
- Code::Char('?') => {
+ None => State::Nok,
+ Some('\n') => at_line_ending(tokenizer, Box::new(instruction)),
+ Some('?') => {
tokenizer.consume();
State::Fn(Box::new(instruction_close))
}
@@ -359,7 +352,7 @@ fn instruction(tokenizer: &mut Tokenizer) -> State {
/// ```
fn instruction_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('>') => end(tokenizer),
+ Some('>') => end(tokenizer),
_ => instruction(tokenizer),
}
}
@@ -372,7 +365,7 @@ fn instruction_close(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('A'..='Z' | 'a'..='z') => {
+ Some('A'..='Z' | 'a'..='z') => {
tokenizer.consume();
State::Fn(Box::new(tag_close))
}
@@ -388,7 +381,7 @@ fn tag_close_start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_close(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
+ Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
tokenizer.consume();
State::Fn(Box::new(tag_close))
}
@@ -404,10 +397,8 @@ fn tag_close(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- at_line_ending(tokenizer, Box::new(tag_close_between))
- }
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\n') => at_line_ending(tokenizer, Box::new(tag_close_between)),
+ Some('\t' | ' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_close_between))
}
@@ -423,13 +414,11 @@ fn tag_close_between(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
+ Some('-' | '0'..='9' | 'A'..='Z' | 'a'..='z') => {
tokenizer.consume();
State::Fn(Box::new(tag_open))
}
- Code::CarriageReturnLineFeed
- | Code::VirtualSpace
- | Code::Char('\t' | '\n' | '\r' | ' ' | '/' | '>') => tag_open_between(tokenizer),
+ Some('\t' | '\n' | ' ' | '/' | '>') => tag_open_between(tokenizer),
_ => State::Nok,
}
}
@@ -442,18 +431,16 @@ fn tag_open(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- at_line_ending(tokenizer, Box::new(tag_open_between))
- }
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\n') => at_line_ending(tokenizer, Box::new(tag_open_between)),
+ Some('\t' | ' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_between))
}
- Code::Char('/') => {
+ Some('/') => {
tokenizer.consume();
State::Fn(Box::new(end))
}
- Code::Char(':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some(':' | 'A'..='Z' | '_' | 'a'..='z') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_name))
}
@@ -469,7 +456,7 @@ fn tag_open_between(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
+ Some('-' | '.' | '0'..='9' | ':' | 'A'..='Z' | '_' | 'a'..='z') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_name))
}
@@ -486,14 +473,12 @@ fn tag_open_attribute_name(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- at_line_ending(tokenizer, Box::new(tag_open_attribute_name_after))
- }
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_name_after)),
+ Some('\t' | ' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_name_after))
}
- Code::Char('=') => {
+ Some('=') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_before))
}
@@ -510,19 +495,17 @@ fn tag_open_attribute_name_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::Char('<' | '=' | '>' | '`') => State::Nok,
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- at_line_ending(tokenizer, Box::new(tag_open_attribute_value_before))
- }
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ None | Some('<' | '=' | '>' | '`') => State::Nok,
+ Some('\n') => at_line_ending(tokenizer, Box::new(tag_open_attribute_value_before)),
+ Some('\t' | ' ') => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_before))
}
- Code::Char(char) if char == '"' || char == '\'' => {
+ Some(char) if char == '"' || char == '\'' => {
tokenizer.consume();
State::Fn(Box::new(move |t| tag_open_attribute_value_quoted(t, char)))
}
- Code::Char(_) => {
+ Some(_) => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_unquoted))
}
@@ -537,12 +520,12 @@ fn tag_open_attribute_value_before(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer, marker: char) -> State {
match tokenizer.current {
- Code::None => State::Nok,
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => at_line_ending(
+ None => State::Nok,
+ Some('\n') => at_line_ending(
tokenizer,
Box::new(move |t| tag_open_attribute_value_quoted(t, marker)),
),
- Code::Char(char) if char == marker => {
+ Some(char) if char == marker => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_quoted_after))
}
@@ -563,11 +546,9 @@ fn tag_open_attribute_value_quoted(tokenizer: &mut Tokenizer, marker: char) -> S
/// ```
fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::Char('"' | '\'' | '<' | '=' | '`') => State::Nok,
- Code::CarriageReturnLineFeed
- | Code::VirtualSpace
- | Code::Char('\t' | '\n' | '\r' | ' ' | '/' | '>') => tag_open_between(tokenizer),
- Code::Char(_) => {
+ None | Some('"' | '\'' | '<' | '=' | '`') => State::Nok,
+ Some('\t' | '\n' | ' ' | '/' | '>') => tag_open_between(tokenizer),
+ Some(_) => {
tokenizer.consume();
State::Fn(Box::new(tag_open_attribute_value_unquoted))
}
@@ -583,9 +564,7 @@ fn tag_open_attribute_value_unquoted(tokenizer: &mut Tokenizer) -> State {
/// ```
fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed
- | Code::VirtualSpace
- | Code::Char('\t' | '\n' | '\r' | ' ' | '>' | '/') => tag_open_between(tokenizer),
+ Some('\t' | '\n' | ' ' | '>' | '/') => tag_open_between(tokenizer),
_ => State::Nok,
}
}
@@ -598,7 +577,7 @@ fn tag_open_attribute_value_quoted_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('>') => {
+ Some('>') => {
tokenizer.consume();
tokenizer.exit(Token::HtmlTextData);
tokenizer.exit(Token::HtmlText);
@@ -620,7 +599,7 @@ fn end(tokenizer: &mut Tokenizer) -> State {
/// ```
fn at_line_ending(tokenizer: &mut Tokenizer, return_state: Box<StateFn>) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ Some('\n') => {
tokenizer.exit(Token::HtmlTextData);
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
diff --git a/src/construct/label_end.rs b/src/construct/label_end.rs
index 6f0a707..5ea788f 100644
--- a/src/construct/label_end.rs
+++ b/src/construct/label_end.rs
@@ -1,4 +1,4 @@
-//! Label end is a construct that occurs in the [text][] content type.
+//! Label end is a construct that occurs in the [text][] conten&t type.
//!
//! It forms with the following BNF:
//!
@@ -154,10 +154,11 @@ use crate::construct::{
partial_title::{start as title, Options as TitleOptions},
};
use crate::token::Token;
-use crate::tokenizer::{Code, Event, EventType, Media, State, Tokenizer};
+use crate::tokenizer::{Event, EventType, Media, State, Tokenizer};
use crate::util::{
normalize_identifier::normalize_identifier,
- span::{serialize, Span},
+ skip,
+ slice::{Position, Slice},
};
/// State needed to parse label end.
@@ -181,7 +182,7 @@ struct Info {
/// > | [a] b
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
- if Code::Char(']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {
+ if Some(']') == tokenizer.current && tokenizer.parse_state.constructs.label_end {
let mut label_start_index = None;
let mut index = tokenizer.label_start_stack.len();
@@ -207,19 +208,23 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
}
let label_end_start = tokenizer.events.len();
+
let info = Info {
label_start_index,
media: Media {
start: label_start.start,
end: (label_end_start, label_end_start + 3),
- id: normalize_identifier(&serialize(
- &tokenizer.parse_state.codes,
- &Span {
- start_index: tokenizer.events[label_start.start.1].point.index,
- end_index: tokenizer.events[label_end_start - 1].point.index,
- },
- false,
- )),
+ // To do: virtual spaces not needed, create a `to_str`?
+ id: normalize_identifier(
+ &Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position {
+ start: &tokenizer.events[label_start.start.1].point,
+ end: &tokenizer.events[label_end_start - 1].point,
+ },
+ )
+ .serialize(),
+ ),
},
};
@@ -253,7 +258,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
// Resource (`[asd](fgh)`)?
- Code::Char('(') => tokenizer.attempt(resource, move |is_ok| {
+ Some('(') => tokenizer.attempt(resource, move |is_ok| {
Box::new(move |t| {
// Also fine if `defined`, as then it’s a valid shortcut.
if is_ok || defined {
@@ -264,7 +269,7 @@ fn after(tokenizer: &mut Tokenizer, info: Info) -> State {
})
})(tokenizer),
// Full (`[asd][fgh]`) or collapsed (`[asd][]`) reference?
- Code::Char('[') => tokenizer.attempt(full_reference, move |is_ok| {
+ Some('[') => tokenizer.attempt(full_reference, move |is_ok| {
Box::new(move |t| {
if is_ok {
ok(t, info)
@@ -377,7 +382,7 @@ fn nok(tokenizer: &mut Tokenizer, label_start_index: usize) -> State {
/// ```
fn resource(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('(') => {
+ Some('(') => {
tokenizer.enter(Token::Resource);
tokenizer.enter(Token::ResourceMarker);
tokenizer.consume();
@@ -406,7 +411,7 @@ fn resource_start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(')') => resource_end(tokenizer),
+ Some(')') => resource_end(tokenizer),
_ => tokenizer.go(
|t| {
destination(
@@ -446,7 +451,7 @@ fn destination_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_between(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('"' | '\'' | '(') => tokenizer.go(
+ Some('"' | '\'' | '(') => tokenizer.go(
|t| {
title(
t,
@@ -481,7 +486,7 @@ fn title_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn resource_end(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(')') => {
+ Some(')') => {
tokenizer.enter(Token::ResourceMarker);
tokenizer.consume();
tokenizer.exit(Token::ResourceMarker);
@@ -500,7 +505,7 @@ fn resource_end(tokenizer: &mut Tokenizer) -> State {
/// ```
fn full_reference(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('[') => tokenizer.go(
+ Some('[') => tokenizer.go(
|t| {
label(
t,
@@ -524,36 +529,23 @@ fn full_reference(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
- let events = &tokenizer.events;
- let mut index = events.len() - 1;
- let mut start: Option<usize> = None;
- let mut end: Option<usize> = None;
-
- while index > 0 {
- index -= 1;
- let event = &events[index];
- if event.token_type == Token::ReferenceString {
- if event.event_type == EventType::Exit {
- end = Some(event.point.index);
- } else {
- start = Some(event.point.index);
- break;
- }
- }
- }
+ let end = skip::to_back(
+ &tokenizer.events,
+ tokenizer.events.len() - 1,
+ &[Token::ReferenceString],
+ );
+
+ // To do: virtual spaces not needed, create a `to_str`?
+ let id = Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position::from_exit_event(&tokenizer.events, end),
+ )
+ .serialize();
if tokenizer
.parse_state
.definitions
- .contains(&normalize_identifier(&serialize(
- &tokenizer.parse_state.codes,
- &Span {
- // Always found, otherwise we don’t get here.
- start_index: start.unwrap(),
- end_index: end.unwrap(),
- },
- false,
- )))
+ .contains(&normalize_identifier(&id))
{
State::Ok
} else {
@@ -571,7 +563,7 @@ fn full_reference_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('[') => {
+ Some('[') => {
tokenizer.enter(Token::Reference);
tokenizer.enter(Token::ReferenceMarker);
tokenizer.consume();
@@ -592,7 +584,7 @@ fn collapsed_reference(tokenizer: &mut Tokenizer) -> State {
/// ```
fn collapsed_reference_open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char(']') => {
+ Some(']') => {
tokenizer.enter(Token::ReferenceMarker);
tokenizer.consume();
tokenizer.exit(Token::ReferenceMarker);
@@ -735,7 +727,11 @@ pub fn resolve_media(tokenizer: &mut Tokenizer) {
0,
vec![Event {
event_type: EventType::Exit,
- token_type: Token::Link,
+ token_type: if group_enter_event.token_type == Token::LabelLink {
+ Token::Link
+ } else {
+ Token::Image
+ },
point: events[group_end_index].point.clone(),
link: None,
}],
diff --git a/src/construct/label_start_image.rs b/src/construct/label_start_image.rs
index 8c12ffe..078026d 100644
--- a/src/construct/label_start_image.rs
+++ b/src/construct/label_start_image.rs
@@ -30,7 +30,7 @@
use super::label_end::resolve_media;
use crate::token::Token;
-use crate::tokenizer::{Code, LabelStart, State, Tokenizer};
+use crate::tokenizer::{LabelStart, State, Tokenizer};
/// Start of label (image) start.
///
@@ -40,7 +40,7 @@ use crate::tokenizer::{Code, LabelStart, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('!') if tokenizer.parse_state.constructs.label_start_image => {
+ Some('!') if tokenizer.parse_state.constructs.label_start_image => {
tokenizer.enter(Token::LabelImage);
tokenizer.enter(Token::LabelImageMarker);
tokenizer.consume();
@@ -59,7 +59,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
pub fn open(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('[') => {
+ Some('[') => {
tokenizer.enter(Token::LabelMarker);
tokenizer.consume();
tokenizer.exit(Token::LabelMarker);
diff --git a/src/construct/label_start_link.rs b/src/construct/label_start_link.rs
index e13cd77..d7ae1d6 100644
--- a/src/construct/label_start_link.rs
+++ b/src/construct/label_start_link.rs
@@ -29,7 +29,7 @@
use super::label_end::resolve_media;
use crate::token::Token;
-use crate::tokenizer::{Code, LabelStart, State, Tokenizer};
+use crate::tokenizer::{LabelStart, State, Tokenizer};
/// Start of label (link) start.
///
@@ -39,7 +39,7 @@ use crate::tokenizer::{Code, LabelStart, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('[') if tokenizer.parse_state.constructs.label_start_link => {
+ Some('[') if tokenizer.parse_state.constructs.label_start_link => {
let start = tokenizer.events.len();
tokenizer.enter(Token::LabelLink);
tokenizer.enter(Token::LabelMarker);
diff --git a/src/construct/list.rs b/src/construct/list.rs
index f5bb0ce..355eeee 100644
--- a/src/construct/list.rs
+++ b/src/construct/list.rs
@@ -50,10 +50,10 @@ use crate::construct::{
thematic_break::start as thematic_break,
};
use crate::token::Token;
-use crate::tokenizer::{Code, EventType, State, Tokenizer};
+use crate::tokenizer::{EventType, State, Tokenizer};
use crate::util::{
skip,
- span::{codes as codes_from_span, from_exit_event},
+ slice::{Position, Slice},
};
/// Type of list.
@@ -117,17 +117,6 @@ impl Kind {
_ => unreachable!("invalid char"),
}
}
- /// Turn [Code] into a kind.
- ///
- /// ## Panics
- ///
- /// Panics if `code` is not `Code::Char('.' | ')' | '*' | '+' | '-')`.
- fn from_code(code: Code) -> Kind {
- match code {
- Code::Char(char) => Kind::from_char(char),
- _ => unreachable!("invalid code"),
- }
- }
}
/// Start of list item.
@@ -160,11 +149,11 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
// Unordered.
- Code::Char('*' | '+' | '-') => tokenizer.check(thematic_break, |ok| {
+ Some('*' | '+' | '-') => tokenizer.check(thematic_break, |ok| {
Box::new(if ok { nok } else { before_unordered })
})(tokenizer),
// Ordered.
- Code::Char(char) if char.is_ascii_digit() && (!tokenizer.interrupt || char == '1') => {
+ Some(char) if char.is_ascii_digit() && (!tokenizer.interrupt || char == '1') => {
tokenizer.enter(Token::ListItemPrefix);
tokenizer.enter(Token::ListItemValue);
inside(tokenizer, 0)
@@ -194,11 +183,11 @@ fn before_unordered(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer, size: usize) -> State {
match tokenizer.current {
- Code::Char(char) if char.is_ascii_digit() && size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
+ Some(char) if char.is_ascii_digit() && size + 1 < LIST_ITEM_VALUE_SIZE_MAX => {
tokenizer.consume();
State::Fn(Box::new(move |t| inside(t, size + 1)))
}
- Code::Char('.' | ')') if !tokenizer.interrupt || size < 2 => {
+ Some('.' | ')') if !tokenizer.interrupt || size < 2 => {
tokenizer.exit(Token::ListItemValue);
marker(tokenizer)
}
@@ -273,10 +262,7 @@ fn whitespace(tokenizer: &mut Tokenizer) -> State {
/// ^
/// ```
fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
- if matches!(
- tokenizer.current,
- Code::VirtualSpace | Code::Char('\t' | ' ')
- ) {
+ if matches!(tokenizer.current, Some('\t' | ' ')) {
State::Nok
} else {
State::Ok
@@ -291,7 +277,7 @@ fn whitespace_after(tokenizer: &mut Tokenizer) -> State {
/// ```
fn prefix_other(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\t' | ' ') => {
tokenizer.enter(Token::SpaceOrTab);
tokenizer.consume();
tokenizer.exit(Token::SpaceOrTab);
@@ -316,8 +302,18 @@ fn after(tokenizer: &mut Tokenizer, blank: bool) -> State {
tokenizer.events.len() - 1,
&[Token::ListItem],
);
- let prefix = tokenizer.point.index - tokenizer.events[start].point.index
- + (if blank { 1 } else { 0 });
+ let mut prefix = Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position {
+ start: &tokenizer.events[start].point,
+ end: &tokenizer.point,
+ },
+ )
+ .size();
+
+ if blank {
+ prefix += 1;
+ }
let container = tokenizer.container.as_mut().unwrap();
container.blank_initial = blank;
@@ -403,12 +399,15 @@ pub fn resolve_list_item(tokenizer: &mut Tokenizer) {
if event.token_type == Token::ListItem {
if event.event_type == EventType::Enter {
let end = skip::opt(&tokenizer.events, index, &[Token::ListItem]) - 1;
- let marker = skip::to(&tokenizer.events, index, &[Token::ListItemMarker]) + 1;
- let codes = codes_from_span(
- &tokenizer.parse_state.codes,
- &from_exit_event(&tokenizer.events, marker),
+ let marker = skip::to(&tokenizer.events, index, &[Token::ListItemMarker]);
+ let kind = Kind::from_char(
+ Slice::from_point(
+ &tokenizer.parse_state.chars,
+ &tokenizer.events[marker].point,
+ )
+ .head()
+ .unwrap(),
);
- let kind = Kind::from_code(codes[0]);
let current = (kind, balance, index, end);
let mut list_index = lists_wip.len();
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 4bce6a4..5d230d3 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -33,7 +33,7 @@
//! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element
use crate::token::Token;
-use crate::tokenizer::{Code, ContentType, EventType, State, Tokenizer};
+use crate::tokenizer::{ContentType, EventType, State, Tokenizer};
use crate::util::skip::opt as skip_opt;
/// Before a paragraph.
@@ -44,7 +44,7 @@ use crate::util::skip::opt as skip_opt;
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
unreachable!("unexpected eol/eof")
}
_ => {
@@ -63,7 +63,7 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::Data);
tokenizer.exit(Token::Paragraph);
tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve));
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
index 4216276..0b66b09 100644
--- a/src/construct/partial_data.rs
+++ b/src/construct/partial_data.rs
@@ -7,7 +7,7 @@
//! [text]: crate::content::text
use crate::token::Token;
-use crate::tokenizer::{Code, EventType, State, Tokenizer};
+use crate::tokenizer::{EventType, State, Tokenizer};
/// At the beginning of data.
///
@@ -15,13 +15,14 @@ use crate::tokenizer::{Code, EventType, State, Tokenizer};
/// > | abc
/// ^
/// ```
-pub fn start(tokenizer: &mut Tokenizer, stop: &'static [Code]) -> State {
- if stop.contains(&tokenizer.current) {
- tokenizer.enter(Token::Data);
- tokenizer.consume();
- State::Fn(Box::new(move |t| data(t, stop)))
- } else {
- at_break(tokenizer, stop)
+pub fn start(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {
+ match tokenizer.current {
+ Some(char) if stop.contains(&char) => {
+ tokenizer.enter(Token::Data);
+ tokenizer.consume();
+ State::Fn(Box::new(move |t| data(t, stop)))
+ }
+ _ => at_break(tokenizer, stop),
}
}
@@ -31,16 +32,16 @@ pub fn start(tokenizer: &mut Tokenizer, stop: &'static [Code]) -> State {
/// > | abc
/// ^
/// ```
-fn at_break(tokenizer: &mut Tokenizer, stop: &'static [Code]) -> State {
+fn at_break(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {
match tokenizer.current {
- Code::None => State::Ok,
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None => State::Ok,
+ Some('\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
State::Fn(Box::new(move |t| at_break(t, stop)))
}
- _ if stop.contains(&tokenizer.current) => {
+ Some(char) if stop.contains(&char) => {
tokenizer.register_resolver_before("data".to_string(), Box::new(resolve_data));
State::Ok
}
@@ -57,10 +58,10 @@ fn at_break(tokenizer: &mut Tokenizer, stop: &'static [Code]) -> State {
/// > | abc
/// ^^^
/// ```
-fn data(tokenizer: &mut Tokenizer, stop: &'static [Code]) -> State {
+fn data(tokenizer: &mut Tokenizer, stop: &'static [char]) -> State {
let done = match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => true,
- _ if stop.contains(&tokenizer.current) => true,
+ None | Some('\n') => true,
+ Some(char) if stop.contains(&char) => true,
_ => false,
};
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 6a984e2..6447228 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -72,7 +72,7 @@
//! [sanitize_uri]: crate::util::sanitize_uri
use crate::token::Token;
-use crate::tokenizer::{Code, ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, Tokenizer};
/// Configuration.
///
@@ -117,7 +117,7 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
};
match tokenizer.current {
- Code::Char('<') => {
+ Some('<') => {
tokenizer.enter(info.options.destination.clone());
tokenizer.enter(info.options.literal.clone());
tokenizer.enter(info.options.marker.clone());
@@ -125,11 +125,9 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
tokenizer.exit(info.options.marker.clone());
State::Fn(Box::new(|t| enclosed_before(t, info)))
}
- Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(' ' | ')') => {
- State::Nok
- }
- Code::Char(char) if char.is_ascii_control() => State::Nok,
- Code::Char(_) => {
+ None | Some(' ' | ')') => State::Nok,
+ Some(char) if char.is_ascii_control() => State::Nok,
+ Some(_) => {
tokenizer.enter(info.options.destination.clone());
tokenizer.enter(info.options.raw.clone());
tokenizer.enter(info.options.string.clone());
@@ -146,7 +144,7 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
/// ^
/// ```
fn enclosed_before(tokenizer: &mut Tokenizer, info: Info) -> State {
- if let Code::Char('>') = tokenizer.current {
+ if let Some('>') = tokenizer.current {
tokenizer.enter(info.options.marker.clone());
tokenizer.consume();
tokenizer.exit(info.options.marker.clone());
@@ -168,13 +166,13 @@ fn enclosed_before(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn enclosed(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('>') => {
+ Some('>') => {
tokenizer.exit(Token::Data);
tokenizer.exit(info.options.string.clone());
enclosed_before(tokenizer, info)
}
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r' | '<') => State::Nok,
- Code::Char('\\') => {
+ None | Some('\n' | '<') => State::Nok,
+ Some('\\') => {
tokenizer.consume();
State::Fn(Box::new(|t| enclosed_escape(t, info)))
}
@@ -193,7 +191,7 @@ fn enclosed(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn enclosed_escape(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('<' | '>' | '\\') => {
+ Some('<' | '>' | '\\') => {
tokenizer.consume();
State::Fn(Box::new(|t| enclosed(t, info)))
}
@@ -209,7 +207,7 @@ fn enclosed_escape(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::Char('(') => {
+ Some('(') => {
if info.balance >= info.options.limit {
State::Nok
} else {
@@ -218,7 +216,7 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
State::Fn(Box::new(move |t| raw(t, info)))
}
}
- Code::Char(')') => {
+ Some(')') => {
if info.balance == 0 {
tokenizer.exit(Token::Data);
tokenizer.exit(info.options.string.clone());
@@ -231,10 +229,7 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
State::Fn(Box::new(move |t| raw(t, info)))
}
}
- Code::None
- | Code::CarriageReturnLineFeed
- | Code::VirtualSpace
- | Code::Char('\t' | '\n' | '\r' | ' ') => {
+ None | Some('\t' | '\n' | ' ') => {
if info.balance > 0 {
State::Nok
} else {
@@ -245,12 +240,12 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
State::Ok
}
}
- Code::Char(char) if char.is_ascii_control() => State::Nok,
- Code::Char('\\') => {
+ Some(char) if char.is_ascii_control() => State::Nok,
+ Some('\\') => {
tokenizer.consume();
State::Fn(Box::new(move |t| raw_escape(t, info)))
}
- Code::Char(_) => {
+ Some(_) => {
tokenizer.consume();
State::Fn(Box::new(move |t| raw(t, info)))
}
@@ -265,7 +260,7 @@ fn raw(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn raw_escape(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char('(' | ')' | '\\') => {
+ Some('(' | ')' | '\\') => {
tokenizer.consume();
State::Fn(Box::new(move |t| raw(t, info)))
}
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 91a0e26..ee31533 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -62,7 +62,7 @@ use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::constant::LINK_REFERENCE_SIZE_MAX;
use crate::subtokenize::link;
use crate::token::Token;
-use crate::tokenizer::{Code, ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, Tokenizer};
/// Configuration.
///
@@ -98,7 +98,7 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
match tokenizer.current {
- Code::Char('[') => {
+ Some('[') => {
let info = Info {
connect: false,
data: false,
@@ -124,10 +124,10 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
/// ```
fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::None | Code::Char('[') => State::Nok,
- Code::Char(']') if !info.data => State::Nok,
+ None | Some('[') => State::Nok,
+ Some(']') if !info.data => State::Nok,
_ if info.size > LINK_REFERENCE_SIZE_MAX => State::Nok,
- Code::Char(']') => {
+ Some(']') => {
tokenizer.exit(info.options.string.clone());
tokenizer.enter(info.options.marker.clone());
tokenizer.consume();
@@ -135,7 +135,7 @@ fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
tokenizer.exit(info.options.label);
State::Ok
}
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.go(
+ Some('\n') => tokenizer.go(
space_or_tab_eol_with_options(EolOptions {
content_type: Some(ContentType::String),
connect: info.connect,
@@ -168,7 +168,7 @@ fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r' | '[' | ']') => {
+ None | Some('\n' | '[' | ']') => {
tokenizer.exit(Token::Data);
at_break(tokenizer, info)
}
@@ -176,12 +176,12 @@ fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {
tokenizer.exit(Token::Data);
at_break(tokenizer, info)
}
- Code::VirtualSpace | Code::Char('\t' | ' ') => {
+ Some('\t' | ' ') => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| label(t, info)))
}
- Code::Char('\\') => {
+ Some('\\') => {
tokenizer.consume();
info.size += 1;
if !info.data {
@@ -189,7 +189,7 @@ fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {
}
State::Fn(Box::new(|t| escape(t, info)))
}
- Code::Char(_) => {
+ Some(_) => {
tokenizer.consume();
info.size += 1;
if !info.data {
@@ -208,7 +208,7 @@ fn label(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn escape(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::Char('[' | '\\' | ']') => {
+ Some('[' | '\\' | ']') => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| label(t, info)))
diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs
index bdc22e4..068e30f 100644
--- a/src/construct/partial_non_lazy_continuation.rs
+++ b/src/construct/partial_non_lazy_continuation.rs
@@ -11,7 +11,7 @@
//! [html_flow]: crate::construct::html_flow
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Start of continuation.
///
@@ -22,7 +22,7 @@ use crate::tokenizer::{Code, State, Tokenizer};
/// ```
pub fn start(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ Some('\n') => {
tokenizer.enter(Token::LineEnding);
tokenizer.consume();
tokenizer.exit(Token::LineEnding);
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
index 5f1a917..6070ffe 100644
--- a/src/construct/partial_space_or_tab.rs
+++ b/src/construct/partial_space_or_tab.rs
@@ -6,7 +6,7 @@
use crate::subtokenize::link;
use crate::token::Token;
-use crate::tokenizer::{Code, ContentType, State, StateFn, Tokenizer};
+use crate::tokenizer::{ContentType, State, StateFn, Tokenizer};
/// Options to parse `space_or_tab`.
#[derive(Debug)]
@@ -134,7 +134,7 @@ pub fn space_or_tab_eol_with_options(options: EolOptions) -> Box<StateFn> {
/// ```
fn start(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::VirtualSpace | Code::Char('\t' | ' ') if info.options.max > 0 => {
+ Some('\t' | ' ') if info.options.max > 0 => {
tokenizer
.enter_with_content(info.options.kind.clone(), info.options.content_type.clone());
@@ -165,7 +165,7 @@ fn start(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::VirtualSpace | Code::Char('\t' | ' ') if info.size < info.options.max => {
+ Some('\t' | ' ') if info.size < info.options.max => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| inside(t, info)))
@@ -190,7 +190,7 @@ fn inside(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn after_space_or_tab(tokenizer: &mut Tokenizer, mut info: EolInfo) -> State {
match tokenizer.current {
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ Some('\n') => {
tokenizer.enter_with_content(Token::LineEnding, info.options.content_type.clone());
if info.connect {
@@ -239,10 +239,7 @@ fn after_eol(tokenizer: &mut Tokenizer, info: EolInfo) -> State {
/// ```
fn after_more_space_or_tab(tokenizer: &mut Tokenizer) -> State {
// Blank line not allowed.
- if matches!(
- tokenizer.current,
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
- ) {
+ if matches!(tokenizer.current, None | Some('\n')) {
State::Nok
} else {
State::Ok
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index e9528fd..15fc25e 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -33,7 +33,7 @@
use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::subtokenize::link;
use crate::token::Token;
-use crate::tokenizer::{Code, ContentType, State, Tokenizer};
+use crate::tokenizer::{ContentType, State, Tokenizer};
/// Configuration.
///
@@ -103,19 +103,6 @@ impl Kind {
_ => unreachable!("invalid char"),
}
}
- /// Turn [Code] into a kind.
- ///
- /// > πŸ‘‰ **Note**: an opening paren must be used for `Kind::Paren`.
- ///
- /// ## Panics
- ///
- /// Panics if `code` is not `Code::Char('(' | '"' | '\'')`.
- fn from_code(code: Code) -> Kind {
- match code {
- Code::Char(char) => Kind::from_char(char),
- _ => unreachable!("invalid code"),
- }
- }
}
/// State needed to parse titles.
@@ -137,10 +124,10 @@ struct Info {
/// ```
pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
match tokenizer.current {
- Code::Char('"' | '\'' | '(') => {
+ Some(char) if matches!(char, '"' | '\'' | '(') => {
let info = Info {
connect: false,
- kind: Kind::from_code(tokenizer.current),
+ kind: Kind::from_char(char),
options,
};
tokenizer.enter(info.options.title.clone());
@@ -163,7 +150,7 @@ pub fn start(tokenizer: &mut Tokenizer, options: Options) -> State {
/// ```
fn begin(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char(char) if char == info.kind.as_char() => {
+ Some(char) if char == info.kind.as_char() => {
tokenizer.enter(info.options.marker.clone());
tokenizer.consume();
tokenizer.exit(info.options.marker.clone());
@@ -185,12 +172,12 @@ fn begin(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::Char(char) if char == info.kind.as_char() => {
+ Some(char) if char == info.kind.as_char() => {
tokenizer.exit(info.options.string.clone());
begin(tokenizer, info)
}
- Code::None => State::Nok,
- Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.go(
+ None => State::Nok,
+ Some('\n') => tokenizer.go(
space_or_tab_eol_with_options(EolOptions {
content_type: Some(ContentType::String),
connect: info.connect,
@@ -223,15 +210,15 @@ fn at_break(tokenizer: &mut Tokenizer, mut info: Info) -> State {
/// ```
fn title(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char(char) if char == info.kind.as_char() => {
+ Some(char) if char == info.kind.as_char() => {
tokenizer.exit(Token::Data);
at_break(tokenizer, info)
}
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
+ None | Some('\n') => {
tokenizer.exit(Token::Data);
at_break(tokenizer, info)
}
- Code::Char('\\') => {
+ Some('\\') => {
tokenizer.consume();
State::Fn(Box::new(|t| escape(t, info)))
}
@@ -250,7 +237,7 @@ fn title(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn escape(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::Char(char) if char == info.kind.as_char() => {
+ Some(char) if char == info.kind.as_char() => {
tokenizer.consume();
State::Fn(Box::new(|t| title(t, info)))
}
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
index 4c94c7d..152824b 100644
--- a/src/construct/partial_whitespace.rs
+++ b/src/construct/partial_whitespace.rs
@@ -47,8 +47,8 @@
use crate::constant::HARD_BREAK_PREFIX_SIZE_MIN;
use crate::token::Token;
-use crate::tokenizer::{Code, Event, EventType, Tokenizer};
-use crate::util::span;
+use crate::tokenizer::{Event, EventType, Tokenizer};
+use crate::util::slice::{Position, Slice};
/// To do.
pub fn create_resolve_whitespace(hard_break: bool, trim_whole: bool) -> impl Fn(&mut Tokenizer) {
@@ -85,30 +85,26 @@ fn trim_data(
trim_end: bool,
hard_break: bool,
) {
- let mut codes = span::codes(
- &tokenizer.parse_state.codes,
- &span::from_exit_event(&tokenizer.events, exit_index),
+ let mut slice = Slice::from_position(
+ &tokenizer.parse_state.chars,
+ &Position::from_exit_event(&tokenizer.events, exit_index),
);
if trim_end {
- let mut index = codes.len();
- let mut vs = 0;
- let mut spaces_only = true;
+ let mut index = slice.chars.len();
+ let vs = slice.after;
+ let mut spaces_only = vs == 0;
while index > 0 {
- match codes[index - 1] {
- Code::Char(' ') => {}
- Code::Char('\t') => spaces_only = false,
- Code::VirtualSpace => {
- vs += 1;
- spaces_only = false;
- }
+ match slice.chars[index - 1] {
+ ' ' => {}
+ '\t' => spaces_only = false,
_ => break,
}
index -= 1;
}
- let diff = codes.len() - index;
+ let diff = slice.chars.len() - index;
let token_type = if spaces_only
&& hard_break
&& exit_index + 1 < tokenizer.events.len()
@@ -127,12 +123,12 @@ fn trim_data(
return;
}
- if diff > 0 {
+ if diff > 0 || vs > 0 {
let exit_point = tokenizer.events[exit_index].point.clone();
let mut enter_point = exit_point.clone();
enter_point.index -= diff;
- enter_point.column -= diff - vs;
- enter_point.offset -= diff - vs;
+ enter_point.column -= diff;
+ enter_point.vs = 0;
tokenizer.map.add(
exit_index + 1,
@@ -154,17 +150,16 @@ fn trim_data(
);
tokenizer.events[exit_index].point = enter_point;
- codes = &codes[..index];
+ slice.chars = &slice.chars[..index];
}
}
if trim_start {
let mut index = 0;
- let mut vs = 0;
- while index < codes.len() {
- match codes[index] {
- Code::Char(' ' | '\t') => {}
- Code::VirtualSpace => vs += 1,
+ let vs = slice.before;
+ while index < slice.chars.len() {
+ match slice.chars[index] {
+ ' ' | '\t' => {}
_ => break,
}
@@ -173,18 +168,18 @@ fn trim_data(
// The whole data is whitespace.
// We can be very fast: we only change the token types.
- if index == codes.len() {
+ if index == slice.chars.len() {
tokenizer.events[exit_index - 1].token_type = Token::SpaceOrTab;
tokenizer.events[exit_index].token_type = Token::SpaceOrTab;
return;
}
- if index > 0 {
+ if index > 0 || vs > 0 {
let enter_point = tokenizer.events[exit_index - 1].point.clone();
let mut exit_point = enter_point.clone();
exit_point.index += index;
- exit_point.column += index - vs;
- exit_point.offset += index - vs;
+ exit_point.column += index;
+ exit_point.vs = 0;
tokenizer.map.add(
exit_index - 1,
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index 41dc6ae..bed454b 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -51,7 +51,7 @@
use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};
use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};
use crate::token::Token;
-use crate::tokenizer::{Code, State, Tokenizer};
+use crate::tokenizer::{State, Tokenizer};
/// Type of thematic break.
#[derive(Debug, PartialEq)]
@@ -104,19 +104,6 @@ impl Kind {
_ => unreachable!("invalid char"),
}
}
- /// Turn [Code] into a kind.
- ///
- /// > πŸ‘‰ **Note**: an opening paren must be used for `Kind::Paren`.
- ///
- /// ## Panics
- ///
- /// Panics if `code` is not `Code::Char('*' | '-' | '_')`.
- fn from_code(code: Code) -> Kind {
- match code {
- Code::Char(char) => Kind::from_char(char),
- _ => unreachable!("invalid code"),
- }
- }
}
/// State needed to parse thematic breaks.
@@ -157,10 +144,10 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {
/// ```
fn before(tokenizer: &mut Tokenizer) -> State {
match tokenizer.current {
- Code::Char('*' | '-' | '_') => at_break(
+ Some(char) if matches!(char, '*' | '-' | '_') => at_break(
tokenizer,
Info {
- kind: Kind::from_code(tokenizer.current),
+ kind: Kind::from_char(char),
size: 0,
},
),
@@ -176,15 +163,13 @@ fn before(tokenizer: &mut Tokenizer) -> State {
/// ```
fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {
match tokenizer.current {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
- if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN =>
- {
+ None | Some('\n' | '\r') if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN => {
tokenizer.exit(Token::ThematicBreak);
// Feel free to interrupt.
tokenizer.interrupt = false;
State::Ok
}
- Code::Char(char) if char == info.kind.as_char() => {
+ Some(char) if char == info.kind.as_char() => {
tokenizer.enter(Token::ThematicBreakSequence);
sequence(tokenizer, info)
}
@@ -200,7 +185,7 @@ fn at_break(tokenizer: &mut Tokenizer, info: Info) -> State {
/// ```
fn sequence(tokenizer: &mut Tokenizer, mut info: Info) -> State {
match tokenizer.current {
- Code::Char(char) if char == info.kind.as_char() => {
+ Some(char) if char == info.kind.as_char() => {
tokenizer.consume();
info.size += 1;
State::Fn(Box::new(|t| sequence(t, info)))