aboutsummaryrefslogtreecommitdiffstats
path: root/src/tokenizer.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/tokenizer.rs')
-rw-r--r--src/tokenizer.rs148
1 files changed, 145 insertions, 3 deletions
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index 9ab4309..3068ddf 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -121,8 +121,6 @@ pub struct Media {
pub start: (usize, usize),
/// Indices of where the media’s label end starts and ends in `events`.
pub end: (usize, usize),
- /// Identifier
- pub id: String,
}
/// Supported containers.
@@ -163,6 +161,62 @@ struct InternalState {
point: Point,
}
+/// To do
+#[allow(clippy::struct_excessive_bools)]
+pub struct TokenizeState {
+ /// To do.
+ pub connect: bool,
+ /// To do.
+ pub document_container_stack: Vec<ContainerState>,
+ /// To do.
+ pub document_continued: usize,
+ /// To do.
+ pub document_index: usize,
+ /// To do.
+ pub document_inject: Vec<(Vec<Event>, Vec<Event>)>,
+ /// To do.
+ pub document_interrupt_before: bool,
+ /// To do.
+ pub document_paragraph_before: bool,
+ /// To do.
+ pub document_next: Option<Box<StateFn>>,
+ /// To do.
+ pub marker: u8,
+ /// To do.
+ pub marker_other: u8,
+ /// To do.
+ pub prefix: usize,
+ /// To do.
+ pub return_state: Option<Box<StateFn>>,
+ /// To do.
+ pub seen: bool,
+ /// To do.
+ pub size: usize,
+ /// To do.
+ pub size_other: usize,
+ /// To do.
+ pub start: usize,
+ /// To do.
+ pub end: usize,
+ /// To do.
+ pub stop: &'static [u8],
+ pub space_or_tab_eol_content_type: Option<ContentType>,
+ pub space_or_tab_eol_connect: bool,
+ pub space_or_tab_eol_ok: bool,
+ pub space_or_tab_connect: bool,
+ pub space_or_tab_content_type: Option<ContentType>,
+ pub space_or_tab_min: usize,
+ pub space_or_tab_max: usize,
+ pub space_or_tab_size: usize,
+ pub space_or_tab_token: Token,
+ /// To do.
+ pub token_1: Token,
+ pub token_2: Token,
+ pub token_3: Token,
+ pub token_4: Token,
+ pub token_5: Token,
+}
+
/// A tokenizer itself.
#[allow(clippy::struct_excessive_bools)]
pub struct Tokenizer<'a> {
@@ -179,6 +233,8 @@ pub struct Tokenizer<'a> {
consumed: bool,
/// Track whether this tokenizer is done.
resolved: bool,
+ /// To do.
+ attempt_balance: usize,
/// Current byte.
pub current: Option<u8>,
/// Previous byte.
@@ -200,6 +256,8 @@ pub struct Tokenizer<'a> {
resolver_ids: Vec<String>,
/// Shared parsing state across tokenizers.
pub parse_state: &'a ParseState<'a>,
+ /// To do.
+ pub tokenize_state: TokenizeState,
/// Stack of label (start) that could form images and links.
///
/// Used when tokenizing [text content][crate::content::text].
@@ -241,10 +299,45 @@ impl<'a> Tokenizer<'a> {
line_start: point.clone(),
consumed: true,
resolved: false,
+ attempt_balance: 0,
point,
stack: vec![],
events: vec![],
parse_state,
+ tokenize_state: TokenizeState {
+ connect: false,
+ document_container_stack: vec![],
+ document_continued: 0,
+ document_index: 0,
+ document_inject: vec![],
+ document_interrupt_before: false,
+ document_paragraph_before: false,
+ document_next: None,
+ marker: 0,
+ marker_other: 0,
+ prefix: 0,
+ seen: false,
+ size: 0,
+ size_other: 0,
+ start: 0,
+ end: 0,
+ stop: &[],
+ return_state: None,
+ space_or_tab_eol_content_type: None,
+ space_or_tab_eol_connect: false,
+ space_or_tab_eol_ok: false,
+ space_or_tab_connect: false,
+ space_or_tab_content_type: None,
+ space_or_tab_min: 0,
+ space_or_tab_max: 0,
+ space_or_tab_size: 0,
+ space_or_tab_token: Token::SpaceOrTab,
+ token_1: Token::Data,
+ token_2: Token::Data,
+ token_3: Token::Data,
+ token_4: Token::Data,
+ token_5: Token::Data,
+ },
map: EditMap::new(),
label_start_stack: vec![],
label_start_list_loose: vec![],
@@ -494,11 +587,14 @@ impl<'a> Tokenizer<'a> {
state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
after: impl FnOnce(&mut Tokenizer) -> State + 'static,
) -> Box<StateFn> {
+ self.attempt_balance += 1;
attempt_impl(
state_fn,
None,
self.point.index,
|tokenizer: &mut Tokenizer, state| {
+ tokenizer.attempt_balance -= 1;
+
if matches!(state, State::Ok) {
tokenizer.consumed = true;
State::Fn(Box::new(after))
@@ -522,11 +618,13 @@ impl<'a> Tokenizer<'a> {
until: impl Fn(Option<u8>) -> bool + 'static,
done: impl FnOnce(State) -> Box<StateFn> + 'static,
) -> Box<StateFn> {
+ self.attempt_balance += 1;
attempt_impl(
state_fn,
Some(Box::new(until)),
self.point.index,
|tokenizer: &mut Tokenizer, state| {
+ tokenizer.attempt_balance -= 1;
tokenizer.consumed = true;
// We don’t capture/free state because it is assumed that
// `go_until` itself is wrapped in another attempt that does
@@ -550,6 +648,7 @@ impl<'a> Tokenizer<'a> {
state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
done: impl FnOnce(bool) -> Box<StateFn> + 'static,
) -> Box<StateFn> {
+ self.attempt_balance += 1;
let previous = self.capture();
attempt_impl(
@@ -557,6 +656,7 @@ impl<'a> Tokenizer<'a> {
None,
self.point.index,
|tokenizer: &mut Tokenizer, state| {
+ tokenizer.attempt_balance -= 1;
tokenizer.free(previous);
tokenizer.consumed = true;
State::Fn(done(matches!(state, State::Ok)))
@@ -580,6 +680,7 @@ impl<'a> Tokenizer<'a> {
state_fn: impl FnOnce(&mut Tokenizer) -> State + 'static,
done: impl FnOnce(bool) -> Box<StateFn> + 'static,
) -> Box<StateFn> {
+ self.attempt_balance += 1;
let previous = self.capture();
attempt_impl(
@@ -587,6 +688,7 @@ impl<'a> Tokenizer<'a> {
None,
self.point.index,
|tokenizer: &mut Tokenizer, state| {
+ tokenizer.attempt_balance -= 1;
let ok = matches!(state, State::Ok);
if !ok {
@@ -782,7 +884,47 @@ fn attempt_impl(
let state = state(tokenizer);
match state {
- State::Ok | State::Nok => done(tokenizer, state),
+ State::Ok | State::Nok => {
+ if tokenizer.attempt_balance == 0 {
+ debug_assert!(!tokenizer.tokenize_state.connect);
+ debug_assert_eq!(tokenizer.tokenize_state.document_continued, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.document_index, 0);
+ debug_assert!(!tokenizer.tokenize_state.document_interrupt_before);
+ debug_assert!(!tokenizer.tokenize_state.document_paragraph_before);
+ debug_assert_eq!(tokenizer.tokenize_state.marker, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.marker_other, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.prefix, 0);
+ debug_assert!(!tokenizer.tokenize_state.seen);
+ debug_assert_eq!(tokenizer.tokenize_state.size, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.size_other, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.stop.len(), 0);
+ debug_assert_eq!(tokenizer.tokenize_state.start, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.end, 0);
+ debug_assert!(tokenizer.tokenize_state.return_state.is_none());
+ debug_assert!(!tokenizer.tokenize_state.space_or_tab_eol_connect);
+ debug_assert!(!tokenizer.tokenize_state.space_or_tab_eol_ok);
+ debug_assert!(tokenizer
+ .tokenize_state
+ .space_or_tab_eol_content_type
+ .is_none());
+ debug_assert!(!tokenizer.tokenize_state.space_or_tab_connect);
+ debug_assert!(tokenizer.tokenize_state.space_or_tab_content_type.is_none());
+ debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_min, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_max, 0);
+ debug_assert_eq!(tokenizer.tokenize_state.space_or_tab_size, 0);
+ debug_assert_eq!(
+ tokenizer.tokenize_state.space_or_tab_token,
+ Token::SpaceOrTab
+ );
+ debug_assert_eq!(tokenizer.tokenize_state.token_1, Token::Data);
+ debug_assert_eq!(tokenizer.tokenize_state.token_2, Token::Data);
+ debug_assert_eq!(tokenizer.tokenize_state.token_3, Token::Data);
+ debug_assert_eq!(tokenizer.tokenize_state.token_4, Token::Data);
+ debug_assert_eq!(tokenizer.tokenize_state.token_5, Token::Data);
+ }
+
+ done(tokenizer, state)
+ }
State::Fn(func) => State::Fn(attempt_impl(func, pause, start, done)),
}
})