From 65dd765cceee8bdccc74c08066eec59a579a16b1 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 20 Jun 2022 17:06:00 +0200 Subject: Add improved whitespace handling * add several helpers for parsing betwen x and y `space_or_tab`s * use those helpers in a bunch of places * move initial indent parsing to flow constructs themselves --- src/compiler.rs | 10 +- src/construct/blank_line.rs | 13 +-- src/construct/code_fenced.rs | 169 +++++++++------------------------- src/construct/code_indented.rs | 96 +++++++------------ src/construct/definition.rs | 96 +++++++------------ src/construct/heading_atx.rs | 52 +++++------ src/construct/heading_setext.rs | 45 ++++----- src/construct/html_flow.rs | 7 +- src/construct/html_text.rs | 9 +- src/construct/mod.rs | 2 +- src/construct/paragraph.rs | 68 +++++--------- src/construct/partial_destination.rs | 2 +- src/construct/partial_space_or_tab.rs | 98 ++++++++++++++++++++ src/construct/partial_title.rs | 7 +- src/construct/partial_whitespace.rs | 64 ------------- src/construct/thematic_break.rs | 50 ++++------ src/content/flow.rs | 45 +++------ src/tokenizer.rs | 32 +------ 18 files changed, 324 insertions(+), 541 deletions(-) create mode 100644 src/construct/partial_space_or_tab.rs delete mode 100644 src/construct/partial_whitespace.rs (limited to 'src') diff --git a/src/compiler.rs b/src/compiler.rs index 59fcd22..366dcd9 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -108,7 +108,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::AutolinkMarker | TokenType::AutolinkProtocol | TokenType::BlankLineEnding - | TokenType::BlankLineWhitespace | TokenType::CharacterEscape | TokenType::CharacterEscapeMarker | TokenType::CharacterEscapeValue @@ -118,10 +117,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::CharacterReferenceMarkerNumeric | TokenType::CharacterReferenceMarkerSemi | TokenType::CharacterReferenceValue - | TokenType::CodeIndentedPrefixWhitespace | TokenType::CodeFencedFence | TokenType::CodeFencedFenceSequence - | TokenType::CodeFencedFenceWhitespace | TokenType::CodeFlowChunk | TokenType::CodeTextData | TokenType::CodeTextLineEnding @@ -153,7 +150,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St | TokenType::LineEnding | TokenType::ThematicBreak | TokenType::ThematicBreakSequence - | TokenType::ThematicBreakWhitespace | TokenType::Whitespace => { // Ignore. } @@ -172,7 +168,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St TokenType::CodeFenced => { code_flow_seen_data = Some(false); line_ending_if_needed(buffers); - // Note: no `>`, which is added later. + // Note that no `>` is used, which is added later. buf_tail_mut(buffers).push("
 St
                 TokenType::Autolink
                 | TokenType::AutolinkMarker
                 | TokenType::BlankLineEnding
-                | TokenType::BlankLineWhitespace
                 | TokenType::CharacterEscape
                 | TokenType::CharacterEscapeMarker
                 | TokenType::CharacterReference
                 | TokenType::CharacterReferenceMarkerSemi
                 | TokenType::CodeFencedFenceSequence
-                | TokenType::CodeFencedFenceWhitespace
-                | TokenType::CodeIndentedPrefixWhitespace
                 | TokenType::CodeTextSequence
                 | TokenType::DefinitionLabel
                 | TokenType::DefinitionLabelMarker
@@ -228,7 +221,6 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
                 | TokenType::HardBreakTrailingSpace
                 | TokenType::HeadingSetext
                 | TokenType::ThematicBreakSequence
-                | TokenType::ThematicBreakWhitespace
                 | TokenType::Whitespace => {
                     // Ignore.
                 }
diff --git a/src/construct/blank_line.rs b/src/construct/blank_line.rs
index fdb1ee0..86091d9 100644
--- a/src/construct/blank_line.rs
+++ b/src/construct/blank_line.rs
@@ -29,27 +29,24 @@
 //!
 //! 
 
-use crate::construct::partial_whitespace::start as whitespace;
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::construct::partial_space_or_tab::space_or_tab_opt;
+use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
 
 /// Start of a blank line.
 ///
-/// Note: `␠` represents a space character.
+/// > πŸ‘‰ **Note**: `␠` represents a space character.
 ///
 /// ```markdown
 /// |␠␠
 /// |
 /// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt(
-        |tokenizer, code| whitespace(tokenizer, code, TokenType::BlankLineWhitespace),
-        |_ok| Box::new(after),
-    )(tokenizer, code)
+    tokenizer.go(space_or_tab_opt(), after)(tokenizer, code)
 }
 
 /// After zero or more spaces or tabs, before a line ending or EOF.
 ///
-/// Note: `␠` represents a space character.
+/// > πŸ‘‰ **Note**: `␠` represents a space character.
 ///
 /// ```markdown
 /// |␠␠
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs
index ba76aa8..30ec911 100644
--- a/src/construct/code_fenced.rs
+++ b/src/construct/code_fenced.rs
@@ -91,7 +91,7 @@
 //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
 
 use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE};
-use crate::construct::partial_whitespace::start as whitespace;
+use crate::construct::partial_space_or_tab::{space_or_tab_min_max, space_or_tab_opt};
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 use crate::util::span::from_exit_event;
 
@@ -130,10 +130,7 @@ struct Info {
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     tokenizer.enter(TokenType::CodeFenced);
     tokenizer.enter(TokenType::CodeFencedFence);
-    tokenizer.attempt(
-        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
-        |_ok| Box::new(before_sequence_open),
-    )(tokenizer, code)
+    tokenizer.go(space_or_tab_opt(), before_sequence_open)(tokenizer, code)
 }
 
 /// Inside the opening fence, after an optional prefix, before a sequence.
@@ -159,6 +156,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
             tokenizer.enter(TokenType::CodeFencedFenceSequence);
             sequence_open(
                 tokenizer,
+                code,
                 Info {
                     prefix,
                     size: 0,
@@ -168,7 +166,6 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
                         Kind::Tilde
                     },
                 },
-                code,
             )
         }
         _ => (State::Nok, None),
@@ -182,7 +179,7 @@ fn before_sequence_open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult
 /// console.log(1);
 /// ~~~
 /// ```
-fn sequence_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn sequence_open(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     let marker = if info.kind == Kind::GraveAccent {
         '`'
     } else {
@@ -193,26 +190,18 @@ fn sequence_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRe
         Code::Char(char) if char == marker => {
             tokenizer.consume(code);
             (
-                State::Fn(Box::new(|tokenizer, code| {
+                State::Fn(Box::new(|t, c| {
                     let mut info = info;
                     info.size += 1;
-                    sequence_open(tokenizer, info, code)
+                    sequence_open(t, c, info)
                 })),
                 None,
             )
         }
+        _ if info.size < CODE_FENCED_SEQUENCE_SIZE_MIN => (State::Nok, None),
         _ => {
-            if info.size < CODE_FENCED_SEQUENCE_SIZE_MIN {
-                (State::Nok, None)
-            } else {
-                tokenizer.exit(TokenType::CodeFencedFenceSequence);
-                tokenizer.attempt(
-                    |tokenizer, code| {
-                        whitespace(tokenizer, code, TokenType::CodeFencedFenceWhitespace)
-                    },
-                    |_ok| Box::new(|tokenizer, code| info_before(tokenizer, info, code)),
-                )(tokenizer, code)
-            }
+            tokenizer.exit(TokenType::CodeFencedFenceSequence);
+            tokenizer.go(space_or_tab_opt(), |t, c| info_before(t, c, info))(tokenizer, code)
         }
     }
 }
@@ -224,16 +213,16 @@ fn sequence_open(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRe
 /// console.log(1);
 /// ~~~
 /// ```
-fn info_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn info_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             tokenizer.exit(TokenType::CodeFencedFence);
-            at_break(tokenizer, info, code)
+            at_break(tokenizer, code, info)
         }
         _ => {
             tokenizer.enter(TokenType::CodeFencedFenceInfo);
             tokenizer.enter(TokenType::ChunkString);
-            info_inside(tokenizer, info, code, vec![])
+            info_inside(tokenizer, code, info, vec![])
         }
     }
 }
@@ -247,8 +236,8 @@ fn info_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResu
 /// ```
 fn info_inside(
     tokenizer: &mut Tokenizer,
-    info: Info,
     code: Code,
+    info: Info,
     codes: Vec,
 ) -> StateFnResult {
     match code {
@@ -256,15 +245,12 @@ fn info_inside(
             tokenizer.exit(TokenType::ChunkString);
             tokenizer.exit(TokenType::CodeFencedFenceInfo);
             tokenizer.exit(TokenType::CodeFencedFence);
-            at_break(tokenizer, info, code)
+            at_break(tokenizer, code, info)
         }
         Code::VirtualSpace | Code::Char('\t' | ' ') => {
             tokenizer.exit(TokenType::ChunkString);
             tokenizer.exit(TokenType::CodeFencedFenceInfo);
-            tokenizer.attempt(
-                |tokenizer, code| whitespace(tokenizer, code, TokenType::CodeFencedFenceWhitespace),
-                |_ok| Box::new(|tokenizer, code| meta_before(tokenizer, info, code)),
-            )(tokenizer, code)
+            tokenizer.go(space_or_tab_opt(), |t, c| meta_before(t, c, info))(tokenizer, code)
         }
         Code::Char(char) if char == '`' && info.kind == Kind::GraveAccent => (State::Nok, None),
         Code::Char(_) => {
@@ -272,9 +258,7 @@ fn info_inside(
             codes.push(code);
             tokenizer.consume(code);
             (
-                State::Fn(Box::new(|tokenizer, code| {
-                    info_inside(tokenizer, info, code, codes)
-                })),
+                State::Fn(Box::new(|t, c| info_inside(t, c, info, codes))),
                 None,
             )
         }
@@ -288,16 +272,16 @@ fn info_inside(
 /// console.log(1);
 /// ~~~
 /// ```
-fn meta_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn meta_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             tokenizer.exit(TokenType::CodeFencedFence);
-            at_break(tokenizer, info, code)
+            at_break(tokenizer, code, info)
         }
         _ => {
             tokenizer.enter(TokenType::CodeFencedFenceMeta);
             tokenizer.enter(TokenType::ChunkString);
-            meta(tokenizer, info, code)
+            meta(tokenizer, code, info)
         }
     }
 }
@@ -309,21 +293,18 @@ fn meta_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResu
 /// console.log(1);
 /// ~~~
 /// ```
-fn meta(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             tokenizer.exit(TokenType::ChunkString);
             tokenizer.exit(TokenType::CodeFencedFenceMeta);
             tokenizer.exit(TokenType::CodeFencedFence);
-            at_break(tokenizer, info, code)
+            at_break(tokenizer, code, info)
         }
         Code::Char(char) if char == '`' && info.kind == Kind::GraveAccent => (State::Nok, None),
         _ => {
             tokenizer.consume(code);
-            (
-                State::Fn(Box::new(|tokenizer, code| meta(tokenizer, info, code))),
-                None,
-            )
+            (State::Fn(Box::new(|t, c| meta(t, c, info))), None)
         }
     }
 }
@@ -335,7 +316,7 @@ fn meta(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
 /// aa|
 /// ~~~
 /// ```
-fn at_break(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     let clone = info.clone();
 
     match code {
@@ -345,12 +326,7 @@ fn at_break(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult
                 tokenizer.enter(TokenType::LineEnding);
                 tokenizer.consume(code);
                 tokenizer.exit(TokenType::LineEnding);
-                (
-                    State::Fn(Box::new(|tokenizer, code| {
-                        close_before(tokenizer, info, code)
-                    })),
-                    None,
-                )
+                (State::Fn(Box::new(|t, c| close_start(t, c, info))), None)
             },
             |ok| {
                 if ok {
@@ -360,12 +336,7 @@ fn at_break(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult
                         tokenizer.enter(TokenType::LineEnding);
                         tokenizer.consume(code);
                         tokenizer.exit(TokenType::LineEnding);
-                        (
-                            State::Fn(Box::new(|tokenizer, code| {
-                                content_start(tokenizer, clone, code)
-                            })),
-                            None,
-                        )
+                        (State::Fn(Box::new(|t, c| content_start(t, c, clone))), None)
                     })
                 }
             },
@@ -385,12 +356,11 @@ fn at_break(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult
 /// console.log('1')
 /// |  ~~~
 /// ```
-fn close_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn close_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     tokenizer.enter(TokenType::CodeFencedFence);
-    tokenizer.attempt(
-        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
-        |_ok| Box::new(|tokenizer, code| close_sequence_before(tokenizer, info, code)),
-    )(tokenizer, code)
+    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), |t, c| {
+        close_before(t, c, info)
+    })(tokenizer, code)
 }
 
 /// In a closing fence, after optional whitespace, before sequence.
@@ -404,31 +374,17 @@ fn close_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnRes
 /// console.log('1')
 ///   |~~~
 /// ```
-fn close_sequence_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
-    let tail = tokenizer.events.last();
-    let mut prefix = 0;
+fn close_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     let marker = if info.kind == Kind::GraveAccent {
         '`'
     } else {
         '~'
     };
 
-    if let Some(event) = tail {
-        if event.token_type == TokenType::Whitespace {
-            let span = from_exit_event(&tokenizer.events, tokenizer.events.len() - 1);
-            prefix = span.end_index - span.start_index;
-        }
-    }
-
-    // To do: 4+ should be okay if code (indented) is turned off!
-    if prefix >= TAB_SIZE {
-        return (State::Nok, None);
-    }
-
     match code {
         Code::Char(char) if char == marker => {
             tokenizer.enter(TokenType::CodeFencedFenceSequence);
-            close_sequence(tokenizer, info, code, 0)
+            close_sequence(tokenizer, code, info, 0)
         }
         _ => (State::Nok, None),
     }
@@ -441,7 +397,7 @@ fn close_sequence_before(tokenizer: &mut Tokenizer, info: Info, code: Code) -> S
 /// console.log('1')
 /// ~|~~
 /// ```
-fn close_sequence(tokenizer: &mut Tokenizer, info: Info, code: Code, size: usize) -> StateFnResult {
+fn close_sequence(tokenizer: &mut Tokenizer, code: Code, info: Info, size: usize) -> StateFnResult {
     let marker = if info.kind == Kind::GraveAccent {
         '`'
     } else {
@@ -452,18 +408,13 @@ fn close_sequence(tokenizer: &mut Tokenizer, info: Info, code: Code, size: usize
         Code::Char(char) if char == marker => {
             tokenizer.consume(code);
             (
-                State::Fn(Box::new(move |tokenizer, code| {
-                    close_sequence(tokenizer, info, code, size + 1)
-                })),
+                State::Fn(Box::new(move |t, c| close_sequence(t, c, info, size + 1))),
                 None,
             )
         }
         _ if size >= CODE_FENCED_SEQUENCE_SIZE_MIN && size >= info.size => {
             tokenizer.exit(TokenType::CodeFencedFenceSequence);
-            tokenizer.attempt(
-                |tokenizer, code| whitespace(tokenizer, code, TokenType::CodeFencedFenceWhitespace),
-                |_ok| Box::new(close_whitespace_after),
-            )(tokenizer, code)
+            tokenizer.go(space_or_tab_opt(), close_sequence_after)(tokenizer, code)
         }
         _ => (State::Nok, None),
     }
@@ -476,7 +427,7 @@ fn close_sequence(tokenizer: &mut Tokenizer, info: Info, code: Code, size: usize
 /// console.log('1')
 /// ~~~ |
 /// ```
-fn close_whitespace_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+fn close_sequence_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             tokenizer.exit(TokenType::CodeFencedFence);
@@ -493,53 +444,27 @@ fn close_whitespace_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResul
 /// |aa
 /// ~~~
 /// ```
-fn content_start(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
-    match code {
-        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            at_break(tokenizer, info, code)
-        }
-        Code::VirtualSpace | Code::Char('\t' | ' ') if info.prefix > 0 => {
-            tokenizer.enter(TokenType::Whitespace);
-            content_prefix(tokenizer, info, 0, code)
-        }
-        _ => {
-            tokenizer.enter(TokenType::CodeFlowChunk);
-            content_continue(tokenizer, info, code)
-        }
-    }
+fn content_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
+    tokenizer.go(space_or_tab_min_max(0, info.prefix), |t, c| {
+        content_begin(t, c, info)
+    })(tokenizer, code)
 }
 
-/// Before code content, in a prefix.
+/// Before code content, after a prefix.
 ///
 /// ```markdown
 ///   ~~~js
 ///  | aa
 ///   ~~~
 /// ```
-fn content_prefix(
-    tokenizer: &mut Tokenizer,
-    info: Info,
-    prefix: usize,
-    code: Code,
-) -> StateFnResult {
+fn content_begin(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     match code {
-        Code::VirtualSpace | Code::Char('\t' | ' ') if info.prefix > prefix => {
-            tokenizer.consume(code);
-            (
-                State::Fn(Box::new(move |tokenizer, code| {
-                    content_prefix(tokenizer, info, prefix + 1, code)
-                })),
-                None,
-            )
-        }
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            tokenizer.exit(TokenType::Whitespace);
-            at_break(tokenizer, info, code)
+            at_break(tokenizer, code, info)
         }
         _ => {
-            tokenizer.exit(TokenType::Whitespace);
             tokenizer.enter(TokenType::CodeFlowChunk);
-            content_continue(tokenizer, info, code)
+            content_continue(tokenizer, code, info)
         }
     }
 }
@@ -553,18 +478,16 @@ fn content_prefix(
 /// ab|
 /// ~~~
 /// ```
-fn content_continue(tokenizer: &mut Tokenizer, info: Info, code: Code) -> StateFnResult {
+fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             tokenizer.exit(TokenType::CodeFlowChunk);
-            at_break(tokenizer, info, code)
+            at_break(tokenizer, code, info)
         }
         _ => {
             tokenizer.consume(code);
             (
-                State::Fn(Box::new(|tokenizer, code| {
-                    content_continue(tokenizer, info, code)
-                })),
+                State::Fn(Box::new(|t, c| content_continue(t, c, info))),
                 None,
             )
         }
diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs
index 55b8901..64956be 100644
--- a/src/construct/code_indented.rs
+++ b/src/construct/code_indented.rs
@@ -38,6 +38,7 @@
 //! [html-pre]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-pre-element
 //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element
 
+use super::partial_space_or_tab::{space_or_tab_min_max, space_or_tab_opt};
 use crate::constant::TAB_SIZE;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
@@ -46,46 +47,13 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 /// ```markdown
 /// |    asd
 /// ```
-pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    match code {
-        Code::VirtualSpace | Code::Char(' ' | '\t') => {
-            tokenizer.enter(TokenType::CodeIndented);
-            tokenizer.enter(TokenType::CodeIndentedPrefixWhitespace);
-            indent(tokenizer, code, 0)
-        }
-        _ => (State::Nok, None),
-    }
-}
-
-/// Inside the initial whitespace.
-///
-/// ```markdown
-///  |   asd
-///   |  asd
-///    | asd
-///     |asd
-/// ```
 ///
 /// > **Parsing note**: it is not needed to check if this first line is a
 /// > filled line (that it has a non-whitespace character), because blank lines
 /// > are parsed already, so we never run into that.
-fn indent(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
-    match code {
-        _ if size == TAB_SIZE => {
-            tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace);
-            at_break(tokenizer, code)
-        }
-        Code::VirtualSpace | Code::Char(' ' | '\t') => {
-            tokenizer.consume(code);
-            (
-                State::Fn(Box::new(move |tokenizer, code| {
-                    indent(tokenizer, code, size + 1)
-                })),
-                None,
-            )
-        }
-        _ => (State::Nok, None),
-    }
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::CodeIndented);
+    tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer, code)
 }
 
 /// At a break.
@@ -153,39 +121,45 @@ fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             tokenizer.exit(TokenType::LineEnding);
             (State::Fn(Box::new(further_start)), None)
         }
-        Code::VirtualSpace | Code::Char(' ' | '\t') => {
-            tokenizer.enter(TokenType::CodeIndentedPrefixWhitespace);
-            further_indent(tokenizer, code, 0)
-        }
-        _ => (State::Nok, None),
+        _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
+            Box::new(if ok { further_end } else { further_begin })
+        })(tokenizer, code),
     }
 }
 
-/// Inside further whitespace.
+/// After a proper indent.
 ///
 /// ```markdown
 ///     asd
-///   |  asd
+///     |asd
 /// ```
-fn further_indent(tokenizer: &mut Tokenizer, code: Code, size: usize) -> StateFnResult {
+fn further_end(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    (State::Ok, Some(vec![code]))
+}
+
+/// At the beginning of a line that is not indented enough.
+///
+/// > πŸ‘‰ **Note**: `␠` represents a space character.
+///
+/// ```markdown
+///     asd
+/// |␠␠
+///     asd
+/// ```
+fn further_begin(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.go(space_or_tab_opt(), further_after)(tokenizer, code)
+}
+
+/// After whitespace.
+///
+/// ```markdown
+///     asd
+/// ␠␠|
+///     asd
+/// ```
+fn further_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
-        _ if size == TAB_SIZE => {
-            tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace);
-            (State::Ok, Some(vec![code]))
-        }
-        Code::VirtualSpace | Code::Char(' ' | '\t') => {
-            tokenizer.consume(code);
-            (
-                State::Fn(Box::new(move |tokenizer, code| {
-                    further_indent(tokenizer, code, size + 1)
-                })),
-                None,
-            )
-        }
-        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
-            tokenizer.exit(TokenType::CodeIndentedPrefixWhitespace);
-            further_start(tokenizer, code)
-        }
+        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => further_start(tokenizer, code),
         _ => (State::Nok, None),
     }
 }
diff --git a/src/construct/definition.rs b/src/construct/definition.rs
index f7f8acd..03baee6 100644
--- a/src/construct/definition.rs
+++ b/src/construct/definition.rs
@@ -58,7 +58,7 @@
 
 use crate::construct::{
     partial_destination::start as destination, partial_label::start as label,
-    partial_title::start as title, partial_whitespace::start as whitespace,
+    partial_space_or_tab::space_or_tab_opt, partial_title::start as title,
 };
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
@@ -68,11 +68,18 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 /// |[a]: b "c"
 /// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::Definition);
+    tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
+}
+
+/// At the start of a definition, after whitespace.
+///
+/// ```markdown
+/// |[a]: b "c"
+/// ```
+pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
-        Code::Char('[') => {
-            tokenizer.enter(TokenType::Definition);
-            tokenizer.go(label, label_after)(tokenizer, code)
-        }
+        Code::Char('[') => tokenizer.go(label, label_after)(tokenizer, code),
         _ => (State::Nok, None),
     }
 }
@@ -93,27 +100,15 @@ fn label_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             tokenizer.enter(TokenType::DefinitionMarker);
             tokenizer.consume(code);
             tokenizer.exit(TokenType::DefinitionMarker);
-            (State::Fn(Box::new(marker_after)), None)
+            (
+                State::Fn(Box::new(tokenizer.go(space_or_tab_opt(), marker_after))),
+                None,
+            )
         }
         _ => (State::Nok, None),
     }
 }
 
-/// After the marker of a definition.
-///
-/// ```markdown
-/// [a]:| b "c"
-///
-/// [a]:| ␊
-///  b "c"
-/// ```
-fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt(
-        |t, c| whitespace(t, c, TokenType::Whitespace),
-        |_ok| Box::new(marker_after_optional_whitespace),
-    )(tokenizer, code)
-}
-
 /// After the marker, after whitespace.
 ///
 /// ```markdown
@@ -122,31 +117,23 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// [a]: |␊
 ///  b "c"
 /// ```
-fn marker_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
             tokenizer.enter(TokenType::LineEnding);
             tokenizer.consume(code);
             tokenizer.exit(TokenType::LineEnding);
-            (State::Fn(Box::new(marker_after_optional_line_ending)), None)
+            (
+                State::Fn(Box::new(
+                    tokenizer.go(space_or_tab_opt(), destination_before),
+                )),
+                None,
+            )
         }
         _ => destination_before(tokenizer, code),
     }
 }
 
-/// After the marker, after a line ending.
-///
-/// ```markdown
-/// [a]:
-/// | b "c"
-/// ```
-fn marker_after_optional_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt(
-        |t, c| whitespace(t, c, TokenType::Whitespace),
-        |_ok| Box::new(destination_before),
-    )(tokenizer, code)
-}
-
 /// Before a destination.
 ///
 /// ```markdown
@@ -163,8 +150,9 @@ fn destination_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n')
     );
 
-    if !char_nok
-        && (event.token_type == TokenType::LineEnding || event.token_type == TokenType::Whitespace)
+    // Whitespace.
+    if (event.token_type == TokenType::LineEnding || event.token_type == TokenType::Whitespace)
+        && !char_nok
     {
         tokenizer.go(destination, destination_after)(tokenizer, code)
     } else {
@@ -191,10 +179,7 @@ fn destination_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// [a]: b "c"|
 /// ```
 fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt(
-        |t, c| whitespace(t, c, TokenType::Whitespace),
-        |_ok| Box::new(after_whitespace),
-    )(tokenizer, code)
+    tokenizer.go(space_or_tab_opt(), after_whitespace)(tokenizer, code)
 }
 
 /// After a definition, after optional whitespace.
@@ -222,10 +207,7 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 ///  "c"
 /// ```
 fn title_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt(
-        |t, c| whitespace(t, c, TokenType::Whitespace),
-        |_ok| Box::new(title_before_after_optional_whitespace),
-    )(tokenizer, code)
+    tokenizer.go(space_or_tab_opt(), title_before_after_optional_whitespace)(tokenizer, code)
 }
 
 /// Before a title, after optional whitespace.
@@ -243,7 +225,9 @@ fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code)
             tokenizer.consume(code);
             tokenizer.exit(TokenType::LineEnding);
             (
-                State::Fn(Box::new(title_before_after_optional_line_ending)),
+                State::Fn(Box::new(
+                    tokenizer.go(space_or_tab_opt(), title_before_marker),
+                )),
                 None,
             )
         }
@@ -251,19 +235,6 @@ fn title_before_after_optional_whitespace(tokenizer: &mut Tokenizer, code: Code)
     }
 }
 
-/// Before a title, after a line ending.
-///
-/// ```markdown
-/// [a]: b␊
-/// | "c"
-/// ```
-fn title_before_after_optional_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt(
-        |t, c| whitespace(t, c, TokenType::Whitespace),
-        |_ok| Box::new(title_before_marker),
-    )(tokenizer, code)
-}
-
 /// Before a title, after a line ending.
 ///
 /// ```markdown
@@ -289,10 +260,7 @@ fn title_before_marker(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// "c"|
 /// ```
 fn title_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt(
-        |t, c| whitespace(t, c, TokenType::Whitespace),
-        |_ok| Box::new(title_after_after_optional_whitespace),
-    )(tokenizer, code)
+    tokenizer.go(space_or_tab_opt(), title_after_after_optional_whitespace)(tokenizer, code)
 }
 
 /// After a title, after optional whitespace.
diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs
index ab8b6a5..12d4193 100644
--- a/src/construct/heading_atx.rs
+++ b/src/construct/heading_atx.rs
@@ -47,6 +47,7 @@
 //! [wiki-setext]: https://en.wikipedia.org/wiki/Setext
 //! [atx]: http://www.aaronsw.com/2002/atx/
 
+use super::partial_space_or_tab::{space_or_tab, space_or_tab_opt};
 use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
@@ -56,8 +57,17 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 /// |## alpha
 /// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::HeadingAtx);
+    tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
+}
+
+/// Start of a heading (atx), after whitespace.
+///
+/// ```markdown
+/// |## alpha
+/// ```
+pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     if Code::Char('#') == code {
-        tokenizer.enter(TokenType::HeadingAtx);
         tokenizer.enter(TokenType::HeadingAtxSequence);
         sequence_open(tokenizer, code, 0)
     } else {
@@ -72,12 +82,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// ```
 fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnResult {
     match code {
-        Code::None
-        | Code::CarriageReturnLineFeed
-        | Code::VirtualSpace
-        | Code::Char('\t' | '\n' | '\r' | ' ')
-            if rank > 0 =>
-        {
+        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') if rank > 0 => {
             tokenizer.exit(TokenType::HeadingAtxSequence);
             at_break(tokenizer, code)
         }
@@ -90,6 +95,13 @@ fn sequence_open(tokenizer: &mut Tokenizer, code: Code, rank: usize) -> StateFnR
                 None,
             )
         }
+        _ if rank > 0 => {
+            tokenizer.exit(TokenType::HeadingAtxSequence);
+            tokenizer.go(
+                space_or_tab(TokenType::HeadingAtxWhitespace, 1, usize::MAX),
+                at_break,
+            )(tokenizer, code)
+        }
         _ => (State::Nok, None),
     }
 }
@@ -109,10 +121,10 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             tokenizer.exit(TokenType::HeadingAtx);
             (State::Ok, Some(vec![code]))
         }
-        Code::VirtualSpace | Code::Char('\t' | ' ') => {
-            tokenizer.enter(TokenType::HeadingAtxWhitespace);
-            whitespace(tokenizer, code)
-        }
+        Code::VirtualSpace | Code::Char('\t' | ' ') => tokenizer.go(
+            space_or_tab(TokenType::HeadingAtxWhitespace, 1, usize::MAX),
+            at_break,
+        )(tokenizer, code),
         Code::Char('#') => {
             tokenizer.enter(TokenType::HeadingAtxSequence);
             further_sequence(tokenizer, code)
@@ -141,24 +153,6 @@ fn further_sequence(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// In whitespace.
-///
-/// ```markdown
-/// ## alpha | bravo
-/// ```
-fn whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    match code {
-        Code::VirtualSpace | Code::Char('\t' | ' ') => {
-            tokenizer.consume(code);
-            (State::Fn(Box::new(whitespace)), None)
-        }
-        _ => {
-            tokenizer.exit(TokenType::HeadingAtxWhitespace);
-            at_break(tokenizer, code)
-        }
-    }
-}
-
 /// In text.
 ///
 /// ```markdown
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index f4c6001..64647cb 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -50,7 +50,7 @@
 //! [atx]: http://www.aaronsw.com/2002/atx/
 
 use crate::constant::TAB_SIZE;
-use crate::construct::partial_whitespace::start as whitespace;
+use crate::construct::partial_space_or_tab::space_or_tab_opt;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 use crate::util::span::from_exit_event;
 
@@ -70,12 +70,22 @@ pub enum Kind {
 /// ==
 /// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::HeadingSetext);
+    tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
+}
+
+/// Start of a heading (setext), after whitespace.
+///
+/// ```markdown
+/// |alpha
+/// ==
+/// ```
+pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
             unreachable!("expected non-eol/eof");
         }
         _ => {
-            tokenizer.enter(TokenType::HeadingSetext);
             tokenizer.enter(TokenType::HeadingSetextText);
             tokenizer.enter(TokenType::ChunkText);
             text_inside(tokenizer, code)
@@ -134,10 +144,7 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             tokenizer.events[next].previous = Some(previous);
 
             (
-                State::Fn(Box::new(tokenizer.attempt(
-                    |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
-                    |_ok| Box::new(text_line_start),
-                ))),
+                State::Fn(Box::new(tokenizer.go(space_or_tab_opt(), text_line_start))),
                 None,
             )
         }
@@ -202,25 +209,17 @@ fn underline_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             tokenizer.enter(TokenType::LineEnding);
             tokenizer.consume(code);
             tokenizer.exit(TokenType::LineEnding);
-            (State::Fn(Box::new(underline_start)), None)
+            (
+                State::Fn(Box::new(
+                    tokenizer.go(space_or_tab_opt(), underline_sequence_start),
+                )),
+                None,
+            )
         }
         _ => unreachable!("expected eol"),
     }
 }
 
-/// After a line ending, presumably an underline.
-///
-/// ```markdown
-/// alpha
-/// |==
-/// ```
-fn underline_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt(
-        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
-        |_ok| Box::new(underline_sequence_start),
-    )(tokenizer, code)
-}
-
 /// After optional whitespace, presumably an underline.
 ///
 /// ```markdown
@@ -276,11 +275,7 @@ fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind)
                 None,
             )
         }
-        Code::VirtualSpace | Code::Char('\t' | ' ') => tokenizer.attempt(
-            |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
-            |_ok| Box::new(underline_after),
-        )(tokenizer, code),
-        _ => underline_after(tokenizer, code),
+        _ => tokenizer.go(space_or_tab_opt(), underline_after)(tokenizer, code),
     }
 }
 
diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs
index 5adac7d..4819e63 100644
--- a/src/construct/html_flow.rs
+++ b/src/construct/html_flow.rs
@@ -93,7 +93,7 @@
 //! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
 
 use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX};
-use crate::construct::{blank_line::start as blank_line, partial_whitespace::start as whitespace};
+use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::space_or_tab_opt};
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
 /// Kind of HTML (flow).
@@ -155,10 +155,7 @@ struct Info {
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     tokenizer.enter(TokenType::HtmlFlow);
     tokenizer.enter(TokenType::HtmlFlowData);
-    tokenizer.attempt(
-        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
-        |_ok| Box::new(before),
-    )(tokenizer, code)
+    tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
 }
 
 /// After optional whitespace, before `<`.
diff --git a/src/construct/html_text.rs b/src/construct/html_text.rs
index 93b4b62..a91113f 100644
--- a/src/construct/html_text.rs
+++ b/src/construct/html_text.rs
@@ -49,7 +49,7 @@
 //! [html_flow]: crate::construct::html_flow
 //! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing
 
-use crate::construct::partial_whitespace::start as whitespace;
+use crate::construct::partial_space_or_tab::space_or_tab_opt;
 use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer};
 
 /// Start of HTML (text)
@@ -673,10 +673,9 @@ fn after_line_ending(
     code: Code,
     return_state: Box,
 ) -> StateFnResult {
-    tokenizer.attempt(
-        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
-        |_ok| Box::new(|t, c| after_line_ending_prefix(t, c, return_state)),
-    )(tokenizer, code)
+    tokenizer.go(space_or_tab_opt(), |t, c| {
+        after_line_ending_prefix(t, c, return_state)
+    })(tokenizer, code)
 }
 
 /// After a line ending, after indent.
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 1debb74..407dc6b 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -70,6 +70,6 @@ pub mod html_text;
 pub mod paragraph;
 pub mod partial_destination;
 pub mod partial_label;
+pub mod partial_space_or_tab;
 pub mod partial_title;
-pub mod partial_whitespace;
 pub mod thematic_break;
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 50ef627..fa18f28 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -30,12 +30,11 @@
 
 use crate::constant::TAB_SIZE;
 use crate::construct::{
-    code_fenced::start as code_fenced, heading_atx::start as heading_atx,
-    html_flow::start as html_flow, partial_whitespace::start as whitespace,
-    thematic_break::start as thematic_break,
+    blank_line::start as blank_line, code_fenced::start as code_fenced,
+    heading_atx::start as heading_atx, html_flow::start as html_flow,
+    partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break,
 };
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::span::from_exit_event;
 
 /// Before a paragraph.
 ///
@@ -114,7 +113,7 @@ fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             tokenizer.enter(TokenType::LineEnding);
             tokenizer.consume(code);
             tokenizer.exit(TokenType::LineEnding);
-            (State::Fn(Box::new(interrupt_initial)), None)
+            (State::Fn(Box::new(interrupt_start)), None)
         }
         _ => unreachable!("expected eol"),
     }
@@ -123,55 +122,30 @@ fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// After a line ending.
 ///
 /// ```markdown
-/// alpha|
-/// ~~~js
+/// alpha
+/// |~~~js
 /// ~~~
 /// ```
-fn interrupt_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt_2(code_fenced, html_flow, |ok| {
+fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    // To do: If code is disabled, indented lines are allowed to interrupt.
+    tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
         if ok {
-            Box::new(|_tokenizer, _code| (State::Nok, None))
+            Box::new(|_t, code| (State::Ok, Some(vec![code])))
         } else {
             Box::new(|tokenizer, code| {
-                tokenizer.attempt(
-                    |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
-                    |_ok| Box::new(interrupt_start),
+                tokenizer.attempt_5(
+                    blank_line,
+                    code_fenced,
+                    html_flow,
+                    heading_atx,
+                    thematic_break,
+                    |ok| {
+                        Box::new(move |_t, code| {
+                            (if ok { State::Nok } else { State::Ok }, Some(vec![code]))
+                        })
+                    },
                 )(tokenizer, code)
             })
         }
     })(tokenizer, code)
 }
-
-/// After a line ending, after optional whitespace.
-///
-/// ```markdown
-/// alpha|
-/// # bravo
-/// ```
-fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    let tail = tokenizer.events.last();
-    let mut prefix = 0;
-
-    if let Some(event) = tail {
-        if event.token_type == TokenType::Whitespace {
-            let span = from_exit_event(&tokenizer.events, tokenizer.events.len() - 1);
-            prefix = span.end_index - span.start_index;
-        }
-    }
-
-    match code {
-        // Blank lines are not allowed in paragraph.
-        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
-        // To do: If code is disabled, indented lines are allowed.
-        _ if prefix >= TAB_SIZE => (State::Ok, None),
-        // To do: definitions, setext headings, etc?
-        _ => tokenizer.attempt_2(heading_atx, thematic_break, |ok| {
-            let result = if ok {
-                (State::Nok, None)
-            } else {
-                (State::Ok, None)
-            };
-            Box::new(|_t, _c| result)
-        })(tokenizer, code),
-    }
-}
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index 58d07c1..bc95055 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -60,7 +60,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
             tokenizer.exit(TokenType::DefinitionDestinationLiteralMarker);
             (State::Fn(Box::new(enclosed_before)), None)
         }
-        Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(')') => {
+        Code::None | Code::CarriageReturnLineFeed | Code::VirtualSpace | Code::Char(' ' | ')') => {
             (State::Nok, None)
         }
         Code::Char(char) if char.is_ascii_control() => (State::Nok, None),
diff --git a/src/construct/partial_space_or_tab.rs b/src/construct/partial_space_or_tab.rs
new file mode 100644
index 0000000..40ece49
--- /dev/null
+++ b/src/construct/partial_space_or_tab.rs
@@ -0,0 +1,98 @@
+//! Several helpers to parse whitespace (`space_or_tab`).
+//!
+//! ## References
+//!
+//! *   [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js)
+
+use crate::tokenizer::{Code, State, StateFn, StateFnResult, TokenType, Tokenizer};
+
+/// Optional `space_or_tab`
+///
+/// ```bnf
+/// space_or_tab_opt ::= *( ' ' '\t' )
+/// ```
+pub fn space_or_tab_opt() -> Box {
+    space_or_tab_min_max(0, usize::MAX)
+}
+
+/// Between `x` and `y` `space_or_tab`
+///
+/// ```bnf
+/// space_or_tab_min_max ::= x*y( ' ' '\t' )
+/// ```
+pub fn space_or_tab_min_max(min: usize, max: usize) -> Box {
+    space_or_tab(TokenType::Whitespace, min, max)
+}
+
+/// Between `x` and `y` `space_or_tab`, with the given token type.
+///
+/// ```bnf
+/// space_or_tab ::= x*y( ' ' '\t' )
+/// ```
+pub fn space_or_tab(kind: TokenType, min: usize, max: usize) -> Box {
+    Box::new(move |t, c| start(t, c, kind, min, max))
+}
+
+/// Before whitespace.
+///
+/// ```markdown
+/// alpha| bravo
+/// ```
+fn start(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    kind: TokenType,
+    min: usize,
+    max: usize,
+) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') if max > 0 => {
+            tokenizer.enter(kind.clone());
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    inside(tokenizer, code, kind, min, max, 1)
+                })),
+                None,
+            )
+        }
+        _ => (
+            if min == 0 { State::Ok } else { State::Nok },
+            Some(vec![code]),
+        ),
+    }
+}
+
+/// In whitespace.
+///
+/// ```markdown
+/// alpha |bravo
+/// alpha | bravo
+/// ```
+fn inside(
+    tokenizer: &mut Tokenizer,
+    code: Code,
+    kind: TokenType,
+    min: usize,
+    max: usize,
+    size: usize,
+) -> StateFnResult {
+    match code {
+        Code::VirtualSpace | Code::Char('\t' | ' ') if size < max => {
+            tokenizer.consume(code);
+            (
+                State::Fn(Box::new(move |tokenizer, code| {
+                    inside(tokenizer, code, kind, min, max, size + 1)
+                })),
+                None,
+            )
+        }
+        _ => {
+            tokenizer.exit(kind);
+            (
+                if size >= min { State::Ok } else { State::Nok },
+                Some(vec![code]),
+            )
+        }
+    }
+}
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 19ba8d4..0669c8e 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -33,7 +33,7 @@
 
 // To do: pass token types in.
 
-use crate::construct::partial_whitespace::start as whitespace;
+use crate::construct::partial_space_or_tab::space_or_tab_opt;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
 /// Type of title.
@@ -143,10 +143,7 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult
 /// |b"
 /// ```
 fn line_start(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
-    tokenizer.attempt(
-        |t, c| whitespace(t, c, TokenType::Whitespace),
-        |_ok| Box::new(|t, c| line_begin(t, c, kind)),
-    )(tokenizer, code)
+    tokenizer.go(space_or_tab_opt(), |t, c| line_begin(t, c, kind))(tokenizer, code)
 }
 
 /// After a line ending, after optional whitespace.
diff --git a/src/construct/partial_whitespace.rs b/src/construct/partial_whitespace.rs
deleted file mode 100644
index b8cf9a7..0000000
--- a/src/construct/partial_whitespace.rs
+++ /dev/null
@@ -1,64 +0,0 @@
-//! A little helper to parse `space_or_tab`
-//!
-//! They’re formed with the following BNF:
-//!
-//! ```bnf
-//! space_or_tab ::= 1*(' ' '\t')
-//! ```
-//!
-//! Depending on where whitespace can occur, it can be optional (or not),
-//! and present in the rendered result (or not).
-//!
-//! ## References
-//!
-//! *   [`micromark-factory-space/index.js` in `micromark`](https://github.com/micromark/micromark/blob/main/packages/micromark-factory-space/dev/index.js)
-
-use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-
-// To do: should `token_type` be a `Some`, with `None` defaulting to something?
-// To do: should `max: Some(usize)` be added?
-
-/// Before whitespace.
-///
-/// ```markdown
-/// alpha| bravo
-/// ```
-pub fn start(tokenizer: &mut Tokenizer, code: Code, token_type: TokenType) -> StateFnResult {
-    match code {
-        Code::VirtualSpace | Code::Char('\t' | ' ') => {
-            // To do: lifetimes.
-            let clone = token_type.clone();
-            tokenizer.enter(token_type);
-            tokenizer.consume(code);
-            (
-                State::Fn(Box::new(|tokenizer, code| inside(tokenizer, code, clone))),
-                None,
-            )
-        }
-        _ => (State::Nok, None),
-    }
-}
-
-/// In whitespace.
-///
-/// ```markdown
-/// alpha |bravo
-/// alpha | bravo
-/// ```
-fn inside(tokenizer: &mut Tokenizer, code: Code, token_type: TokenType) -> StateFnResult {
-    match code {
-        Code::VirtualSpace | Code::Char('\t' | ' ') => {
-            tokenizer.consume(code);
-            (
-                State::Fn(Box::new(|tokenizer, code| {
-                    inside(tokenizer, code, token_type)
-                })),
-                None,
-            )
-        }
-        _ => {
-            tokenizer.exit(token_type);
-            (State::Ok, Some(vec![code]))
-        }
-    }
-}
diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs
index bc41991..abf733d 100644
--- a/src/construct/thematic_break.rs
+++ b/src/construct/thematic_break.rs
@@ -44,6 +44,7 @@
 //!
 //! 
 
+use super::partial_space_or_tab::space_or_tab_opt;
 use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN;
 use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 
@@ -53,9 +54,18 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
 /// |***
 /// ```
 pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+    tokenizer.enter(TokenType::ThematicBreak);
+    tokenizer.go(space_or_tab_opt(), before)(tokenizer, code)
+}
+
+/// Start of a thematic break, after whitespace.
+///
+/// ```markdown
+/// |***
+/// ```
+pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::Char(char) if char == '*' || char == '-' || char == '_' => {
-            tokenizer.enter(TokenType::ThematicBreak);
             at_break(tokenizer, code, char, 0)
         }
         _ => (State::Nok, None),
@@ -71,20 +81,16 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// ```
 fn at_break(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
     match code {
-        Code::Char(char) if char == marker => {
-            tokenizer.enter(TokenType::ThematicBreakSequence);
-            sequence(tokenizer, code, marker, size)
-        }
-        Code::VirtualSpace | Code::Char('\t' | ' ') => {
-            tokenizer.enter(TokenType::ThematicBreakWhitespace);
-            whitespace(tokenizer, code, marker, size)
-        }
         Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')
             if size >= THEMATIC_BREAK_MARKER_COUNT_MIN =>
         {
             tokenizer.exit(TokenType::ThematicBreak);
             (State::Ok, Some(vec![code]))
         }
+        Code::Char(char) if char == marker => {
+            tokenizer.enter(TokenType::ThematicBreakSequence);
+            sequence(tokenizer, code, marker, size)
+        }
         _ => (State::Nok, None),
     }
 }
@@ -109,31 +115,9 @@ fn sequence(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) ->
         }
         _ => {
             tokenizer.exit(TokenType::ThematicBreakSequence);
-            at_break(tokenizer, code, marker, size)
-        }
-    }
-}
-
-/// In whitespace.
-///
-/// ```markdown
-/// * |* *
-/// * | * *
-/// ```
-fn whitespace(tokenizer: &mut Tokenizer, code: Code, marker: char, size: usize) -> StateFnResult {
-    match code {
-        Code::VirtualSpace | Code::Char('\t' | ' ') => {
-            tokenizer.consume(code);
-            (
-                State::Fn(Box::new(move |tokenizer, code| {
-                    whitespace(tokenizer, code, marker, size)
-                })),
-                None,
+            tokenizer.go(space_or_tab_opt(), move |t, c| at_break(t, c, marker, size))(
+                tokenizer, code,
             )
         }
-        _ => {
-            tokenizer.exit(TokenType::ThematicBreakWhitespace);
-            at_break(tokenizer, code, marker, size)
-        }
     }
 }
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 22aa77f..f4af4ea 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -24,7 +24,7 @@ use crate::construct::{
     code_indented::start as code_indented, definition::start as definition,
     heading_atx::start as heading_atx, heading_setext::start as heading_setext,
     html_flow::start as html_flow, paragraph::start as paragraph,
-    partial_whitespace::start as whitespace, thematic_break::start as thematic_break,
+    thematic_break::start as thematic_break,
 };
 use crate::subtokenize::subtokenize;
 use crate::tokenizer::{Code, Event, Point, State, StateFnResult, TokenType, Tokenizer};
@@ -95,9 +95,16 @@ fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     match code {
         Code::None => (State::Ok, None),
         // To do: should all flow just start before the prefix?
-        _ => tokenizer.attempt_3(code_indented, code_fenced, html_flow, |ok| {
-            Box::new(if ok { after } else { before })
-        })(tokenizer, code),
+        _ => tokenizer.attempt_7(
+            code_indented,
+            code_fenced,
+            html_flow,
+            heading_atx,
+            thematic_break,
+            definition,
+            heading_setext,
+            |ok| Box::new(if ok { after } else { before_paragraph }),
+        )(tokenizer, code),
     }
 }
 
@@ -123,36 +130,6 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     }
 }
 
-/// Before flow, but not at code (indented) or code (fenced).
-///
-/// Compared to flow (initial), normal flow can be arbitrarily prefixed.
-///
-/// ```markdown
-/// |qwe
-/// ```
-fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt(
-        |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
-        |_ok| Box::new(before_after_prefix),
-    )(tokenizer, code)
-}
-
-/// Before flow, after potential whitespace.
-///
-/// ```markdown
-/// |# asd
-/// |***
-/// ```
-fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
-    tokenizer.attempt_4(
-        heading_atx,
-        thematic_break,
-        definition,
-        heading_setext,
-        |ok| Box::new(if ok { after } else { before_paragraph }),
-    )(tokenizer, code)
-}
-
 /// Before a paragraph.
 ///
 /// ```markdown
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index c1bb61b..de27d12 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -25,7 +25,6 @@ pub enum TokenType {
     AutolinkProtocol,
     AutolinkEmail,
     BlankLineEnding,
-    BlankLineWhitespace,
     CharacterEscape,
     CharacterEscapeMarker,
     CharacterEscapeValue,
@@ -38,12 +37,10 @@ pub enum TokenType {
     CodeFenced,
     CodeFencedFence,
     CodeFencedFenceSequence,
-    CodeFencedFenceWhitespace,
     CodeFencedFenceInfo,
     CodeFencedFenceMeta,
     CodeFlowChunk,
     CodeIndented,
-    CodeIndentedPrefixWhitespace,
     CodeText,
     CodeTextSequence,
     CodeTextLineEnding,
@@ -81,7 +78,6 @@ pub enum TokenType {
     Paragraph,
     ThematicBreak,
     ThematicBreakSequence,
-    ThematicBreakWhitespace,
     Whitespace,
 
     // Chunks are tokenizer, but unraveled by `subtokenize`.
@@ -114,7 +110,7 @@ pub struct Point {
     /// 1-indexed line number.
     pub line: usize,
     /// 1-indexed column number.
-    /// Note that this is increases up to a tab stop for tabs.
+    /// This is increases up to a tab stop for tabs.
     /// Some editors count tabs as 1 character, so this position is not always
     /// the same as editors.
     pub column: usize,
@@ -485,32 +481,14 @@ impl Tokenizer {
         )
     }
 
-    pub fn attempt_3(
-        &mut self,
-        a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
-        b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
-        c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
-        done: impl FnOnce(bool) -> Box + 'static,
-    ) -> Box {
-        self.call_multiple(
-            false,
-            Some(Box::new(a)),
-            Some(Box::new(b)),
-            Some(Box::new(c)),
-            None,
-            None,
-            None,
-            None,
-            done,
-        )
-    }
-
-    pub fn attempt_4(
+    #[allow(clippy::too_many_arguments, clippy::many_single_char_names)]
+    pub fn attempt_5(
         &mut self,
         a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
         b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
         c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
         d: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
+        e: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static,
         done: impl FnOnce(bool) -> Box + 'static,
     ) -> Box {
         self.call_multiple(
@@ -519,7 +497,7 @@ impl Tokenizer {
             Some(Box::new(b)),
             Some(Box::new(c)),
             Some(Box::new(d)),
-            None,
+            Some(Box::new(e)),
             None,
             None,
             done,
-- 
cgit