diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/construct/code_fenced.rs | 5 | ||||
| -rw-r--r-- | src/construct/code_indented.rs | 11 | ||||
| -rw-r--r-- | src/construct/definition.rs | 17 | ||||
| -rw-r--r-- | src/construct/heading_atx.rs | 9 | ||||
| -rw-r--r-- | src/construct/heading_setext.rs | 233 | ||||
| -rw-r--r-- | src/construct/html_flow.rs | 19 | ||||
| -rw-r--r-- | src/construct/paragraph.rs | 150 | ||||
| -rw-r--r-- | src/construct/thematic_break.rs | 9 | ||||
| -rw-r--r-- | src/content/flow.rs | 46 | ||||
| -rw-r--r-- | src/tokenizer.rs | 3 | 
10 files changed, 205 insertions, 297 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index d19cad0..f2d243a 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -179,7 +179,8 @@ struct Info {  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      tokenizer.enter(TokenType::CodeFenced);      tokenizer.enter(TokenType::CodeFencedFence); -    tokenizer.attempt_opt(space_or_tab(), before_sequence_open)(tokenizer, code) +    // To do: allow arbitrary when code (indented) is turned off. +    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before_sequence_open)(tokenizer, code)  }  /// Inside the opening fence, after an optional prefix, before a sequence. @@ -550,5 +551,7 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateF  /// ```  fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      tokenizer.exit(TokenType::CodeFenced); +    // Feel free to interrupt. +    tokenizer.interrupt = false;      (State::Ok, Some(vec![code]))  } diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 99445b9..9bdfd71 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -59,8 +59,13 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  /// > filled line (that it has a non-whitespace character), because blank lines  /// > are parsed already, so we never run into that.  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.enter(TokenType::CodeIndented); -    tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer, code) +    // Do not interrupt paragraphs. +    if tokenizer.interrupt { +        (State::Nok, None) +    } else { +        tokenizer.enter(TokenType::CodeIndented); +        tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), at_break)(tokenizer, code) +    }  }  /// At a break. @@ -110,6 +115,8 @@ fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ```  fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      tokenizer.exit(TokenType::CodeIndented); +    // Feel free to interrupt. +    tokenizer.interrupt = false;      (State::Ok, Some(vec![code]))  } diff --git a/src/construct/definition.rs b/src/construct/definition.rs index f05064a..e1afd03 100644 --- a/src/construct/definition.rs +++ b/src/construct/definition.rs @@ -107,8 +107,19 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  /// |[a]: b "c"  /// ```  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.enter(TokenType::Definition); -    tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) +    let index = tokenizer.events.len(); +    let definition_before = index > 3 +        && tokenizer.events[index - 1].token_type == TokenType::LineEnding +        && tokenizer.events[index - 3].token_type == TokenType::Definition; + +    // Do not interrupt paragraphs (but do follow definitions). +    if tokenizer.interrupt && !definition_before { +        (State::Nok, None) +    } else { +        tokenizer.enter(TokenType::Definition); +        // Note: arbitrary whitespace allowed even if code (indented) is on. +        tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) +    }  }  /// At the start of a definition, after whitespace. @@ -218,6 +229,8 @@ fn after_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {              tokenizer.exit(TokenType::Definition); +            // You’d be interrupting. +            tokenizer.interrupt = true;              (State::Ok, Some(vec![code]))          }          _ => (State::Nok, None), diff --git a/src/construct/heading_atx.rs b/src/construct/heading_atx.rs index 2811894..3ce7052 100644 --- a/src/construct/heading_atx.rs +++ b/src/construct/heading_atx.rs @@ -54,8 +54,8 @@  //! [wiki-setext]: https://en.wikipedia.org/wiki/Setext  //! [atx]: http://www.aaronsw.com/2002/atx/ -use super::partial_space_or_tab::space_or_tab; -use crate::constant::HEADING_ATX_OPENING_FENCE_SIZE_MAX; +use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::constant::{HEADING_ATX_OPENING_FENCE_SIZE_MAX, TAB_SIZE};  use crate::tokenizer::{      Code, ContentType, Event, EventType, State, StateFnResult, TokenType, Tokenizer,  }; @@ -68,7 +68,8 @@ use crate::util::edit_map::EditMap;  /// ```  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      tokenizer.enter(TokenType::HeadingAtx); -    tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) +    // To do: allow arbitrary when code (indented) is turned off. +    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)  }  /// Start of a heading (atx), after whitespace. @@ -127,6 +128,8 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {          Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {              tokenizer.exit(TokenType::HeadingAtx);              tokenizer.register_resolver("heading_atx".to_string(), Box::new(resolve)); +            // Feel free to interrupt. +            tokenizer.interrupt = false;              (State::Ok, Some(vec![code]))          }          Code::VirtualSpace | Code::Char('\t' | ' ') => { diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 03a2e55..df20aa7 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -58,10 +58,9 @@  //! [atx]: http://www.aaronsw.com/2002/atx/  use crate::constant::TAB_SIZE; -use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_with_options, Options}; -use crate::subtokenize::link; -use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::span::from_exit_event; +use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, TokenType, Tokenizer}; +use crate::util::edit_map::EditMap;  /// Kind of underline.  #[derive(Debug, Clone, PartialEq)] @@ -109,150 +108,23 @@ impl Kind {      }  } -/// Start of a heading (setext). -/// -/// ```markdown -/// |alpha -/// == -/// ``` -pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.enter(TokenType::HeadingSetext); -    tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) -} - -/// Start of a heading (setext), after whitespace. -/// -/// ```markdown -/// |alpha -/// == -/// ``` -fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            unreachable!("expected non-eol/eof"); -        } -        _ => { -            tokenizer.enter(TokenType::HeadingSetextText); -            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text)); -            text_inside(tokenizer, code) -        } -    } -} - -/// Inside text. -/// -/// ```markdown -/// al|pha -/// bra|vo -/// == -/// ``` -fn text_inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::None => (State::Nok, None), -        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            tokenizer.exit(TokenType::Data); -            tokenizer.exit(TokenType::HeadingSetextText); -            tokenizer.attempt(underline_before, |ok| { -                Box::new(if ok { after } else { text_continue }) -            })(tokenizer, code) -        } -        _ => { -            tokenizer.consume(code); -            (State::Fn(Box::new(text_inside)), None) -        } -    } -} - -/// At a line ending, not at an underline. -/// -/// ```markdown -/// alpha -/// |bravo -/// == -/// ``` -fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    // Needed to connect the text. -    tokenizer.enter(TokenType::HeadingSetextText); -    tokenizer.events.pop(); -    tokenizer.events.pop(); - -    match code { -        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            tokenizer.enter_with_content(TokenType::LineEnding, Some(ContentType::Text)); -            let index = tokenizer.events.len() - 1; -            link(&mut tokenizer.events, index); -            tokenizer.consume(code); -            tokenizer.exit(TokenType::LineEnding); - -            ( -                State::Fn(Box::new(tokenizer.attempt_opt( -                    space_or_tab_with_options(Options { -                        kind: TokenType::SpaceOrTab, -                        min: 1, -                        max: usize::MAX, -                        content_type: Some(ContentType::Text), -                        connect: true, -                    }), -                    text_line_start, -                ))), -                None, -            ) -        } -        _ => unreachable!("expected eol"), -    } -} - -/// At a line ending after whitespace, not at an underline. -/// -/// ```markdown -/// alpha -/// |bravo -/// == -/// ``` -fn text_line_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        // Blank lines not allowed. -        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None), -        _ => { -            tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text)); -            let index = tokenizer.events.len() - 1; -            link(&mut tokenizer.events, index); -            text_inside(tokenizer, code) -        } -    } -} - -/// After a heading (setext). -/// -/// ```markdown -/// alpha -/// ==| -/// ``` -fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.exit(TokenType::HeadingSetext); -    (State::Ok, Some(vec![code])) -} -  /// At a line ending, presumably an underline.  ///  /// ```markdown  /// alpha|  /// ==  /// ``` -fn underline_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            tokenizer.enter(TokenType::LineEnding); -            tokenizer.consume(code); -            tokenizer.exit(TokenType::LineEnding); -            ( -                State::Fn(Box::new( -                    tokenizer.attempt_opt(space_or_tab(), underline_sequence_start), -                )), -                None, -            ) -        } -        _ => unreachable!("expected eol"), +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    let index = tokenizer.events.len(); +    let paragraph_before = index > 3 +        && tokenizer.events[index - 1].token_type == TokenType::LineEnding +        && tokenizer.events[index - 3].token_type == TokenType::Paragraph; + +    if paragraph_before { +        // To do: allow arbitrary when code (indented) is turned off. +        tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) +    } else { +        (State::Nok, None)      }  } @@ -262,26 +134,11 @@ fn underline_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// alpha  /// |==  /// ``` -fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    let tail = tokenizer.events.last(); -    let mut prefix = 0; - -    if let Some(event) = tail { -        if event.token_type == TokenType::SpaceOrTab { -            let span = from_exit_event(&tokenizer.events, tokenizer.events.len() - 1); -            prefix = span.end_index - span.start_index; -        } -    } - -    // To do: 4+ should be okay if code (indented) is turned off! -    if prefix >= TAB_SIZE { -        return (State::Nok, None); -    } - +fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::Char(char) if char == '-' || char == '=' => {              tokenizer.enter(TokenType::HeadingSetextUnderline); -            underline_sequence_inside(tokenizer, code, Kind::from_char(char)) +            inside(tokenizer, code, Kind::from_char(char))          }          _ => (State::Nok, None),      } @@ -293,16 +150,13 @@ fn underline_sequence_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnRes  /// alpha  /// =|=  /// ``` -fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult { +fn inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {      match code {          Code::Char(char) if char == kind.as_char() => {              tokenizer.consume(code); -            ( -                State::Fn(Box::new(move |t, c| underline_sequence_inside(t, c, kind))), -                None, -            ) +            (State::Fn(Box::new(move |t, c| inside(t, c, kind))), None)          } -        _ => tokenizer.attempt_opt(space_or_tab(), underline_after)(tokenizer, code), +        _ => tokenizer.attempt_opt(space_or_tab(), after)(tokenizer, code),      }  } @@ -312,12 +166,59 @@ fn underline_sequence_inside(tokenizer: &mut Tokenizer, code: Code, kind: Kind)  /// alpha  /// ==|  /// ``` -fn underline_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {              tokenizer.exit(TokenType::HeadingSetextUnderline); +            // Feel free to interrupt. +            tokenizer.interrupt = false; +            tokenizer.register_resolver("heading_setext".to_string(), Box::new(resolve));              (State::Ok, Some(vec![code]))          }          _ => (State::Nok, None),      }  } + +/// To do. +pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { +    let mut edit_map = EditMap::new(); +    let mut index = 0; +    let mut paragraph_enter: Option<usize> = None; +    let mut paragraph_exit: Option<usize> = None; + +    while index < tokenizer.events.len() { +        let event = &tokenizer.events[index]; + +        // Find paragraphs. +        if event.event_type == EventType::Enter { +            if event.token_type == TokenType::Paragraph { +                paragraph_enter = Some(index); +            } +        } else if event.token_type == TokenType::Paragraph { +            paragraph_exit = Some(index); +        } +        // We know this is preceded by a paragraph. +        // Otherwise we don’t parse. +        else if event.token_type == TokenType::HeadingSetextUnderline { +            let enter = paragraph_enter.take().unwrap(); +            let exit = paragraph_exit.take().unwrap(); + +            // Change types of Enter:Paragraph, Exit:Paragraph. +            tokenizer.events[enter].token_type = TokenType::HeadingSetextText; +            tokenizer.events[exit].token_type = TokenType::HeadingSetextText; + +            // Add of Enter:HeadingSetext, Exit:HeadingSetext. +            let mut heading_enter = tokenizer.events[enter].clone(); +            heading_enter.token_type = TokenType::HeadingSetext; +            let mut heading_exit = tokenizer.events[index].clone(); +            heading_exit.token_type = TokenType::HeadingSetext; + +            edit_map.add(enter, 0, vec![heading_enter]); +            edit_map.add(index + 1, 0, vec![heading_exit]); +        } + +        index += 1; +    } + +    edit_map.consume(&mut tokenizer.events) +} diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index d0e0558..a1bddad 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -98,8 +98,10 @@  //! [html_block_names]: crate::constant::HTML_BLOCK_NAMES  //! [html-parsing]: https://html.spec.whatwg.org/multipage/parsing.html#parsing -use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX}; -use crate::construct::{blank_line::start as blank_line, partial_space_or_tab::space_or_tab}; +use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE}; +use crate::construct::{ +    blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max, +};  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  /// Kind of HTML (flow). @@ -191,7 +193,8 @@ struct Info {  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      tokenizer.enter(TokenType::HtmlFlow);      tokenizer.enter(TokenType::HtmlFlowData); -    tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) +    // To do: allow arbitrary when code (indented) is turned off. +    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)  }  /// After optional whitespace, before `<`. @@ -400,8 +403,10 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes              } else {                  info.kind = Kind::Complete; -                // To do: do not support complete HTML when interrupting. -                if info.start_tag { +                // Do not support complete HTML when interrupting. +                if tokenizer.interrupt { +                    (State::Nok, None) +                } else if info.start_tag {                      complete_attribute_name_before(tokenizer, code, info)                  } else {                      complete_closing_tag_after(tokenizer, code, info) @@ -784,6 +789,8 @@ fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Sta      match code {          Code::None => {              tokenizer.exit(TokenType::HtmlFlow); +            // Feel free to interrupt. +            tokenizer.interrupt = false;              (State::Ok, Some(vec![code]))          }          // To do: do not allow lazy lines. @@ -949,6 +956,8 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat          Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {              tokenizer.exit(TokenType::HtmlFlowData);              tokenizer.exit(TokenType::HtmlFlow); +            // Feel free to interrupt. +            tokenizer.interrupt = false;              (State::Ok, Some(vec![code]))          }          _ => { diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index fea7052..ae2f4de 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -32,14 +32,10 @@  //! [code_text]: crate::construct::code_text  //! [html]: https://html.spec.whatwg.org/multipage/grouping-content.html#the-p-element -use crate::constant::TAB_SIZE; -use crate::construct::{ -    blank_line::start as blank_line, code_fenced::start as code_fenced, -    heading_atx::start as heading_atx, html_flow::start as html_flow, -    partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break, +use crate::tokenizer::{ +    Code, ContentType, Event, EventType, State, StateFnResult, TokenType, Tokenizer,  }; -use crate::subtokenize::link; -use crate::tokenizer::{Code, ContentType, State, StateFnResult, TokenType, Tokenizer}; +use crate::util::edit_map::EditMap;  /// Before a paragraph.  /// @@ -66,11 +62,14 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ```  fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code { -        Code::None => end(tokenizer, code), -        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer -            .check(interrupt, |ok| { -                Box::new(if ok { at_line_ending } else { end }) -            })(tokenizer, code), +        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { +            tokenizer.exit(TokenType::Data); +            tokenizer.exit(TokenType::Paragraph); +            tokenizer.register_resolver_before("paragraph".to_string(), Box::new(resolve)); +            // You’d be interrupting. +            tokenizer.interrupt = true; +            (State::Ok, Some(vec![code])) +        }          _ => {              tokenizer.consume(code);              (State::Fn(Box::new(inside)), None) @@ -78,90 +77,55 @@ fn inside(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      }  } -/// At a line ending, not interrupting. -/// -/// ```markdown -/// alpha| -/// bravo. -/// ``` -fn at_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.consume(code); -    tokenizer.exit(TokenType::Data); -    tokenizer.enter_with_content(TokenType::Data, Some(ContentType::Text)); -    let index = tokenizer.events.len() - 1; -    link(&mut tokenizer.events, index); -    (State::Fn(Box::new(inside)), None) -} +/// Merge “`Paragraph`”s, which currently span a single line, into actual +/// `Paragraph`s that span multiple lines. +pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { +    let mut edit_map = EditMap::new(); +    let len = tokenizer.events.len(); +    let mut index = 0; -/// At a line ending, done. -/// -/// ```markdown -/// alpha| -/// *** -/// ``` -fn end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.exit(TokenType::Data); -    tokenizer.exit(TokenType::Paragraph); -    (State::Ok, Some(vec![code])) -} +    while index < len { +        let event = &tokenizer.events[index]; -/// Before a potential interruption. -/// -/// ```markdown -/// alpha| -/// *** -/// ``` -fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            tokenizer.enter(TokenType::LineEnding); -            tokenizer.consume(code); -            tokenizer.exit(TokenType::LineEnding); -            (State::Fn(Box::new(interrupt_start)), None) -        } -        _ => unreachable!("expected eol"), -    } -} +        if event.event_type == EventType::Enter && event.token_type == TokenType::Paragraph { +            // Exit:Paragraph +            let mut exit_index = index + 3; +            // Enter:Paragraph +            let mut enter_next_index = exit_index + 3; -/// After a line ending. -/// -/// ```markdown -/// alpha -/// |~~~js -/// ~~~ -/// ``` -fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    // To do: If code is disabled, indented lines are allowed to interrupt. -    tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { -        Box::new(if ok { interrupt_indent } else { interrupt_cont }) -    })(tokenizer, code) -} +            // To do: assert that `LineEnding` between? +            while enter_next_index < len +                && tokenizer.events[enter_next_index].token_type == TokenType::Paragraph +            { +                // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph. +                edit_map.add(exit_index, 4, vec![]); +                println!("rm {:?} {:?}", exit_index, exit_index + 4); -/// At an indent. -/// -/// ```markdown -/// alpha -///     | -/// ``` -fn interrupt_indent(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    (State::Ok, Some(vec![code])) -} +                // Add Exit:LineEnding position info to Exit:Data. +                let line_ending_exit = &tokenizer.events[enter_next_index - 1]; +                let line_ending_point = line_ending_exit.point.clone(); +                let line_ending_index = line_ending_exit.index; +                let data_exit = &mut tokenizer.events[exit_index - 1]; +                data_exit.point = line_ending_point; +                data_exit.index = line_ending_index; -/// Not at an indented line. -/// -/// ```markdown -/// alpha -/// |<div> -/// ``` -fn interrupt_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    tokenizer.attempt_n( -        vec![ -            Box::new(blank_line), -            Box::new(code_fenced), -            Box::new(html_flow), -            Box::new(heading_atx), -            Box::new(thematic_break), -        ], -        |ok| Box::new(move |_t, code| (if ok { State::Nok } else { State::Ok }, Some(vec![code]))), -    )(tokenizer, code) +                // Link Enter:Data on the previous line to Enter:Data on this line. +                let data_enter_prev = &mut tokenizer.events[exit_index - 2]; +                data_enter_prev.next = Some(enter_next_index + 1); +                let data_enter_next = &mut tokenizer.events[enter_next_index + 1]; +                data_enter_next.previous = Some(exit_index - 2); + +                // Potential next start. +                exit_index = enter_next_index + 3; +                enter_next_index = exit_index + 3; +            } + +            // Move to `Exit:Paragraph`. +            index = exit_index; +        } + +        index += 1; +    } + +    edit_map.consume(&mut tokenizer.events)  } diff --git a/src/construct/thematic_break.rs b/src/construct/thematic_break.rs index 9978ee0..8d29157 100644 --- a/src/construct/thematic_break.rs +++ b/src/construct/thematic_break.rs @@ -49,8 +49,8 @@  //!  //! <!-- To do: link `lists` --> -use super::partial_space_or_tab::space_or_tab; -use crate::constant::THEMATIC_BREAK_MARKER_COUNT_MIN; +use super::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::constant::{TAB_SIZE, THEMATIC_BREAK_MARKER_COUNT_MIN};  use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  /// Type of thematic break. @@ -122,7 +122,8 @@ struct Info {  /// ```  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      tokenizer.enter(TokenType::ThematicBreak); -    tokenizer.attempt_opt(space_or_tab(), before)(tokenizer, code) +    // To do: allow arbitrary when code (indented) is turned off. +    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)  }  /// Start of a thematic break, after whitespace. @@ -157,6 +158,8 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult              if info.size >= THEMATIC_BREAK_MARKER_COUNT_MIN =>          {              tokenizer.exit(TokenType::ThematicBreak); +            // Feel free to interrupt. +            tokenizer.interrupt = false;              (State::Ok, Some(vec![code]))          }          Code::Char(char) if char == info.kind.as_char() => { diff --git a/src/content/flow.rs b/src/content/flow.rs index 0d3ede0..3ff948d 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -92,26 +92,6 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      }  } -/// After a blank line. -/// -/// Move to `start` afterwards. -/// -/// ```markdown -/// ␠␠| -/// ``` -fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    match code { -        Code::None => (State::Ok, None), -        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            tokenizer.enter(TokenType::BlankLineEnding); -            tokenizer.consume(code); -            tokenizer.exit(TokenType::BlankLineEnding); -            (State::Fn(Box::new(start)), None) -        } -        _ => unreachable!("expected eol/eof after blank line `{:?}`", code), -    } -} -  /// Before flow (initial).  ///  /// “Initial” flow means unprefixed flow, so right at the start of a line. @@ -133,16 +113,38 @@ fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {                  Box::new(code_fenced),                  Box::new(html_flow),                  Box::new(heading_atx), +                Box::new(heading_setext),                  Box::new(thematic_break),                  Box::new(definition), -                Box::new(heading_setext),              ],              |ok| Box::new(if ok { after } else { before_paragraph }),          )(tokenizer, code),      }  } -/// After a flow construct. +/// After a blank line. +/// +/// Move to `start` afterwards. +/// +/// ```markdown +/// ␠␠| +/// ``` +fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::None => (State::Ok, None), +        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { +            tokenizer.enter(TokenType::BlankLineEnding); +            tokenizer.consume(code); +            tokenizer.exit(TokenType::BlankLineEnding); +            // Feel free to interrupt. +            tokenizer.interrupt = false; +            (State::Fn(Box::new(start)), None) +        } +        _ => unreachable!("expected eol/eof after blank line `{:?}`", code), +    } +} + +/// After something.  ///  /// ```markdown  /// ## alpha| diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 817c1de..b70e706 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1760,6 +1760,8 @@ pub struct Tokenizer<'a> {      /// To do.      pub label_start_list_loose: Vec<LabelStart>,      /// To do. +    pub interrupt: bool, +    /// To do.      pub media_list: Vec<Media>,      /// To do.      resolvers: Vec<Box<Resolver>>, @@ -1783,6 +1785,7 @@ impl<'a> Tokenizer<'a> {              label_start_stack: vec![],              label_start_list_loose: vec![],              media_list: vec![], +            interrupt: false,              resolvers: vec![],              resolver_ids: vec![],          }  | 
