diff options
| -rw-r--r-- | src/content/document.rs | 2 | ||||
| -rw-r--r-- | src/content/flow.rs | 58 | ||||
| -rw-r--r-- | src/tokenizer.rs | 6 | 
3 files changed, 50 insertions, 16 deletions
| diff --git a/src/content/document.rs b/src/content/document.rs index d02021a..cc83415 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -66,7 +66,7 @@ pub fn document(parse_state: &mut ParseState, point: Point) -> Vec<Event> {          let event = &tokenizer.events[index];          if event.event_type == EventType::Exit && event.token_type == Token::DefinitionLabelString { -            // Note: we don‘t care about virtual spaces, so `as_str` is fine. +            // Note: we don’t care about virtual spaces, so `as_str` is fine.              let id = normalize_identifier(                  Slice::from_position(                      tokenizer.parse_state.bytes, diff --git a/src/content/flow.rs b/src/content/flow.rs index bfaf5e9..b3fb866 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -34,15 +34,56 @@ use crate::tokenizer::{State, StateName, Tokenizer};  /// ```  pub fn start(tokenizer: &mut Tokenizer) -> State {      match tokenizer.current { -        None => State::Ok, -        _ => tokenizer.attempt( -            StateName::BlankLineStart, -            State::Fn(StateName::FlowBlankLineAfter), -            State::Fn(StateName::FlowBefore), +        Some(b'`' | b'~') => tokenizer.attempt( +            StateName::CodeFencedStart, +            State::Fn(StateName::FlowAfter), +            State::Fn(StateName::FlowBeforeParagraph), +        ), +        Some(b'<') => tokenizer.attempt( +            StateName::HtmlFlowStart, +            State::Fn(StateName::FlowAfter), +            State::Fn(StateName::FlowBeforeParagraph), +        ), +        Some(b'#') => tokenizer.attempt( +            StateName::HeadingAtxStart, +            State::Fn(StateName::FlowAfter), +            State::Fn(StateName::FlowBeforeParagraph), +        ), +        // Note: `-` is also used in thematic breaks, so it’s not included here. +        Some(b'=') => tokenizer.attempt( +            StateName::HeadingSetextStart, +            State::Fn(StateName::FlowAfter), +            State::Fn(StateName::FlowBeforeParagraph), +        ), +        Some(b'*' | b'_') => tokenizer.attempt( +            StateName::ThematicBreakStart, +            State::Fn(StateName::FlowAfter), +            State::Fn(StateName::FlowBeforeParagraph), +        ), +        Some(b'[') => tokenizer.attempt( +            StateName::DefinitionStart, +            State::Fn(StateName::FlowAfter), +            State::Fn(StateName::FlowBeforeParagraph), +        ), +        // Actual parsing: blank line? Indented code? Indented anything? +        // Also includes `-` which can be a setext heading underline or a thematic break. +        None | Some(b'\t' | b'\n' | b' ' | b'-') => before_blank_line(tokenizer), +        Some(_) => tokenizer.attempt( +            StateName::ParagraphStart, +            State::Fn(StateName::FlowAfter), +            State::Nok,          ),      }  } +pub fn before_blank_line(tokenizer: &mut Tokenizer) -> State { +    tokenizer.attempt( +        StateName::BlankLineStart, +        State::Fn(StateName::FlowBlankLineAfter), +        State::Fn(StateName::FlowBeforeCodeIndented), +    ) +} +  /// Before flow (initial).  ///  /// “Initial” flow means unprefixed flow, so right at the start of a line. @@ -55,17 +96,12 @@ pub fn start(tokenizer: &mut Tokenizer) -> State {  /// |~~~js  /// |<div>  /// ``` -pub fn before(tokenizer: &mut Tokenizer) -> State { -    // match tokenizer.current { -    //     None => State::Ok, -    //     _ => { +pub fn before_code_indented(tokenizer: &mut Tokenizer) -> State {      tokenizer.attempt(          StateName::CodeIndentedStart,          State::Fn(StateName::FlowAfter),          State::Fn(StateName::FlowBeforeCodeFenced),      ) -    //     } -    // }  }  pub fn before_code_fenced(tokenizer: &mut Tokenizer) -> State { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 1d02d5a..034c6f9 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -214,7 +214,7 @@ pub enum StateName {      DocumentFlowEnd,      FlowStart, -    FlowBefore, +    FlowBeforeCodeIndented,      FlowBeforeCodeFenced,      FlowBeforeHtml,      FlowBeforeHeadingAtx, @@ -508,15 +508,13 @@ impl StateName {              StateName::DocumentFlowInside => content::document::flow_inside,              StateName::FlowStart => content::flow::start, -            StateName::FlowBefore => content::flow::before, - +            StateName::FlowBeforeCodeIndented => content::flow::before_code_indented,              StateName::FlowBeforeCodeFenced => content::flow::before_code_fenced,              StateName::FlowBeforeHtml => content::flow::before_html,              StateName::FlowBeforeHeadingAtx => content::flow::before_heading_atx,              StateName::FlowBeforeHeadingSetext => content::flow::before_heading_setext,              StateName::FlowBeforeThematicBreak => content::flow::before_thematic_break,              StateName::FlowBeforeDefinition => content::flow::before_definition, -              StateName::FlowAfter => content::flow::after,              StateName::FlowBlankLineAfter => content::flow::blank_line_after,              StateName::FlowBeforeParagraph => content::flow::before_paragraph, | 
