diff options
Diffstat (limited to '')
| -rw-r--r-- | src/compiler.rs | 29 | ||||
| -rw-r--r-- | src/construct/block_quote.rs | 58 | ||||
| -rw-r--r-- | src/construct/heading_setext.rs | 27 | ||||
| -rw-r--r-- | src/construct/mod.rs | 1 | ||||
| -rw-r--r-- | src/construct/paragraph.rs | 28 | ||||
| -rw-r--r-- | src/content/document.rs | 439 | ||||
| -rw-r--r-- | src/content/flow.rs | 49 | ||||
| -rw-r--r-- | src/content/mod.rs | 6 | ||||
| -rw-r--r-- | src/parser.rs | 4 | ||||
| -rw-r--r-- | src/tokenizer.rs | 38 | ||||
| -rw-r--r-- | src/util/edit_map.rs | 1 | ||||
| -rw-r--r-- | src/util/mod.rs | 1 | ||||
| -rw-r--r-- | src/util/skip.rs | 44 | 
13 files changed, 645 insertions, 80 deletions
| diff --git a/src/compiler.rs b/src/compiler.rs index 7e47f95..f27c0de 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -193,9 +193,7 @@ pub struct Options {      /// // micromark is safe by default:      /// assert_eq!(      ///     micromark("> a"), -    ///     // To do: block quote -    ///     // "<blockquote>\n<p>a</p>\n</blockquote>" -    ///     "<p>> a</p>" +    ///     "<blockquote>\n<p>a</p>\n</blockquote>"      /// );      ///      /// // Define `default_line_ending` to configure the default: @@ -209,9 +207,7 @@ pub struct Options {      ///      ///         }      ///     ), -    ///     // To do: block quote -    ///     // "<blockquote>\r\n<p>a</p>\r\n</blockquote>" -    ///     "<p>> a</p>" +    ///     "<blockquote>\r\n<p>a</p>\r\n</blockquote>"      /// );      /// ```      pub default_line_ending: Option<LineEnding>, @@ -418,6 +414,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {      enter_map.insert(TokenType::HeadingSetextText, on_enter_buffer);      enter_map.insert(TokenType::Label, on_enter_buffer);      enter_map.insert(TokenType::ResourceTitleString, on_enter_buffer); +    enter_map.insert(TokenType::BlockQuote, on_enter_block_quote);      enter_map.insert(TokenType::CodeIndented, on_enter_code_indented);      enter_map.insert(TokenType::CodeFenced, on_enter_code_fenced);      enter_map.insert(TokenType::CodeText, on_enter_code_text); @@ -491,6 +488,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {      exit_map.insert(TokenType::CodeFlowChunk, on_exit_code_flow_chunk);      exit_map.insert(TokenType::CodeText, on_exit_code_text);      exit_map.insert(TokenType::CodeTextLineEnding, on_exit_code_text_line_ending); +    exit_map.insert(TokenType::BlockQuote, on_exit_block_quote);      exit_map.insert(TokenType::HardBreakEscape, on_exit_break);      exit_map.insert(TokenType::HardBreakTrailing, on_exit_break);      exit_map.insert(TokenType::HeadingAtx, on_exit_heading_atx); @@ -607,6 +605,13 @@ fn on_enter_buffer(context: &mut CompileContext) {      context.buffer();  } +/// Handle [`Enter`][EventType::Enter]:[`BlockQuote`][TokenType::BlockQuote]. +fn on_enter_block_quote(context: &mut CompileContext) { +    // tightStack.push(false) +    context.line_ending_if_needed(); +    context.tag("<blockquote>".to_string()); +} +  /// Handle [`Enter`][EventType::Enter]:[`CodeIndented`][TokenType::CodeIndented].  fn on_enter_code_indented(context: &mut CompileContext) {      context.code_flow_seen_data = Some(false); @@ -695,6 +700,7 @@ fn on_enter_link(context: &mut CompileContext) {  /// Handle [`Enter`][EventType::Enter]:[`Paragraph`][TokenType::Paragraph].  fn on_enter_paragraph(context: &mut CompileContext) { +    context.line_ending_if_needed();      context.tag("<p>".to_string());  } @@ -756,6 +762,14 @@ fn on_exit_break(context: &mut CompileContext) {      context.tag("<br />".to_string());  } +/// Handle [`Exit`][EventType::Exit]:[`BlockQuote`][TokenType::BlockQuote]. +fn on_exit_block_quote(context: &mut CompileContext) { +    // tightStack.pop() +    context.line_ending_if_needed(); +    context.tag("</blockquote>".to_string()); +    // let mut slurp_all_line_endings = false; +} +  /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][TokenType::CharacterReferenceMarker].  fn on_exit_character_reference_marker(context: &mut CompileContext) {      context.character_reference_kind = Some(CharacterReferenceKind::Named); @@ -971,6 +985,7 @@ fn on_exit_heading_atx_sequence(context: &mut CompileContext) {              false,          )          .len(); +        context.line_ending_if_needed();          context.atx_opening_sequence_size = Some(rank);          context.tag(format!("<h{}>", rank));      } @@ -1001,6 +1016,7 @@ fn on_exit_heading_setext_underline(context: &mut CompileContext) {      )[0];      let level: usize = if head == Code::Char('-') { 2 } else { 1 }; +    context.line_ending_if_needed();      context.tag(format!("<h{}>", level));      context.push(text);      context.tag(format!("</h{}>", level)); @@ -1157,5 +1173,6 @@ fn on_exit_strong(context: &mut CompileContext) {  /// Handle [`Exit`][EventType::Exit]:[`ThematicBreak`][TokenType::ThematicBreak].  fn on_exit_thematic_break(context: &mut CompileContext) { +    context.line_ending_if_needed();      context.tag("<hr />".to_string());  } diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs new file mode 100644 index 0000000..cd5b872 --- /dev/null +++ b/src/construct/block_quote.rs @@ -0,0 +1,58 @@ +//! To do. + +use crate::constant::TAB_SIZE; +use crate::construct::partial_space_or_tab::space_or_tab_min_max; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    // To do: allow arbitrary when code (indented) is turned off. +    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) +} + +fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char('>') => { +            tokenizer.enter(TokenType::BlockQuote); +            cont_before(tokenizer, code) +        } +        _ => cont_before(tokenizer, code), +    } +} + +pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    // To do: allow arbitrary when code (indented) is turned off. +    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), cont_before)(tokenizer, code) +} + +fn cont_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::Char('>') => { +            tokenizer.enter(TokenType::BlockQuotePrefix); +            tokenizer.enter(TokenType::BlockQuoteMarker); +            tokenizer.consume(code); +            tokenizer.exit(TokenType::BlockQuoteMarker); +            (State::Fn(Box::new(cont_after)), None) +        } +        _ => (State::Nok, None), +    } +} + +fn cont_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::VirtualSpace | Code::Char('\t' | ' ') => { +            tokenizer.enter(TokenType::BlockQuotePrefixWhitespace); +            tokenizer.consume(code); +            tokenizer.exit(TokenType::BlockQuotePrefixWhitespace); +            tokenizer.exit(TokenType::BlockQuotePrefix); +            (State::Ok, None) +        } +        _ => { +            tokenizer.exit(TokenType::BlockQuotePrefix); +            (State::Ok, Some(vec![code])) +        } +    } +} + +pub fn end() -> Vec<TokenType> { +    vec![TokenType::BlockQuote] +} diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 211434f..440baa8 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -60,7 +60,7 @@  use crate::constant::TAB_SIZE;  use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max};  use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::edit_map::EditMap; +use crate::util::{edit_map::EditMap, skip::opt_back as skip_opt_back};  /// Kind of underline.  #[derive(Debug, Clone, PartialEq)] @@ -116,11 +116,26 @@ impl Kind {  /// ```  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      let index = tokenizer.events.len(); -    let paragraph_before = index > 3 -        && tokenizer.events[index - 1].token_type == TokenType::LineEnding -        && tokenizer.events[index - 3].token_type == TokenType::Paragraph; - -    if paragraph_before { +    let previous = if index > 1 { +        skip_opt_back( +            &tokenizer.events, +            index - 1, +            &[TokenType::SpaceOrTab, TokenType::BlockQuotePrefix], +        ) +    } else { +        0 +    }; +    let previous = skip_opt_back(&tokenizer.events, previous, &[TokenType::LineEnding]); +    let paragraph_before = +        previous > 1 && tokenizer.events[previous].token_type == TokenType::Paragraph; + +    println!( +        "setext-start: {:?} {:?} {:?}", +        tokenizer.interrupt, tokenizer.lazy, paragraph_before +    ); + +    // Require a paragraph before and do not allow on a lazy line. +    if paragraph_before && !tokenizer.lazy {          // To do: allow arbitrary when code (indented) is turned off.          tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)      } else { diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 66b2a3c..936ecf6 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -64,6 +64,7 @@  pub mod attention;  pub mod autolink;  pub mod blank_line; +pub mod block_quote;  pub mod character_escape;  pub mod character_reference;  pub mod code_fenced; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 4f5e662..ace174f 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -35,7 +35,7 @@  use crate::tokenizer::{      Code, ContentType, Event, EventType, State, StateFnResult, TokenType, Tokenizer,  }; -use crate::util::edit_map::EditMap; +use crate::util::{edit_map::EditMap, skip::opt as skip_opt};  /// Before a paragraph.  /// @@ -90,19 +90,27 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {          if event.event_type == EventType::Enter && event.token_type == TokenType::Paragraph {              // Exit:Paragraph              let mut exit_index = index + 3; +            let mut enter_next_index = +                skip_opt(&tokenizer.events, exit_index + 1, &[TokenType::LineEnding]);              // Enter:Paragraph -            let mut enter_next_index = exit_index + 3; +            enter_next_index = skip_opt( +                &tokenizer.events, +                enter_next_index, +                &[TokenType::SpaceOrTab, TokenType::BlockQuotePrefix], +            );              // Find future `Paragraphs`. -            // There will be `LineEnding` between. -            while enter_next_index < len +            while enter_next_index < tokenizer.events.len()                  && tokenizer.events[enter_next_index].token_type == TokenType::Paragraph              {                  // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph. -                edit_map.add(exit_index, 4, vec![]); +                edit_map.add(exit_index, 3, vec![]); + +                // Remove Enter:Paragraph. +                edit_map.add(enter_next_index, 1, vec![]);                  // Add Exit:LineEnding position info to Exit:Data. -                let line_ending_exit = &tokenizer.events[enter_next_index - 1]; +                let line_ending_exit = &tokenizer.events[exit_index + 2];                  let line_ending_point = line_ending_exit.point.clone();                  let line_ending_index = line_ending_exit.index;                  let data_exit = &mut tokenizer.events[exit_index - 1]; @@ -117,7 +125,13 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {                  // Potential next start.                  exit_index = enter_next_index + 3; -                enter_next_index = exit_index + 3; +                enter_next_index = +                    skip_opt(&tokenizer.events, exit_index + 1, &[TokenType::LineEnding]); +                enter_next_index = skip_opt( +                    &tokenizer.events, +                    enter_next_index, +                    &[TokenType::SpaceOrTab, TokenType::BlockQuotePrefix], +                );              }              // Move to `Exit:Paragraph`. diff --git a/src/content/document.rs b/src/content/document.rs new file mode 100644 index 0000000..dd5038f --- /dev/null +++ b/src/content/document.rs @@ -0,0 +1,439 @@ +//! The document content type. +//! +//! **Document** represents the containers, such as block quotes and lists, +//! which structure the document and contain other sections. +//! +//! The constructs found in flow are: +//! +//! *   [Block quote][crate::construct::block_quote] +//! *   List + +use crate::construct::block_quote::{ +    cont as block_quote_cont, end as block_quote_end, start as block_quote, +}; +use crate::content::flow::start as flow; +use crate::parser::ParseState; +use crate::subtokenize::subtokenize; +use crate::tokenizer::{ +    Code, Event, EventType, Point, State, StateFn, StateFnResult, TokenType, Tokenizer, +}; +use crate::util::edit_map::EditMap; +use crate::util::{ +    normalize_identifier::normalize_identifier, +    span::{from_exit_event, serialize}, +}; +use std::collections::HashSet; + +struct DocumentInfo { +    continued: usize, +    stack: Vec<String>, +    next: Box<StateFn>, +    last_line_ending_index: Option<usize>, +    map: EditMap, +} + +/// Turn `codes` as the document content type into events. +pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> { +    let mut tokenizer = Tokenizer::new(point, index, parse_state); + +    tokenizer.push(&parse_state.codes, Box::new(start), true); + +    let mut index = 0; +    let mut next_definitions: HashSet<String> = HashSet::new(); + +    while index < tokenizer.events.len() { +        let event = &tokenizer.events[index]; + +        if event.event_type == EventType::Exit +            && event.token_type == TokenType::DefinitionLabelString +        { +            next_definitions.insert(normalize_identifier( +                serialize( +                    &parse_state.codes, +                    &from_exit_event(&tokenizer.events, index), +                    false, +                ) +                .as_str(), +            )); +        } + +        index += 1; +    } + +    let mut result = (tokenizer.events, false); + +    parse_state.definitions = next_definitions; + +    while !result.1 { +        result = subtokenize(result.0, parse_state); +    } + +    result.0 +} + +fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    let info = DocumentInfo { +        continued: 0, +        stack: vec![], +        next: Box::new(flow), +        last_line_ending_index: None, +        map: EditMap::new(), +    }; +    before(tokenizer, code, info) +} + +fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { +    println!("before: check existing open containers"); +    // First we iterate through the open blocks, starting with the root +    // document, and descending through last children down to the last open +    // block. +    // Each block imposes a condition that the line must satisfy if the block +    // is to remain open. +    // For example, a block quote requires a `>` character. +    // A paragraph requires a non-blank line. +    // In this phase we may match all or just some of the open blocks. +    // But we cannot close unmatched blocks yet, because we may have a lazy +    // continuation line. +    if info.continued < info.stack.len() { +        let name = &info.stack[info.continued]; +        // To do: list. +        let cont = if name == "blockquote" { +            block_quote_cont +        } else { +            unreachable!("todo: cont construct {:?}", name) +        }; + +        // To do: state? + +        tokenizer.attempt(cont, move |ok| { +            if ok { +                Box::new(|t, c| document_continue(t, c, info)) +            } else { +                Box::new(|t, c| check_new_containers(t, c, info)) +            } +        })(tokenizer, code) +    } else { +        // Done. +        check_new_containers(tokenizer, code, info) +    } +} + +fn document_continue( +    tokenizer: &mut Tokenizer, +    code: Code, +    mut info: DocumentInfo, +) -> StateFnResult { +    println!("document_continue"); +    info.continued += 1; + +    println!("  to do: close flow sometimes?"); +    // // Note: this field is called `_closeFlow` but it also closes containers. +    // // Perhaps a good idea to rename it but it’s already used in the wild by +    // // extensions. +    // if (self.containerState._closeFlow) { +    //   self.containerState._closeFlow = undefined + +    //   if (childFlow) { +    //     closeFlow() +    //   } + +    //   // Note: this algorithm for moving events around is similar to the +    //   // algorithm when dealing with lazy lines in `writeToChild`. +    //   const indexBeforeExits = self.events.length +    //   let indexBeforeFlow = indexBeforeExits +    //   /** @type {Point|undefined} */ +    //   let point + +    //   // Find the flow chunk. +    //   while (indexBeforeFlow--) { +    //     if ( +    //       self.events[indexBeforeFlow][0] === 'exit' && +    //       self.events[indexBeforeFlow][1].type === types.chunkFlow +    //     ) { +    //       point = self.events[indexBeforeFlow][1].end +    //       break +    //     } +    //   } + +    //   assert(point, 'could not find previous flow chunk') + +    let size = info.continued; +    exit_containers(tokenizer, &mut info, size); + +    //   // Fix positions. +    //   let index = indexBeforeExits + +    //   while (index < self.events.length) { +    //     self.events[index][1].end = Object.assign({}, point) +    //     index++ +    //   } + +    //   // Inject the exits earlier (they’re still also at the end). +    //   splice( +    //     self.events, +    //     indexBeforeFlow + 1, +    //     0, +    //     self.events.slice(indexBeforeExits) +    //   ) + +    //   // Discard the duplicate exits. +    //   self.events.length = index + +    //   return checkNewContainers(code) +    // } + +    before(tokenizer, code, info) +} +// documentContinue + +fn check_new_containers( +    tokenizer: &mut Tokenizer, +    code: Code, +    info: DocumentInfo, +) -> StateFnResult { +    println!("check_new_containers"); +    // Next, after consuming the continuation markers for existing blocks, we +    // look for new block starts (e.g. `>` for a block quote). +    // If we encounter a new block start, we close any blocks unmatched in +    // step 1 before creating the new block as a child of the last matched +    // block. +    if info.continued == info.stack.len() { +        println!("  to do: concrete? interrupt?"); +        //   // No need to `check` whether there’s a container, of `exitContainers` +        //   // would be moot. +        //   // We can instead immediately `attempt` to parse one. +        //   if (!childFlow) { +        //     return documentContinued(code) +        //   } + +        //   // If we have concrete content, such as block HTML or fenced code, +        //   // we can’t have containers “pierce” into them, so we can immediately +        //   // start. +        //   if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) { +        //     return flowStart(code) +        //   } + +        //   // If we do have flow, it could still be a blank line, +        //   // but we’d be interrupting it w/ a new container if there’s a current +        //   // construct. +        //   self.interrupt = Boolean( +        //     childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack +        //   ) +    } + +    // Check if there is a new container. +    // To do: list. +    tokenizer.attempt(block_quote, move |ok| { +        if ok { +            Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string())) +        } else { +            Box::new(|t, c| there_is_no_new_container(t, c, info)) +        } +    })(tokenizer, code) +} + +fn there_is_a_new_container( +    tokenizer: &mut Tokenizer, +    code: Code, +    mut info: DocumentInfo, +    name: String, +) -> StateFnResult { +    println!("there_is_a_new_container"); +    println!("  todo: close_flow"); +    // if (childFlow) closeFlow() +    let size = info.continued; +    exit_containers(tokenizer, &mut info, size); +    info.stack.push(name); +    info.continued += 1; +    document_continued(tokenizer, code, info) +} + +/// Exit open containers. +fn exit_containers(tokenizer: &mut Tokenizer, info: &mut DocumentInfo, size: usize) { +    while info.stack.len() > size { +        let name = info.stack.pop().unwrap(); + +        // To do: list. +        let end = if name == "blockquote" { +            block_quote_end +        } else { +            unreachable!("todo: cont {:?}", name) +        }; + +        // To do: improve below code. +        let insert_index = if let Some(index) = info.last_line_ending_index { +            index +        } else { +            tokenizer.events.len() +        }; +        let eol_point = if let Some(index) = info.last_line_ending_index { +            tokenizer.events[index].point.clone() +        } else { +            tokenizer.point.clone() +        }; +        let eol_index = if let Some(index) = info.last_line_ending_index { +            tokenizer.events[index].index +        } else { +            tokenizer.index +        }; + +        let token_types = end(); + +        let mut index = 0; +        while index < token_types.len() { +            let token_type = &token_types[index]; + +            info.map.add( +                insert_index, +                0, +                vec![Event { +                    event_type: EventType::Exit, +                    token_type: token_type.clone(), +                    point: eol_point.clone(), +                    index: eol_index, +                    previous: None, +                    next: None, +                    content_type: None, +                }], +            ); + +            let mut stack_index = tokenizer.stack.len(); + +            while stack_index > 0 { +                stack_index -= 1; + +                if tokenizer.stack[stack_index] == *token_type { +                    break; +                } +            } + +            assert_eq!( +                tokenizer.stack[stack_index], *token_type, +                "expected token type" +            ); +            tokenizer.stack.remove(stack_index); + +            index += 1; +        } +    } +} + +fn there_is_no_new_container( +    tokenizer: &mut Tokenizer, +    code: Code, +    info: DocumentInfo, +) -> StateFnResult { +    let lazy = info.continued != info.stack.len(); +    tokenizer.lazy = lazy; +    println!("there is no new container"); +    if lazy { +        println!( +            "  This line will be lazy. Depending on what is parsed now, we need to close containers before?" +        ); +    } +    // lineStartOffset = self.now().offset +    flow_start(tokenizer, code, info) +} + +fn document_continued(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { +    println!("document_continued"); + +    // Try new containers. +    // To do: list. +    tokenizer.attempt(block_quote, |ok| { +        if ok { +            Box::new(|t, c| container_continue(t, c, info)) +        } else { +            Box::new(|t, c| { +                // To do: this looks like a bug? +                t.lazy = false; +                flow_start(t, c, info) +            }) +        } +    })(tokenizer, code) +} + +fn container_continue( +    tokenizer: &mut Tokenizer, +    code: Code, +    mut info: DocumentInfo, +) -> StateFnResult { +    println!("container_continue"); +    // assert( +    //   self.currentConstruct, +    //   'expected `currentConstruct` to be defined on tokenizer' +    // ) +    // assert( +    //   self.containerState, +    //   'expected `containerState` to be defined on tokenizer' +    // ) +    info.continued += 1; +    // To do: add to stack? +    // stack.push([self.currentConstruct, self.containerState]) +    // Try another. +    document_continued(tokenizer, code, info) +} + +fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { +    println!("flow_start"); +    let next = info.next; +    info.next = Box::new(flow); // This is weird but Rust needs a function there. + +    let size = info.continued; +    exit_containers(tokenizer, &mut info, size); + +    tokenizer.go_until(next, eof_eol, move |(state, remainder)| { +        ( +            State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))), +            remainder, +        ) +    })(tokenizer, code) +} + +fn flow_end( +    tokenizer: &mut Tokenizer, +    code: Code, +    mut info: DocumentInfo, +    result: State, +) -> StateFnResult { +    println!("flow_end"); +    let was_lazy = tokenizer.lazy; + +    if was_lazy { +        println!( +            "this line was lazy. Depeding on what was parsed, we need to exit containers after it?" +        ); +    } + +    info.continued = 0; + +    // To do: blank lines? Other things? +    if tokenizer.events.len() > 2 +        && tokenizer.events[tokenizer.events.len() - 1].token_type == TokenType::LineEnding +    { +        info.last_line_ending_index = Some(tokenizer.events.len() - 2); +    } else { +        info.last_line_ending_index = None; +    } + +    match result { +        State::Ok => { +            println!("State::Ok"); +            exit_containers(tokenizer, &mut info, 0); +            tokenizer.events = info.map.consume(&mut tokenizer.events); +            (State::Ok, Some(vec![code])) +        } +        State::Nok => unreachable!("handle nok in `flow`?"), +        State::Fn(func) => { +            info.next = func; +            before(tokenizer, code, info) +        } +    } +} + +fn eof_eol(code: Code) -> bool { +    matches!( +        code, +        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') +    ) +} diff --git a/src/content/flow.rs b/src/content/flow.rs index 74c6a62..f406685 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -26,52 +26,7 @@ use crate::construct::{      html_flow::start as html_flow, paragraph::start as paragraph,      thematic_break::start as thematic_break,  }; -use crate::parser::ParseState; -use crate::subtokenize::subtokenize; -use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::{ -    normalize_identifier::normalize_identifier, -    span::{from_exit_event, serialize}, -}; -use std::collections::HashSet; - -/// Turn `codes` as the flow content type into events. -pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> { -    let mut tokenizer = Tokenizer::new(point, index, parse_state); -    tokenizer.push(&parse_state.codes, Box::new(start), true); -    let mut next_definitions: HashSet<String> = HashSet::new(); - -    let mut index = 0; - -    while index < tokenizer.events.len() { -        let event = &tokenizer.events[index]; - -        if event.event_type == EventType::Exit -            && event.token_type == TokenType::DefinitionLabelString -        { -            next_definitions.insert(normalize_identifier( -                serialize( -                    &parse_state.codes, -                    &from_exit_event(&tokenizer.events, index), -                    false, -                ) -                .as_str(), -            )); -        } - -        index += 1; -    } - -    let mut result = (tokenizer.events, false); - -    parse_state.definitions = next_definitions; - -    while !result.1 { -        result = subtokenize(result.0, parse_state); -    } - -    result.0 -} +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};  /// Before flow.  /// @@ -83,7 +38,7 @@ pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Eve  /// |    bravo  /// |***  /// ``` -fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          Code::None => (State::Ok, None),          _ => tokenizer.attempt(blank_line, |ok| { diff --git a/src/content/mod.rs b/src/content/mod.rs index ae8ad83..af40cc0 100644 --- a/src/content/mod.rs +++ b/src/content/mod.rs @@ -1,5 +1,11 @@  //! Content types found in markdown. +//! +//! *   [document][document] +//! *   [flow][flow] +//! *   [string][string] +//! *   [text][text] +pub mod document;  pub mod flow;  pub mod string;  pub mod text; diff --git a/src/parser.rs b/src/parser.rs index 69dd355..b1fd4fd 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,7 +2,7 @@  use std::collections::HashSet;  // To do: this should start with `containers`, when they’re done. -use crate::content::flow::flow; +use crate::content::document::document;  use crate::tokenizer::{Code, Event, Point};  use crate::util::codes::parse as parse_codes; @@ -27,7 +27,7 @@ pub fn parse(value: &str) -> (Vec<Event>, ParseState) {          definitions: HashSet::new(),      }; -    let events = flow( +    let events = document(          &mut parse_state,          Point {              line: 1, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8c11a68..cbcc464 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1702,6 +1702,10 @@ pub enum TokenType {      ///      /// > 👉 **Note**: this is used while parsing but compiled away.      AttentionSequence, +    BlockQuote, +    BlockQuoteMarker, +    BlockQuotePrefix, +    BlockQuotePrefixWhitespace,  }  /// Embedded content type. @@ -1841,6 +1845,7 @@ struct InternalState {  // #[derive(Debug)]  /// A tokenizer itself. +#[allow(clippy::struct_excessive_bools)]  pub struct Tokenizer<'a> {      column_start: HashMap<usize, usize>,      /// Track whether a character is expected to be consumed, and whether it’s @@ -1855,15 +1860,15 @@ pub struct Tokenizer<'a> {      /// Hierarchy of semantic labels.      ///      /// Tracked to make sure everything’s valid. -    stack: Vec<TokenType>, +    pub stack: Vec<TokenType>,      /// Previous character code.      pub previous: Code,      /// Current character code.      current: Code,      /// `index` in codes of the current code. -    index: usize, +    pub index: usize,      /// Current relative and absolute place in the file. -    point: Point, +    pub point: Point,      /// List of attached resolvers, which will be called when done feeding,      /// to clean events.      resolvers: Vec<Box<Resolver>>, @@ -1887,6 +1892,7 @@ pub struct Tokenizer<'a> {      ///      /// Used when tokenizing [flow content][crate::content::flow].      pub interrupt: bool, +    pub lazy: bool,  }  impl<'a> Tokenizer<'a> { @@ -1907,6 +1913,7 @@ impl<'a> Tokenizer<'a> {              label_start_list_loose: vec![],              media_list: vec![],              interrupt: false, +            lazy: false,              resolvers: vec![],              resolver_ids: vec![],          } @@ -2120,7 +2127,8 @@ impl<'a> Tokenizer<'a> {              state_fn,              until,              vec![], -            |result: (Vec<Code>, Vec<Code>), _ok, _tokenizer: &mut Tokenizer, state| { +            |result: (Vec<Code>, Vec<Code>), _ok, tokenizer: &mut Tokenizer, state| { +                tokenizer.consumed = true;                  done(check_statefn_result((state, Some(result.1))))              },          ) @@ -2262,6 +2270,20 @@ fn attempt_impl(      done: impl FnOnce((Vec<Code>, Vec<Code>), bool, &mut Tokenizer, State) -> StateFnResult + 'static,  ) -> Box<StateFn> {      Box::new(|tokenizer, code| { +        // To do: `pause` is currently used after the code. +        // Should it be before? +        // How to match `eof`? +        if !codes.is_empty() && pause(tokenizer.previous) { +            tokenizer.consumed = true; +            println!("pause!: {:?}", (codes.clone(), vec![code])); +            return done( +                (codes, vec![code]), +                false, +                tokenizer, +                State::Fn(Box::new(state)), +            ); +        } +          let (next, remainder) = check_statefn_result(state(tokenizer, code));          match code { @@ -2278,14 +2300,6 @@ fn attempt_impl(              );          } -        // To do: `pause` is currently used after the code. -        // Should it be before? -        if pause(code) { -            tokenizer.consumed = true; -            let remaining = if let Some(x) = remainder { x } else { vec![] }; -            return done((codes, remaining), false, tokenizer, next); -        } -          match next {              State::Ok => {                  let remaining = if let Some(x) = remainder { x } else { vec![] }; diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index ae627c1..f67a8b9 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -48,6 +48,7 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, isize)]) {  /// Make it easy to insert and remove things while being performant and keeping  /// links in check. +#[derive(Debug)]  pub struct EditMap {      /// Whether this map was consumed already.      consumed: bool, diff --git a/src/util/mod.rs b/src/util/mod.rs index d1a0e01..ae1add6 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -6,4 +6,5 @@ pub mod edit_map;  pub mod encode;  pub mod normalize_identifier;  pub mod sanitize_uri; +pub mod skip;  pub mod span; diff --git a/src/util/skip.rs b/src/util/skip.rs new file mode 100644 index 0000000..2c4198a --- /dev/null +++ b/src/util/skip.rs @@ -0,0 +1,44 @@ +use crate::tokenizer::{Event, TokenType}; + +/// To do. +pub fn opt(events: &[Event], index: usize, token_types: &[TokenType]) -> usize { +    skip_opt_with_direction(events, index, token_types, true) +} + +/// To do. +pub fn opt_back(events: &[Event], index: usize, token_types: &[TokenType]) -> usize { +    skip_opt_with_direction(events, index, token_types, false) +} + +/// To do. +fn skip_opt_with_direction( +    events: &[Event], +    index: usize, +    token_types: &[TokenType], +    forward: bool, +) -> usize { +    let mut index = index; + +    while index < events.len() { +        let current = &events[index].token_type; + +        if !token_types.contains(current) { +            break; +        } + +        // assert_eq!(events[index].event_type, EventType::Enter); +        index = if forward { index + 1 } else { index - 1 }; + +        loop { +            if events[index].token_type == *current { +                // assert_eq!(events[index].event_type, EventType::Exit); +                index = if forward { index + 1 } else { index - 1 }; +                break; +            } + +            index = if forward { index + 1 } else { index - 1 }; +        } +    } + +    index +} | 
