diff options
Diffstat (limited to '')
| -rw-r--r-- | src/construct/code_fenced.rs | 44 | ||||
| -rw-r--r-- | src/construct/code_indented.rs | 23 | ||||
| -rw-r--r-- | src/construct/html_flow.rs | 60 | ||||
| -rw-r--r-- | src/construct/mod.rs | 1 | ||||
| -rw-r--r-- | src/construct/partial_non_lazy_continuation.rs | 26 | ||||
| -rw-r--r-- | src/content/document.rs | 207 | ||||
| -rw-r--r-- | src/tokenizer.rs | 1 | 
7 files changed, 165 insertions, 197 deletions
diff --git a/src/construct/code_fenced.rs b/src/construct/code_fenced.rs index c7b2334..18beb92 100644 --- a/src/construct/code_fenced.rs +++ b/src/construct/code_fenced.rs @@ -102,7 +102,10 @@  //! [html-code]: https://html.spec.whatwg.org/multipage/text-level-semantics.html#the-code-element  use crate::constant::{CODE_FENCED_SEQUENCE_SIZE_MIN, TAB_SIZE}; -use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; +use crate::construct::{ +    partial_non_lazy_continuation::start as partial_non_lazy_continuation, +    partial_space_or_tab::{space_or_tab, space_or_tab_min_max}, +};  use crate::token::Token;  use crate::tokenizer::{Code, ContentType, State, StateFnResult, Tokenizer};  use crate::util::span::from_exit_event; @@ -376,22 +379,35 @@ fn meta(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {  fn at_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult {      let clone = info.clone(); -    match code { -        Code::None => after(tokenizer, code, info), -        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => tokenizer.attempt( -            |t, c| close_begin(t, c, info), -            |ok| { -                if ok { -                    Box::new(|t, c| after(t, c, clone)) -                } else { -                    Box::new(|t, c| content_before(t, c, clone)) -                } -            }, -        )(tokenizer, code), -        _ => unreachable!("expected eof/eol"), +    if tokenizer.lazy { +        after(tokenizer, code, info) +    } else { +        tokenizer.check(partial_non_lazy_continuation, |ok| { +            if ok { +                Box::new(move |t, c| at_non_lazy_break(t, c, clone)) +            } else { +                Box::new(move |t, c| after(t, c, clone)) +            } +        })(tokenizer, code)      }  } +/// To do. +fn at_non_lazy_break(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { +    let clone = info.clone(); + +    tokenizer.attempt( +        |t, c| close_begin(t, c, info), +        |ok| { +            if ok { +                Box::new(|t, c| after(t, c, clone)) +            } else { +                Box::new(|t, c| content_before(t, c, clone)) +            } +        }, +    )(tokenizer, code) +} +  /// Before a closing fence, at the line ending.  ///  /// ```markdown diff --git a/src/construct/code_indented.rs b/src/construct/code_indented.rs index 8966249..74a0938 100644 --- a/src/construct/code_indented.rs +++ b/src/construct/code_indented.rs @@ -128,17 +128,20 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  ///     cd  /// ```  fn further_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    // To do: `nok` if lazy line. -    match code { -        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { -            tokenizer.enter(Token::LineEnding); -            tokenizer.consume(code); -            tokenizer.exit(Token::LineEnding); -            (State::Fn(Box::new(further_start)), None) +    if tokenizer.lazy { +        (State::Nok, None) +    } else { +        match code { +            Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { +                tokenizer.enter(Token::LineEnding); +                tokenizer.consume(code); +                tokenizer.exit(Token::LineEnding); +                (State::Fn(Box::new(further_start)), None) +            } +            _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { +                Box::new(if ok { further_end } else { further_begin }) +            })(tokenizer, code),          } -        _ => tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| { -            Box::new(if ok { further_end } else { further_begin }) -        })(tokenizer, code),      }  } diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index f30db3f..a8b1efc 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -100,7 +100,9 @@  use crate::constant::{HTML_BLOCK_NAMES, HTML_RAW_NAMES, HTML_RAW_SIZE_MAX, TAB_SIZE};  use crate::construct::{ -    blank_line::start as blank_line, partial_space_or_tab::space_or_tab_min_max, +    blank_line::start as blank_line, +    partial_non_lazy_continuation::start as partial_non_lazy_continuation, +    partial_space_or_tab::space_or_tab_min_max,  };  use crate::token::Token;  use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; @@ -425,7 +427,7 @@ fn tag_name(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnRes                  info.kind = Kind::Complete;                  // Do not support complete HTML when interrupting. -                if tokenizer.interrupt { +                if tokenizer.interrupt && !tokenizer.lazy {                      (State::Nok, None)                  } else if info.start_tag {                      complete_attribute_name_before(tokenizer, code, info) @@ -805,25 +807,52 @@ fn continuation_at_line_ending(tokenizer: &mut Tokenizer, code: Code, info: Info  /// asd  /// ```  fn html_continue_start(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { +    tokenizer.check(partial_non_lazy_continuation, |ok| { +        let func = if ok { +            html_continue_start_non_lazy +        } else { +            html_continue_after +        }; +        Box::new(move |t, c| func(t, c, info)) +    })(tokenizer, code) +} + +/// To do. +#[allow(clippy::needless_pass_by_value)] +fn html_continue_after(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { +    tokenizer.exit(Token::HtmlFlow); +    // Feel free to interrupt. +    tokenizer.interrupt = false; +    // Restore previous `concrete`. +    tokenizer.concrete = info.concrete; +    (State::Ok, Some(vec![code])) +} + +/// To do. +fn html_continue_start_non_lazy( +    tokenizer: &mut Tokenizer, +    code: Code, +    info: Info, +) -> StateFnResult {      match code { -        Code::None => { -            tokenizer.exit(Token::HtmlFlow); -            // Feel free to interrupt. -            tokenizer.interrupt = false; -            // Restore previous `concrete`. -            tokenizer.concrete = info.concrete; -            (State::Ok, Some(vec![code])) -        } -        // To do: do not allow lazy lines.          Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {              tokenizer.enter(Token::LineEnding);              tokenizer.consume(code);              tokenizer.exit(Token::LineEnding);              ( -                State::Fn(Box::new(|t, c| html_continue_start(t, c, info))), +                State::Fn(Box::new(|t, c| html_continue_before(t, c, info))),                  None,              )          } +        _ => unreachable!("expected eol"), +    } +} + +fn html_continue_before(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnResult { +    match code { +        Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { +            html_continue_start(tokenizer, code, info) +        }          _ => {              tokenizer.enter(Token::HtmlFlowData);              continuation(tokenizer, code, info) @@ -976,12 +1005,7 @@ fn continuation_close(tokenizer: &mut Tokenizer, code: Code, info: Info) -> Stat      match code {          Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {              tokenizer.exit(Token::HtmlFlowData); -            tokenizer.exit(Token::HtmlFlow); -            // Feel free to interrupt. -            tokenizer.interrupt = false; -            // Restore previous `concrete`. -            tokenizer.concrete = info.concrete; -            (State::Ok, Some(vec![code])) +            html_continue_after(tokenizer, code, info)          }          _ => {              tokenizer.consume(code); diff --git a/src/construct/mod.rs b/src/construct/mod.rs index ac830ef..06ff4e9 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -84,6 +84,7 @@ pub mod paragraph;  pub mod partial_data;  pub mod partial_destination;  pub mod partial_label; +pub mod partial_non_lazy_continuation;  pub mod partial_space_or_tab;  pub mod partial_title;  pub mod partial_whitespace; diff --git a/src/construct/partial_non_lazy_continuation.rs b/src/construct/partial_non_lazy_continuation.rs new file mode 100644 index 0000000..7964de3 --- /dev/null +++ b/src/construct/partial_non_lazy_continuation.rs @@ -0,0 +1,26 @@ +//! To do. + +use crate::token::Token; +use crate::tokenizer::{Code, State, StateFnResult, Tokenizer}; + +/// To do. +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    match code { +        Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { +            tokenizer.enter(Token::LineEnding); +            tokenizer.consume(code); +            tokenizer.exit(Token::LineEnding); +            (State::Fn(Box::new(non_lazy_after)), None) +        } +        _ => (State::Nok, None), +    } +} + +/// To do. +fn non_lazy_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    if tokenizer.lazy { +        (State::Nok, None) +    } else { +        (State::Ok, Some(vec![code])) +    } +} diff --git a/src/content/document.rs b/src/content/document.rs index f093a04..a8ff775 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -19,6 +19,7 @@ use crate::tokenizer::{Code, Event, EventType, Point, State, StateFn, StateFnRes  use crate::util::edit_map::EditMap;  use crate::util::{      normalize_identifier::normalize_identifier, +    skip,      span::{from_exit_event, serialize},  };  use std::collections::HashSet; @@ -26,6 +27,7 @@ use std::collections::HashSet;  struct DocumentInfo {      continued: usize,      containers_begin_index: usize, +    paragraph_before: bool,      inject: Vec<(Vec<Event>, Vec<Event>)>,      stack: Vec<String>,      next: Box<StateFn>, @@ -71,6 +73,7 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec  fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      let info = DocumentInfo {          continued: 0, +        paragraph_before: false,          inject: vec![],          containers_begin_index: 0,          stack: vec![], @@ -80,7 +83,7 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  }  fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { -    println!("before: check existing open containers"); +    println!("before");      // First we iterate through the open blocks, starting with the root      // document, and descending through last children down to the last open      // block. @@ -101,8 +104,6 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR          };          // To do: state? -        println!("check existing: {:?}", name); -          tokenizer.attempt(cont, move |ok| {              if ok {                  Box::new(|t, c| document_continue(t, c, info)) @@ -112,7 +113,6 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR          })(tokenizer, code)      } else {          // Done. -        println!("check new:");          check_new_containers(tokenizer, code, info)      }  } @@ -124,67 +124,8 @@ fn document_continue(  ) -> StateFnResult {      println!("document_continue");      info.continued += 1; - -    println!("  to do: close flow sometimes?"); -    // // Note: this field is called `_closeFlow` but it also closes containers. -    // // Perhaps a good idea to rename it but it’s already used in the wild by -    // // extensions. -    // if (self.containerState._closeFlow) { -    //   self.containerState._closeFlow = undefined - -    //   if (childFlow) { -    //     closeFlow() -    //   } - -    //   // Note: this algorithm for moving events around is similar to the -    //   // algorithm when dealing with lazy lines in `writeToChild`. -    //   const indexBeforeExits = self.events.length -    //   let indexBeforeFlow = indexBeforeExits -    //   /** @type {Point|undefined} */ -    //   let point - -    //   // Find the flow chunk. -    //   while (indexBeforeFlow--) { -    //     if ( -    //       self.events[indexBeforeFlow][0] === 'exit' && -    //       self.events[indexBeforeFlow][1].type === types.chunkFlow -    //     ) { -    //       point = self.events[indexBeforeFlow][1].end -    //       break -    //     } -    //   } - -    //   assert(point, 'could not find previous flow chunk') - -    let size = info.continued; -    info = exit_containers(tokenizer, info, size, true); -    tokenizer.expect(code, true); - -    //   // Fix positions. -    //   let index = indexBeforeExits - -    //   while (index < self.events.length) { -    //     self.events[index][1].end = Object.assign({}, point) -    //     index++ -    //   } - -    //   // Inject the exits earlier (they’re still also at the end). -    //   splice( -    //     self.events, -    //     indexBeforeFlow + 1, -    //     0, -    //     self.events.slice(indexBeforeExits) -    //   ) - -    //   // Discard the duplicate exits. -    //   self.events.length = index - -    //   return checkNewContainers(code) -    // } -      before(tokenizer, code, info)  } -// documentContinue  fn check_new_containers(      tokenizer: &mut Tokenizer, @@ -198,18 +139,11 @@ fn check_new_containers(      // step 1 before creating the new block as a child of the last matched      // block.      if info.continued == info.stack.len() { -        //   // No need to `check` whether there’s a container, if `exitContainers` -        //   // would be moot. -        //   // We can instead immediately `attempt` to parse one. -        //   if (!childFlow) { -        //     return documentContinued(code) -        //   } -          // If we have concrete content, such as block HTML or fenced code,          // we can’t have containers “pierce” into them, so we can immediately          // start.          if tokenizer.concrete { -            println!("  concrete!"); +            println!("  concrete");              return there_is_no_new_container(tokenizer, code, info);          } @@ -239,7 +173,6 @@ fn there_is_a_new_container(      mut info: DocumentInfo,      name: String,  ) -> StateFnResult { -    println!("there_is_a_new_container");      let size = info.continued;      info = exit_containers(tokenizer, info, size, true);      tokenizer.expect(code, true); @@ -253,15 +186,12 @@ fn there_is_a_new_container(          unreachable!("todo: cont {:?}", name)      }; -    println!("creating exit (a) for `{:?}`", name); -      let token_types = end();      let mut index = 0;      while index < token_types.len() {          let token_type = &token_types[index];          let mut stack_index = tokenizer.stack.len(); -        println!("stack: {:?}", tokenizer.stack);          let mut found = false;          while stack_index > 0 { @@ -278,11 +208,8 @@ fn there_is_a_new_container(          index += 1;      } -    println!("add to stack: {:?}", name);      info.stack.push(name); - -    info.continued += 1; -    document_continued(tokenizer, code, info) +    document_continue(tokenizer, code, info)  }  /// Exit open containers. @@ -295,8 +222,7 @@ fn exit_containers(      let mut exits: Vec<Event> = vec![];      if info.stack.len() > size { -        // To do: inject these somewhere? Fix positions? -        println!("closing flow. To do: are these resulting exits okay?"); +        println!("closing flow");          let index = tokenizer.events.len();          let result = tokenizer.flush(info.next);          info.next = Box::new(flow); // This is weird but Rust needs a function there. @@ -314,7 +240,6 @@ fn exit_containers(          let mut add = tokenizer.events.drain(index..end).collect::<Vec<_>>(); -        println!("evs: {:#?}", add);          exits.append(&mut add);          println!("  setting `interrupt: false`"); @@ -331,8 +256,6 @@ fn exit_containers(              unreachable!("todo: cont {:?}", name)          }; -        println!("creating exit (b) for `{:?}`", name); -          let token_types = end();          let mut index = 0; @@ -359,8 +282,6 @@ fn exit_containers(          info.inject[index].1.append(&mut exits);      } -    // println!("exits: {:?} {:?}", info.inject, exits); -      info  } @@ -369,58 +290,15 @@ fn there_is_no_new_container(      code: Code,      info: DocumentInfo,  ) -> StateFnResult { -    let lazy = info.continued != info.stack.len(); -    tokenizer.lazy = lazy; -    println!("there is no new container"); -    if lazy { -        println!( -            "  This line will be lazy. Depending on what is parsed now, we need to close containers before?" -        ); -    } +    println!("there_is_no_new_container"); +    tokenizer.lazy = info.continued != info.stack.len();      // lineStartOffset = self.now().offset      flow_start(tokenizer, code, info)  } -fn document_continued(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { -    println!("document_continued"); - -    // Try new containers. -    // To do: list. -    tokenizer.attempt(block_quote, |ok| { -        if ok { -            Box::new(|t, c| container_continue(t, c, info)) -        } else { -            Box::new(|t, c| { -                // To do: this looks like a bug? -                t.lazy = false; -                flow_start(t, c, info) -            }) -        } -    })(tokenizer, code) -} - -fn container_continue( -    tokenizer: &mut Tokenizer, -    code: Code, -    mut info: DocumentInfo, -) -> StateFnResult { -    println!("container_continue"); -    // assert( -    //   self.currentConstruct, -    //   'expected `currentConstruct` to be defined on tokenizer' -    // ) -    // assert( -    //   self.containerState, -    //   'expected `containerState` to be defined on tokenizer' -    // ) -    info.continued += 1; -    // To do: add to stack? -    // stack.push([self.currentConstruct, self.containerState]) -    // Try another. -    document_continued(tokenizer, code, info) -} -  fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { +    println!("flow_start"); +      let containers = tokenizer          .events          .drain(info.containers_begin_index..) @@ -428,11 +306,6 @@ fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) ->      info.inject.push((containers, vec![])); -    // Exit containers. -    let size = info.continued; -    info = exit_containers(tokenizer, info, size, true); -    tokenizer.expect(code, true); -      // Define start.      let point = tokenizer.point.clone();      tokenizer.define_skip(&point); @@ -440,9 +313,7 @@ fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) ->      let state = info.next;      info.next = Box::new(flow); // This is weird but Rust needs a function there. -    println!("flow_start:before");      tokenizer.go_until(state, eof_eol, move |(state, remainder)| { -        println!("flow_start:after");          (              State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))),              remainder, @@ -456,31 +327,62 @@ fn flow_end(      mut info: DocumentInfo,      result: State,  ) -> StateFnResult { -    println!("flow_end"); -    let was_lazy = tokenizer.lazy; +    println!("flow_end: lazy? {:?}", tokenizer.lazy); -    if was_lazy { -        println!( -            "this line was lazy. Depeding on what was parsed, we need to exit containers after it?" -        ); +    let index = tokenizer.events.len(); +    let index = if index > 0 { +        skip::opt_back(&tokenizer.events, index - 1, &[Token::LineEnding]) +    } else { +        0 +    }; + +    let paragraph = if index > 0 { +        let ev = &tokenizer.events[index]; +        ev.point.offset + 1 >= tokenizer.point.offset +            && ev.token_type == Token::Paragraph +            && !(matches!( +                tokenizer.previous, +                Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') +            ) && matches!(code, Code::None)) +    } else { +        false +    }; + +    let mut continued = info.continued; +    let size = info.stack.len(); + +    if tokenizer.lazy { +        println!("this line was lazy."); + +        if info.paragraph_before && paragraph { +            println!("it was another paragraph, which is allowed."); +            continued = size; +        } else { +            println!( +                "it was something else (prev: {:?}, cur: {:?}), which is not allowed.", +                info.paragraph_before, paragraph +            ); +        }      } +    // Exit containers. +    info = exit_containers(tokenizer, info, continued, true); +    tokenizer.expect(code, true); +      info.continued = 0; +    info.paragraph_before = paragraph;      info.containers_begin_index = tokenizer.events.len();      match result {          State::Ok => { -            println!("State::Ok");              info = exit_containers(tokenizer, info, 0, false);              tokenizer.expect(code, true); -            // println!("document:inject: {:?}", info.inject);              let mut map = EditMap::new();              let mut line_index = 0;              let mut index = 0;              let add = info.inject[line_index].0.clone(); -            println!("add enters at start: {:?}", add);              map.add(0, 0, add);              while index < tokenizer.events.len() { @@ -489,7 +391,6 @@ fn flow_end(                  if event.token_type == Token::LineEnding                      || event.token_type == Token::BlankLineEnding                  { -                    println!("eol: {:?}", event.point);                      if event.event_type == EventType::Enter {                          let mut add = info.inject[line_index].1.clone();                          let mut deep_index = 0; @@ -498,12 +399,10 @@ fn flow_end(                              add[deep_index].index = event.index;                              deep_index += 1;                          } -                        println!("add exits before: {:?}", add);                          map.add(index, 0, add);                      } else {                          line_index += 1;                          let add = info.inject[line_index].0.clone(); -                        println!("add enters after: {:?}", add);                          map.add(index + 1, 0, add);                      }                  } @@ -518,12 +417,12 @@ fn flow_end(                  add[deep_index].index = tokenizer.index;                  deep_index += 1;              } -            println!("add exits at end: {:?}", add);              map.add(index, 0, add);              tokenizer.events = map.consume(&mut tokenizer.events);              let mut index = 0; -            println!("document:inject:ends: {:?}", tokenizer.events.len()); + +            println!("after: {:?}", tokenizer.events.len());              while index < tokenizer.events.len() {                  let event = &tokenizer.events[index];                  println!( diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 80786ea..163c2bf 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -611,7 +611,6 @@ fn attempt_impl(          // Should it be before?          // How to match `eof`?          if !codes.is_empty() && pause(tokenizer.previous) { -            println!("pause!: {:?}", (codes.clone(), vec![code]));              return done(                  (codes, vec![code]),                  false,  | 
