diff options
Diffstat (limited to '')
| -rw-r--r-- | src/compiler.rs | 21 | ||||
| -rw-r--r-- | src/construct/list.rs | 95 | ||||
| -rw-r--r-- | src/content/document.rs | 39 | ||||
| -rw-r--r-- | src/tokenizer.rs | 10 | ||||
| -rw-r--r-- | src/util/skip.rs | 18 | ||||
| -rw-r--r-- | src/util/span.rs | 4 | 
6 files changed, 125 insertions, 62 deletions
| diff --git a/src/compiler.rs b/src/compiler.rs index 37229a4..51c7e2b 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -1150,7 +1150,9 @@ fn on_exit_media(context: &mut CompileContext) {  fn on_exit_paragraph(context: &mut CompileContext) {      let tight = context.tight_stack.last().unwrap_or(&false); -    if !tight { +    if *tight { +        context.slurp_one_line_ending = true; +    } else {          context.tag("</p>".to_string());      }  } @@ -1216,6 +1218,14 @@ fn on_enter_list(context: &mut CompileContext) {              if balance < 3 && event.token_type == Token::BlankLineEnding              // && !(balance == 1 && events[index - 2].token_type == Token::ListItem)              { +                let at_marker = balance == 2 +                    && events[skip::opt_back( +                        events, +                        index - 2, +                        &[Token::BlankLineEnding, Token::SpaceOrTab], +                    )] +                    .token_type +                        == Token::ListItemPrefix;                  let at_list_item = balance == 1 && events[index - 2].token_type == Token::ListItem;                  let at_empty_list_item = if at_list_item {                      let before_item = skip::opt_back(events, index - 2, &[Token::ListItem]); @@ -1229,7 +1239,7 @@ fn on_enter_list(context: &mut CompileContext) {                      false                  }; -                if !at_list_item || !at_empty_list_item { +                if !at_marker && (!at_list_item || !at_empty_list_item) {                      loose = true;                      break;                  } @@ -1297,7 +1307,12 @@ fn on_exit_list_item(context: &mut CompileContext) {      let before_item = skip::opt_back(          context.events,          context.index - 1, -        &[Token::BlankLineEnding, Token::LineEnding, Token::SpaceOrTab], +        &[ +            Token::BlankLineEnding, +            Token::LineEnding, +            Token::SpaceOrTab, +            Token::BlockQuotePrefix, +        ],      );      let previous = &context.events[before_item];      let tight_paragraph = *tight && previous.token_type == Token::Paragraph; diff --git a/src/construct/list.rs b/src/construct/list.rs index d06eaf0..bab821c 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -99,6 +99,7 @@ impl Kind {  /// To do.  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    tokenizer.enter(Token::ListItem);      // To do: allow arbitrary when code (indented) is turned off.      tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)  } @@ -108,12 +109,10 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      match code {          // Unordered.          Code::Char('*' | '+' | '-') => tokenizer.check(thematic_break, |ok| { -            let func = if ok { nok } else { before_unordered }; -            Box::new(func) +            Box::new(if ok { nok } else { before_unordered })          })(tokenizer, code),          // Ordered.          Code::Char(char) if char.is_ascii_digit() => { -            tokenizer.enter(Token::ListItem);              tokenizer.enter(Token::ListItemPrefix);              tokenizer.enter(Token::ListItemValue);              // To do: `interrupt || !1`? @@ -125,8 +124,6 @@ fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// To do.  fn before_unordered(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    // To do: check if this is a thematic break? -    tokenizer.enter(Token::ListItem);      tokenizer.enter(Token::ListItemPrefix);      marker(tokenizer, code)  } @@ -163,7 +160,6 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      let interrupt = tokenizer.interrupt;      tokenizer.check(blank_line, move |ok| { -        println!("check:blank_line:after {:?} {:?}", ok, interrupt);          let func = if ok {              if interrupt {                  nok @@ -179,9 +175,12 @@ fn marker_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// To do.  fn on_blank(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    if let Some(container) = tokenizer.container.as_mut() { +        container.blank_initial = true; +    } +      // self.containerState.initialBlankLine = true -    // initialSize++ -    prefix_end(tokenizer, code) +    prefix_end(tokenizer, code, true)  }  /// To do. @@ -189,8 +188,11 @@ fn marker_after_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      let interrupt = tokenizer.interrupt;      tokenizer.attempt(list_item_prefix_whitespace, move |ok| {          println!("marker:after:after: {:?} {:?}", ok, interrupt); -        let func = if ok { prefix_end } else { prefix_other }; -        Box::new(func) +        if ok { +            Box::new(|t, c| prefix_end(t, c, false)) +        } else { +            Box::new(prefix_other) +        }      })(tokenizer, code)  } @@ -203,15 +205,25 @@ fn prefix_other(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {              tokenizer.enter(Token::SpaceOrTab);              tokenizer.consume(code);              tokenizer.exit(Token::SpaceOrTab); -            (State::Fn(Box::new(prefix_end)), None) +            (State::Fn(Box::new(|t, c| prefix_end(t, c, false))), None)          }          _ => (State::Nok, None),      }  }  /// To do. -fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    // To do: calculate size. +fn prefix_end(tokenizer: &mut Tokenizer, code: Code, blank: bool) -> StateFnResult { +    let start = skip::to_back( +        &tokenizer.events, +        tokenizer.events.len() - 1, +        &[Token::ListItem], +    ); +    let prefix = tokenizer.index - tokenizer.events[start].index + (if blank { 1 } else { 0 }); + +    if let Some(container) = tokenizer.container.as_mut() { +        container.size = prefix; +    } +      tokenizer.exit(Token::ListItemPrefix);      tokenizer.register_resolver_before("list_item".to_string(), Box::new(resolve));      (State::Ok, Some(vec![code])) @@ -221,14 +233,17 @@ fn prefix_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  fn list_item_prefix_whitespace(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      // To do: check how big this should be?      tokenizer.go( -        space_or_tab_min_max(1, TAB_SIZE - 1), +        space_or_tab_min_max(1, TAB_SIZE),          list_item_prefix_whitespace_after,      )(tokenizer, code)  }  fn list_item_prefix_whitespace_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    // To do: check some stuff? -    (State::Ok, Some(vec![code])) +    if matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) { +        (State::Nok, None) +    } else { +        (State::Ok, Some(vec![code])) +    }  }  /// To do. @@ -240,46 +255,40 @@ fn nok(_tokenizer: &mut Tokenizer, _code: Code) -> StateFnResult {  pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      tokenizer.check(blank_line, |ok| {          println!("cont:check:blank:after: {:?}", ok); -        let func = if ok { blank_cont } else { not_blank_cont }; -        Box::new(func) +        Box::new(if ok { blank_cont } else { not_blank_cont })      })(tokenizer, code)  }  pub fn blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    // self.containerState.furtherBlankLines = -    //   self.containerState.furtherBlankLines || -    //   self.containerState.initialBlankLine +    let mut size = 0; +    if let Some(container) = tokenizer.container.as_ref() { +        size = container.size; + +        if container.blank_initial { +            return (State::Nok, None); +        } +    }      // We have a blank line.      // Still, try to consume at most the items size.      // To do: eat at most `size` whitespace. -    tokenizer.go(space_or_tab_min_max(0, TAB_SIZE), blank_cont_after)(tokenizer, code) -} - -pub fn blank_cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    println!("cont: blank: after"); -    (State::Ok, Some(vec![code])) +    tokenizer.go(space_or_tab_min_max(0, size), cont_after)(tokenizer, code)  }  pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { -    let index = tokenizer.events.len(); -    let currently_blank = -        index > 0 && tokenizer.events[index - 1].token_type == Token::BlankLineEnding; -    let mut further_blank = false; - -    if currently_blank && index > 5 { -        let before = skip::opt_back(&tokenizer.events, index - 3, &[Token::SpaceOrTab]); -        further_blank = tokenizer.events[before].token_type == Token::BlankLineEnding; -    } +    let mut size = 0; -    if further_blank || !matches!(code, Code::VirtualSpace | Code::Char('\t' | ' ')) { -        println!("cont: not blank after further blank, or not blank w/o whitespace"); -        (State::Nok, None) -    } else { -        println!("cont: not blank"); -        // To do: eat exactly `size` whitespace. -        tokenizer.go(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), blank_cont_after)(tokenizer, code) +    if let Some(container) = tokenizer.container.as_mut() { +        container.blank_initial = false; +        size = container.size;      } + +    tokenizer.go(space_or_tab_min_max(size, size), cont_after)(tokenizer, code) +} + +pub fn cont_after(_tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +    println!("cont: blank: after"); +    (State::Ok, Some(vec![code]))  }  /// To do. diff --git a/src/content/document.rs b/src/content/document.rs index f6b8f55..bec0039 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -16,7 +16,9 @@ use crate::content::flow::start as flow;  use crate::parser::ParseState;  use crate::subtokenize::subtokenize;  use crate::token::Token; -use crate::tokenizer::{Code, Event, EventType, Point, State, StateFn, StateFnResult, Tokenizer}; +use crate::tokenizer::{ +    Code, ContainerState, Event, EventType, Point, State, StateFn, StateFnResult, Tokenizer, +};  use crate::util::edit_map::EditMap;  use crate::util::{      normalize_identifier::normalize_identifier, @@ -37,6 +39,7 @@ struct DocumentInfo {      paragraph_before: bool,      inject: Vec<(Vec<Event>, Vec<Event>)>,      stack: Vec<Container>, +    states: Vec<ContainerState>,      stack_close: Vec<Container>,      next: Box<StateFn>,  } @@ -86,6 +89,7 @@ fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {          next: Box::new(flow),          paragraph_before: false,          stack: vec![], +        states: vec![],          stack_close: vec![],      };      line_start(tokenizer, code, info) @@ -104,7 +108,7 @@ fn line_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) ->  fn container_existing_before(      tokenizer: &mut Tokenizer,      code: Code, -    info: DocumentInfo, +    mut info: DocumentInfo,  ) -> StateFnResult {      println!("container_existing_before"); @@ -120,17 +124,20 @@ fn container_existing_before(      // continuation line.      if info.continued < info.stack.len() {          let kind = &info.stack[info.continued]; +        let container = info.states.remove(info.continued); +        tokenizer.container = Some(container);          let cont = match kind {              Container::BlockQuote => block_quote_cont,              Container::ListItem => list_item_const,          }; +        // tokenizer.container = Some(&mut info.states[info.continued]);          // To do: state?          tokenizer.attempt(cont, move |ok| {              if ok {                  Box::new(|t, c| container_existing_after(t, c, info))              } else { -                Box::new(|t, c| container_new_before(t, c, info)) +                Box::new(|t, c| container_existing_missing(t, c, info))              }          })(tokenizer, code)      } else { @@ -139,12 +146,24 @@ fn container_existing_before(      }  } +fn container_existing_missing( +    tokenizer: &mut Tokenizer, +    code: Code, +    mut info: DocumentInfo, +) -> StateFnResult { +    let container = tokenizer.container.take().unwrap(); +    info.states.insert(info.continued, container); +    container_new_before(tokenizer, code, info) +} +  fn container_existing_after(      tokenizer: &mut Tokenizer,      code: Code,      mut info: DocumentInfo,  ) -> StateFnResult {      println!("container_existing_after"); +    let container = tokenizer.container.take().unwrap(); +    info.states.insert(info.continued, container);      info.continued += 1;      container_existing_before(tokenizer, code, info)  } @@ -179,17 +198,16 @@ fn container_new_before(          //   self.interrupt = Boolean(          //     childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack          //   ) -    } else { -        // println!("  set interrupt to `false`! (before: {:?})", tokenizer.interrupt); -        // tokenizer.interrupt = false;      } +    tokenizer.container = Some(ContainerState::default());      // Check if there is a new container.      tokenizer.attempt(block_quote, move |ok| {          if ok {              Box::new(|t, c| container_new_after(t, c, info, Container::BlockQuote))          } else {              Box::new(|tokenizer, code| { +                tokenizer.container = Some(ContainerState::default());                  tokenizer.attempt(list_item, move |ok| {                      if ok {                          Box::new(|t, c| container_new_after(t, c, info, Container::ListItem)) @@ -240,12 +258,15 @@ fn container_new_after(      if info.continued < info.stack.len() {          info.stack_close              .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>()); +        info.states.truncate(info.continued);          info = line_end(tokenizer, info, false, true);          tokenizer.expect(code, true);      } +    let container = tokenizer.container.take().unwrap(); +    info.states.push(container);      info.stack.push(kind); -    info.continued = info.stack.len(); +    info.continued = info.stack.len(); // To do: `+= 1`?      container_new_before(tokenizer, code, info)  } @@ -261,7 +282,6 @@ fn containers_after(      info.inject.last_mut().unwrap().0.append(&mut containers);      tokenizer.lazy = info.continued != info.stack.len(); -    println!("lazy: {:?} {:?}", info.continued, info.stack.len());      // Define start.      let point = tokenizer.point.clone(); @@ -331,6 +351,7 @@ fn flow_end(      if !lazy && info.continued < info.stack.len() {          info.stack_close              .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>()); +        info.states.truncate(info.continued);      }      info = line_end(tokenizer, info, false, false); @@ -500,7 +521,7 @@ fn line_end(      info.inject[index].1.append(&mut exits);      println!( -        "  setting `interrupt: false` (before: {:?}", +        "  setting `interrupt: false` (before: {:?})",          tokenizer.interrupt      );      tokenizer.interrupt = false; diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 163c2bf..34cfde3 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -130,6 +130,13 @@ pub struct Media {      pub id: String,  } +/// To do. +#[derive(Default, Debug)] +pub struct ContainerState { +    pub blank_initial: bool, +    pub size: usize, +} +  /// The internal state of a tokenizer, not to be confused with states from the  /// state machine, this instead is all the information about where we currently  /// are and what’s going on. @@ -203,6 +210,8 @@ pub struct Tokenizer<'a> {      pub concrete: bool,      /// To do.      pub lazy: bool, +    /// To do. +    pub container: Option<ContainerState>,  }  impl<'a> Tokenizer<'a> { @@ -225,6 +234,7 @@ impl<'a> Tokenizer<'a> {              interrupt: false,              concrete: false,              lazy: false, +            container: None,              resolvers: vec![],              resolver_ids: vec![],          } diff --git a/src/util/skip.rs b/src/util/skip.rs index 10ba364..d2ad914 100644 --- a/src/util/skip.rs +++ b/src/util/skip.rs @@ -5,15 +5,23 @@ use crate::tokenizer::{Event, EventType};  /// Skip from `index`, optionally past `token_types`.  pub fn opt(events: &[Event], index: usize, token_types: &[Token]) -> usize { -    skip_opt_with_direction(events, index, token_types, true) +    skip_opt_impl(events, index, token_types, true)  }  /// Skip from `index`, optionally past `token_types`, backwards.  pub fn opt_back(events: &[Event], index: usize, token_types: &[Token]) -> usize { -    skip_opt_with_direction(events, index, token_types, false) +    skip_opt_impl(events, index, token_types, false)  } -pub fn to(events: &[Event], mut index: usize, token_types: &[Token]) -> usize { +pub fn to_back(events: &[Event], index: usize, token_types: &[Token]) -> usize { +    to_impl(events, index, token_types, false) +} + +pub fn to(events: &[Event], index: usize, token_types: &[Token]) -> usize { +    to_impl(events, index, token_types, true) +} + +pub fn to_impl(events: &[Event], mut index: usize, token_types: &[Token], forward: bool) -> usize {      while index < events.len() {          let current = &events[index].token_type; @@ -21,14 +29,14 @@ pub fn to(events: &[Event], mut index: usize, token_types: &[Token]) -> usize {              break;          } -        index += 1; +        index = if forward { index + 1 } else { index - 1 };      }      index  }  /// Skip internals. -fn skip_opt_with_direction( +fn skip_opt_impl(      events: &[Event],      mut index: usize,      token_types: &[Token], diff --git a/src/util/span.rs b/src/util/span.rs index 32dd00f..72b451d 100644 --- a/src/util/span.rs +++ b/src/util/span.rs @@ -6,9 +6,9 @@ use crate::util::codes::serialize as serialize_codes;  /// A struct representing the span of an opening and closing event of a token.  #[derive(Debug)]  pub struct Span { -    /// Absolute offset (and `index` in `codes`) of where this span starts. +    /// Absolute offset (an `index` in `codes`) of where this span starts.      pub start_index: usize, -    /// Absolute offset (and `index` in `codes`) of where this span ends. +    /// Absolute offset (an `index` in `codes`) of where this span ends.      pub end_index: usize,  } | 
