diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/compiler.rs | 55 | ||||
| -rw-r--r-- | src/construct/html_flow.rs | 7 | ||||
| -rw-r--r-- | src/construct/list.rs | 4 | ||||
| -rw-r--r-- | src/content/document.rs | 332 | 
4 files changed, 221 insertions, 177 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 4b38c8d..37229a4 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -8,6 +8,7 @@ use crate::util::{      decode_character_reference::{decode_named, decode_numeric},      encode::encode,      sanitize_uri::sanitize_uri, +    skip,      span::{codes as codes_from_span, from_exit_event, serialize},  };  use std::collections::HashMap; @@ -241,7 +242,6 @@ struct CompileContext<'a> {      pub tight_stack: Vec<bool>,      /// Fields used to influance the current compilation.      pub slurp_one_line_ending: bool, -    pub slurp_all_line_endings: bool,      pub tags: bool,      pub ignore_encode: bool,      pub last_was_tag: bool, @@ -276,7 +276,6 @@ impl<'a> CompileContext<'a> {              definitions: HashMap::new(),              tight_stack: vec![],              slurp_one_line_ending: false, -            slurp_all_line_endings: false,              tags: true,              ignore_encode: false,              last_was_tag: false, @@ -718,8 +717,6 @@ fn on_enter_paragraph(context: &mut CompileContext) {          context.line_ending_if_needed();          context.tag("<p>".to_string());      } - -    context.slurp_all_line_endings = false;  }  /// Handle [`Enter`][EventType::Enter]:[`Resource`][Token::Resource]. @@ -785,7 +782,6 @@ fn on_exit_block_quote(context: &mut CompileContext) {      context.tight_stack.pop();      context.line_ending_if_needed();      context.tag("</blockquote>".to_string()); -    context.slurp_all_line_endings = false;  }  /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker]. @@ -1075,9 +1071,7 @@ fn on_exit_label_text(context: &mut CompileContext) {  /// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding].  fn on_exit_line_ending(context: &mut CompileContext) { -    if context.slurp_all_line_endings { -        // Empty. -    } else if context.slurp_one_line_ending { +    if context.slurp_one_line_ending {          context.slurp_one_line_ending = false;      } else {          context.push(context.encode_opt(&serialize( @@ -1156,9 +1150,7 @@ fn on_exit_media(context: &mut CompileContext) {  fn on_exit_paragraph(context: &mut CompileContext) {      let tight = context.tight_stack.last().unwrap_or(&false); -    if *tight { -        context.slurp_all_line_endings = true; -    } else { +    if !tight {          context.tag("</p>".to_string());      }  } @@ -1218,10 +1210,29 @@ fn on_enter_list(context: &mut CompileContext) {          } else {              balance -= 1; -            // Blank line directly in list or directly in list item. -            if balance < 3 && event.token_type == Token::BlankLineEnding { -                loose = true; -                break; +            // Blank line directly in list or directly in list item, +            // but not a blank line after an empty list item. +            // To do: does this check if the item is empty? +            if balance < 3 && event.token_type == Token::BlankLineEnding +            // && !(balance == 1 && events[index - 2].token_type == Token::ListItem) +            { +                let at_list_item = balance == 1 && events[index - 2].token_type == Token::ListItem; +                let at_empty_list_item = if at_list_item { +                    let before_item = skip::opt_back(events, index - 2, &[Token::ListItem]); +                    let before_prefix = skip::opt_back( +                        events, +                        index - 3, +                        &[Token::ListItemPrefix, Token::SpaceOrTab], +                    ); +                    before_item + 1 == before_prefix +                } else { +                    false +                }; + +                if !at_list_item || !at_empty_list_item { +                    loose = true; +                    break; +                }              }              // Done. @@ -1233,7 +1244,6 @@ fn on_enter_list(context: &mut CompileContext) {          index += 1;      } -    println!("list: {:?} {:?}", token_type, loose);      context.tight_stack.push(!loose);      context.line_ending_if_needed();      // Note: no `>`. @@ -1283,12 +1293,21 @@ fn on_exit_list_item_value(context: &mut CompileContext) {  /// To do.  fn on_exit_list_item(context: &mut CompileContext) { -    if context.last_was_tag && !context.slurp_all_line_endings { +    let tight = context.tight_stack.last().unwrap_or(&false); +    let before_item = skip::opt_back( +        context.events, +        context.index - 1, +        &[Token::BlankLineEnding, Token::LineEnding, Token::SpaceOrTab], +    ); +    let previous = &context.events[before_item]; +    let tight_paragraph = *tight && previous.token_type == Token::Paragraph; +    let empty_item = previous.token_type == Token::ListItemPrefix; + +    if !tight_paragraph && !empty_item {          context.line_ending_if_needed();      }      context.tag("</li>".to_string()); -    context.slurp_all_line_endings = false;  }  /// To do. diff --git a/src/construct/html_flow.rs b/src/construct/html_flow.rs index a8b1efc..3300d2f 100644 --- a/src/construct/html_flow.rs +++ b/src/construct/html_flow.rs @@ -207,7 +207,6 @@ struct Info {  ///  pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      tokenizer.enter(Token::HtmlFlow); -    tokenizer.enter(Token::HtmlFlowData);      // To do: allow arbitrary when code (indented) is turned off.      tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code)  } @@ -219,6 +218,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {  /// ```  fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      if Code::Char('<') == code { +        tokenizer.enter(Token::HtmlFlowData);          tokenizer.consume(code);          (State::Fn(Box::new(open)), None)      } else { @@ -771,11 +771,12 @@ fn continuation(tokenizer: &mut Tokenizer, code: Code, info: Info) -> StateFnRes          Code::CarriageReturnLineFeed | Code::Char('\n' | '\r')              if info.kind == Kind::Basic || info.kind == Kind::Complete =>          { +            tokenizer.exit(Token::HtmlFlowData);              tokenizer.check(blank_line_before, |ok| {                  let func = if ok { -                    continuation_close +                    html_continue_after                  } else { -                    continuation_at_line_ending +                    html_continue_start // continuation_at_line_ending                  };                  Box::new(move |t, c| func(t, c, info))              })(tokenizer, code) diff --git a/src/construct/list.rs b/src/construct/list.rs index 960c0eb..d06eaf0 100644 --- a/src/construct/list.rs +++ b/src/construct/list.rs @@ -267,7 +267,7 @@ pub fn not_blank_cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {          index > 0 && tokenizer.events[index - 1].token_type == Token::BlankLineEnding;      let mut further_blank = false; -    if currently_blank && index > 3 { +    if currently_blank && index > 5 {          let before = skip::opt_back(&tokenizer.events, index - 3, &[Token::SpaceOrTab]);          further_blank = tokenizer.events[before].token_type == Token::BlankLineEnding;      } @@ -338,7 +338,7 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {                          && skip::opt(                              &tokenizer.events,                              previous.3 + 1, -                            &[Token::LineEnding, Token::BlankLineEnding], +                            &[Token::SpaceOrTab, Token::LineEnding, Token::BlankLineEnding],                          ) == current.2                      {                          println!("prev:match {:?} {:?}", previous, current); diff --git a/src/content/document.rs b/src/content/document.rs index b29e4b9..f6b8f55 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -10,7 +10,7 @@  use crate::construct::{      block_quote::{cont as block_quote_cont, end as block_quote_end, start as block_quote}, -    list::{cont as list_const, end as list_end, start as list}, +    list::{cont as list_item_const, end as list_item_end, start as list_item},  };  use crate::content::flow::start as flow;  use crate::parser::ParseState; @@ -25,12 +25,19 @@ use crate::util::{  };  use std::collections::HashSet; +#[derive(Debug, PartialEq)] +enum Container { +    BlockQuote, +    ListItem, +} +  struct DocumentInfo {      continued: usize, -    containers_begin_index: usize, +    index: usize,      paragraph_before: bool,      inject: Vec<(Vec<Event>, Vec<Event>)>, -    stack: Vec<String>, +    stack: Vec<Container>, +    stack_close: Vec<Container>,      next: Box<StateFn>,  } @@ -73,18 +80,34 @@ pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec  fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {      let info = DocumentInfo { +        index: 0,          continued: 0, -        paragraph_before: false,          inject: vec![], -        containers_begin_index: 0, -        stack: vec![],          next: Box::new(flow), +        paragraph_before: false, +        stack: vec![], +        stack_close: vec![],      }; -    before(tokenizer, code, info) +    line_start(tokenizer, code, info) +} + +/// Start of a new line. +fn line_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { +    println!("line_start"); +    info.index = tokenizer.events.len(); +    info.inject.push((vec![], vec![])); +    info.continued = 0; +    container_existing_before(tokenizer, code, info)  } -fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { -    println!("before"); +/// Before existing containers. +fn container_existing_before( +    tokenizer: &mut Tokenizer, +    code: Code, +    info: DocumentInfo, +) -> StateFnResult { +    println!("container_existing_before"); +      // First we iterate through the open blocks, starting with the root      // document, and descending through last children down to the last open      // block. @@ -96,45 +119,42 @@ fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnR      // But we cannot close unmatched blocks yet, because we may have a lazy      // continuation line.      if info.continued < info.stack.len() { -        let name = &info.stack[info.continued]; -        let cont = if name == "blockquote" { -            block_quote_cont -        } else if name == "list" { -            list_const -        } else { -            unreachable!("todo: cont construct {:?}", name) +        let kind = &info.stack[info.continued]; +        let cont = match kind { +            Container::BlockQuote => block_quote_cont, +            Container::ListItem => list_item_const,          };          // To do: state?          tokenizer.attempt(cont, move |ok| {              if ok { -                Box::new(|t, c| document_continue(t, c, info)) +                Box::new(|t, c| container_existing_after(t, c, info))              } else { -                Box::new(|t, c| check_new_containers(t, c, info)) +                Box::new(|t, c| container_new_before(t, c, info))              }          })(tokenizer, code)      } else {          // Done. -        check_new_containers(tokenizer, code, info) +        container_new_before(tokenizer, code, info)      }  } -fn document_continue( +fn container_existing_after(      tokenizer: &mut Tokenizer,      code: Code,      mut info: DocumentInfo,  ) -> StateFnResult { -    println!("document_continue"); +    println!("container_existing_after");      info.continued += 1; -    before(tokenizer, code, info) +    container_existing_before(tokenizer, code, info)  } -fn check_new_containers( +fn container_new_before(      tokenizer: &mut Tokenizer,      code: Code,      info: DocumentInfo,  ) -> StateFnResult { -    println!("check_new_containers"); +    println!("container_new_before");      // Next, after consuming the continuation markers for existing blocks, we      // look for new block starts (e.g. `>` for a block quote).      // If we encounter a new block start, we close any blocks unmatched in @@ -146,10 +166,13 @@ fn check_new_containers(          // start.          if tokenizer.concrete {              println!("  concrete"); -            return there_is_no_new_container(tokenizer, code, info); +            return containers_after(tokenizer, code, info);          } -        println!("  to do: interrupt ({:?})?", tokenizer.interrupt); +        println!( +            "  to do: set interrupt? (before: {:?})", +            tokenizer.interrupt +        );          //   // If we do have flow, it could still be a blank line,          //   // but we’d be interrupting it w/ a new container if there’s a current          //   // construct. @@ -157,20 +180,21 @@ fn check_new_containers(          //     childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack          //   )      } else { -        tokenizer.interrupt = false; +        // println!("  set interrupt to `false`! (before: {:?})", tokenizer.interrupt); +        // tokenizer.interrupt = false;      }      // Check if there is a new container.      tokenizer.attempt(block_quote, move |ok| {          if ok { -            Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string())) +            Box::new(|t, c| container_new_after(t, c, info, Container::BlockQuote))          } else {              Box::new(|tokenizer, code| { -                tokenizer.attempt(list, move |ok| { +                tokenizer.attempt(list_item, move |ok| {                      if ok { -                        Box::new(|t, c| there_is_a_new_container(t, c, info, "list".to_string())) +                        Box::new(|t, c| container_new_after(t, c, info, Container::ListItem))                      } else { -                        Box::new(|t, c| there_is_no_new_container(t, c, info)) +                        Box::new(|t, c| containers_after(t, c, info))                      }                  })(tokenizer, code)              }) @@ -178,25 +202,17 @@ fn check_new_containers(      })(tokenizer, code)  } -fn there_is_a_new_container( +fn container_new_after(      tokenizer: &mut Tokenizer,      code: Code,      mut info: DocumentInfo, -    name: String, +    kind: Container,  ) -> StateFnResult { -    let size = info.continued; -    println!("exit:0: {:?}", false); -    info = exit_containers(tokenizer, info, size, false); -    tokenizer.expect(code, true); -      // Remove from the event stack.      // We’ll properly add exits at different points manually. -    let end = if name == "blockquote" { -        block_quote_end -    } else if name == "list" { -        list_end -    } else { -        unreachable!("todo: end {:?}", name) +    let end = match kind { +        Container::BlockQuote => block_quote_end, +        Container::ListItem => list_item_end,      };      let token_types = end(); @@ -221,118 +237,42 @@ fn there_is_a_new_container(          index += 1;      } -    info.stack.push(name); -    document_continue(tokenizer, code, info) -} - -/// Exit open containers. -fn exit_containers( -    tokenizer: &mut Tokenizer, -    mut info: DocumentInfo, -    size: usize, -    before: bool, -) -> DocumentInfo { -    let mut exits: Vec<Event> = vec![]; - -    if info.stack.len() > size { -        println!("closing flow"); -        let index = tokenizer.events.len(); -        let result = tokenizer.flush(info.next); -        info.next = Box::new(flow); // This is weird but Rust needs a function there. -        assert!(matches!(result.0, State::Ok)); -        assert!(result.1.is_none()); - -        let mut end = tokenizer.events.len(); -        while end > 0 && end > index { -            if tokenizer.events[end - 1].token_type != Token::LineEnding { -                break; -            } - -            end -= 1; -        } - -        let mut add = tokenizer.events.drain(index..end).collect::<Vec<_>>(); - -        exits.append(&mut add); - -        println!("  setting `interrupt: false`"); -        tokenizer.interrupt = false; -    } - -    while info.stack.len() > size { -        let name = info.stack.pop().unwrap(); - -        let end = if name == "blockquote" { -            block_quote_end -        } else if name == "list" { -            list_end -        } else { -            unreachable!("todo: end {:?}", name) -        }; - -        let token_types = end(); - -        let mut index = 0; -        while index < token_types.len() { -            let token_type = &token_types[index]; - -            exits.push(Event { -                event_type: EventType::Exit, -                token_type: token_type.clone(), -                // Note: positions are fixed later. -                point: tokenizer.point.clone(), -                index: tokenizer.index, -                previous: None, -                next: None, -                content_type: None, -            }); - -            index += 1; -        } -    } - -    if !exits.is_empty() { -        let before = if before { 1 } else { 0 }; -        let mut index = info.inject.len() - 1; -        if before > index { -            // To do: maybe, if this branch happens, it’s a bug? -            println!("inject:0: {:?}", index); -            index = 0; -        } else { -            index -= before; -            println!("inject:set: {:?}", index); -        } -        info.inject[index].1.append(&mut exits); +    if info.continued < info.stack.len() { +        info.stack_close +            .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>()); +        info = line_end(tokenizer, info, false, true); +        tokenizer.expect(code, true);      } -    info +    info.stack.push(kind); +    info.continued = info.stack.len(); +    container_new_before(tokenizer, code, info)  } -fn there_is_no_new_container( +fn containers_after(      tokenizer: &mut Tokenizer,      code: Code, -    info: DocumentInfo, +    mut info: DocumentInfo,  ) -> StateFnResult { -    println!("there_is_no_new_container"); -    tokenizer.lazy = info.continued != info.stack.len(); -    // lineStartOffset = self.now().offset -    flow_start(tokenizer, code, info) -} +    println!("containers_after"); -fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { -    println!("flow_start"); +    // Add all container events we parsed. +    let mut containers = tokenizer.events.drain(info.index..).collect::<Vec<_>>(); +    info.inject.last_mut().unwrap().0.append(&mut containers); -    let containers = tokenizer -        .events -        .drain(info.containers_begin_index..) -        .collect::<Vec<_>>(); - -    info.inject.push((containers, vec![])); +    tokenizer.lazy = info.continued != info.stack.len(); +    println!("lazy: {:?} {:?}", info.continued, info.stack.len());      // Define start.      let point = tokenizer.point.clone();      tokenizer.define_skip(&point); +    flow_start(tokenizer, code, info) +} + +fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { +    println!("flow_start"); +      let state = info.next;      info.next = Box::new(flow); // This is weird but Rust needs a function there. @@ -352,6 +292,7 @@ fn flow_end(  ) -> StateFnResult {      println!("flow_end: lazy? {:?}", tokenizer.lazy); +    // To do: clean this!      let index = tokenizer.events.len();      let index = if index > 0 {          skip::opt_back(&tokenizer.events, index - 1, &[Token::LineEnding]) @@ -371,15 +312,14 @@ fn flow_end(          false      }; -    let mut continued = info.continued; -    let size = info.stack.len(); +    let mut lazy = false;      if tokenizer.lazy {          println!("this line was lazy.");          if info.paragraph_before && paragraph {              println!("it was another paragraph, which is allowed."); -            continued = size; +            lazy = true;          } else {              println!(                  "it was something else (prev: {:?}, cur: {:?}), which is not allowed.", @@ -388,27 +328,31 @@ fn flow_end(          }      } -    // Exit containers. -    println!("exit:1: {:?}", true); -    info = exit_containers(tokenizer, info, continued, true); +    if !lazy && info.continued < info.stack.len() { +        info.stack_close +            .append(&mut info.stack.drain(info.continued..).collect::<Vec<_>>()); +    } + +    info = line_end(tokenizer, info, false, false);      tokenizer.expect(code, true); -    info.continued = 0;      info.paragraph_before = paragraph; -    info.containers_begin_index = tokenizer.events.len();      match result {          State::Ok => { -            println!("exit:3: {:?}", false); -            info = exit_containers(tokenizer, info, 0, false); -            tokenizer.expect(code, true); +            info.stack_close +                .append(&mut info.stack.drain(..).collect::<Vec<_>>()); +            info = line_end(tokenizer, info, true, false);              let mut map = EditMap::new();              let mut line_index = 0;              let mut index = 0; +            println!("injections: {:#?}", info.inject); +              let add = info.inject[line_index].0.clone();              let mut first_line_ending_in_run: Option<usize> = None; +            println!("inject:enters:0: {:?}", add.len());              map.add(0, 0, add);              while index < tokenizer.events.len() { @@ -427,6 +371,11 @@ fn flow_end(                              index += 1;                          }                          if !add.is_empty() { +                            println!( +                                "inject:exits:at-{:?}: {:?}", +                                first_line_ending_in_run, +                                add.len() +                            );                              map.add(first_line_ending_in_run.unwrap(), 0, add);                          }                      } else { @@ -435,6 +384,7 @@ fn flow_end(                          if !add.is_empty() {                              // No longer empty.                              first_line_ending_in_run = None; +                            println!("inject:enters:at-{:?}: {:?}", index + 1, add.len());                              map.add(index + 1, 0, add);                          }                      } @@ -448,6 +398,7 @@ fn flow_end(              }              let mut add = info.inject[line_index].1.clone(); +            println!("inject:exits:tail-{:?}: {:?}", index, add.len());              let mut deep_index = 0;              while deep_index < add.len() {                  add[deep_index].point = tokenizer.point.clone(); @@ -479,11 +430,84 @@ fn flow_end(          State::Nok => unreachable!("handle nok in `flow`?"),          State::Fn(func) => {              info.next = func; -            before(tokenizer, code, info) +            line_start(tokenizer, code, info)          }      }  } +fn line_end( +    tokenizer: &mut Tokenizer, +    mut info: DocumentInfo, +    eof: bool, +    containers_before: bool, +) -> DocumentInfo { +    let mut stack_close = info.stack_close.drain(..).collect::<Vec<_>>(); +    println!("line_end: {:?}", stack_close); + +    if stack_close.is_empty() { +        return info; +    } + +    // So, we’re at the end of a line, but we need to close the *previous* line. +    if !eof { +        println!("closing previous flow"); +        tokenizer.define_skip(&tokenizer.point.clone()); +        let mut current_events = tokenizer.events.drain(info.index..).collect::<Vec<_>>(); +        let next = info.next; +        info.next = Box::new(flow); // This is weird but Rust needs a function there. +        let result = tokenizer.flush(next); +        assert!(matches!(result.0, State::Ok)); +        assert!(result.1.is_none()); + +        if containers_before { +            info.index = tokenizer.events.len(); +        } + +        tokenizer.events.append(&mut current_events); +    } + +    let mut exits: Vec<Event> = vec![]; + +    while !stack_close.is_empty() { +        let kind = stack_close.pop().unwrap(); +        let end = match kind { +            Container::BlockQuote => block_quote_end, +            Container::ListItem => list_item_end, +        }; + +        let token_types = end(); + +        let mut index = 0; +        while index < token_types.len() { +            let token_type = &token_types[index]; + +            exits.push(Event { +                event_type: EventType::Exit, +                token_type: token_type.clone(), +                // Note: positions are fixed later. +                point: tokenizer.point.clone(), +                index: tokenizer.index, +                previous: None, +                next: None, +                content_type: None, +            }); + +            index += 1; +        } +    } + +    let index = info.inject.len() - (if eof { 1 } else { 2 }); +    info.inject[index].1.append(&mut exits); + +    println!( +        "  setting `interrupt: false` (before: {:?}", +        tokenizer.interrupt +    ); +    tokenizer.interrupt = false; + +    info +} +  fn eof_eol(code: Code) -> bool {      matches!(          code,  | 
