From b7bd2b734fae09c40d738fcd57d5ee6876f0f504 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Mon, 11 Jul 2022 14:53:45 +0200 Subject: Fix block quote bugs --- readme.md | 2 +- src/compiler.rs | 15 +++++++++------ src/construct/heading_setext.rs | 5 ----- src/content/document.rs | 8 ++++++-- src/tokenizer.rs | 11 ++++++----- tests/block_quote.rs | 11 +++++------ tests/code_fenced.rs | 11 +++++------ tests/code_indented.rs | 39 ++++++++++++++++++--------------------- 8 files changed, 50 insertions(+), 52 deletions(-) diff --git a/readme.md b/readme.md index 120158c..32116cb 100644 --- a/readme.md +++ b/readme.md @@ -125,7 +125,6 @@ cargo doc --document-private-items #### Parse -- [ ] (3) Fix some block quote bugs - [ ] (3) Lazy lines (`code indented`, `html flow`) - [ ] (8) list\ test (`character_reference`, `code_indented`, `heading_setext`, @@ -273,3 +272,4 @@ important. - [x] (3) Add support for concrete constructs (html (flow) or code (fenced) cannot be “pierced” into by containers) - [x] (1) Make sure that rust character groups match CM character groups +- [x] (3) Fix block quote bug diff --git a/src/compiler.rs b/src/compiler.rs index d675c48..8a28654 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -237,6 +237,7 @@ struct CompileContext<'a> { pub character_reference_kind: Option, pub media_stack: Vec, pub definitions: HashMap, + pub tight_stack: Vec, /// Fields used to influance the current compilation. pub slurp_one_line_ending: bool, pub tags: bool, @@ -270,6 +271,7 @@ impl<'a> CompileContext<'a> { character_reference_kind: None, media_stack: vec![], definitions: HashMap::new(), + tight_stack: vec![], slurp_one_line_ending: false, tags: true, ignore_encode: false, @@ -604,7 +606,7 @@ fn on_enter_buffer(context: &mut CompileContext) { /// Handle [`Enter`][EventType::Enter]:[`BlockQuote`][Token::BlockQuote]. fn on_enter_block_quote(context: &mut CompileContext) { - // tightStack.push(false) + context.tight_stack.push(false); context.line_ending_if_needed(); context.tag("
".to_string()); } @@ -761,7 +763,7 @@ fn on_exit_break(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`BlockQuote`][Token::BlockQuote]. fn on_exit_block_quote(context: &mut CompileContext) { - // tightStack.pop() + context.tight_stack.pop(); context.line_ending_if_needed(); context.tag("
".to_string()); // let mut slurp_all_line_endings = false; @@ -842,15 +844,16 @@ fn on_exit_code_flow(context: &mut CompileContext) { .take() .expect("`code_flow_seen_data` must be defined"); - // To do: containers. // One special case is if we are inside a container, and the fenced code was // not closed (meaning it runs to the end). // In that case, the following line ending, is considered *outside* the // fenced code and block quote by micromark, but CM wants to treat that // ending as part of the code. - // if fenced_count != None && fenced_count < 2 && tightStack.length > 0 && !last_was_tag { - // line_ending(); - // } + if let Some(count) = context.code_fenced_fences_count { + if count == 1 && !context.tight_stack.is_empty() && !context.last_was_tag { + context.line_ending(); + } + } // But in most cases, it’s simpler: when we’ve seen some data, emit an extra // line ending when needed. diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 633f7de..2078338 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -130,11 +130,6 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let paragraph_before = previous > 1 && tokenizer.events[previous].token_type == Token::Paragraph; - println!( - "setext-start: {:?} {:?} {:?}", - tokenizer.interrupt, tokenizer.lazy, paragraph_before - ); - // Require a paragraph before and do not allow on a lazy line. if paragraph_before && !tokenizer.lazy { // To do: allow arbitrary when code (indented) is turned off. diff --git a/src/content/document.rs b/src/content/document.rs index 0112d52..f093a04 100644 --- a/src/content/document.rs +++ b/src/content/document.rs @@ -158,6 +158,7 @@ fn document_continue( let size = info.continued; info = exit_containers(tokenizer, info, size, true); + tokenizer.expect(code, true); // // Fix positions. // let index = indexBeforeExits @@ -241,6 +242,7 @@ fn there_is_a_new_container( println!("there_is_a_new_container"); let size = info.continued; info = exit_containers(tokenizer, info, size, true); + tokenizer.expect(code, true); // Remove from the event stack. // We’ll properly add exits at different points manually. @@ -251,7 +253,7 @@ fn there_is_a_new_container( unreachable!("todo: cont {:?}", name) }; - println!("creating exit for `{:?}`", name); + println!("creating exit (a) for `{:?}`", name); let token_types = end(); @@ -329,7 +331,7 @@ fn exit_containers( unreachable!("todo: cont {:?}", name) }; - println!("creating exit for `{:?}`", name); + println!("creating exit (b) for `{:?}`", name); let token_types = end(); @@ -429,6 +431,7 @@ fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> // Exit containers. let size = info.continued; info = exit_containers(tokenizer, info, size, true); + tokenizer.expect(code, true); // Define start. let point = tokenizer.point.clone(); @@ -469,6 +472,7 @@ fn flow_end( State::Ok => { println!("State::Ok"); info = exit_containers(tokenizer, info, 0, false); + tokenizer.expect(code, true); // println!("document:inject: {:?}", info.inject); let mut map = EditMap::new(); diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c984a75..80786ea 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -247,8 +247,10 @@ impl<'a> Tokenizer<'a> { } /// Prepare for a next code to get consumed. - fn expect(&mut self, code: Code) { - assert!(self.consumed, "expected previous character to be consumed"); + pub fn expect(&mut self, code: Code, force: bool) { + if !force { + assert!(self.consumed, "expected previous character to be consumed"); + } self.consumed = false; self.current = code; } @@ -609,7 +611,6 @@ fn attempt_impl( // Should it be before? // How to match `eof`? if !codes.is_empty() && pause(tokenizer.previous) { - tokenizer.consumed = true; println!("pause!: {:?}", (codes.clone(), vec![code])); return done( (codes, vec![code]), @@ -674,7 +675,7 @@ fn feed_impl( } State::Fn(func) => { log::debug!("main: passing `{:?}`", code); - tokenizer.expect(code); + tokenizer.expect(code, false); let (next, remainder) = check_statefn_result(func(tokenizer, code)); state = next; index = index + 1 @@ -706,7 +707,7 @@ fn flush_impl( State::Fn(func) => { let code = Code::None; log::debug!("main: passing eof"); - tokenizer.expect(code); + tokenizer.expect(code, false); let (next, remainder) = check_statefn_result(func(tokenizer, code)); assert!(remainder.is_none(), "expected no remainder"); state = next; diff --git a/tests/block_quote.rs b/tests/block_quote.rs index 5e5adce..c0b10b7 100644 --- a/tests/block_quote.rs +++ b/tests/block_quote.rs @@ -67,12 +67,11 @@ fn block_quote() { // "should not support lazy lists in block quotes" // ); - // To do: block quote (lazy, code (indented), some bug). - // assert_eq!( - // micromark("> a\n b"), - // "
\n
a\n
\n
\n
b\n
", - // "should not support lazy indented code in block quotes" - // ); + assert_eq!( + micromark("> a\n b"), + "
\n
a\n
\n
\n
b\n
", + "should not support lazy indented code in block quotes" + ); assert_eq!( micromark("> ```\na\n```"), diff --git a/tests/code_fenced.rs b/tests/code_fenced.rs index a777f9f..fa9ed5f 100644 --- a/tests/code_fenced.rs +++ b/tests/code_fenced.rs @@ -226,12 +226,11 @@ fn code_fenced() { "should not support a closing sequence w/ too much indent, regardless of opening sequence (1)" ); - // To do: blockquote (fix compiler). - // assert_eq!( - // micromark("> ```\n>\n>\n>\n\na"), - // "
\n
\n\n\n
\n
\n

a

", - // "should not support a closing sequence w/ too much indent, regardless of opening sequence (2)" - // ); + assert_eq!( + micromark("> ```\n>\n>\n>\n\na"), + "
\n
\n\n\n
\n
\n

a

", + "should not support a closing sequence w/ too much indent, regardless of opening sequence (2)" + ); assert_eq!( micromark("> ```a\nb"), diff --git a/tests/code_indented.rs b/tests/code_indented.rs index f06cf4c..f462792 100644 --- a/tests/code_indented.rs +++ b/tests/code_indented.rs @@ -82,47 +82,44 @@ fn code_indented() { "should not support lazyness (1)" ); - // To do: blockquote (lazy, some bug). + // To do: blockquote (lazy). // assert_eq!( // micromark("> a\n b"), // "
\n

a\nb

\n
", // "should not support lazyness (2)" // ); - // To do: blockquote (lazy, some bug). + // To do: blockquote (lazy). // assert_eq!( // micromark("> a\n b"), // "
\n

a\nb

\n
", // "should not support lazyness (3)" // ); - // To do: blockquote (lazy, some bug). + // To do: blockquote (lazy). // assert_eq!( // micromark("> a\n b"), // "
\n

a\nb

\n
", // "should not support lazyness (4)" // ); - // To do: blockquote (lazy, some bug). - // assert_eq!( - // micromark("> a\n b"), - // "
\n
a\n
\n
\n
b\n
", - // "should not support lazyness (5)" - // ); + assert_eq!( + micromark("> a\n b"), + "
\n
a\n
\n
\n
b\n
", + "should not support lazyness (5)" + ); - // To do: blockquote (lazy, some bug). - // assert_eq!( - // micromark("> a\n b"), - // "
\n
a\n
\n
\n
 b\n
", - // "should not support lazyness (6)" - // ); + assert_eq!( + micromark("> a\n b"), + "
\n
a\n
\n
\n
 b\n
", + "should not support lazyness (6)" + ); - // To do: blockquote (lazy, some bug). - // assert_eq!( - // micromark("> a\n b"), - // "
\n
a\n
\n
\n
  b\n
", - // "should not support lazyness (7)" - // ); + assert_eq!( + micromark("> a\n b"), + "
\n
a\n
\n
\n
  b\n
", + "should not support lazyness (7)" + ); // To do: turning things off. // assert_eq!( -- cgit