diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-07-07 17:21:38 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-07-07 17:36:35 +0200 |
commit | 4806864e5377a5fef937b3fa02542e620c547969 (patch) | |
tree | c91ae2bbd1dc2037f425efd24d62d05e706e3e60 | |
parent | c2b4402223e53498078fc33dd55aabc0a48cdb56 (diff) | |
download | markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.gz markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.tar.bz2 markdown-rs-4806864e5377a5fef937b3fa02542e620c547969.zip |
Add basic support for block quotes
-rw-r--r-- | readme.md | 13 | ||||
-rw-r--r-- | src/compiler.rs | 29 | ||||
-rw-r--r-- | src/construct/block_quote.rs | 58 | ||||
-rw-r--r-- | src/construct/heading_setext.rs | 27 | ||||
-rw-r--r-- | src/construct/mod.rs | 1 | ||||
-rw-r--r-- | src/construct/paragraph.rs | 28 | ||||
-rw-r--r-- | src/content/document.rs | 439 | ||||
-rw-r--r-- | src/content/flow.rs | 49 | ||||
-rw-r--r-- | src/content/mod.rs | 6 | ||||
-rw-r--r-- | src/parser.rs | 4 | ||||
-rw-r--r-- | src/tokenizer.rs | 38 | ||||
-rw-r--r-- | src/util/edit_map.rs | 1 | ||||
-rw-r--r-- | src/util/mod.rs | 1 | ||||
-rw-r--r-- | src/util/skip.rs | 44 | ||||
-rw-r--r-- | tests/autolink.rs | 6 | ||||
-rw-r--r-- | tests/block_quote.rs | 188 | ||||
-rw-r--r-- | tests/code_fenced.rs | 68 | ||||
-rw-r--r-- | tests/code_indented.rs | 78 | ||||
-rw-r--r-- | tests/definition.rs | 4 | ||||
-rw-r--r-- | tests/heading_atx.rs | 11 | ||||
-rw-r--r-- | tests/heading_setext.rs | 28 | ||||
-rw-r--r-- | tests/html_flow.rs | 108 | ||||
-rw-r--r-- | tests/misc_default_line_ending.rs | 93 | ||||
-rw-r--r-- | tests/thematic_break.rs | 13 |
24 files changed, 1045 insertions, 290 deletions
@@ -62,7 +62,7 @@ cargo doc --document-private-items - [x] attention (emphasis, strong) - [x] autolink - [x] blank line -- [ ] (5) block quote +- [x] block quote - [x] character escape - [x] character reference - [x] code (fenced) @@ -85,7 +85,7 @@ cargo doc --document-private-items ### Content types - [ ] (8) container - - [ ] block quote + - [x] block quote - [ ] list - [x] flow - [x] blank line @@ -127,15 +127,13 @@ cargo doc --document-private-items #### Parse -- [ ] (8) block quote\ - test (`code_fenced`, `definition`, `code_indented`, `heading_atx`, `heading_setext`, - `html_flow`, `misc_default_line_ending`, `thematic_break`) +- [ ] (3) Fix some block quote bugs +- [ ] (3) Lazy lines (`code indented`, `html flow`) +- [ ] (3) Concrete (`html flow`) - [ ] (8) list\ test (`character_reference`, `code_indented`, `heading_setext`, `html_flow`, `thematic_break`)\ link (`blank line`, `thematic break`) -- [ ] (3) Lazy lines (`code indented`, `html flow`) -- [ ] (3) Concrete (`html flow`) - [ ] (3) Turn off things (enable every test for these) - [ ] (3) Make tokenizer tokens extendable? @@ -276,3 +274,4 @@ important. - [x] (1) Remove todos in `span.rs` if not needed - [x] (2) Fix resizing attention bug - [x] (2) Fix interleaving of attention/label +- [x] (8) Add basic support for block quotes diff --git a/src/compiler.rs b/src/compiler.rs index 7e47f95..f27c0de 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -193,9 +193,7 @@ pub struct Options { /// // micromark is safe by default: /// assert_eq!( /// micromark("> a"), - /// // To do: block quote - /// // "<blockquote>\n<p>a</p>\n</blockquote>" - /// "<p>> a</p>" + /// "<blockquote>\n<p>a</p>\n</blockquote>" /// ); /// /// // Define `default_line_ending` to configure the default: @@ -209,9 +207,7 @@ pub struct Options { /// /// } /// ), - /// // To do: block quote - /// // "<blockquote>\r\n<p>a</p>\r\n</blockquote>" - /// "<p>> a</p>" + /// "<blockquote>\r\n<p>a</p>\r\n</blockquote>" /// ); /// ``` pub default_line_ending: Option<LineEnding>, @@ -418,6 +414,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { enter_map.insert(TokenType::HeadingSetextText, on_enter_buffer); enter_map.insert(TokenType::Label, on_enter_buffer); enter_map.insert(TokenType::ResourceTitleString, on_enter_buffer); + enter_map.insert(TokenType::BlockQuote, on_enter_block_quote); enter_map.insert(TokenType::CodeIndented, on_enter_code_indented); enter_map.insert(TokenType::CodeFenced, on_enter_code_fenced); enter_map.insert(TokenType::CodeText, on_enter_code_text); @@ -491,6 +488,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { exit_map.insert(TokenType::CodeFlowChunk, on_exit_code_flow_chunk); exit_map.insert(TokenType::CodeText, on_exit_code_text); exit_map.insert(TokenType::CodeTextLineEnding, on_exit_code_text_line_ending); + exit_map.insert(TokenType::BlockQuote, on_exit_block_quote); exit_map.insert(TokenType::HardBreakEscape, on_exit_break); exit_map.insert(TokenType::HardBreakTrailing, on_exit_break); exit_map.insert(TokenType::HeadingAtx, on_exit_heading_atx); @@ -607,6 +605,13 @@ fn on_enter_buffer(context: &mut CompileContext) { context.buffer(); } +/// Handle [`Enter`][EventType::Enter]:[`BlockQuote`][TokenType::BlockQuote]. +fn on_enter_block_quote(context: &mut CompileContext) { + // tightStack.push(false) + context.line_ending_if_needed(); + context.tag("<blockquote>".to_string()); +} + /// Handle [`Enter`][EventType::Enter]:[`CodeIndented`][TokenType::CodeIndented]. fn on_enter_code_indented(context: &mut CompileContext) { context.code_flow_seen_data = Some(false); @@ -695,6 +700,7 @@ fn on_enter_link(context: &mut CompileContext) { /// Handle [`Enter`][EventType::Enter]:[`Paragraph`][TokenType::Paragraph]. fn on_enter_paragraph(context: &mut CompileContext) { + context.line_ending_if_needed(); context.tag("<p>".to_string()); } @@ -756,6 +762,14 @@ fn on_exit_break(context: &mut CompileContext) { context.tag("<br />".to_string()); } +/// Handle [`Exit`][EventType::Exit]:[`BlockQuote`][TokenType::BlockQuote]. +fn on_exit_block_quote(context: &mut CompileContext) { + // tightStack.pop() + context.line_ending_if_needed(); + context.tag("</blockquote>".to_string()); + // let mut slurp_all_line_endings = false; +} + /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][TokenType::CharacterReferenceMarker]. fn on_exit_character_reference_marker(context: &mut CompileContext) { context.character_reference_kind = Some(CharacterReferenceKind::Named); @@ -971,6 +985,7 @@ fn on_exit_heading_atx_sequence(context: &mut CompileContext) { false, ) .len(); + context.line_ending_if_needed(); context.atx_opening_sequence_size = Some(rank); context.tag(format!("<h{}>", rank)); } @@ -1001,6 +1016,7 @@ fn on_exit_heading_setext_underline(context: &mut CompileContext) { )[0]; let level: usize = if head == Code::Char('-') { 2 } else { 1 }; + context.line_ending_if_needed(); context.tag(format!("<h{}>", level)); context.push(text); context.tag(format!("</h{}>", level)); @@ -1157,5 +1173,6 @@ fn on_exit_strong(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`ThematicBreak`][TokenType::ThematicBreak]. fn on_exit_thematic_break(context: &mut CompileContext) { + context.line_ending_if_needed(); context.tag("<hr />".to_string()); } diff --git a/src/construct/block_quote.rs b/src/construct/block_quote.rs new file mode 100644 index 0000000..cd5b872 --- /dev/null +++ b/src/construct/block_quote.rs @@ -0,0 +1,58 @@ +//! To do. + +use crate::constant::TAB_SIZE; +use crate::construct::partial_space_or_tab::space_or_tab_min_max; +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; + +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // To do: allow arbitrary when code (indented) is turned off. + tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) +} + +fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('>') => { + tokenizer.enter(TokenType::BlockQuote); + cont_before(tokenizer, code) + } + _ => cont_before(tokenizer, code), + } +} + +pub fn cont(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // To do: allow arbitrary when code (indented) is turned off. + tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), cont_before)(tokenizer, code) +} + +fn cont_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::Char('>') => { + tokenizer.enter(TokenType::BlockQuotePrefix); + tokenizer.enter(TokenType::BlockQuoteMarker); + tokenizer.consume(code); + tokenizer.exit(TokenType::BlockQuoteMarker); + (State::Fn(Box::new(cont_after)), None) + } + _ => (State::Nok, None), + } +} + +fn cont_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::VirtualSpace | Code::Char('\t' | ' ') => { + tokenizer.enter(TokenType::BlockQuotePrefixWhitespace); + tokenizer.consume(code); + tokenizer.exit(TokenType::BlockQuotePrefixWhitespace); + tokenizer.exit(TokenType::BlockQuotePrefix); + (State::Ok, None) + } + _ => { + tokenizer.exit(TokenType::BlockQuotePrefix); + (State::Ok, Some(vec![code])) + } + } +} + +pub fn end() -> Vec<TokenType> { + vec![TokenType::BlockQuote] +} diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs index 211434f..440baa8 100644 --- a/src/construct/heading_setext.rs +++ b/src/construct/heading_setext.rs @@ -60,7 +60,7 @@ use crate::constant::TAB_SIZE; use crate::construct::partial_space_or_tab::{space_or_tab, space_or_tab_min_max}; use crate::tokenizer::{Code, Event, EventType, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::edit_map::EditMap; +use crate::util::{edit_map::EditMap, skip::opt_back as skip_opt_back}; /// Kind of underline. #[derive(Debug, Clone, PartialEq)] @@ -116,11 +116,26 @@ impl Kind { /// ``` pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let index = tokenizer.events.len(); - let paragraph_before = index > 3 - && tokenizer.events[index - 1].token_type == TokenType::LineEnding - && tokenizer.events[index - 3].token_type == TokenType::Paragraph; - - if paragraph_before { + let previous = if index > 1 { + skip_opt_back( + &tokenizer.events, + index - 1, + &[TokenType::SpaceOrTab, TokenType::BlockQuotePrefix], + ) + } else { + 0 + }; + let previous = skip_opt_back(&tokenizer.events, previous, &[TokenType::LineEnding]); + let paragraph_before = + previous > 1 && tokenizer.events[previous].token_type == TokenType::Paragraph; + + println!( + "setext-start: {:?} {:?} {:?}", + tokenizer.interrupt, tokenizer.lazy, paragraph_before + ); + + // Require a paragraph before and do not allow on a lazy line. + if paragraph_before && !tokenizer.lazy { // To do: allow arbitrary when code (indented) is turned off. tokenizer.go(space_or_tab_min_max(0, TAB_SIZE - 1), before)(tokenizer, code) } else { diff --git a/src/construct/mod.rs b/src/construct/mod.rs index 66b2a3c..936ecf6 100644 --- a/src/construct/mod.rs +++ b/src/construct/mod.rs @@ -64,6 +64,7 @@ pub mod attention; pub mod autolink; pub mod blank_line; +pub mod block_quote; pub mod character_escape; pub mod character_reference; pub mod code_fenced; diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs index 4f5e662..ace174f 100644 --- a/src/construct/paragraph.rs +++ b/src/construct/paragraph.rs @@ -35,7 +35,7 @@ use crate::tokenizer::{ Code, ContentType, Event, EventType, State, StateFnResult, TokenType, Tokenizer, }; -use crate::util::edit_map::EditMap; +use crate::util::{edit_map::EditMap, skip::opt as skip_opt}; /// Before a paragraph. /// @@ -90,19 +90,27 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { if event.event_type == EventType::Enter && event.token_type == TokenType::Paragraph { // Exit:Paragraph let mut exit_index = index + 3; + let mut enter_next_index = + skip_opt(&tokenizer.events, exit_index + 1, &[TokenType::LineEnding]); // Enter:Paragraph - let mut enter_next_index = exit_index + 3; + enter_next_index = skip_opt( + &tokenizer.events, + enter_next_index, + &[TokenType::SpaceOrTab, TokenType::BlockQuotePrefix], + ); // Find future `Paragraphs`. - // There will be `LineEnding` between. - while enter_next_index < len + while enter_next_index < tokenizer.events.len() && tokenizer.events[enter_next_index].token_type == TokenType::Paragraph { // Remove Exit:Paragraph, Enter:LineEnding, Exit:LineEnding, Enter:Paragraph. - edit_map.add(exit_index, 4, vec![]); + edit_map.add(exit_index, 3, vec![]); + + // Remove Enter:Paragraph. + edit_map.add(enter_next_index, 1, vec![]); // Add Exit:LineEnding position info to Exit:Data. - let line_ending_exit = &tokenizer.events[enter_next_index - 1]; + let line_ending_exit = &tokenizer.events[exit_index + 2]; let line_ending_point = line_ending_exit.point.clone(); let line_ending_index = line_ending_exit.index; let data_exit = &mut tokenizer.events[exit_index - 1]; @@ -117,7 +125,13 @@ pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> { // Potential next start. exit_index = enter_next_index + 3; - enter_next_index = exit_index + 3; + enter_next_index = + skip_opt(&tokenizer.events, exit_index + 1, &[TokenType::LineEnding]); + enter_next_index = skip_opt( + &tokenizer.events, + enter_next_index, + &[TokenType::SpaceOrTab, TokenType::BlockQuotePrefix], + ); } // Move to `Exit:Paragraph`. diff --git a/src/content/document.rs b/src/content/document.rs new file mode 100644 index 0000000..dd5038f --- /dev/null +++ b/src/content/document.rs @@ -0,0 +1,439 @@ +//! The document content type. +//! +//! **Document** represents the containers, such as block quotes and lists, +//! which structure the document and contain other sections. +//! +//! The constructs found in flow are: +//! +//! * [Block quote][crate::construct::block_quote] +//! * List + +use crate::construct::block_quote::{ + cont as block_quote_cont, end as block_quote_end, start as block_quote, +}; +use crate::content::flow::start as flow; +use crate::parser::ParseState; +use crate::subtokenize::subtokenize; +use crate::tokenizer::{ + Code, Event, EventType, Point, State, StateFn, StateFnResult, TokenType, Tokenizer, +}; +use crate::util::edit_map::EditMap; +use crate::util::{ + normalize_identifier::normalize_identifier, + span::{from_exit_event, serialize}, +}; +use std::collections::HashSet; + +struct DocumentInfo { + continued: usize, + stack: Vec<String>, + next: Box<StateFn>, + last_line_ending_index: Option<usize>, + map: EditMap, +} + +/// Turn `codes` as the document content type into events. +pub fn document(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> { + let mut tokenizer = Tokenizer::new(point, index, parse_state); + + tokenizer.push(&parse_state.codes, Box::new(start), true); + + let mut index = 0; + let mut next_definitions: HashSet<String> = HashSet::new(); + + while index < tokenizer.events.len() { + let event = &tokenizer.events[index]; + + if event.event_type == EventType::Exit + && event.token_type == TokenType::DefinitionLabelString + { + next_definitions.insert(normalize_identifier( + serialize( + &parse_state.codes, + &from_exit_event(&tokenizer.events, index), + false, + ) + .as_str(), + )); + } + + index += 1; + } + + let mut result = (tokenizer.events, false); + + parse_state.definitions = next_definitions; + + while !result.1 { + result = subtokenize(result.0, parse_state); + } + + result.0 +} + +fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + let info = DocumentInfo { + continued: 0, + stack: vec![], + next: Box::new(flow), + last_line_ending_index: None, + map: EditMap::new(), + }; + before(tokenizer, code, info) +} + +fn before(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { + println!("before: check existing open containers"); + // First we iterate through the open blocks, starting with the root + // document, and descending through last children down to the last open + // block. + // Each block imposes a condition that the line must satisfy if the block + // is to remain open. + // For example, a block quote requires a `>` character. + // A paragraph requires a non-blank line. + // In this phase we may match all or just some of the open blocks. + // But we cannot close unmatched blocks yet, because we may have a lazy + // continuation line. + if info.continued < info.stack.len() { + let name = &info.stack[info.continued]; + // To do: list. + let cont = if name == "blockquote" { + block_quote_cont + } else { + unreachable!("todo: cont construct {:?}", name) + }; + + // To do: state? + + tokenizer.attempt(cont, move |ok| { + if ok { + Box::new(|t, c| document_continue(t, c, info)) + } else { + Box::new(|t, c| check_new_containers(t, c, info)) + } + })(tokenizer, code) + } else { + // Done. + check_new_containers(tokenizer, code, info) + } +} + +fn document_continue( + tokenizer: &mut Tokenizer, + code: Code, + mut info: DocumentInfo, +) -> StateFnResult { + println!("document_continue"); + info.continued += 1; + + println!(" to do: close flow sometimes?"); + // // Note: this field is called `_closeFlow` but it also closes containers. + // // Perhaps a good idea to rename it but it’s already used in the wild by + // // extensions. + // if (self.containerState._closeFlow) { + // self.containerState._closeFlow = undefined + + // if (childFlow) { + // closeFlow() + // } + + // // Note: this algorithm for moving events around is similar to the + // // algorithm when dealing with lazy lines in `writeToChild`. + // const indexBeforeExits = self.events.length + // let indexBeforeFlow = indexBeforeExits + // /** @type {Point|undefined} */ + // let point + + // // Find the flow chunk. + // while (indexBeforeFlow--) { + // if ( + // self.events[indexBeforeFlow][0] === 'exit' && + // self.events[indexBeforeFlow][1].type === types.chunkFlow + // ) { + // point = self.events[indexBeforeFlow][1].end + // break + // } + // } + + // assert(point, 'could not find previous flow chunk') + + let size = info.continued; + exit_containers(tokenizer, &mut info, size); + + // // Fix positions. + // let index = indexBeforeExits + + // while (index < self.events.length) { + // self.events[index][1].end = Object.assign({}, point) + // index++ + // } + + // // Inject the exits earlier (they’re still also at the end). + // splice( + // self.events, + // indexBeforeFlow + 1, + // 0, + // self.events.slice(indexBeforeExits) + // ) + + // // Discard the duplicate exits. + // self.events.length = index + + // return checkNewContainers(code) + // } + + before(tokenizer, code, info) +} +// documentContinue + +fn check_new_containers( + tokenizer: &mut Tokenizer, + code: Code, + info: DocumentInfo, +) -> StateFnResult { + println!("check_new_containers"); + // Next, after consuming the continuation markers for existing blocks, we + // look for new block starts (e.g. `>` for a block quote). + // If we encounter a new block start, we close any blocks unmatched in + // step 1 before creating the new block as a child of the last matched + // block. + if info.continued == info.stack.len() { + println!(" to do: concrete? interrupt?"); + // // No need to `check` whether there’s a container, of `exitContainers` + // // would be moot. + // // We can instead immediately `attempt` to parse one. + // if (!childFlow) { + // return documentContinued(code) + // } + + // // If we have concrete content, such as block HTML or fenced code, + // // we can’t have containers “pierce” into them, so we can immediately + // // start. + // if (childFlow.currentConstruct && childFlow.currentConstruct.concrete) { + // return flowStart(code) + // } + + // // If we do have flow, it could still be a blank line, + // // but we’d be interrupting it w/ a new container if there’s a current + // // construct. + // self.interrupt = Boolean( + // childFlow.currentConstruct && !childFlow._gfmTableDynamicInterruptHack + // ) + } + + // Check if there is a new container. + // To do: list. + tokenizer.attempt(block_quote, move |ok| { + if ok { + Box::new(|t, c| there_is_a_new_container(t, c, info, "blockquote".to_string())) + } else { + Box::new(|t, c| there_is_no_new_container(t, c, info)) + } + })(tokenizer, code) +} + +fn there_is_a_new_container( + tokenizer: &mut Tokenizer, + code: Code, + mut info: DocumentInfo, + name: String, +) -> StateFnResult { + println!("there_is_a_new_container"); + println!(" todo: close_flow"); + // if (childFlow) closeFlow() + let size = info.continued; + exit_containers(tokenizer, &mut info, size); + info.stack.push(name); + info.continued += 1; + document_continued(tokenizer, code, info) +} + +/// Exit open containers. +fn exit_containers(tokenizer: &mut Tokenizer, info: &mut DocumentInfo, size: usize) { + while info.stack.len() > size { + let name = info.stack.pop().unwrap(); + + // To do: list. + let end = if name == "blockquote" { + block_quote_end + } else { + unreachable!("todo: cont {:?}", name) + }; + + // To do: improve below code. + let insert_index = if let Some(index) = info.last_line_ending_index { + index + } else { + tokenizer.events.len() + }; + let eol_point = if let Some(index) = info.last_line_ending_index { + tokenizer.events[index].point.clone() + } else { + tokenizer.point.clone() + }; + let eol_index = if let Some(index) = info.last_line_ending_index { + tokenizer.events[index].index + } else { + tokenizer.index + }; + + let token_types = end(); + + let mut index = 0; + while index < token_types.len() { + let token_type = &token_types[index]; + + info.map.add( + insert_index, + 0, + vec![Event { + event_type: EventType::Exit, + token_type: token_type.clone(), + point: eol_point.clone(), + index: eol_index, + previous: None, + next: None, + content_type: None, + }], + ); + + let mut stack_index = tokenizer.stack.len(); + + while stack_index > 0 { + stack_index -= 1; + + if tokenizer.stack[stack_index] == *token_type { + break; + } + } + + assert_eq!( + tokenizer.stack[stack_index], *token_type, + "expected token type" + ); + tokenizer.stack.remove(stack_index); + + index += 1; + } + } +} + +fn there_is_no_new_container( + tokenizer: &mut Tokenizer, + code: Code, + info: DocumentInfo, +) -> StateFnResult { + let lazy = info.continued != info.stack.len(); + tokenizer.lazy = lazy; + println!("there is no new container"); + if lazy { + println!( + " This line will be lazy. Depending on what is parsed now, we need to close containers before?" + ); + } + // lineStartOffset = self.now().offset + flow_start(tokenizer, code, info) +} + +fn document_continued(tokenizer: &mut Tokenizer, code: Code, info: DocumentInfo) -> StateFnResult { + println!("document_continued"); + + // Try new containers. + // To do: list. + tokenizer.attempt(block_quote, |ok| { + if ok { + Box::new(|t, c| container_continue(t, c, info)) + } else { + Box::new(|t, c| { + // To do: this looks like a bug? + t.lazy = false; + flow_start(t, c, info) + }) + } + })(tokenizer, code) +} + +fn container_continue( + tokenizer: &mut Tokenizer, + code: Code, + mut info: DocumentInfo, +) -> StateFnResult { + println!("container_continue"); + // assert( + // self.currentConstruct, + // 'expected `currentConstruct` to be defined on tokenizer' + // ) + // assert( + // self.containerState, + // 'expected `containerState` to be defined on tokenizer' + // ) + info.continued += 1; + // To do: add to stack? + // stack.push([self.currentConstruct, self.containerState]) + // Try another. + document_continued(tokenizer, code, info) +} + +fn flow_start(tokenizer: &mut Tokenizer, code: Code, mut info: DocumentInfo) -> StateFnResult { + println!("flow_start"); + let next = info.next; + info.next = Box::new(flow); // This is weird but Rust needs a function there. + + let size = info.continued; + exit_containers(tokenizer, &mut info, size); + + tokenizer.go_until(next, eof_eol, move |(state, remainder)| { + ( + State::Fn(Box::new(move |t, c| flow_end(t, c, info, state))), + remainder, + ) + })(tokenizer, code) +} + +fn flow_end( + tokenizer: &mut Tokenizer, + code: Code, + mut info: DocumentInfo, + result: State, +) -> StateFnResult { + println!("flow_end"); + let was_lazy = tokenizer.lazy; + + if was_lazy { + println!( + "this line was lazy. Depeding on what was parsed, we need to exit containers after it?" + ); + } + + info.continued = 0; + + // To do: blank lines? Other things? + if tokenizer.events.len() > 2 + && tokenizer.events[tokenizer.events.len() - 1].token_type == TokenType::LineEnding + { + info.last_line_ending_index = Some(tokenizer.events.len() - 2); + } else { + info.last_line_ending_index = None; + } + + match result { + State::Ok => { + println!("State::Ok"); + exit_containers(tokenizer, &mut info, 0); + tokenizer.events = info.map.consume(&mut tokenizer.events); + (State::Ok, Some(vec![code])) + } + State::Nok => unreachable!("handle nok in `flow`?"), + State::Fn(func) => { + info.next = func; + before(tokenizer, code, info) + } + } +} + +fn eof_eol(code: Code) -> bool { + matches!( + code, + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') + ) +} diff --git a/src/content/flow.rs b/src/content/flow.rs index 74c6a62..f406685 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -26,52 +26,7 @@ use crate::construct::{ html_flow::start as html_flow, paragraph::start as paragraph, thematic_break::start as thematic_break, }; -use crate::parser::ParseState; -use crate::subtokenize::subtokenize; -use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer}; -use crate::util::{ - normalize_identifier::normalize_identifier, - span::{from_exit_event, serialize}, -}; -use std::collections::HashSet; - -/// Turn `codes` as the flow content type into events. -pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Event> { - let mut tokenizer = Tokenizer::new(point, index, parse_state); - tokenizer.push(&parse_state.codes, Box::new(start), true); - let mut next_definitions: HashSet<String> = HashSet::new(); - - let mut index = 0; - - while index < tokenizer.events.len() { - let event = &tokenizer.events[index]; - - if event.event_type == EventType::Exit - && event.token_type == TokenType::DefinitionLabelString - { - next_definitions.insert(normalize_identifier( - serialize( - &parse_state.codes, - &from_exit_event(&tokenizer.events, index), - false, - ) - .as_str(), - )); - } - - index += 1; - } - - let mut result = (tokenizer.events, false); - - parse_state.definitions = next_definitions; - - while !result.1 { - result = subtokenize(result.0, parse_state); - } - - result.0 -} +use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer}; /// Before flow. /// @@ -83,7 +38,7 @@ pub fn flow(parse_state: &mut ParseState, point: Point, index: usize) -> Vec<Eve /// | bravo /// |*** /// ``` -fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { +pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Ok, None), _ => tokenizer.attempt(blank_line, |ok| { diff --git a/src/content/mod.rs b/src/content/mod.rs index ae8ad83..af40cc0 100644 --- a/src/content/mod.rs +++ b/src/content/mod.rs @@ -1,5 +1,11 @@ //! Content types found in markdown. +//! +//! * [document][document] +//! * [flow][flow] +//! * [string][string] +//! * [text][text] +pub mod document; pub mod flow; pub mod string; pub mod text; diff --git a/src/parser.rs b/src/parser.rs index 69dd355..b1fd4fd 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; // To do: this should start with `containers`, when they’re done. -use crate::content::flow::flow; +use crate::content::document::document; use crate::tokenizer::{Code, Event, Point}; use crate::util::codes::parse as parse_codes; @@ -27,7 +27,7 @@ pub fn parse(value: &str) -> (Vec<Event>, ParseState) { definitions: HashSet::new(), }; - let events = flow( + let events = document( &mut parse_state, Point { line: 1, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 8c11a68..cbcc464 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -1702,6 +1702,10 @@ pub enum TokenType { /// /// > 👉 **Note**: this is used while parsing but compiled away. AttentionSequence, + BlockQuote, + BlockQuoteMarker, + BlockQuotePrefix, + BlockQuotePrefixWhitespace, } /// Embedded content type. @@ -1841,6 +1845,7 @@ struct InternalState { // #[derive(Debug)] /// A tokenizer itself. +#[allow(clippy::struct_excessive_bools)] pub struct Tokenizer<'a> { column_start: HashMap<usize, usize>, /// Track whether a character is expected to be consumed, and whether it’s @@ -1855,15 +1860,15 @@ pub struct Tokenizer<'a> { /// Hierarchy of semantic labels. /// /// Tracked to make sure everything’s valid. - stack: Vec<TokenType>, + pub stack: Vec<TokenType>, /// Previous character code. pub previous: Code, /// Current character code. current: Code, /// `index` in codes of the current code. - index: usize, + pub index: usize, /// Current relative and absolute place in the file. - point: Point, + pub point: Point, /// List of attached resolvers, which will be called when done feeding, /// to clean events. resolvers: Vec<Box<Resolver>>, @@ -1887,6 +1892,7 @@ pub struct Tokenizer<'a> { /// /// Used when tokenizing [flow content][crate::content::flow]. pub interrupt: bool, + pub lazy: bool, } impl<'a> Tokenizer<'a> { @@ -1907,6 +1913,7 @@ impl<'a> Tokenizer<'a> { label_start_list_loose: vec![], media_list: vec![], interrupt: false, + lazy: false, resolvers: vec![], resolver_ids: vec![], } @@ -2120,7 +2127,8 @@ impl<'a> Tokenizer<'a> { state_fn, until, vec![], - |result: (Vec<Code>, Vec<Code>), _ok, _tokenizer: &mut Tokenizer, state| { + |result: (Vec<Code>, Vec<Code>), _ok, tokenizer: &mut Tokenizer, state| { + tokenizer.consumed = true; done(check_statefn_result((state, Some(result.1)))) }, ) @@ -2262,6 +2270,20 @@ fn attempt_impl( done: impl FnOnce((Vec<Code>, Vec<Code>), bool, &mut Tokenizer, State) -> StateFnResult + 'static, ) -> Box<StateFn> { Box::new(|tokenizer, code| { + // To do: `pause` is currently used after the code. + // Should it be before? + // How to match `eof`? + if !codes.is_empty() && pause(tokenizer.previous) { + tokenizer.consumed = true; + println!("pause!: {:?}", (codes.clone(), vec![code])); + return done( + (codes, vec![code]), + false, + tokenizer, + State::Fn(Box::new(state)), + ); + } + let (next, remainder) = check_statefn_result(state(tokenizer, code)); match code { @@ -2278,14 +2300,6 @@ fn attempt_impl( ); } - // To do: `pause` is currently used after the code. - // Should it be before? - if pause(code) { - tokenizer.consumed = true; - let remaining = if let Some(x) = remainder { x } else { vec![] }; - return done((codes, remaining), false, tokenizer, next); - } - match next { State::Ok => { let remaining = if let Some(x) = remainder { x } else { vec![] }; diff --git a/src/util/edit_map.rs b/src/util/edit_map.rs index ae627c1..f67a8b9 100644 --- a/src/util/edit_map.rs +++ b/src/util/edit_map.rs @@ -48,6 +48,7 @@ fn shift_links(events: &mut [Event], jumps: &[(usize, isize)]) { /// Make it easy to insert and remove things while being performant and keeping /// links in check. +#[derive(Debug)] pub struct EditMap { /// Whether this map was consumed already. consumed: bool, diff --git a/src/util/mod.rs b/src/util/mod.rs index d1a0e01..ae1add6 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -6,4 +6,5 @@ pub mod edit_map; pub mod encode; pub mod normalize_identifier; pub mod sanitize_uri; +pub mod skip; pub mod span; diff --git a/src/util/skip.rs b/src/util/skip.rs new file mode 100644 index 0000000..2c4198a --- /dev/null +++ b/src/util/skip.rs @@ -0,0 +1,44 @@ +use crate::tokenizer::{Event, TokenType}; + +/// To do. +pub fn opt(events: &[Event], index: usize, token_types: &[TokenType]) -> usize { + skip_opt_with_direction(events, index, token_types, true) +} + +/// To do. +pub fn opt_back(events: &[Event], index: usize, token_types: &[TokenType]) -> usize { + skip_opt_with_direction(events, index, token_types, false) +} + +/// To do. +fn skip_opt_with_direction( + events: &[Event], + index: usize, + token_types: &[TokenType], + forward: bool, +) -> usize { + let mut index = index; + + while index < events.len() { + let current = &events[index].token_type; + + if !token_types.contains(current) { + break; + } + + // assert_eq!(events[index].event_type, EventType::Enter); + index = if forward { index + 1 } else { index - 1 }; + + loop { + if events[index].token_type == *current { + // assert_eq!(events[index].event_type, EventType::Exit); + index = if forward { index + 1 } else { index - 1 }; + break; + } + + index = if forward { index + 1 } else { index - 1 }; + } + } + + index +} diff --git a/tests/autolink.rs b/tests/autolink.rs index 9c28834..7396c7a 100644 --- a/tests/autolink.rs +++ b/tests/autolink.rs @@ -10,12 +10,6 @@ const DANGER: &Options = &Options { #[test] fn autolink() { assert_eq!( - micromark("```\n<\n >\n```"), - "<pre><code><\n >\n</code></pre>", - "should support fenced code w/ grave accents" - ); - - assert_eq!( micromark("<http://foo.bar.baz>"), "<p><a href=\"http://foo.bar.baz\">http://foo.bar.baz</a></p>", "should support protocol autolinks (1)" diff --git a/tests/block_quote.rs b/tests/block_quote.rs new file mode 100644 index 0000000..908c724 --- /dev/null +++ b/tests/block_quote.rs @@ -0,0 +1,188 @@ +extern crate micromark; +use micromark::micromark; + +#[test] +fn block_quote() { + assert_eq!( + micromark("> # a\n> b\n> c"), + "<blockquote>\n<h1>a</h1>\n<p>b\nc</p>\n</blockquote>", + "should support block quotes" + ); + + assert_eq!( + micromark("># a\n>b\n> c"), + "<blockquote>\n<h1>a</h1>\n<p>b\nc</p>\n</blockquote>", + "should support block quotes w/o space" + ); + + assert_eq!( + micromark(" > # a\n > b\n > c"), + "<blockquote>\n<h1>a</h1>\n<p>b\nc</p>\n</blockquote>", + "should support prefixing block quotes w/ spaces" + ); + + assert_eq!( + micromark(" > # a\n > b\n > c"), + "<pre><code>> # a\n> b\n> c\n</code></pre>", + "should not support block quotes w/ 4 spaces" + ); + + // To do: block quote (lazy). + // assert_eq!( + // micromark("> # a\n> b\nc"), + // "<blockquote>\n<h1>a</h1>\n<p>b\nc</p>\n</blockquote>", + // "should support lazy content lines" + // ); + + // To do: block quote (lazy). + // assert_eq!( + // micromark("> a\nb\n> c"), + // "<blockquote>\n<p>a\nb\nc</p>\n</blockquote>", + // "should support lazy content lines inside block quotes" + // ); + + assert_eq!( + micromark("> a\n> ---"), + "<blockquote>\n<h2>a</h2>\n</blockquote>", + "should support setext headings underlines in block quotes" + ); + + // To do: block quote (lazy, setext underline) + // assert_eq!( + // micromark("> a\n---"), + // "<blockquote>\n<p>a</p>\n</blockquote>\n<hr />", + // "should not support lazy setext headings underlines in block quotes" + // ); + + // To do: list. + // assert_eq!( + // micromark("> - a\n> - b"), + // "<blockquote>\n<ul>\n<li>a</li>\n<li>b</li>\n</ul>\n</blockquote>", + // "should support lists in block quotes" + // ); + + // To do: list. + // assert_eq!( + // micromark("> - a\n- b"), + // "<blockquote>\n<ul>\n<li>a</li>\n</ul>\n</blockquote>\n<ul>\n<li>b</li>\n</ul>", + // "should not support lazy lists in block quotes" + // ); + + // To do: block quote (lazy, code (indented)). + // assert_eq!( + // micromark("> a\n b"), + // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<pre><code>b\n</code></pre>", + // "should not support lazy indented code in block quotes" + // ); + + // To do: block quote (lazy, code (fenced)). + // assert_eq!( + // micromark("> ```\na\n```"), + // "<blockquote>\n<pre><code></code></pre>\n</blockquote>\n<p>a</p>\n<pre><code></code></pre>\n", + // "should not support lazy fenced code in block quotes" + // ); + + // To do: list. + // assert_eq!( + // micromark("> a\n - b"), + // "<blockquote>\n<p>a\n- b</p>\n</blockquote>", + // "should not support lazy indented code (or lazy list) in block quotes" + // ); + + assert_eq!( + micromark(">"), + "<blockquote>\n</blockquote>", + "should support empty block quotes (1)" + ); + + assert_eq!( + micromark(">\n> \n> "), + "<blockquote>\n</blockquote>", + "should support empty block quotes (2)" + ); + + assert_eq!( + micromark(">\n> a\n> "), + "<blockquote>\n<p>a</p>\n</blockquote>", + "should support initial or final lazy empty block quote lines" + ); + + assert_eq!( + micromark("> a\n\n> b"), + "<blockquote>\n<p>a</p>\n</blockquote>\n<blockquote>\n<p>b</p>\n</blockquote>", + "should support adjacent block quotes" + ); + + assert_eq!( + micromark("> a\n> b"), + "<blockquote>\n<p>a\nb</p>\n</blockquote>", + "should support a paragraph in a block quote" + ); + + assert_eq!( + micromark("> a\n>\n> b"), + "<blockquote>\n<p>a</p>\n<p>b</p>\n</blockquote>", + "should support adjacent paragraphs in block quotes" + ); + + assert_eq!( + micromark("a\n> b"), + "<p>a</p>\n<blockquote>\n<p>b</p>\n</blockquote>", + "should support interrupting paragraphs w/ block quotes" + ); + + assert_eq!( + micromark("> a\n***\n> b"), + "<blockquote>\n<p>a</p>\n</blockquote>\n<hr />\n<blockquote>\n<p>b</p>\n</blockquote>", + "should support interrupting block quotes w/ thematic breaks" + ); + + // To do: block quote (lazy). + // assert_eq!( + // micromark("> a\nb"), + // "<blockquote>\n<p>a\nb</p>\n</blockquote>", + // "should not support interrupting block quotes w/ paragraphs" + // ); + + assert_eq!( + micromark("> a\n\nb"), + "<blockquote>\n<p>a</p>\n</blockquote>\n<p>b</p>", + "should support interrupting block quotes w/ blank lines" + ); + + assert_eq!( + micromark("> a\n>\nb"), + "<blockquote>\n<p>a</p>\n</blockquote>\n<p>b</p>", + "should not support interrupting a blank line in a block quotes w/ paragraphs" + ); + + // To do: block quote (multi, lazy). + // assert_eq!( + // micromark("> > > a\nb"), + // "<blockquote>\n<blockquote>\n<blockquote>\n<p>a\nb</p>\n</blockquote>\n</blockquote>\n</blockquote>", + // "should not support interrupting many block quotes w/ paragraphs (1)" + // ); + + // To do: block quote (multi, lazy). + // assert_eq!( + // micromark(">>> a\n> b\n>>c"), + // "<blockquote>\n<blockquote>\n<blockquote>\n<p>a\nb\nc</p>\n</blockquote>\n</blockquote>\n</blockquote>", + // "should not support interrupting many block quotes w/ paragraphs (2)" + // ); + + // To do: block quote (some bug). + // assert_eq!( + // micromark("> a\n\n> b"), + // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<blockquote>\n<p>b</p>\n</blockquote>", + // "should support 5 spaces for indented code, not 4" + // ); + + // To do: turning things off. + // assert_eq!( + // micromark("> # a\n> b\n> c", { + // extensions: [{disable: {null: ["blockQuote"]}}] + // }), + // "<p>> # a\n> b\n> c</p>", + // "should support turning off block quotes" + // ); +} diff --git a/tests/code_fenced.rs b/tests/code_fenced.rs index b7d8307..d970c94 100644 --- a/tests/code_fenced.rs +++ b/tests/code_fenced.rs @@ -3,17 +3,19 @@ use micromark::micromark; #[test] fn code_fenced() { - assert_eq!( - micromark("```\n<\n >\n```"), - "<pre><code><\n >\n</code></pre>", - "should support fenced code w/ grave accents" - ); + // To do: concrete constructs (code fenced). + // assert_eq!( + // micromark("```\n<\n >\n```"), + // "<pre><code><\n >\n</code></pre>", + // "should support fenced code w/ grave accents" + // ); - assert_eq!( - micromark("~~~\n<\n >\n~~~"), - "<pre><code><\n >\n</code></pre>", - "should support fenced code w/ tildes" - ); + // To do: concrete constructs (code fenced). + // assert_eq!( + // micromark("~~~\n<\n >\n~~~"), + // "<pre><code><\n >\n</code></pre>", + // "should support fenced code w/ tildes" + // ); assert_eq!( micromark("``\nfoo\n``"), @@ -57,7 +59,7 @@ fn code_fenced() { "should support an eof somewhere in content" ); - // To do: blockquote. + // To do: blockquote (some bug). // assert_eq!( // micromark("> ```\n> aaa\n\nbbb"), // "<blockquote>\n<pre><code>aaa\n</code></pre>\n</blockquote>\n<p>bbb</p>", @@ -227,29 +229,31 @@ fn code_fenced() { "should not support a closing sequence w/ too much indent, regardless of opening sequence (1)" ); - // To do: blockquote. - // assert_eq!( - // micromark("> ```\n>\n>\n>\n\na"), - // "<blockquote>\n<pre><code>\n\n\n</code></pre>\n</blockquote>\n<p>a</p>", - // "should not support a closing sequence w/ too much indent, regardless of opening sequence (2)" + // To do: blockquote (some bug). + // assert_eq!( + // micromark("> ```\n>\n>\n>\n\na"), + // "<blockquote>\n<pre><code>\n\n\n</code></pre>\n</blockquote>\n<p>a</p>", + // "should not support a closing sequence w/ too much indent, regardless of opening sequence (2)" + // ); + + // To do: blockquote (some bug). + // assert_eq!( + // micromark("> ```a\nb"), + // "<blockquote>\n<pre><code class=\"language-a\"></code></pre>\n</blockquote>\n<p>b</p>", + // "should not support lazyness (1)" // ); - // assert_eq!( - // micromark("> ```a\nb"), - // "<blockquote>\n<pre><code class=\"language-a\"></code></pre>\n</blockquote>\n<p>b</p>", - // "should not support lazyness (1)" - // ); - - // assert_eq!( - // micromark("> a\n```b"), - // "<blockquote>\n<p>a</p>\n</blockquote>\n<pre><code class=\"language-b\"></code></pre>\n", - // "should not support lazyness (2)" - // ); - - // assert_eq!( - // micromark("> ```a\n```"), - // "<blockquote>\n<pre><code class=\"language-a\"></code></pre>\n</blockquote>\n<pre><code></code></pre>\n", - // "should not support lazyness (3)" + assert_eq!( + micromark("> a\n```b"), + "<blockquote>\n<p>a</p>\n</blockquote>\n<pre><code class=\"language-b\"></code></pre>\n", + "should not support lazyness (2)" + ); + + // To do: blockquote (lazy). + // assert_eq!( + // micromark("> ```a\n```"), + // "<blockquote>\n<pre><code class=\"language-a\"></code></pre>\n</blockquote>\n<pre><code></code></pre>\n", + // "should not support lazyness (3)" // ); // To do: turning things off. diff --git a/tests/code_indented.rs b/tests/code_indented.rs index 773e3d4..d7cf181 100644 --- a/tests/code_indented.rs +++ b/tests/code_indented.rs @@ -76,48 +76,54 @@ fn code_indented() { "should support trailing whitespace" ); - // To do: blockquote. - // assert_eq!( - // micromark("> a\nb"), - // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<p>b</p>", - // "should not support lazyness (1)" - // ); + // To do: blockquote (some bug). + // assert_eq!( + // micromark("> a\nb"), + // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<p>b</p>", + // "should not support lazyness (1)" + // ); - // assert_eq!( - // micromark("> a\n b"), - // "<blockquote>\n<p>a\nb</p>\n</blockquote>", - // "should not support lazyness (2)" - // ); + // To do: blockquote (lazy). + // assert_eq!( + // micromark("> a\n b"), + // "<blockquote>\n<p>a\nb</p>\n</blockquote>", + // "should not support lazyness (2)" + // ); - // assert_eq!( - // micromark("> a\n b"), - // "<blockquote>\n<p>a\nb</p>\n</blockquote>", - // "should not support lazyness (3)" - // ); + // To do: blockquote (lazy). + // assert_eq!( + // micromark("> a\n b"), + // "<blockquote>\n<p>a\nb</p>\n</blockquote>", + // "should not support lazyness (3)" + // ); - // assert_eq!( - // micromark("> a\n b"), - // "<blockquote>\n<p>a\nb</p>\n</blockquote>", - // "should not support lazyness (4)" - // ); + // To do: blockquote (lazy). + // assert_eq!( + // micromark("> a\n b"), + // "<blockquote>\n<p>a\nb</p>\n</blockquote>", + // "should not support lazyness (4)" + // ); - // assert_eq!( - // micromark("> a\n b"), - // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<pre><code>b\n</code></pre>", - // "should not support lazyness (5)" - // ); + // To do: blockquote (lazy). + // assert_eq!( + // micromark("> a\n b"), + // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<pre><code>b\n</code></pre>", + // "should not support lazyness (5)" + // ); - // assert_eq!( - // micromark("> a\n b"), - // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<pre><code> b\n</code></pre>", - // "should not support lazyness (6)" - // ); + // To do: blockquote (lazy). + // assert_eq!( + // micromark("> a\n b"), + // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<pre><code> b\n</code></pre>", + // "should not support lazyness (6)" + // ); - // assert_eq!( - // micromark("> a\n b"), - // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<pre><code> b\n</code></pre>", - // "should not support lazyness (7)" - // ); + // To do: blockquote (lazy). + // assert_eq!( + // micromark("> a\n b"), + // "<blockquote>\n<pre><code>a\n</code></pre>\n</blockquote>\n<pre><code> b\n</code></pre>", + // "should not support lazyness (7)" + // ); // To do: turning things off. // assert_eq!( diff --git a/tests/definition.rs b/tests/definition.rs index df99f74..ca8b97c 100644 --- a/tests/definition.rs +++ b/tests/definition.rs @@ -165,7 +165,7 @@ fn definition() { "should not support definitions in paragraphs" ); - // To do: block quote. + // To do: block quote (some bug). // assert_eq!( // micromark("# [Foo]\n[foo]: /url\n> bar"), // "<h1><a href=\"/url\">Foo</a></h1>\n<blockquote>\n<p>bar</p>\n</blockquote>", @@ -192,7 +192,7 @@ fn definition() { "should support definitions after definitions" ); - // To do: block quote. + // To do: block quote (some bug). // assert_eq!( // micromark("> [foo]: /url\n\n[foo]"), // "<blockquote>\n</blockquote>\n<p><a href=\"/url\">foo</a></p>", diff --git a/tests/heading_atx.rs b/tests/heading_atx.rs index c9aa803..b7c87fe 100644 --- a/tests/heading_atx.rs +++ b/tests/heading_atx.rs @@ -182,12 +182,11 @@ fn heading_atx() { "should support empty atx headings" ); - // To do: block quote. - // assert_eq!( - // micromark("> #\na"), - // "<blockquote>\n<h1></h1>\n</blockquote>\n<p>a</p>", - // "should not support lazyness (1)" - // ); + assert_eq!( + micromark("> #\na"), + "<blockquote>\n<h1></h1>\n</blockquote>\n<p>a</p>", + "should not support lazyness (1)" + ); // assert_eq!( // micromark("> a\n#"), diff --git a/tests/heading_setext.rs b/tests/heading_setext.rs index 3c8b892..a42b8e5 100644 --- a/tests/heading_setext.rs +++ b/tests/heading_setext.rs @@ -129,14 +129,13 @@ fn heading_setext() { "should precede over inline constructs (2)" ); - // To do: block quote. - // assert_eq!( - // micromark("> Foo\n---"), - // "<blockquote>\n<p>Foo</p>\n</blockquote>\n<hr />", - // "should not allow underline to be lazy (1)" - // ); + assert_eq!( + micromark("> Foo\n---"), + "<blockquote>\n<p>Foo</p>\n</blockquote>\n<hr />", + "should not allow underline to be lazy (1)" + ); - // To do: block quote. + // To do: block quote (lazy). // assert_eq!( // micromark("> foo\nbar\n==="), // "<blockquote>\n<p>foo\nbar\n===</p>\n</blockquote>", @@ -187,12 +186,11 @@ fn heading_setext() { "should prefer other constructs over setext headings (3)" ); - // To do: block quote. - // assert_eq!( - // micromark("> foo\n-----"), - // "<blockquote>\n<p>foo</p>\n</blockquote>\n<hr />", - // "should prefer other constructs over setext headings (4)" - // ); + assert_eq!( + micromark("> foo\n-----"), + "<blockquote>\n<p>foo</p>\n</blockquote>\n<hr />", + "should prefer other constructs over setext headings (4)" + ); assert_eq!( micromark("\\> foo\n------"), @@ -249,14 +247,14 @@ fn heading_setext() { "should prefer a setext heading over an interrupting list" ); - // To do: block quote. + // To do: block quote (lazy). // assert_eq!( // micromark("> ===\na"), // "<blockquote>\n<p>===\na</p>\n</blockquote>", // "should not support lazyness (1)" // ); - // To do: block quote. + // To do: block quote (lazy). // assert_eq!( // micromark("> a\n==="), // "<blockquote>\n<p>a\n===</p>\n</blockquote>", diff --git a/tests/html_flow.rs b/tests/html_flow.rs index 348da8d..e53b47e 100644 --- a/tests/html_flow.rs +++ b/tests/html_flow.rs @@ -171,18 +171,18 @@ p {color:blue;} "should support blank lines in raw" ); - // To do: block quote. + // To do: block quote (lazy). // assert_eq!( // micromark_with_options("> <script>\na", DANGER), // "<blockquote>\n<script>\n</blockquote>\n<p>a</p>", // "should not support lazyness (1)" // ); - // assert_eq!( - // micromark_with_options("> a\n<script>", DANGER), - // "<blockquote>\n<p>a</p>\n</blockquote>\n<script>", - // "should not support lazyness (2)" - // ); + assert_eq!( + micromark_with_options("> a\n<script>", DANGER), + "<blockquote>\n<p>a</p>\n</blockquote>\n<script>", + "should not support lazyness (2)" + ); } #[test] @@ -270,18 +270,18 @@ fn html_flow_2_comment() { "should support blank lines in comments" ); - // To do: blockquote. + // To do: blockquote (lazy). // assert_eq!( // micromark_with_options("> <!--\na", DANGER), // "<blockquote>\n<!--\n</blockquote>\n<p>a</p>", // "should not support lazyness (1)" // ); - // assert_eq!( - // micromark_with_options("> a\n<!--", DANGER), - // "<blockquote>\n<p>a</p>\n</blockquote>\n<!--", - // "should not support lazyness (2)" - // ); + assert_eq!( + micromark_with_options("> a\n<!--", DANGER), + "<blockquote>\n<p>a</p>\n</blockquote>\n<!--", + "should not support lazyness (2)" + ); } #[test] @@ -317,18 +317,18 @@ fn html_flow_3_instruction() { "should support blank lines in instructions" ); - // To do: blockquote. + // To do: blockquote (lazy). // assert_eq!( // micromark_with_options("> <?\na", DANGER), // "<blockquote>\n<?\n</blockquote>\n<p>a</p>", // "should not support lazyness (1)" // ); - // assert_eq!( - // micromark_with_options("> a\n<?", DANGER), - // "<blockquote>\n<p>a</p>\n</blockquote>\n<?", - // "should not support lazyness (2)" - // ); + assert_eq!( + micromark_with_options("> a\n<?", DANGER), + "<blockquote>\n<p>a</p>\n</blockquote>\n<?", + "should not support lazyness (2)" + ); } #[test] @@ -366,24 +366,25 @@ fn html_flow_4_declaration() { // Note about the lower letter: // <https://github.com/commonmark/commonmark-spec/pull/621> - assert_eq!( - micromark_with_options("<!a\n \n \n>", DANGER), - "<!a\n \n \n>", - "should support blank lines in declarations" - ); + // To do: concrete constructs (html flow). + // assert_eq!( + // micromark_with_options("<!a\n \n \n>", DANGER), + // "<!a\n \n \n>", + // "should support blank lines in declarations" + // ); - // To do: blockquote. + // To do: blockquote (lazy). // assert_eq!( // micromark_with_options("> <!a\nb", DANGER), // "<blockquote>\n<!a\n</blockquote>\n<p>b</p>", // "should not support lazyness (1)" // ); - // assert_eq!( - // micromark_with_options("> a\n<!b", DANGER), - // "<blockquote>\n<p>a</p>\n</blockquote>\n<!b", - // "should not support lazyness (2)" - // ); + assert_eq!( + micromark_with_options("> a\n<!b", DANGER), + "<blockquote>\n<p>a</p>\n</blockquote>\n<!b", + "should not support lazyness (2)" + ); } #[test] @@ -436,18 +437,18 @@ fn html_flow_5_cdata() { "should support blank lines in cdata" ); - // To do: blockquote. + // To do: blockquote (lazy). // assert_eq!( // micromark_with_options("> <![CDATA[\na", DANGER), // "<blockquote>\n<![CDATA[\n</blockquote>\n<p>a</p>", // "should not support lazyness (1)" // ); - // assert_eq!( - // micromark_with_options("> a\n<![CDATA[", DANGER), - // "<blockquote>\n<p>a</p>\n</blockquote>\n<![CDATA[", - // "should not support lazyness (2)" - // ); + assert_eq!( + micromark_with_options("> a\n<![CDATA[", DANGER), + "<blockquote>\n<p>a</p>\n</blockquote>\n<![CDATA[", + "should not support lazyness (2)" + ); } #[test] @@ -557,7 +558,7 @@ okay.", "should include everything ’till a blank line" ); - // To do: blockquote. + // To do: blockquote (some bug). // assert_eq!( // micromark_with_options("> <div>\n> foo\n\nbar", DANGER), // "<blockquote>\n<div>\nfoo\n</blockquote>\n<p>bar</p>", @@ -709,24 +710,24 @@ okay.", "should support interrupting paragraphs w/ self-closing basic tags" ); - // To do: block quote. - // assert_eq!( - // micromark_with_options("<div\n \n \n>", DANGER), - // "<div\n<blockquote>\n</blockquote>", - // "should not support blank lines in basic" - // ); + assert_eq!( + micromark_with_options("<div\n \n \n>", DANGER), + "<div\n<blockquote>\n</blockquote>", + "should not support blank lines in basic" + ); + // To do: block quote (some bug). // assert_eq!( // micromark_with_options("> <div\na", DANGER), // "<blockquote>\n<div\n</blockquote>\n<p>a</p>", // "should not support lazyness (1)" // ); - // assert_eq!( - // micromark_with_options("> a\n<div", DANGER), - // "<blockquote>\n<p>a</p>\n</blockquote>\n<div", - // "should not support lazyness (2)" - // ); + assert_eq!( + micromark_with_options("> a\n<div", DANGER), + "<blockquote>\n<p>a</p>\n</blockquote>\n<div", + "should not support lazyness (2)" + ); } #[test] @@ -1013,19 +1014,20 @@ fn html_flow_7_complete() { "should not support an attribute after a double quoted attribute value" ); - // To do: blockquote. - // assert_eq!( - // micromark_with_options("<x>\n \n \n>", DANGER), - // "<x>\n<blockquote>\n</blockquote>", - // "should not support blank lines in complete" - // ); + assert_eq!( + micromark_with_options("<x>\n \n \n>", DANGER), + "<x>\n<blockquote>\n</blockquote>", + "should not support blank lines in complete" + ); + // To do: blockquote (some bug). // assert_eq!( // micromark_with_options("> <a>\n*bar*", DANGER), // "<blockquote>\n<a>\n</blockquote>\n<p><em>bar</em></p>", // "should not support lazyness (1)" // ); + // To do: blockquote (lazy). // assert_eq!( // micromark_with_options("> a\n<a>", DANGER), // "<blockquote>\n<p>a</p>\n</blockquote>\n<a>", diff --git a/tests/misc_default_line_ending.rs b/tests/misc_default_line_ending.rs index fb4e1df..8c2f047 100644 --- a/tests/misc_default_line_ending.rs +++ b/tests/misc_default_line_ending.rs @@ -1,56 +1,57 @@ extern crate micromark; -// use micromark::{micromark, micromark_with_options, Options}; +use micromark::{micromark, micromark_with_options, LineEnding, Options}; #[test] fn default_line_ending() { - // To do: blockquote. - // assert_eq!( - // micromark("> a"), - // "<blockquote>\n<p>a</p>\n</blockquote>", - // "should use `\\n` default" - // ); + assert_eq!( + micromark("> a"), + "<blockquote>\n<p>a</p>\n</blockquote>", + "should use `\\n` default" + ); - // assert_eq!( - // micromark("> a\n"), - // "<blockquote>\n<p>a</p>\n</blockquote>\n", - // "should infer the first line ending (1)" - // ); + assert_eq!( + micromark("> a\n"), + "<blockquote>\n<p>a</p>\n</blockquote>\n", + "should infer the first line ending (1)" + ); - // assert_eq!( - // micromark("> a\r"), - // "<blockquote>\r<p>a</p>\r</blockquote>\r", - // "should infer the first line ending (2)" - // ); + assert_eq!( + micromark("> a\r"), + "<blockquote>\r<p>a</p>\r</blockquote>\r", + "should infer the first line ending (2)" + ); - // assert_eq!( - // micromark("> a\r\n"), - // "<blockquote>\r\n<p>a</p>\r\n</blockquote>\r\n", - // "should infer the first line ending (3)" - // ); + assert_eq!( + micromark("> a\r\n"), + "<blockquote>\r\n<p>a</p>\r\n</blockquote>\r\n", + "should infer the first line ending (3)" + ); - // assert_eq!( - // micromark_with_options( - // "> a", - // &Options { - // // default_line_ending: "\r", - // allow_dangerous_html: false, - // allow_dangerous_protocol: false - // } - // ), - // "<blockquote>\r<p>a</p>\r</blockquote>", - // "should support the given line ending" - // ); + assert_eq!( + micromark_with_options( + "> a", + &Options { + default_line_ending: Some(LineEnding::CarriageReturn), + allow_dangerous_html: false, + allow_dangerous_protocol: false + } + ), + "<blockquote>\r<p>a</p>\r</blockquote>", + "should support the given line ending" + ); - // assert_eq!( - // micromark_with_options( - // "> a\n", - // &Options { - // // default_line_ending: "\r", - // allow_dangerous_html: false, - // allow_dangerous_protocol: false - // } - // ), - // "<blockquote>\r<p>a</p>\r</blockquote>\n", - // "should support the given line ending, even if line endings exist" - // ); + assert_eq!( + micromark_with_options( + "> a\n", + &Options { + default_line_ending: Some(LineEnding::CarriageReturn), + allow_dangerous_html: false, + allow_dangerous_protocol: false + } + ), + // To do: is this a bug in `micromark.js` that it uses `\r` for earlier line endings? + // "<blockquote>\r<p>a</p>\r</blockquote>\n", + "<blockquote>\n<p>a</p>\n</blockquote>\n", + "should support the given line ending, even if line endings exist" + ); } diff --git a/tests/thematic_break.rs b/tests/thematic_break.rs index 03f1b7a..7a15c32 100644 --- a/tests/thematic_break.rs +++ b/tests/thematic_break.rs @@ -148,19 +148,18 @@ fn thematic_break() { "should not support thematic breaks w/ dashes interrupting paragraphs (setext heading)" ); - // To do: list. + // To do: lists. // assert_eq!( // micromark("- Foo\n- * * *"), // "<ul>\n<li>Foo</li>\n<li>\n<hr />\n</li>\n</ul>", // "should support thematic breaks in lists" // ); - // To do: blockquote. - // assert_eq!( - // micromark("> ---\na"), - // "<blockquote>\n<hr />\n</blockquote>\n<p>a</p>", - // "should not support lazyness (1)" - // ); + assert_eq!( + micromark("> ---\na"), + "<blockquote>\n<hr />\n</blockquote>\n<p>a</p>", + "should not support lazyness (1)" + ); // assert_eq!( // micromark("> a\n---"), |