diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-09 12:49:11 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-09 12:49:11 +0200 |
commit | 433680ae0914da8921c4ee762fdc93e7b70cf9f1 (patch) | |
tree | 760a8eb29afbc17059bdeb682457b8efa2ade7a1 /src | |
parent | 76aaa25e1d6a87c977a23437e67e6f38b1358b4d (diff) | |
download | markdown-rs-433680ae0914da8921c4ee762fdc93e7b70cf9f1.tar.gz markdown-rs-433680ae0914da8921c4ee762fdc93e7b70cf9f1.tar.bz2 markdown-rs-433680ae0914da8921c4ee762fdc93e7b70cf9f1.zip |
Add basic support for interrupting content
Diffstat (limited to '')
-rw-r--r-- | src/compiler.rs | 4 | ||||
-rw-r--r-- | src/content/flow.rs | 164 | ||||
-rw-r--r-- | src/tokenizer.rs | 58 |
3 files changed, 154 insertions, 72 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 166950e..4f362b8 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -62,7 +62,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St ignore_encode = true; } } - TokenType::ContentPhrasing + TokenType::ContentChunk | TokenType::AtxHeading | TokenType::AtxHeadingSequence | TokenType::AtxHeadingWhitespace @@ -280,7 +280,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St } // To do: `ContentPhrasing` should be parsed as phrasing first. // This branch below currently acts as the resulting `data` tokens. - TokenType::ContentPhrasing + TokenType::ContentChunk // To do: `ChunkString` does not belong here. Remove it when subtokenization is supported. | TokenType::ChunkString | TokenType::Data diff --git a/src/content/flow.rs b/src/content/flow.rs index 21c5721..6c47a10 100644 --- a/src/content/flow.rs +++ b/src/content/flow.rs @@ -96,16 +96,14 @@ fn blank_line_after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ```markdown /// |qwe /// | asd +/// |~~~js +/// |<div> /// ``` fn initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { Code::None => (State::Ok, None), - _ => tokenizer.attempt(code_indented, |ok| { - Box::new(if ok { - after - } else { - initial_before_not_code_indented - }) + _ => tokenizer.attempt_3(code_indented, code_fenced, html_flow, |ok| { + Box::new(if ok { after } else { before }) })(tokenizer, code), } } @@ -132,38 +130,6 @@ fn after(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } -/// Before flow (initial), but not at code (indented). -/// -/// ```markdown -/// |qwe -/// ``` -fn initial_before_not_code_indented(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None => (State::Ok, None), - _ => tokenizer.attempt(code_fenced, |ok| { - Box::new(if ok { - after - } else { - initial_before_not_code_fenced - }) - })(tokenizer, code), - } -} - -/// Before flow (initial), but not at code (fenced). -/// -/// ```markdown -/// |qwe -/// ``` -fn initial_before_not_code_fenced(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - match code { - Code::None => (State::Ok, None), - _ => tokenizer.attempt(html_flow, |ok| Box::new(if ok { after } else { before }))( - tokenizer, code, - ), - } -} - /// Before flow, but not at code (indented) or code (fenced). /// /// Compared to flow (initial), normal flow can be arbitrarily prefixed. @@ -181,32 +147,11 @@ pub fn before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// Before flow, after potential whitespace. /// /// ```markdown -/// |qwe +/// |# asd +/// |*** /// ``` pub fn before_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.attempt(heading_atx, |ok| { - Box::new(if ok { after } else { before_not_heading_atx }) - })(tokenizer, code) -} - -/// Before flow, but not before a heading (atx) -/// -/// ```markdown -/// |qwe -/// ``` -pub fn before_not_heading_atx(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.attempt(thematic_break, |ok| { - Box::new(if ok { after } else { before_not_thematic_break }) - })(tokenizer, code) -} - -/// Before flow, but not before a heading (atx) or thematic break. -/// -/// ```markdown -/// |qwe -/// ``` -pub fn before_not_thematic_break(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { - tokenizer.attempt(html_flow, |ok| { + tokenizer.attempt_2(heading_atx, thematic_break, |ok| { Box::new(if ok { after } else { content_before }) })(tokenizer, code) } @@ -231,9 +176,8 @@ fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } _ => { tokenizer.enter(TokenType::Content); - tokenizer.enter(TokenType::ContentPhrasing); - tokenizer.consume(code); - (State::Fn(Box::new(content)), None) + tokenizer.enter(TokenType::ContentChunk); + content(tokenizer, code) } } } @@ -245,10 +189,15 @@ fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { // To do: lift limitations as documented above. fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { match code { - Code::None | Code::Char('\n' | '\r') => { - tokenizer.exit(TokenType::ContentPhrasing); - tokenizer.exit(TokenType::Content); - after(tokenizer, code) + Code::None => { + tokenizer.exit(TokenType::ContentChunk); + content_end(tokenizer, code) + } + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.exit(TokenType::ContentChunk); + tokenizer.check(continuation_construct, |ok| { + Box::new(if ok { content_continue } else { content_end }) + })(tokenizer, code) } _ => { tokenizer.consume(code); @@ -256,3 +205,80 @@ fn content(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { } } } + +fn continuation_construct(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + match code { + Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => { + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + ( + State::Fn(Box::new(continuation_construct_initial_before)), + None, + ) + } + _ => unreachable!("expected eol"), + } +} + +fn continuation_construct_initial_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.attempt_2(code_fenced, html_flow, |ok| { + if ok { + Box::new(|_tokenizer, _code| (State::Nok, None)) + } else { + Box::new(|tokenizer, code| { + tokenizer.attempt( + |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace), + |_ok| Box::new(continuation_construct_after_prefix), + )(tokenizer, code) + }) + } + })(tokenizer, code) +} + +fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // let tail = tokenizer.events.last(); + // let mut prefix = 0; + + // if let Some(event) = tail { + // if event.token_type == TokenType::Whitespace { + // let span = get_span(&tokenizer.events, tokenizer.events.len() - 1); + // prefix = span.end_index - span.start_index; + // } + // } + + match code { + // Blank lines are not allowed in content. + Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None), + // If code is disabled, indented lines are part of the content. + // _ if prefix >= TAB_SIZE => { + // (State::Ok, None) + // } + _ => { + println!("to do: check if flow interrupts, assuming it can’t"); + tokenizer.attempt_2(heading_atx, thematic_break, |ok| { + let result = if ok { + (State::Nok, None) + } else { + (State::Ok, None) + }; + Box::new(|_t, _c| result) + })(tokenizer, code) + } + } +} + +fn content_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + // To do: should this be part of the content chunk? + // That’s what `micromark-js` does. + tokenizer.enter(TokenType::LineEnding); + tokenizer.consume(code); + tokenizer.exit(TokenType::LineEnding); + tokenizer.enter(TokenType::ContentChunk); + (State::Fn(Box::new(content)), None) +} + +fn content_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { + tokenizer.exit(TokenType::Content); + after(tokenizer, code) +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c8b1440..4239520 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -62,7 +62,8 @@ pub enum TokenType { BlankLineWhitespace, Content, - ContentPhrasing, + ContentChunk, + ChunkString, } @@ -377,6 +378,61 @@ impl Tokenizer { ) } + // To do: lifetimes, boxes, lmao. + pub fn attempt_2( + &mut self, + a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + done: impl FnOnce(bool) -> Box<StateFn> + 'static, + ) -> Box<StateFn> { + self.call_multiple(false, Some(Box::new(a)), Some(Box::new(b)), None, done) + } + + pub fn attempt_3( + &mut self, + a: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + b: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + c: impl FnOnce(&mut Tokenizer, Code) -> StateFnResult + 'static, + done: impl FnOnce(bool) -> Box<StateFn> + 'static, + ) -> Box<StateFn> { + self.call_multiple( + false, + Some(Box::new(a)), + Some(Box::new(b)), + Some(Box::new(c)), + done, + ) + } + + pub fn call_multiple( + &mut self, + check: bool, + a: Option<Box<StateFn>>, + b: Option<Box<StateFn>>, + c: Option<Box<StateFn>>, + done: impl FnOnce(bool) -> Box<StateFn> + 'static, + ) -> Box<StateFn> { + if let Some(head) = a { + let callback = move |ok| { + if ok { + done(ok) + } else { + Box::new(move |tokenizer: &mut Tokenizer, code| { + tokenizer.call_multiple(check, b, c, None, done)(tokenizer, code) + }) + } + }; + + if check { + self.check(head, callback) + } else { + self.attempt(head, callback) + } + } else { + done(false) + } + } + /// Feed a list of `codes` into `start`. /// /// This is set up to support repeatedly calling `feed`, and thus streaming |