aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct/paragraph.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 17:06:00 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 17:06:00 +0200
commit65dd765cceee8bdccc74c08066eec59a579a16b1 (patch)
treecb60ab00039135b6f0a65efcb508f73a8b39aa11 /src/construct/paragraph.rs
parent61271d73128f8553f8c4c17927828cde52a25eba (diff)
downloadmarkdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.tar.gz
markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.tar.bz2
markdown-rs-65dd765cceee8bdccc74c08066eec59a579a16b1.zip
Add improved whitespace handling
* add several helpers for parsing betwen x and y `space_or_tab`s * use those helpers in a bunch of places * move initial indent parsing to flow constructs themselves
Diffstat (limited to '')
-rw-r--r--src/construct/paragraph.rs68
1 files changed, 21 insertions, 47 deletions
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index 50ef627..fa18f28 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -30,12 +30,11 @@
use crate::constant::TAB_SIZE;
use crate::construct::{
- code_fenced::start as code_fenced, heading_atx::start as heading_atx,
- html_flow::start as html_flow, partial_whitespace::start as whitespace,
- thematic_break::start as thematic_break,
+ blank_line::start as blank_line, code_fenced::start as code_fenced,
+ heading_atx::start as heading_atx, html_flow::start as html_flow,
+ partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break,
};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::span::from_exit_event;
/// Before a paragraph.
///
@@ -114,7 +113,7 @@ fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::LineEnding);
tokenizer.consume(code);
tokenizer.exit(TokenType::LineEnding);
- (State::Fn(Box::new(interrupt_initial)), None)
+ (State::Fn(Box::new(interrupt_start)), None)
}
_ => unreachable!("expected eol"),
}
@@ -123,55 +122,30 @@ fn interrupt(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// After a line ending.
///
/// ```markdown
-/// alpha|
-/// ~~~js
+/// alpha
+/// |~~~js
/// ~~~
/// ```
-fn interrupt_initial(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.attempt_2(code_fenced, html_flow, |ok| {
+fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ // To do: If code is disabled, indented lines are allowed to interrupt.
+ tokenizer.attempt(space_or_tab_min_max(TAB_SIZE, TAB_SIZE), |ok| {
if ok {
- Box::new(|_tokenizer, _code| (State::Nok, None))
+ Box::new(|_t, code| (State::Ok, Some(vec![code])))
} else {
Box::new(|tokenizer, code| {
- tokenizer.attempt(
- |tokenizer, code| whitespace(tokenizer, code, TokenType::Whitespace),
- |_ok| Box::new(interrupt_start),
+ tokenizer.attempt_5(
+ blank_line,
+ code_fenced,
+ html_flow,
+ heading_atx,
+ thematic_break,
+ |ok| {
+ Box::new(move |_t, code| {
+ (if ok { State::Nok } else { State::Ok }, Some(vec![code]))
+ })
+ },
)(tokenizer, code)
})
}
})(tokenizer, code)
}
-
-/// After a line ending, after optional whitespace.
-///
-/// ```markdown
-/// alpha|
-/// # bravo
-/// ```
-fn interrupt_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let tail = tokenizer.events.last();
- let mut prefix = 0;
-
- if let Some(event) = tail {
- if event.token_type == TokenType::Whitespace {
- let span = from_exit_event(&tokenizer.events, tokenizer.events.len() - 1);
- prefix = span.end_index - span.start_index;
- }
- }
-
- match code {
- // Blank lines are not allowed in paragraph.
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
- // To do: If code is disabled, indented lines are allowed.
- _ if prefix >= TAB_SIZE => (State::Ok, None),
- // To do: definitions, setext headings, etc?
- _ => tokenizer.attempt_2(heading_atx, thematic_break, |ok| {
- let result = if ok {
- (State::Nok, None)
- } else {
- (State::Ok, None)
- };
- Box::new(|_t, _c| result)
- })(tokenizer, code),
- }
-}