aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 18:14:15 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 18:14:15 +0200
commitc7a46d3cc22bd0b029ff97623cee31c6ec38fdfb (patch)
treecc8991179de6efc57fbefdcd5fbebaeb872e878e
parent65dd765cceee8bdccc74c08066eec59a579a16b1 (diff)
downloadmarkdown-rs-c7a46d3cc22bd0b029ff97623cee31c6ec38fdfb.tar.gz
markdown-rs-c7a46d3cc22bd0b029ff97623cee31c6ec38fdfb.tar.bz2
markdown-rs-c7a46d3cc22bd0b029ff97623cee31c6ec38fdfb.zip
Add support for line endings in string
Diffstat (limited to '')
-rw-r--r--readme.md4
-rw-r--r--src/construct/heading_setext.rs27
-rw-r--r--src/construct/paragraph.rs6
-rw-r--r--src/construct/partial_destination.rs6
-rw-r--r--src/construct/partial_label.rs84
-rw-r--r--src/construct/partial_title.rs33
-rw-r--r--src/content/string.rs32
-rw-r--r--src/util/link.rs8
-rw-r--r--src/util/mod.rs1
-rw-r--r--tests/character_reference.rs2
10 files changed, 136 insertions, 67 deletions
diff --git a/readme.md b/readme.md
index 224c5d0..e50a21b 100644
--- a/readme.md
+++ b/readme.md
@@ -66,8 +66,6 @@ cargo doc --document-private-items
### Small things
-- [ ] (1) Connect `ChunkString` in label, destination, title
-- [ ] (1) Add support for line endings in `string`
- [ ] (1) Add docs to subtokenize
- [ ] (1) Add module docs to parser
- [ ] (1) Add overview docs on how everything works
@@ -171,6 +169,8 @@ cargo doc --document-private-items
- [x] (1) Remove `content` content type, as it is no longer needed
- [x] (1) Paragraph
- [x] (1) Parse whitespace in each flow construct
+- [x] (1) Connect `ChunkString` in label, destination, title
+- [x] (1) Add support for line endings in `string`
### Extensions
diff --git a/src/construct/heading_setext.rs b/src/construct/heading_setext.rs
index 64647cb..579fa71 100644
--- a/src/construct/heading_setext.rs
+++ b/src/construct/heading_setext.rs
@@ -52,7 +52,7 @@
use crate::constant::TAB_SIZE;
use crate::construct::partial_space_or_tab::space_or_tab_opt;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
-use crate::util::span::from_exit_event;
+use crate::util::{link::link, span::from_exit_event};
/// Kind of underline.
#[derive(Debug, Clone, PartialEq)]
@@ -133,16 +133,12 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => {
- let next = tokenizer.events.len();
- let previous = next - 2;
-
tokenizer.enter(TokenType::LineEnding);
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
tokenizer.consume(code);
tokenizer.exit(TokenType::LineEnding);
- tokenizer.events[previous].next = Some(next);
- tokenizer.events[next].previous = Some(previous);
-
(
State::Fn(Box::new(tokenizer.go(space_or_tab_opt(), text_line_start))),
None,
@@ -160,27 +156,20 @@ fn text_continue(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ==
/// ```
fn text_line_start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- let next = tokenizer.events.len() - 2;
- let previous = next - 2;
+ let index = tokenizer.events.len() - 2;
// Link the whitespace, if it exists.
- if tokenizer.events[next].token_type == TokenType::Whitespace {
- tokenizer.events[previous].next = Some(next);
- tokenizer.events[next].previous = Some(previous);
+ if tokenizer.events[index].token_type == TokenType::Whitespace {
+ link(&mut tokenizer.events, index);
}
match code {
// Blank lines not allowed.
Code::None | Code::CarriageReturnLineFeed | Code::Char('\n' | '\r') => (State::Nok, None),
_ => {
- let next = tokenizer.events.len();
- let previous = next - 2;
-
tokenizer.enter(TokenType::ChunkText);
-
- tokenizer.events[previous].next = Some(next);
- tokenizer.events[next].previous = Some(previous);
-
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
text_inside(tokenizer, code)
}
}
diff --git a/src/construct/paragraph.rs b/src/construct/paragraph.rs
index fa18f28..b00188d 100644
--- a/src/construct/paragraph.rs
+++ b/src/construct/paragraph.rs
@@ -35,6 +35,7 @@ use crate::construct::{
partial_space_or_tab::space_or_tab_min_max, thematic_break::start as thematic_break,
};
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::link::link;
/// Before a paragraph.
///
@@ -83,9 +84,8 @@ fn at_line_ending(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.consume(code);
tokenizer.exit(TokenType::ChunkText);
tokenizer.enter(TokenType::ChunkText);
- let next_index = tokenizer.events.len() - 1;
- tokenizer.events[next_index - 2].next = Some(next_index);
- tokenizer.events[next_index].previous = Some(next_index - 2);
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
(State::Fn(Box::new(inside)), None)
}
diff --git a/src/construct/partial_destination.rs b/src/construct/partial_destination.rs
index bc95055..901a10d 100644
--- a/src/construct/partial_destination.rs
+++ b/src/construct/partial_destination.rs
@@ -18,8 +18,8 @@
//! They are counted with a counter that starts at `0`, and is incremented
//! every time `(` occurs and decremented every time `)` occurs.
//! If `)` is found when the counter is `0`, the destination closes immediately
-//! after it.
-//! Escaped parens do not count.
+//! before it.
+//! Escaped parens do not count in balancing.
//!
//! It is recommended to use the enclosed variant of destinations, as it allows
//! arbitrary parens, and also allows for whitespace and other characters in
@@ -68,7 +68,6 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.enter(TokenType::DefinitionDestination);
tokenizer.enter(TokenType::DefinitionDestinationRaw);
tokenizer.enter(TokenType::DefinitionDestinationString);
- // To do: link.
tokenizer.enter(TokenType::ChunkString);
raw(tokenizer, code, 0)
}
@@ -90,7 +89,6 @@ fn enclosed_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
(State::Ok, None)
} else {
tokenizer.enter(TokenType::DefinitionDestinationString);
- // To do: link.
tokenizer.enter(TokenType::ChunkString);
enclosed(tokenizer, code)
}
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index 4997390..55efd13 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -56,7 +56,9 @@
// To do: pass token types in.
use crate::constant::LINK_REFERENCE_SIZE_MAX;
+use crate::construct::partial_space_or_tab::space_or_tab_opt;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::link::link;
/// Before a label.
///
@@ -71,7 +73,10 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
tokenizer.consume(code);
tokenizer.exit(TokenType::DefinitionLabelMarker);
tokenizer.enter(TokenType::DefinitionLabelData);
- (State::Fn(Box::new(|t, c| at_break(t, c, false, 0))), None)
+ (
+ State::Fn(Box::new(|t, c| at_break(t, c, false, 0, false))),
+ None,
+ )
}
_ => (State::Nok, None),
}
@@ -83,7 +88,13 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// [|a]
/// [a|]
/// ```
-fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
+fn at_break(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ data: bool,
+ size: usize,
+ connect: bool,
+) -> StateFnResult {
match code {
Code::None | Code::Char('[') => (State::Nok, None),
Code::Char(']') if !data => (State::Nok, None),
@@ -96,24 +107,57 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> S
tokenizer.exit(TokenType::DefinitionLabel);
(State::Ok, None)
}
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- // To do: limit blank lines.
- (
- State::Fn(Box::new(move |t, c| at_break(t, c, data, size))),
- None,
- )
- }
_ => {
tokenizer.enter(TokenType::ChunkString);
- // To do: link.
+
+ if connect {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ }
+
label(tokenizer, code, data, size)
}
}
}
+/// After a line ending.
+///
+/// ```markdown
+/// [a
+/// |b]
+/// ```
+fn line_start(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ data: bool,
+ size: usize,
+ connect: bool,
+) -> StateFnResult {
+ tokenizer.go(space_or_tab_opt(), move |t, c| {
+ line_begin(t, c, data, size, connect)
+ })(tokenizer, code)
+}
+
+/// After a line ending, after optional whitespace.
+///
+/// ```markdown
+/// [a
+/// |b]
+/// ```
+fn line_begin(
+ tokenizer: &mut Tokenizer,
+ code: Code,
+ data: bool,
+ size: usize,
+ connect: bool,
+) -> StateFnResult {
+ match code {
+ // Blank line not allowed.
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
+ _ => at_break(tokenizer, code, data, size, connect),
+ }
+}
+
/// In a label, in text.
///
/// ```markdown
@@ -121,13 +165,21 @@ fn at_break(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> S
/// ```
fn label(tokenizer: &mut Tokenizer, code: Code, data: bool, size: usize) -> StateFnResult {
match code {
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n' | '[' | ']') => {
+ Code::None | Code::Char('[' | ']') => {
tokenizer.exit(TokenType::ChunkString);
- at_break(tokenizer, code, data, size)
+ at_break(tokenizer, code, data, size, true)
}
_ if size > LINK_REFERENCE_SIZE_MAX => {
tokenizer.exit(TokenType::ChunkString);
- at_break(tokenizer, code, data, size)
+ at_break(tokenizer, code, data, size, true)
+ }
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::ChunkString);
+ (
+ State::Fn(Box::new(move |t, c| line_start(t, c, data, size + 1, true))),
+ None,
+ )
}
Code::VirtualSpace | Code::Char('\t' | ' ') => {
tokenizer.consume(code);
diff --git a/src/construct/partial_title.rs b/src/construct/partial_title.rs
index 0669c8e..322a3e6 100644
--- a/src/construct/partial_title.rs
+++ b/src/construct/partial_title.rs
@@ -35,6 +35,7 @@
use crate::construct::partial_space_or_tab::space_or_tab_opt;
use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::link::link;
/// Type of title.
#[derive(Debug, Clone, PartialEq)]
@@ -102,7 +103,7 @@ fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
}
_ => {
tokenizer.enter(TokenType::DefinitionTitleString);
- at_break(tokenizer, code, kind)
+ at_break(tokenizer, code, kind, false)
}
}
}
@@ -115,22 +116,19 @@ fn begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
/// (a|
/// b)
/// ```
-fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
+fn at_break(tokenizer: &mut Tokenizer, code: Code, kind: Kind, connect: bool) -> StateFnResult {
match code {
Code::Char(char) if char == kind_to_marker(&kind) => {
tokenizer.exit(TokenType::DefinitionTitleString);
begin(tokenizer, code, kind)
}
Code::None => (State::Nok, None),
- Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
- tokenizer.enter(TokenType::LineEnding);
- tokenizer.consume(code);
- tokenizer.exit(TokenType::LineEnding);
- (State::Fn(Box::new(|t, c| line_start(t, c, kind))), None)
- }
_ => {
- // To do: link.
tokenizer.enter(TokenType::ChunkString);
+ if connect {
+ let index = tokenizer.events.len() - 1;
+ link(&mut tokenizer.events, index);
+ }
title(tokenizer, code, kind)
}
}
@@ -156,7 +154,7 @@ fn line_begin(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResul
match code {
// Blank line not allowed.
Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => (State::Nok, None),
- _ => at_break(tokenizer, code, kind),
+ _ => at_break(tokenizer, code, kind, true),
}
}
@@ -169,11 +167,20 @@ fn title(tokenizer: &mut Tokenizer, code: Code, kind: Kind) -> StateFnResult {
match code {
Code::Char(char) if char == kind_to_marker(&kind) => {
tokenizer.exit(TokenType::ChunkString);
- at_break(tokenizer, code, kind)
+ at_break(tokenizer, code, kind, true)
}
- Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ Code::None => {
+ tokenizer.exit(TokenType::ChunkString);
+ at_break(tokenizer, code, kind, true)
+ }
+ // To do: limit blank lines.
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.consume(code);
tokenizer.exit(TokenType::ChunkString);
- at_break(tokenizer, code, kind)
+ (
+ State::Fn(Box::new(move |t, c| line_start(t, c, kind))),
+ None,
+ )
}
Code::Char('\\') => {
tokenizer.consume(code);
diff --git a/src/content/string.rs b/src/content/string.rs
index f591cd7..efb6e60 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -43,13 +43,28 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// |qwe
/// ```
fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- if let Code::None = code {
- (State::Ok, None)
- } else {
- tokenizer.enter(TokenType::Data);
- tokenizer.consume(code);
- (State::Fn(Box::new(in_data)), None)
+ match code {
+ Code::None => (State::Ok, None),
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.enter(TokenType::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LineEnding);
+ (State::Fn(Box::new(start)), None)
+ }
+ _ => {
+ tokenizer.enter(TokenType::Data);
+ tokenizer.consume(code);
+ (State::Fn(Box::new(in_data)), None)
+ }
}
+
+ // if let Code::None = code {
+ // (State::Ok, None)
+ // } else {
+ // tokenizer.enter(TokenType::Data);
+ // tokenizer.consume(code);
+ // (State::Fn(Box::new(in_data)), None)
+ // }
}
/// In data.
@@ -59,10 +74,9 @@ fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
- // To do: line endings.
- Code::None => {
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
tokenizer.exit(TokenType::Data);
- (State::Ok, None)
+ before_data(tokenizer, code)
}
// To do: somehow get these markers from constructs.
Code::Char('&' | '\\') => {
diff --git a/src/util/link.rs b/src/util/link.rs
new file mode 100644
index 0000000..917ce4d
--- /dev/null
+++ b/src/util/link.rs
@@ -0,0 +1,8 @@
+//! To do.
+
+use crate::tokenizer::Event;
+
+pub fn link(events: &mut [Event], index: usize) {
+ events[index - 2].next = Some(index);
+ events[index].previous = Some(index - 2);
+}
diff --git a/src/util/mod.rs b/src/util/mod.rs
index c3db267..5439c62 100644
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@@ -2,5 +2,6 @@
pub mod decode_character_reference;
pub mod encode;
+pub mod link;
pub mod sanitize_uri;
pub mod span;
diff --git a/tests/character_reference.rs b/tests/character_reference.rs
index bcd0aca..f2337ab 100644
--- a/tests/character_reference.rs
+++ b/tests/character_reference.rs
@@ -100,7 +100,7 @@ fn character_reference() {
// "should not support character references as construct markers (2)"
// );
- // To do: link.
+ // To do: link (resource).
// assert_eq!(
// micromark("[a](url &quot;tit&quot;)"),
// "<p>[a](url &quot;tit&quot;)</p>",