aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-21 17:24:56 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-21 17:24:56 +0200
commit56ff5c73c7ec19b349e7d60d04ce1057c006d6ec (patch)
treeb4107ae0e0219f871a2f2764215ad979b2b0d75f /src/construct
parent7effd171218fff68f051671f1373cee467a8f921 (diff)
downloadmarkdown-rs-56ff5c73c7ec19b349e7d60d04ce1057c006d6ec.tar.gz
markdown-rs-56ff5c73c7ec19b349e7d60d04ce1057c006d6ec.tar.bz2
markdown-rs-56ff5c73c7ec19b349e7d60d04ce1057c006d6ec.zip
Make data a construct
Diffstat (limited to '')
-rw-r--r--src/construct/mod.rs20
-rw-r--r--src/construct/partial_data.rs69
2 files changed, 84 insertions, 5 deletions
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 407dc6b..9e5da0e 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -7,11 +7,10 @@
//! For example, [code (fenced)][code_fenced] and
//! [code (indented)][code_indented] are considered different constructs
//!
-//! <!-- To do: can these rest things be made into constructs? -->
-//!
-//! Content types also have a *rest* thing: after all character escapes and
-//! character references are parsed, there’s something left.
-//! This remainder is, currently, not called a constructs.
+//! Content types also have a *rest* thing: after all things are parsed,
+//! there’s something left.
+//! In flow, that is a [paragraph][].
+//! In string and text, that is [data][partial_data].
//!
//! The following constructs are found in markdown:
//!
@@ -38,6 +37,14 @@
//! * [paragraph][]
//! * [thematic break][thematic_break]
//!
+//! There are also several routines used in different places:
+//!
+//! * [data][partial_data]
+//! * [destination][partial_destination]
+//! * [label][partial_label]
+//! * [space or tab][partial_space_or_tab]
+//! * [title][partial_title]
+//!
//! Each construct maintained here is explained with a BNF diagram.
//! For example, the docs for [character escape][character_escape] contain:
//!
@@ -52,6 +59,8 @@
//! They also contain references to character as defined by [char][], so for
//! example `ascii_punctuation` refers to
//! [`char::is_ascii_punctuation`][char::is_ascii_punctuation].
+//!
+//!
pub mod autolink;
pub mod blank_line;
@@ -68,6 +77,7 @@ pub mod heading_setext;
pub mod html_flow;
pub mod html_text;
pub mod paragraph;
+pub mod partial_data;
pub mod partial_destination;
pub mod partial_label;
pub mod partial_space_or_tab;
diff --git a/src/construct/partial_data.rs b/src/construct/partial_data.rs
new file mode 100644
index 0000000..d83787a
--- /dev/null
+++ b/src/construct/partial_data.rs
@@ -0,0 +1,69 @@
+//! Data occurs in [text][] and [string][].
+//!
+//! It can include anything (including line endings), and stops at certain
+//! characters.
+//!
+//! [string]: crate::content::string
+//! [text]: crate::content::text
+
+// To do: pass token types in?
+
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// At the beginning of data.
+///
+/// ```markdown
+/// |&qwe
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult {
+ if stop.contains(&code) {
+ tokenizer.enter(TokenType::Data);
+ tokenizer.consume(code);
+ (State::Fn(Box::new(|t, c| data(t, c, stop))), None)
+ } else {
+ at_break(tokenizer, code, stop)
+ }
+}
+
+/// Before something.
+///
+/// ```markdown
+/// |qwe| |&
+/// ```
+fn at_break(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult {
+ match code {
+ Code::None => (State::Ok, None),
+ Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => {
+ tokenizer.enter(TokenType::LineEnding);
+ tokenizer.consume(code);
+ tokenizer.exit(TokenType::LineEnding);
+ (State::Fn(Box::new(|t, c| at_break(t, c, stop))), None)
+ }
+ _ if stop.contains(&code) => (State::Ok, Some(vec![code])),
+ _ => {
+ tokenizer.enter(TokenType::Data);
+ data(tokenizer, code, stop)
+ }
+ }
+}
+
+/// In data.
+///
+/// ```markdown
+/// q|w|e
+/// ```
+fn data(tokenizer: &mut Tokenizer, code: Code, stop: Vec<Code>) -> StateFnResult {
+ let done = match code {
+ Code::None | Code::CarriageReturnLineFeed | Code::Char('\r' | '\n') => true,
+ _ if stop.contains(&code) => true,
+ _ => false,
+ };
+
+ if done {
+ tokenizer.exit(TokenType::Data);
+ at_break(tokenizer, code, stop)
+ } else {
+ tokenizer.consume(code);
+ (State::Fn(Box::new(|t, c| data(t, c, stop))), None)
+ }
+}