aboutsummaryrefslogtreecommitdiffstats
path: root/src/content
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-10 16:47:43 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-10 16:47:43 +0200
commit17f4eec55ad0a5f74aedbcff6c2f0119ad52e584 (patch)
tree1839c796de977421456d1b9006f2f2c1e23cf809 /src/content
parent5133042973f31a3992f216e591d840bb491bfd45 (diff)
downloadmarkdown-rs-17f4eec55ad0a5f74aedbcff6c2f0119ad52e584.tar.gz
markdown-rs-17f4eec55ad0a5f74aedbcff6c2f0119ad52e584.tar.bz2
markdown-rs-17f4eec55ad0a5f74aedbcff6c2f0119ad52e584.zip
Add text content type
* Add character reference and character escapes in text * Add recursive subtokenization
Diffstat (limited to '')
-rw-r--r--src/content/flow.rs14
-rw-r--r--src/content/mod.rs1
-rw-r--r--src/content/string.rs10
-rw-r--r--src/content/text.rs80
4 files changed, 92 insertions, 13 deletions
diff --git a/src/content/flow.rs b/src/content/flow.rs
index 0d1bd22..6fa8c25 100644
--- a/src/content/flow.rs
+++ b/src/content/flow.rs
@@ -34,7 +34,11 @@ use crate::util::get_span;
pub fn flow(codes: &[Code], point: Point, index: usize) -> Vec<Event> {
let mut tokenizer = Tokenizer::new(point, index);
tokenizer.feed(codes, Box::new(start), true);
- subtokenize(tokenizer.events, codes)
+ let mut result = (tokenizer.events, false);
+ while !result.1 {
+ result = subtokenize(result.0, codes);
+ }
+ result.0
}
/// Before flow.
@@ -165,7 +169,7 @@ fn content_before(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
}
_ => {
tokenizer.enter(TokenType::Content);
- tokenizer.enter(TokenType::ContentChunk);
+ tokenizer.enter(TokenType::ChunkContent);
content(tokenizer, code, tokenizer.events.len() - 1)
}
}
@@ -259,8 +263,8 @@ fn continuation_construct_after_prefix(tokenizer: &mut Tokenizer, code: Code) ->
fn content_continue(tokenizer: &mut Tokenizer, code: Code, previous_index: usize) -> StateFnResult {
tokenizer.consume(code);
- tokenizer.exit(TokenType::ContentChunk);
- tokenizer.enter(TokenType::ContentChunk);
+ tokenizer.exit(TokenType::ChunkContent);
+ tokenizer.enter(TokenType::ChunkContent);
let next_index = tokenizer.events.len() - 1;
tokenizer.events[previous_index].next = Some(next_index);
tokenizer.events[next_index].previous = Some(previous_index);
@@ -271,7 +275,7 @@ fn content_continue(tokenizer: &mut Tokenizer, code: Code, previous_index: usize
}
fn content_end(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
- tokenizer.exit(TokenType::ContentChunk);
+ tokenizer.exit(TokenType::ChunkContent);
tokenizer.exit(TokenType::Content);
after(tokenizer, code)
}
diff --git a/src/content/mod.rs b/src/content/mod.rs
index 4c0a7f4..d13df79 100644
--- a/src/content/mod.rs
+++ b/src/content/mod.rs
@@ -4,3 +4,4 @@
pub mod content;
pub mod flow;
pub mod string;
+pub mod text;
diff --git a/src/content/string.rs b/src/content/string.rs
index ff9e3fc..2723785 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -17,8 +17,6 @@ use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
/// Before string.
///
-/// First we assume character reference.
-///
/// ```markdown
/// |&amp;
/// |\&
@@ -28,11 +26,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
Code::None => (State::Ok, None),
_ => tokenizer.attempt_2(character_reference, character_escape, |ok| {
- Box::new(if ok {
- start
- } else {
- before_not_character_escape
- })
+ Box::new(if ok { start } else { before_data })
})(tokenizer, code),
}
}
@@ -44,7 +38,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```markdown
/// |qwe
/// ```
-fn before_not_character_escape(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
if let Code::None = code {
(State::Ok, None)
} else {
diff --git a/src/content/text.rs b/src/content/text.rs
new file mode 100644
index 0000000..2c93b18
--- /dev/null
+++ b/src/content/text.rs
@@ -0,0 +1,80 @@
+//! The text content type.
+//!
+//! **Text** contains phrasing content such as attention (emphasis, strong),
+//! media (links, images), and actual text.
+//!
+//! The constructs found in text are:
+//!
+//! * Autolink
+//! * Attention
+//! * HTML (text)
+//! * Hard break escape
+//! * Code (text)
+//! * Line ending
+//! * Label start (image)
+//! * Label start (link)
+//! * [Character escape][crate::construct::character_escape]
+//! * [Character reference][crate::construct::character_reference]
+
+use crate::construct::{
+ character_escape::start as character_escape, character_reference::start as character_reference,
+};
+use crate::tokenizer::{Code, State, StateFnResult, TokenType, Tokenizer};
+
+/// Before text.
+///
+/// First we assume character reference.
+///
+/// ```markdown
+/// |&amp;
+/// |\&
+/// |qwe
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::None => (State::Ok, None),
+ _ => tokenizer.attempt_2(character_reference, character_escape, |ok| {
+ Box::new(if ok { start } else { before_data })
+ })(tokenizer, code),
+ }
+}
+
+/// Before text.
+///
+/// We’re at data.
+///
+/// ```markdown
+/// |qwe
+/// ```
+fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ if let Code::None = code {
+ (State::Ok, None)
+ } else {
+ tokenizer.enter(TokenType::Data);
+ tokenizer.consume(code);
+ (State::Fn(Box::new(in_data)), None)
+ }
+}
+
+/// In data.
+///
+/// ```markdown
+/// q|w|e
+/// ```
+fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::None => {
+ tokenizer.exit(TokenType::Data);
+ (State::Ok, None)
+ }
+ // To do: somehow get these markers from constructs.
+ Code::Char('&' | '\\') => {
+ tokenizer.exit(TokenType::Data);
+ start(tokenizer, code)
+ }
+ _ => {
+ tokenizer.consume(code);
+ (State::Fn(Box::new(in_data)), None)
+ }
+ }
+}