aboutsummaryrefslogtreecommitdiffstats
path: root/src/construct
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-04 12:16:51 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-04 12:16:58 +0200
commitfaca28020f4894bdfcf5a4b164ebbc75864d8776 (patch)
tree93377413ae8c355e2d804f7e700241693b228e70 /src/construct
parente1cae8c705e66669d043f5269e9f58c09c7b0eaa (diff)
downloadmarkdown-rs-faca28020f4894bdfcf5a4b164ebbc75864d8776.tar.gz
markdown-rs-faca28020f4894bdfcf5a4b164ebbc75864d8776.tar.bz2
markdown-rs-faca28020f4894bdfcf5a4b164ebbc75864d8776.zip
Add support for attention (emphasis, strong)
Diffstat (limited to '')
-rw-r--r--src/construct/attention.rs401
-rw-r--r--src/construct/mod.rs3
-rw-r--r--src/construct/partial_label.rs5
3 files changed, 405 insertions, 4 deletions
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
new file mode 100644
index 0000000..f022e6e
--- /dev/null
+++ b/src/construct/attention.rs
@@ -0,0 +1,401 @@
+//! To do.
+
+use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::edit_map::EditMap;
+
+/// To do
+#[derive(Debug, PartialEq)]
+enum GroupKind {
+ Whitespace,
+ Punctuation,
+ Other,
+}
+
+/// To do
+#[derive(Debug, PartialEq)]
+enum MarkerKind {
+ Asterisk,
+ Underscore,
+}
+
+impl MarkerKind {
+ fn from_char(char: char) -> MarkerKind {
+ match char {
+ '*' => MarkerKind::Asterisk,
+ '_' => MarkerKind::Underscore,
+ _ => unreachable!("invalid char"),
+ }
+ }
+ fn from_code(code: Code) -> MarkerKind {
+ match code {
+ Code::Char(char) => MarkerKind::from_char(char),
+ _ => unreachable!("invalid code"),
+ }
+ }
+}
+
+/// To do
+#[derive(Debug)]
+struct Run {
+ marker: MarkerKind,
+ event_index: usize,
+ start_point: Point,
+ start_index: usize,
+ end_point: Point,
+ end_index: usize,
+ size: usize,
+ open: bool,
+ close: bool,
+}
+
+/// Before a paragraph.
+///
+/// ```markdown
+/// |qwe
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(char) if char == '*' || char == '_' => {
+ tokenizer.enter(TokenType::AttentionSequence);
+ inside(tokenizer, code, char)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// In a paragraph.
+///
+/// ```markdown
+/// al|pha
+/// ```
+fn inside(tokenizer: &mut Tokenizer, code: Code, marker: char) -> StateFnResult {
+ match code {
+ Code::Char(char) if char == marker => {
+ tokenizer.consume(code);
+ (State::Fn(Box::new(move |t, c| inside(t, c, marker))), None)
+ }
+ _ => {
+ tokenizer.exit(TokenType::AttentionSequence);
+ tokenizer.register_resolver("attention".to_string(), Box::new(resolve));
+ (State::Ok, Some(vec![code]))
+ }
+ }
+}
+
+/// To do.
+#[allow(clippy::too_many_lines)]
+pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+ let mut index = 0;
+ println!("before: {:?}", tokenizer.events.len());
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+ println!(
+ "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
+ index,
+ event.event_type,
+ event.token_type,
+ event.content_type,
+ event.previous,
+ event.next
+ );
+ index += 1;
+ }
+
+ let codes = &tokenizer.parse_state.codes;
+ let mut edit_map = EditMap::new();
+ let mut start = 0;
+ let mut runs: Vec<Run> = vec![];
+
+ // Find runs of sequences and information about them.
+ while start < tokenizer.events.len() {
+ let enter = &tokenizer.events[start];
+
+ if enter.event_type == EventType::Enter && enter.token_type == TokenType::AttentionSequence
+ {
+ let end = start + 1;
+ let exit = &tokenizer.events[end];
+ let marker = MarkerKind::from_code(codes[enter.index]);
+ let before = classify_character(if enter.index > 0 {
+ codes[enter.index - 1]
+ } else {
+ Code::None
+ });
+ let after = classify_character(if exit.index < codes.len() {
+ codes[exit.index]
+ } else {
+ Code::None
+ });
+ let open = after == GroupKind::Other
+ || (after == GroupKind::Punctuation && before != GroupKind::Other);
+ // To do: GFM strikethrough?
+ // || attentionMarkers.includes(code)
+ let close = before == GroupKind::Other
+ || (before == GroupKind::Punctuation && after != GroupKind::Other);
+ // To do: GFM strikethrough?
+ // || attentionMarkers.includes(previous)
+
+ runs.push(Run {
+ event_index: start,
+ start_point: enter.point.clone(),
+ start_index: enter.index,
+ end_point: exit.point.clone(),
+ end_index: exit.index,
+ size: exit.index - enter.index,
+ open: if marker == MarkerKind::Asterisk {
+ open
+ } else {
+ open && (before != GroupKind::Other || !close)
+ },
+ close: if marker == MarkerKind::Asterisk {
+ close
+ } else {
+ close && (after != GroupKind::Other || !open)
+ },
+ marker,
+ });
+
+ start += 1;
+ }
+
+ start += 1;
+ }
+
+ // Walk through runs and match them.
+ let mut close = 0;
+
+ while close < runs.len() {
+ let run_close = &runs[close];
+
+ // Find a run that can close.
+ if run_close.close {
+ let mut open = close;
+
+ // Now walk back to find an opener.
+ while open > 0 {
+ open -= 1;
+
+ let run_open = &runs[open];
+
+ // Find a token that can open the closer.
+ if run_open.open && run_close.marker == run_open.marker {
+ // If the opening can close or the closing can open,
+ // and the close size *is not* a multiple of three,
+ // but the sum of the opening and closing size *is*
+ // multiple of three, then **don’t** match.
+ if (run_open.close || run_close.open)
+ && run_close.size % 3 != 0
+ && (run_open.size + run_close.size) % 3 == 0
+ {
+ continue;
+ }
+
+ // Number of markers to use from the sequence.
+ let take = if run_open.size > 1 && run_close.size > 1 {
+ 2
+ } else {
+ 1
+ };
+
+ let run_close = &mut runs[close];
+ let close_event_index = run_close.event_index;
+ let seq_close_enter = (run_close.start_point.clone(), run_close.start_index);
+ run_close.size -= take;
+ run_close.start_point.column += take;
+ run_close.start_point.offset += take;
+ let seq_close_exit = (run_close.start_point.clone(), run_close.start_index);
+
+ // Remove closing run if fully used.
+ if run_close.size == 0 {
+ runs.remove(close);
+ edit_map.add(close_event_index, 2, vec![]);
+ }
+
+ let run_open = &mut runs[open];
+ let open_event_index = run_open.event_index;
+ let seq_open_exit = (run_open.end_point.clone(), run_open.end_index);
+ run_open.size -= take;
+ run_open.end_point.column -= take;
+ run_open.end_point.offset -= take;
+ let seq_open_enter = (run_open.end_point.clone(), run_open.end_index);
+
+ // Remove opening run if fully used.
+ if run_open.size == 0 {
+ runs.remove(open);
+ edit_map.add(open_event_index, 2, vec![]);
+ }
+
+ // Opening.
+ edit_map.add(
+ open_event_index,
+ 0,
+ vec![
+ Event {
+ event_type: EventType::Enter,
+ token_type: if take == 1 {
+ TokenType::Emphasis
+ } else {
+ TokenType::Strong
+ },
+ point: seq_open_enter.0.clone(),
+ index: seq_open_enter.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Enter,
+ token_type: if take == 1 {
+ TokenType::EmphasisSequence
+ } else {
+ TokenType::StrongSequence
+ },
+ point: seq_open_enter.0.clone(),
+ index: seq_open_enter.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Exit,
+ token_type: if take == 1 {
+ TokenType::EmphasisSequence
+ } else {
+ TokenType::StrongSequence
+ },
+ point: seq_open_exit.0.clone(),
+ index: seq_open_exit.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Enter,
+ token_type: if take == 1 {
+ TokenType::EmphasisText
+ } else {
+ TokenType::StrongText
+ },
+ point: seq_open_exit.0.clone(),
+ index: seq_open_exit.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ ],
+ );
+ // Closing.
+ edit_map.add(
+ close_event_index,
+ 0,
+ vec![
+ Event {
+ event_type: EventType::Exit,
+ token_type: if take == 1 {
+ TokenType::EmphasisText
+ } else {
+ TokenType::StrongText
+ },
+ point: seq_close_enter.0.clone(),
+ index: seq_close_enter.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Enter,
+ token_type: if take == 1 {
+ TokenType::EmphasisSequence
+ } else {
+ TokenType::StrongSequence
+ },
+ point: seq_close_enter.0.clone(),
+ index: seq_close_enter.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Exit,
+ token_type: if take == 1 {
+ TokenType::EmphasisSequence
+ } else {
+ TokenType::StrongSequence
+ },
+ point: seq_close_exit.0.clone(),
+ index: seq_close_exit.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Exit,
+ token_type: if take == 1 {
+ TokenType::Emphasis
+ } else {
+ TokenType::Strong
+ },
+ point: seq_close_exit.0.clone(),
+ index: seq_close_exit.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ ],
+ );
+
+ break;
+ }
+ }
+ }
+
+ close += 1;
+ }
+
+ // Mark remaining sequences as data.
+ let mut index = 0;
+ while index < runs.len() {
+ let run = &runs[index];
+ // To do: resize!
+ tokenizer.events[run.event_index].token_type = TokenType::Data;
+ tokenizer.events[run.event_index + 1].token_type = TokenType::Data;
+
+ index += 1;
+ }
+
+ let events = edit_map.consume(&mut tokenizer.events);
+ let mut index = 0;
+ println!("after: {:?}", events.len());
+ while index < events.len() {
+ let event = &events[index];
+ println!(
+ "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
+ index,
+ event.event_type,
+ event.token_type,
+ event.content_type,
+ event.previous,
+ event.next
+ );
+ index += 1;
+ }
+
+ events
+}
+
+fn classify_character(code: Code) -> GroupKind {
+ match code {
+ // Markdown whitespace.
+ Code::None
+ | Code::CarriageReturnLineFeed
+ | Code::VirtualSpace
+ | Code::Char('\t' | '\r' | '\n' | ' ') => GroupKind::Whitespace,
+ // Unicode whitespace.
+ Code::Char(char) if char.is_whitespace() => GroupKind::Whitespace,
+ // Unicode punctuation.
+ // To do: `is_punctuation` is not in rust? Why not?
+ // Perhaps we need to generate stuff just like:
+ // <https://github.com/micromark/micromark/blob/main/packages/micromark-util-character/dev/lib/unicode-punctuation-regex.js>.
+ Code::Char(char) if char.is_ascii_punctuation() => GroupKind::Punctuation,
+ Code::Char(_) => GroupKind::Other,
+ }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 9e3dfb0..66b2a3c 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -14,7 +14,7 @@
//!
//! The following constructs are found in markdown:
//!
-//! * attention (strong, emphasis)
+//! * [attention (strong, emphasis)][attention]
//! * [autolink][]
//! * [blank line][blank_line]
//! * block quote
@@ -61,6 +61,7 @@
//! example `ascii_punctuation` refers to
//! [`char::is_ascii_punctuation`][char::is_ascii_punctuation].
+pub mod attention;
pub mod autolink;
pub mod blank_line;
pub mod character_escape;
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index e505997..32182d6 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -41,7 +41,7 @@
//! > ([label start (image)][label_start_image] or
//! > [label start (link)][label_start_link]) and a closing
//! > ([label end][label_end]), so as to allow further phrasing such as
-//! > [code (text)][code_text] or attention.
+//! > [code (text)][code_text] or [attention][].
//!
//! ## References
//!
@@ -49,6 +49,7 @@
//!
//! [definition]: crate::construct::definition
//! [string]: crate::content::string
+//! [attention]: crate::construct::attention
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [label_start_image]: crate::construct::label_start_image
@@ -56,8 +57,6 @@
//! [label_end]: crate::construct::label_end
//! [code_text]: crate::construct::code_text
//! [link_reference_size_max]: crate::constant::LINK_REFERENCE_SIZE_MAX
-//!
-//! <!-- To do: link attention. -->
use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::constant::LINK_REFERENCE_SIZE_MAX;