aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/compiler.rs24
-rw-r--r--src/construct/attention.rs401
-rw-r--r--src/construct/mod.rs3
-rw-r--r--src/construct/partial_label.rs5
-rw-r--r--src/content/text.rs18
-rw-r--r--src/tokenizer.rs9
6 files changed, 449 insertions, 11 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 1f16648..061d3e3 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -421,6 +421,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
enter_map.insert(TokenType::CodeIndented, on_enter_code_indented);
enter_map.insert(TokenType::CodeFenced, on_enter_code_fenced);
enter_map.insert(TokenType::CodeText, on_enter_code_text);
+ enter_map.insert(TokenType::Emphasis, on_enter_emphasis);
enter_map.insert(TokenType::HtmlFlow, on_enter_html_flow);
enter_map.insert(TokenType::HtmlText, on_enter_html_text);
enter_map.insert(TokenType::Image, on_enter_image);
@@ -431,6 +432,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
on_enter_resource_destination_string,
);
enter_map.insert(TokenType::Paragraph, on_enter_paragraph);
+ enter_map.insert(TokenType::Strong, on_enter_strong);
enter_map.insert(TokenType::Definition, on_enter_definition);
enter_map.insert(
TokenType::DefinitionDestinationString,
@@ -441,6 +443,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
enter_map.insert(TokenType::DefinitionTitleString, on_enter_buffer);
let mut exit_map: Map = HashMap::new();
+ exit_map.insert(TokenType::Emphasis, on_exit_emphasis);
exit_map.insert(TokenType::Label, on_exit_label);
exit_map.insert(TokenType::LabelText, on_exit_label_text);
exit_map.insert(TokenType::ReferenceString, on_exit_reference_string);
@@ -452,6 +455,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
TokenType::ResourceTitleString,
on_exit_resource_title_string,
);
+ exit_map.insert(TokenType::Strong, on_exit_strong);
exit_map.insert(TokenType::Image, on_exit_media);
exit_map.insert(TokenType::Link, on_exit_media);
exit_map.insert(TokenType::CodeTextData, on_exit_data);
@@ -644,6 +648,11 @@ fn on_enter_definition_destination_string(context: &mut CompileContext) {
context.ignore_encode = true;
}
+/// Handle [`Enter`][EventType::Enter]:[`Emphasis`][TokenType::Emphasis].
+fn on_enter_emphasis(context: &mut CompileContext) {
+ context.tag("<em>".to_string());
+}
+
/// Handle [`Enter`][EventType::Enter]:[`HtmlFlow`][TokenType::HtmlFlow].
fn on_enter_html_flow(context: &mut CompileContext) {
context.line_ending_if_needed();
@@ -704,6 +713,11 @@ fn on_enter_resource_destination_string(context: &mut CompileContext) {
context.ignore_encode = true;
}
+/// Handle [`Enter`][EventType::Enter]:[`Strong`][TokenType::Strong].
+fn on_enter_strong(context: &mut CompileContext) {
+ context.tag("<strong>".to_string());
+}
+
/// Handle [`Exit`][EventType::Exit]:[`AutolinkEmail`][TokenType::AutolinkEmail].
fn on_exit_autolink_email(context: &mut CompileContext) {
let slice = serialize(
@@ -933,6 +947,11 @@ fn on_exit_definition_title_string(context: &mut CompileContext) {
definition.title = Some(buf);
}
+/// Handle [`Exit`][EventType::Exit]:[`Strong`][TokenType::Emphasis].
+fn on_exit_emphasis(context: &mut CompileContext) {
+ context.tag("</em>".to_string());
+}
+
/// Handle [`Exit`][EventType::Exit]:[`HeadingAtx`][TokenType::HeadingAtx].
fn on_exit_heading_atx(context: &mut CompileContext) {
let rank = context
@@ -1132,6 +1151,11 @@ fn on_exit_resource_title_string(context: &mut CompileContext) {
media.title = Some(buf);
}
+/// Handle [`Exit`][EventType::Exit]:[`Strong`][TokenType::Strong].
+fn on_exit_strong(context: &mut CompileContext) {
+ context.tag("</strong>".to_string());
+}
+
/// Handle [`Exit`][EventType::Exit]:[`ThematicBreak`][TokenType::ThematicBreak].
fn on_exit_thematic_break(context: &mut CompileContext) {
context.tag("<hr />".to_string());
diff --git a/src/construct/attention.rs b/src/construct/attention.rs
new file mode 100644
index 0000000..f022e6e
--- /dev/null
+++ b/src/construct/attention.rs
@@ -0,0 +1,401 @@
+//! To do.
+
+use crate::tokenizer::{Code, Event, EventType, Point, State, StateFnResult, TokenType, Tokenizer};
+use crate::util::edit_map::EditMap;
+
+/// To do
+#[derive(Debug, PartialEq)]
+enum GroupKind {
+ Whitespace,
+ Punctuation,
+ Other,
+}
+
+/// To do
+#[derive(Debug, PartialEq)]
+enum MarkerKind {
+ Asterisk,
+ Underscore,
+}
+
+impl MarkerKind {
+ fn from_char(char: char) -> MarkerKind {
+ match char {
+ '*' => MarkerKind::Asterisk,
+ '_' => MarkerKind::Underscore,
+ _ => unreachable!("invalid char"),
+ }
+ }
+ fn from_code(code: Code) -> MarkerKind {
+ match code {
+ Code::Char(char) => MarkerKind::from_char(char),
+ _ => unreachable!("invalid code"),
+ }
+ }
+}
+
+/// To do
+#[derive(Debug)]
+struct Run {
+ marker: MarkerKind,
+ event_index: usize,
+ start_point: Point,
+ start_index: usize,
+ end_point: Point,
+ end_index: usize,
+ size: usize,
+ open: bool,
+ close: bool,
+}
+
+/// Before a paragraph.
+///
+/// ```markdown
+/// |qwe
+/// ```
+pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
+ match code {
+ Code::Char(char) if char == '*' || char == '_' => {
+ tokenizer.enter(TokenType::AttentionSequence);
+ inside(tokenizer, code, char)
+ }
+ _ => (State::Nok, None),
+ }
+}
+
+/// In a paragraph.
+///
+/// ```markdown
+/// al|pha
+/// ```
+fn inside(tokenizer: &mut Tokenizer, code: Code, marker: char) -> StateFnResult {
+ match code {
+ Code::Char(char) if char == marker => {
+ tokenizer.consume(code);
+ (State::Fn(Box::new(move |t, c| inside(t, c, marker))), None)
+ }
+ _ => {
+ tokenizer.exit(TokenType::AttentionSequence);
+ tokenizer.register_resolver("attention".to_string(), Box::new(resolve));
+ (State::Ok, Some(vec![code]))
+ }
+ }
+}
+
+/// To do.
+#[allow(clippy::too_many_lines)]
+pub fn resolve(tokenizer: &mut Tokenizer) -> Vec<Event> {
+ let mut index = 0;
+ println!("before: {:?}", tokenizer.events.len());
+ while index < tokenizer.events.len() {
+ let event = &tokenizer.events[index];
+ println!(
+ "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
+ index,
+ event.event_type,
+ event.token_type,
+ event.content_type,
+ event.previous,
+ event.next
+ );
+ index += 1;
+ }
+
+ let codes = &tokenizer.parse_state.codes;
+ let mut edit_map = EditMap::new();
+ let mut start = 0;
+ let mut runs: Vec<Run> = vec![];
+
+ // Find runs of sequences and information about them.
+ while start < tokenizer.events.len() {
+ let enter = &tokenizer.events[start];
+
+ if enter.event_type == EventType::Enter && enter.token_type == TokenType::AttentionSequence
+ {
+ let end = start + 1;
+ let exit = &tokenizer.events[end];
+ let marker = MarkerKind::from_code(codes[enter.index]);
+ let before = classify_character(if enter.index > 0 {
+ codes[enter.index - 1]
+ } else {
+ Code::None
+ });
+ let after = classify_character(if exit.index < codes.len() {
+ codes[exit.index]
+ } else {
+ Code::None
+ });
+ let open = after == GroupKind::Other
+ || (after == GroupKind::Punctuation && before != GroupKind::Other);
+ // To do: GFM strikethrough?
+ // || attentionMarkers.includes(code)
+ let close = before == GroupKind::Other
+ || (before == GroupKind::Punctuation && after != GroupKind::Other);
+ // To do: GFM strikethrough?
+ // || attentionMarkers.includes(previous)
+
+ runs.push(Run {
+ event_index: start,
+ start_point: enter.point.clone(),
+ start_index: enter.index,
+ end_point: exit.point.clone(),
+ end_index: exit.index,
+ size: exit.index - enter.index,
+ open: if marker == MarkerKind::Asterisk {
+ open
+ } else {
+ open && (before != GroupKind::Other || !close)
+ },
+ close: if marker == MarkerKind::Asterisk {
+ close
+ } else {
+ close && (after != GroupKind::Other || !open)
+ },
+ marker,
+ });
+
+ start += 1;
+ }
+
+ start += 1;
+ }
+
+ // Walk through runs and match them.
+ let mut close = 0;
+
+ while close < runs.len() {
+ let run_close = &runs[close];
+
+ // Find a run that can close.
+ if run_close.close {
+ let mut open = close;
+
+ // Now walk back to find an opener.
+ while open > 0 {
+ open -= 1;
+
+ let run_open = &runs[open];
+
+ // Find a token that can open the closer.
+ if run_open.open && run_close.marker == run_open.marker {
+ // If the opening can close or the closing can open,
+ // and the close size *is not* a multiple of three,
+ // but the sum of the opening and closing size *is*
+ // multiple of three, then **don’t** match.
+ if (run_open.close || run_close.open)
+ && run_close.size % 3 != 0
+ && (run_open.size + run_close.size) % 3 == 0
+ {
+ continue;
+ }
+
+ // Number of markers to use from the sequence.
+ let take = if run_open.size > 1 && run_close.size > 1 {
+ 2
+ } else {
+ 1
+ };
+
+ let run_close = &mut runs[close];
+ let close_event_index = run_close.event_index;
+ let seq_close_enter = (run_close.start_point.clone(), run_close.start_index);
+ run_close.size -= take;
+ run_close.start_point.column += take;
+ run_close.start_point.offset += take;
+ let seq_close_exit = (run_close.start_point.clone(), run_close.start_index);
+
+ // Remove closing run if fully used.
+ if run_close.size == 0 {
+ runs.remove(close);
+ edit_map.add(close_event_index, 2, vec![]);
+ }
+
+ let run_open = &mut runs[open];
+ let open_event_index = run_open.event_index;
+ let seq_open_exit = (run_open.end_point.clone(), run_open.end_index);
+ run_open.size -= take;
+ run_open.end_point.column -= take;
+ run_open.end_point.offset -= take;
+ let seq_open_enter = (run_open.end_point.clone(), run_open.end_index);
+
+ // Remove opening run if fully used.
+ if run_open.size == 0 {
+ runs.remove(open);
+ edit_map.add(open_event_index, 2, vec![]);
+ }
+
+ // Opening.
+ edit_map.add(
+ open_event_index,
+ 0,
+ vec![
+ Event {
+ event_type: EventType::Enter,
+ token_type: if take == 1 {
+ TokenType::Emphasis
+ } else {
+ TokenType::Strong
+ },
+ point: seq_open_enter.0.clone(),
+ index: seq_open_enter.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Enter,
+ token_type: if take == 1 {
+ TokenType::EmphasisSequence
+ } else {
+ TokenType::StrongSequence
+ },
+ point: seq_open_enter.0.clone(),
+ index: seq_open_enter.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Exit,
+ token_type: if take == 1 {
+ TokenType::EmphasisSequence
+ } else {
+ TokenType::StrongSequence
+ },
+ point: seq_open_exit.0.clone(),
+ index: seq_open_exit.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Enter,
+ token_type: if take == 1 {
+ TokenType::EmphasisText
+ } else {
+ TokenType::StrongText
+ },
+ point: seq_open_exit.0.clone(),
+ index: seq_open_exit.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ ],
+ );
+ // Closing.
+ edit_map.add(
+ close_event_index,
+ 0,
+ vec![
+ Event {
+ event_type: EventType::Exit,
+ token_type: if take == 1 {
+ TokenType::EmphasisText
+ } else {
+ TokenType::StrongText
+ },
+ point: seq_close_enter.0.clone(),
+ index: seq_close_enter.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Enter,
+ token_type: if take == 1 {
+ TokenType::EmphasisSequence
+ } else {
+ TokenType::StrongSequence
+ },
+ point: seq_close_enter.0.clone(),
+ index: seq_close_enter.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Exit,
+ token_type: if take == 1 {
+ TokenType::EmphasisSequence
+ } else {
+ TokenType::StrongSequence
+ },
+ point: seq_close_exit.0.clone(),
+ index: seq_close_exit.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ Event {
+ event_type: EventType::Exit,
+ token_type: if take == 1 {
+ TokenType::Emphasis
+ } else {
+ TokenType::Strong
+ },
+ point: seq_close_exit.0.clone(),
+ index: seq_close_exit.1,
+ previous: None,
+ next: None,
+ content_type: None,
+ },
+ ],
+ );
+
+ break;
+ }
+ }
+ }
+
+ close += 1;
+ }
+
+ // Mark remaining sequences as data.
+ let mut index = 0;
+ while index < runs.len() {
+ let run = &runs[index];
+ // To do: resize!
+ tokenizer.events[run.event_index].token_type = TokenType::Data;
+ tokenizer.events[run.event_index + 1].token_type = TokenType::Data;
+
+ index += 1;
+ }
+
+ let events = edit_map.consume(&mut tokenizer.events);
+ let mut index = 0;
+ println!("after: {:?}", events.len());
+ while index < events.len() {
+ let event = &events[index];
+ println!(
+ "ev: {:?} {:?} {:?} {:?} {:?} {:?}",
+ index,
+ event.event_type,
+ event.token_type,
+ event.content_type,
+ event.previous,
+ event.next
+ );
+ index += 1;
+ }
+
+ events
+}
+
+fn classify_character(code: Code) -> GroupKind {
+ match code {
+ // Markdown whitespace.
+ Code::None
+ | Code::CarriageReturnLineFeed
+ | Code::VirtualSpace
+ | Code::Char('\t' | '\r' | '\n' | ' ') => GroupKind::Whitespace,
+ // Unicode whitespace.
+ Code::Char(char) if char.is_whitespace() => GroupKind::Whitespace,
+ // Unicode punctuation.
+ // To do: `is_punctuation` is not in rust? Why not?
+ // Perhaps we need to generate stuff just like:
+ // <https://github.com/micromark/micromark/blob/main/packages/micromark-util-character/dev/lib/unicode-punctuation-regex.js>.
+ Code::Char(char) if char.is_ascii_punctuation() => GroupKind::Punctuation,
+ Code::Char(_) => GroupKind::Other,
+ }
+}
diff --git a/src/construct/mod.rs b/src/construct/mod.rs
index 9e3dfb0..66b2a3c 100644
--- a/src/construct/mod.rs
+++ b/src/construct/mod.rs
@@ -14,7 +14,7 @@
//!
//! The following constructs are found in markdown:
//!
-//! * attention (strong, emphasis)
+//! * [attention (strong, emphasis)][attention]
//! * [autolink][]
//! * [blank line][blank_line]
//! * block quote
@@ -61,6 +61,7 @@
//! example `ascii_punctuation` refers to
//! [`char::is_ascii_punctuation`][char::is_ascii_punctuation].
+pub mod attention;
pub mod autolink;
pub mod blank_line;
pub mod character_escape;
diff --git a/src/construct/partial_label.rs b/src/construct/partial_label.rs
index e505997..32182d6 100644
--- a/src/construct/partial_label.rs
+++ b/src/construct/partial_label.rs
@@ -41,7 +41,7 @@
//! > ([label start (image)][label_start_image] or
//! > [label start (link)][label_start_link]) and a closing
//! > ([label end][label_end]), so as to allow further phrasing such as
-//! > [code (text)][code_text] or attention.
+//! > [code (text)][code_text] or [attention][].
//!
//! ## References
//!
@@ -49,6 +49,7 @@
//!
//! [definition]: crate::construct::definition
//! [string]: crate::content::string
+//! [attention]: crate::construct::attention
//! [character_escape]: crate::construct::character_escape
//! [character_reference]: crate::construct::character_reference
//! [label_start_image]: crate::construct::label_start_image
@@ -56,8 +57,6 @@
//! [label_end]: crate::construct::label_end
//! [code_text]: crate::construct::code_text
//! [link_reference_size_max]: crate::constant::LINK_REFERENCE_SIZE_MAX
-//!
-//! <!-- To do: link attention. -->
use super::partial_space_or_tab::{space_or_tab_eol_with_options, EolOptions};
use crate::constant::LINK_REFERENCE_SIZE_MAX;
diff --git a/src/content/text.rs b/src/content/text.rs
index c3f4e1b..ecb6ae1 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -1,12 +1,13 @@
//! The text content type.
//!
-//! **Text** contains phrasing content such as attention (emphasis, strong),
-//! media (links, images), and actual text.
+//! **Text** contains phrasing content such as
+//! [attention][crate::construct::attention] (emphasis, strong),
+//! [code (text)][crate::construct::code_text], and actual text.
//!
//! The constructs found in text are:
//!
+//! * [Attention][crate::construct::attention]
//! * [Autolink][crate::construct::autolink]
-//! * Attention
//! * [HTML (text)][crate::construct::html_text]
//! * [Hard break (escape)][crate::construct::hard_break_escape]
//! * [Hard break (trailing)][crate::construct::hard_break_trailing]
@@ -18,9 +19,9 @@
//! * [Character reference][crate::construct::character_reference]
use crate::construct::{
- autolink::start as autolink, character_escape::start as character_escape,
- character_reference::start as character_reference, code_text::start as code_text,
- hard_break_escape::start as hard_break_escape,
+ attention::start as attention, autolink::start as autolink,
+ character_escape::start as character_escape, character_reference::start as character_reference,
+ code_text::start as code_text, hard_break_escape::start as hard_break_escape,
hard_break_trailing::start as hard_break_trailing, html_text::start as html_text,
label_end::start as label_end, label_start_image::start as label_start_image,
label_start_link::start as label_start_link, partial_data::start as data,
@@ -28,16 +29,18 @@ use crate::construct::{
};
use crate::tokenizer::{Code, State, StateFnResult, Tokenizer};
-const MARKERS: [Code; 10] = [
+const MARKERS: [Code; 12] = [
Code::VirtualSpace, // `whitespace`
Code::Char('\t'), // `whitespace`
Code::Char(' '), // `hard_break_trailing`, `whitespace`
Code::Char('!'), // `label_start_image`
Code::Char('&'), // `character_reference`
+ Code::Char('*'), // `attention`
Code::Char('<'), // `autolink`, `html_text`
Code::Char('['), // `label_start_link`
Code::Char('\\'), // `character_escape`, `hard_break_escape`
Code::Char(']'), // `label_end`
+ Code::Char('_'), // `attention`
Code::Char('`'), // `code_text`
];
@@ -55,6 +58,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
Code::None => (State::Ok, None),
_ => tokenizer.attempt_n(
vec![
+ Box::new(attention),
Box::new(autolink),
Box::new(character_escape),
Box::new(character_reference),
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index b70e706..282c99f 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -1593,6 +1593,15 @@ pub enum TokenType {
/// ^ ^ ^
/// ```
ThematicBreakSequence,
+ Strong,
+ StrongSequence,
+ StrongText,
+ Emphasis,
+ EmphasisSequence,
+ EmphasisText,
+ // To do: this is removed.
+ // Should it reuse something e.g., emphasis? Data?
+ AttentionSequence,
}
/// Embedded content type.