aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/compiler.rs335
-rw-r--r--src/content/string.rs1
-rw-r--r--src/content/text.rs1
-rw-r--r--src/tokenizer.rs28
4 files changed, 176 insertions, 189 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 9f84a38..50c06e1 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -102,8 +102,51 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
match event.event_type {
EventType::Enter => match token_type {
- TokenType::Paragraph => {
- buf_tail_mut(buffers).push("<p>".to_string());
+ TokenType::AtxHeading
+ | TokenType::AtxHeadingSequence
+ | TokenType::AtxHeadingWhitespace
+ | TokenType::Autolink
+ | TokenType::AutolinkEmail
+ | TokenType::AutolinkMarker
+ | TokenType::AutolinkProtocol
+ | TokenType::BlankLineEnding
+ | TokenType::BlankLineWhitespace
+ | TokenType::CharacterEscape
+ | TokenType::CharacterEscapeMarker
+ | TokenType::CharacterEscapeValue
+ | TokenType::CharacterReference
+ | TokenType::CharacterReferenceMarker
+ | TokenType::CharacterReferenceMarkerHexadecimal
+ | TokenType::CharacterReferenceMarkerNumeric
+ | TokenType::CharacterReferenceMarkerSemi
+ | TokenType::CharacterReferenceValue
+ | TokenType::CodeIndentedPrefixWhitespace
+ | TokenType::CodeFencedFence
+ | TokenType::CodeFencedFenceSequence
+ | TokenType::CodeFencedFenceWhitespace
+ | TokenType::CodeFlowChunk
+ | TokenType::CodeTextData
+ | TokenType::CodeTextLineEnding
+ | TokenType::CodeTextSequence
+ | TokenType::Content
+ | TokenType::Data
+ | TokenType::HardBreakEscape
+ | TokenType::HardBreakEscapeMarker
+ | TokenType::HardBreakTrailing
+ | TokenType::HardBreakTrailingSpace
+ | TokenType::HtmlFlowData
+ | TokenType::HtmlTextData
+ | TokenType::LineEnding
+ | TokenType::ThematicBreak
+ | TokenType::ThematicBreakSequence
+ | TokenType::ThematicBreakWhitespace
+ | TokenType::Whitespace => {
+ // Ignore.
+ }
+ TokenType::AtxHeadingText
+ | TokenType::CodeFencedFenceInfo
+ | TokenType::CodeFencedFenceMeta => {
+ buffer(buffers);
}
TokenType::CodeIndented => {
code_flow_seen_data = Some(false);
@@ -117,9 +160,8 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
buf_tail_mut(buffers).push("<pre><code".to_string());
code_fenced_fences_count = Some(0);
}
- TokenType::AtxHeadingText
- | TokenType::CodeFencedFenceInfo
- | TokenType::CodeFencedFenceMeta => {
+ TokenType::CodeText => {
+ buf_tail_mut(buffers).push("<code>".to_string());
buffer(buffers);
}
TokenType::HtmlFlow => {
@@ -133,161 +175,58 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
ignore_encode = true;
}
}
- TokenType::CodeText => {
- buf_tail_mut(buffers).push("<code>".to_string());
- buffer(buffers);
+ TokenType::Paragraph => {
+ buf_tail_mut(buffers).push("<p>".to_string());
}
- TokenType::Content
- | TokenType::AtxHeading
- | TokenType::AtxHeadingSequence
- | TokenType::AtxHeadingWhitespace
- | TokenType::LineEnding
- | TokenType::ThematicBreak
- | TokenType::ThematicBreakSequence
- | TokenType::ThematicBreakWhitespace
- | TokenType::CodeIndentedPrefixWhitespace
- | TokenType::CodeFlowChunk
- | TokenType::BlankLineEnding
- | TokenType::BlankLineWhitespace
- | TokenType::Whitespace
- | TokenType::HardBreakEscape
- | TokenType::HardBreakEscapeMarker
- | TokenType::HardBreakTrailing
- | TokenType::HardBreakTrailingSpace
- | TokenType::HtmlFlowData
- | TokenType::HtmlTextData
- | TokenType::CodeFencedFence
- | TokenType::CodeFencedFenceSequence
- | TokenType::CodeFencedFenceWhitespace
- | TokenType::CodeTextSequence
- | TokenType::CodeTextData
- | TokenType::CodeTextLineEnding
- | TokenType::Data
- | TokenType::CharacterEscape
- | TokenType::CharacterEscapeMarker
- | TokenType::CharacterEscapeValue
- | TokenType::CharacterReference
- | TokenType::CharacterReferenceMarker
- | TokenType::CharacterReferenceMarkerNumeric
- | TokenType::CharacterReferenceMarkerHexadecimal
- | TokenType::CharacterReferenceMarkerSemi
- | TokenType::CharacterReferenceValue
- | TokenType::Autolink
- | TokenType::AutolinkMarker
- | TokenType::AutolinkProtocol
- | TokenType::AutolinkEmail => {}
#[allow(unreachable_patterns)]
_ => {
unreachable!("unhandled `enter` of TokenType {:?}", token_type)
}
},
EventType::Exit => match token_type {
- TokenType::Content
- | TokenType::ThematicBreakSequence
- | TokenType::ThematicBreakWhitespace
- | TokenType::CodeIndentedPrefixWhitespace
+ TokenType::Autolink
+ | TokenType::AutolinkMarker
| TokenType::BlankLineEnding
| TokenType::BlankLineWhitespace
- | TokenType::Whitespace
- | TokenType::CodeFencedFenceSequence
- | TokenType::CodeFencedFenceWhitespace
- | TokenType::CodeTextSequence
| TokenType::CharacterEscape
| TokenType::CharacterEscapeMarker
| TokenType::CharacterReference
| TokenType::CharacterReferenceMarkerSemi
+ | TokenType::CodeFencedFenceSequence
+ | TokenType::CodeFencedFenceWhitespace
+ | TokenType::CodeIndentedPrefixWhitespace
+ | TokenType::CodeTextSequence
+ | TokenType::Content
| TokenType::HardBreakEscapeMarker
| TokenType::HardBreakTrailingSpace
- | TokenType::Autolink
- | TokenType::AutolinkMarker => {}
- TokenType::HtmlFlow | TokenType::HtmlText => {
- ignore_encode = false;
+ | TokenType::ThematicBreakSequence
+ | TokenType::ThematicBreakWhitespace
+ | TokenType::Whitespace => {
+ // Ignore.
}
- TokenType::HtmlFlowData | TokenType::HtmlTextData => {
- let slice = serialize(codes, &from_exit_event(events, index), false);
-
- let res = if ignore_encode { slice } else { encode(&slice) };
-
+ // Just output it.
+ TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {
// last_was_tag = false;
- buf_tail_mut(buffers).push(res);
- }
- TokenType::Paragraph => {
- buf_tail_mut(buffers).push("</p>".to_string());
- }
- TokenType::HardBreakEscape | TokenType::HardBreakTrailing => {
- buf_tail_mut(buffers).push("<br />".to_string());
- }
- TokenType::CodeIndented | TokenType::CodeFenced => {
- let seen_data =
- code_flow_seen_data.expect("`code_flow_seen_data` must be defined");
-
- // To do: containers.
- // One special case is if we are inside a container, and the fenced code was
- // not closed (meaning it runs to the end).
- // In that case, the following line ending, is considered *outside* the
- // fenced code and block quote by micromark, but CM wants to treat that
- // ending as part of the code.
- // if fenced_count != None && fenced_count < 2 && tightStack.length > 0 && !last_was_tag {
- // line_ending();
- // }
-
- // But in most cases, it’s simpler: when we’ve seen some data, emit an extra
- // line ending when needed.
- if seen_data {
- line_ending_if_needed(buffers);
- }
-
- buf_tail_mut(buffers).push("</code></pre>".to_string());
-
- if let Some(count) = code_fenced_fences_count {
- if count < 2 {
- line_ending_if_needed(buffers);
- }
- }
-
- code_flow_seen_data = None;
- code_fenced_fences_count = None;
- slurp_one_line_ending = false;
- }
- TokenType::CodeFencedFence => {
- let count = if let Some(count) = code_fenced_fences_count {
- count
- } else {
- 0
- };
-
- if count == 0 {
- buf_tail_mut(buffers).push(">".to_string());
- // tag = true;
- slurp_one_line_ending = true;
- }
-
- code_fenced_fences_count = Some(count + 1);
- }
- TokenType::CodeFencedFenceInfo => {
- let value = resume(buffers);
- buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value));
- // tag = true;
- }
- TokenType::CodeFencedFenceMeta => {
- resume(buffers);
- }
- TokenType::CodeFlowChunk => {
- code_flow_seen_data = Some(true);
buf_tail_mut(buffers).push(encode(&serialize(
codes,
&from_exit_event(events, index),
false,
)));
}
-
+ TokenType::AtxHeading => {
+ let rank = atx_opening_sequence_size
+ .expect("`atx_opening_sequence_size` must be set in headings");
+ buf_tail_mut(buffers).push(format!("</h{}>", rank));
+ atx_opening_sequence_size = None;
+ atx_heading_buffer = None;
+ }
// `AtxHeadingWhitespace` is ignored after the opening sequence,
// before the closing sequence, and after the closing sequence.
// But it is used around intermediate sequences.
// `atx_heading_buffer` is set to `Some` by the first `AtxHeadingText`.
// `AtxHeadingSequence` is ignored as the opening and closing sequence,
// but not when intermediate.
- TokenType::AtxHeadingWhitespace | TokenType::AtxHeadingSequence => {
+ TokenType::AtxHeadingSequence | TokenType::AtxHeadingWhitespace => {
if let Some(buf) = atx_heading_buffer {
atx_heading_buffer = Some(
buf.to_string()
@@ -320,50 +259,26 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
buf_tail_mut(buffers).push(encode(&result));
}
- TokenType::AtxHeading => {
- let rank = atx_opening_sequence_size
- .expect("`atx_opening_sequence_size` must be set in headings");
- buf_tail_mut(buffers).push(format!("</h{}>", rank));
- atx_opening_sequence_size = None;
- atx_heading_buffer = None;
- }
- TokenType::AutolinkProtocol => {
+ TokenType::AutolinkEmail => {
let slice = serialize(codes, &from_exit_event(events, index), false);
let buf = buf_tail_mut(buffers);
buf.push(format!(
- "<a href=\"{}\">",
+ "<a href=\"mailto:{}\">",
sanitize_uri(slice.as_str(), &protocol_href)
));
buf.push(encode(&slice));
buf.push("</a>".to_string());
}
- TokenType::AutolinkEmail => {
+ TokenType::AutolinkProtocol => {
let slice = serialize(codes, &from_exit_event(events, index), false);
let buf = buf_tail_mut(buffers);
buf.push(format!(
- "<a href=\"mailto:{}\">",
+ "<a href=\"{}\">",
sanitize_uri(slice.as_str(), &protocol_href)
));
buf.push(encode(&slice));
buf.push("</a>".to_string());
}
- TokenType::ThematicBreak => {
- buf_tail_mut(buffers).push("<hr />".to_string());
- }
- TokenType::LineEnding => {
- // if slurp_all_line_endings {
- // // Empty.
- // } else
- if slurp_one_line_ending {
- slurp_one_line_ending = false;
- } else {
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
- }
- }
TokenType::CharacterReferenceMarker => {
character_reference_kind = Some(CharacterReferenceKind::Named);
}
@@ -389,9 +304,71 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
};
buf_tail_mut(buffers).push(encode(&value));
-
character_reference_kind = None;
}
+ TokenType::CodeFenced | TokenType::CodeIndented => {
+ let seen_data =
+ code_flow_seen_data.expect("`code_flow_seen_data` must be defined");
+
+ // To do: containers.
+ // One special case is if we are inside a container, and the fenced code was
+ // not closed (meaning it runs to the end).
+ // In that case, the following line ending, is considered *outside* the
+ // fenced code and block quote by micromark, but CM wants to treat that
+ // ending as part of the code.
+ // if fenced_count != None && fenced_count < 2 && tightStack.length > 0 && !last_was_tag {
+ // line_ending();
+ // }
+
+ // But in most cases, it’s simpler: when we’ve seen some data, emit an extra
+ // line ending when needed.
+ if seen_data {
+ line_ending_if_needed(buffers);
+ }
+
+ buf_tail_mut(buffers).push("</code></pre>".to_string());
+
+ if let Some(count) = code_fenced_fences_count {
+ if count < 2 {
+ line_ending_if_needed(buffers);
+ }
+ }
+
+ code_flow_seen_data = None;
+ code_fenced_fences_count = None;
+ slurp_one_line_ending = false;
+ }
+ TokenType::CodeFencedFence => {
+ let count = if let Some(count) = code_fenced_fences_count {
+ count
+ } else {
+ 0
+ };
+
+ if count == 0 {
+ buf_tail_mut(buffers).push(">".to_string());
+ // tag = true;
+ slurp_one_line_ending = true;
+ }
+
+ code_fenced_fences_count = Some(count + 1);
+ }
+ TokenType::CodeFencedFenceInfo => {
+ let value = resume(buffers);
+ buf_tail_mut(buffers).push(format!(" class=\"language-{}\"", value));
+ // tag = true;
+ }
+ TokenType::CodeFencedFenceMeta => {
+ resume(buffers);
+ }
+ TokenType::CodeFlowChunk => {
+ code_flow_seen_data = Some(true);
+ buf_tail_mut(buffers).push(encode(&serialize(
+ codes,
+ &from_exit_event(events, index),
+ false,
+ )));
+ }
TokenType::CodeText => {
let result = resume(buffers);
let mut chars = result.chars();
@@ -417,14 +394,38 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
TokenType::CodeTextLineEnding => {
buf_tail_mut(buffers).push(" ".to_string());
}
- // This branch below currently acts as the resulting `data` tokens.
- TokenType::CodeTextData | TokenType::Data | TokenType::CharacterEscapeValue => {
+
+ TokenType::HardBreakEscape | TokenType::HardBreakTrailing => {
+ buf_tail_mut(buffers).push("<br />".to_string());
+ }
+
+ TokenType::HtmlFlow | TokenType::HtmlText => {
+ ignore_encode = false;
+ }
+ TokenType::HtmlFlowData | TokenType::HtmlTextData => {
+ let slice = serialize(codes, &from_exit_event(events, index), false);
// last_was_tag = false;
- buf_tail_mut(buffers).push(encode(&serialize(
- codes,
- &from_exit_event(events, index),
- false,
- )));
+ buf_tail_mut(buffers).push(if ignore_encode { slice } else { encode(&slice) });
+ }
+ TokenType::LineEnding => {
+ // if slurp_all_line_endings {
+ // // Empty.
+ // } else
+ if slurp_one_line_ending {
+ slurp_one_line_ending = false;
+ } else {
+ buf_tail_mut(buffers).push(encode(&serialize(
+ codes,
+ &from_exit_event(events, index),
+ false,
+ )));
+ }
+ }
+ TokenType::Paragraph => {
+ buf_tail_mut(buffers).push("</p>".to_string());
+ }
+ TokenType::ThematicBreak => {
+ buf_tail_mut(buffers).push("<hr />".to_string());
}
#[allow(unreachable_patterns)]
_ => {
diff --git a/src/content/string.rs b/src/content/string.rs
index 2723785..25d8582 100644
--- a/src/content/string.rs
+++ b/src/content/string.rs
@@ -55,6 +55,7 @@ fn before_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
/// ```
fn in_data(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
match code {
+ // To do: line endings.
Code::None => {
tokenizer.exit(TokenType::Data);
(State::Ok, None)
diff --git a/src/content/text.rs b/src/content/text.rs
index f61b390..6a30d4c 100644
--- a/src/content/text.rs
+++ b/src/content/text.rs
@@ -11,7 +11,6 @@
//! * [Hard break (escape)][crate::construct::hard_break_escape]
//! * [Hard break (trailing)][crate::construct::hard_break_trailing]
//! * [Code (text)][crate::construct::code_text]
-//! * Line ending
//! * Label start (image)
//! * Label start (link)
//! * [Character escape][crate::construct::character_escape]
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index da45ee5..0aae480 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -24,66 +24,52 @@ pub enum TokenType {
AutolinkMarker,
AutolinkProtocol,
AutolinkEmail,
-
AtxHeading,
AtxHeadingSequence,
AtxHeadingWhitespace,
AtxHeadingText,
-
+ BlankLineEnding,
+ BlankLineWhitespace,
CharacterEscape,
CharacterEscapeMarker,
CharacterEscapeValue,
-
CharacterReference,
CharacterReferenceMarker,
CharacterReferenceMarkerNumeric,
CharacterReferenceMarkerHexadecimal,
CharacterReferenceMarkerSemi,
CharacterReferenceValue,
-
CodeFenced,
CodeFencedFence,
CodeFencedFenceSequence,
CodeFencedFenceWhitespace,
CodeFencedFenceInfo,
CodeFencedFenceMeta,
-
+ CodeFlowChunk,
CodeIndented,
CodeIndentedPrefixWhitespace,
-
CodeText,
CodeTextSequence,
CodeTextLineEnding,
CodeTextData,
-
- CodeFlowChunk,
-
+ Content,
Data,
-
HardBreakEscape,
HardBreakEscapeMarker,
HardBreakTrailing,
HardBreakTrailingSpace,
-
HtmlFlow,
HtmlFlowData,
-
HtmlText,
HtmlTextData,
-
+ LineEnding,
+ Paragraph,
ThematicBreak,
ThematicBreakSequence,
ThematicBreakWhitespace,
-
Whitespace,
- LineEnding,
- BlankLineEnding,
- BlankLineWhitespace,
-
- Content,
-
- Paragraph,
+ // Chunks are tokenizer, but unraveled by `subtokenize`.
ChunkContent,
ChunkString,
ChunkText,