aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-21 12:06:51 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-21 12:06:51 +0200
commitf99d131ec3ab60956344d001bcd40244343c241b (patch)
treeac798f9a6a1ab73021cdd5a5303e20424d37172e /src
parent182467c1d393dee2081ff80f1c049cb145f23123 (diff)
downloadmarkdown-rs-f99d131ec3ab60956344d001bcd40244343c241b.tar.gz
markdown-rs-f99d131ec3ab60956344d001bcd40244343c241b.tar.bz2
markdown-rs-f99d131ec3ab60956344d001bcd40244343c241b.zip
Add support for inferring line ending, configurable
* Rename `CompileOptions` to `Options` * Add support for an optional default line ending style * Add support for inferring the used line ending style
Diffstat (limited to 'src')
-rw-r--r--src/compiler.rs89
-rw-r--r--src/lib.rs11
-rw-r--r--src/tokenizer.rs6
3 files changed, 83 insertions, 23 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index 366dcd9..5c7f6d8 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -8,9 +8,17 @@ use crate::util::{
span::{codes as codes_from_span, from_exit_event, serialize},
};
+/// To do.
+#[derive(Debug, Clone)]
+pub enum LineEnding {
+ CarriageReturnLineFeed,
+ CarriageReturn,
+ LineFeed,
+}
+
/// Configuration (optional).
#[derive(Default, Debug)]
-pub struct CompileOptions {
+pub struct Options {
/// Whether to allow (dangerous) HTML.
/// The default is `false`, you can turn it on to `true` for trusted
/// content.
@@ -18,7 +26,7 @@ pub struct CompileOptions {
/// ## Examples
///
/// ```rust
- /// use micromark::{micromark, micromark_with_options, CompileOptions};
+ /// use micromark::{micromark, micromark_with_options, Options};
///
/// // micromark is safe by default:
/// assert_eq!(
@@ -30,9 +38,11 @@ pub struct CompileOptions {
/// assert_eq!(
/// micromark_with_options(
/// "Hi, <i>venus</i>!",
- /// &CompileOptions {
+ /// &Options {
/// allow_dangerous_html: true,
/// allow_dangerous_protocol: false,
+ /// default_line_ending: None,
+ ///
/// }
/// ),
/// "<p>Hi, <i>venus</i>!</p>"
@@ -47,7 +57,7 @@ pub struct CompileOptions {
/// ## Examples
///
/// ```rust
- /// use micromark::{micromark, micromark_with_options, CompileOptions};
+ /// use micromark::{micromark, micromark_with_options, Options};
///
/// // micromark is safe by default:
/// assert_eq!(
@@ -59,20 +69,24 @@ pub struct CompileOptions {
/// assert_eq!(
/// micromark_with_options(
/// "<javascript:alert(1)>",
- /// &CompileOptions {
+ /// &Options {
/// allow_dangerous_html: false,
/// allow_dangerous_protocol: true,
+ /// default_line_ending: None,
/// }
/// ),
/// "<p><a href=\"javascript:alert(1)\">javascript:alert(1)</a></p>"
/// );
/// ```
pub allow_dangerous_protocol: bool,
+
+ /// To do.
+ pub default_line_ending: Option<LineEnding>,
}
/// Turn events and codes into a string of HTML.
#[allow(clippy::too_many_lines)]
-pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> String {
+pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
let mut index = 0;
// let mut last_was_tag = false;
let buffers: &mut Vec<Vec<String>> = &mut vec![vec![]];
@@ -89,6 +103,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
} else {
Some(vec!["http", "https", "irc", "ircs", "mailto", "xmpp"])
};
+ let mut line_ending_inferred: Option<LineEnding> = None;
// let protocol_src = if options.allow_dangerous_protocol {
// None
// } else {
@@ -96,6 +111,40 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
// };
// let mut slurp_all_line_endings = false;
+ while index < events.len() {
+ let event = &events[index];
+
+ if event.event_type == EventType::Exit
+ && (event.token_type == TokenType::BlankLineEnding
+ || event.token_type == TokenType::CodeTextLineEnding
+ || event.token_type == TokenType::LineEnding)
+ {
+ let codes = codes_from_span(codes, &from_exit_event(events, index));
+ let code = *codes.first().unwrap();
+ line_ending_inferred = Some(if code == Code::CarriageReturnLineFeed {
+ LineEnding::CarriageReturnLineFeed
+ } else if code == Code::Char('\r') {
+ LineEnding::CarriageReturn
+ } else {
+ LineEnding::LineFeed
+ });
+ break;
+ }
+
+ index += 1;
+ }
+
+ let line_ending_default: LineEnding;
+
+ if let Some(value) = line_ending_inferred {
+ line_ending_default = value;
+ } else if let Some(value) = &options.default_line_ending {
+ line_ending_default = value.clone();
+ } else {
+ line_ending_default = LineEnding::LineFeed;
+ }
+
+ index = 0;
while index < events.len() {
let event = &events[index];
@@ -162,12 +211,12 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
}
TokenType::CodeIndented => {
code_flow_seen_data = Some(false);
- line_ending_if_needed(buffers);
+ line_ending_if_needed(buffers, &line_ending_default);
buf_tail_mut(buffers).push("<pre><code>".to_string());
}
TokenType::CodeFenced => {
code_flow_seen_data = Some(false);
- line_ending_if_needed(buffers);
+ line_ending_if_needed(buffers, &line_ending_default);
// Note that no `>` is used, which is added later.
buf_tail_mut(buffers).push("<pre><code".to_string());
code_fenced_fences_count = Some(0);
@@ -177,7 +226,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
buffer(buffers);
}
TokenType::HtmlFlow => {
- line_ending_if_needed(buffers);
+ line_ending_if_needed(buffers, &line_ending_default);
if options.allow_dangerous_html {
ignore_encode = true;
}
@@ -297,14 +346,14 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St
// But in most cases, it’s simpler: when we’ve seen some data, emit an extra
// line ending when needed.
if seen_data {
- line_ending_if_needed(buffers);
+ line_ending_if_needed(buffers, &line_ending_default);
}
buf_tail_mut(buffers).push("</code></pre>".to_string());
if let Some(count) = code_fenced_fences_count {
if count < 2 {
- line_ending_if_needed(buffers);
+ line_ending_if_needed(buffers, &line_ending_default);
}
}
@@ -506,15 +555,23 @@ fn buf_tail(buffers: &mut [Vec<String>]) -> &Vec<String> {
}
/// Add a line ending.
-fn line_ending(buffers: &mut [Vec<String>]) {
+fn line_ending(buffers: &mut [Vec<String>], default: &LineEnding) {
let tail = buf_tail_mut(buffers);
- // To do: use inferred line ending style.
+
+ println!("xxx: {:?}", default);
+
+ let line_ending = match default {
+ LineEnding::CarriageReturnLineFeed => "\r\n",
+ LineEnding::CarriageReturn => "\r",
+ LineEnding::LineFeed => "\n",
+ };
+
// lastWasTag = false
- tail.push("\n".to_string());
+ tail.push(line_ending.to_string());
}
/// Add a line ending if needed (as in, there’s no eol/eof already).
-fn line_ending_if_needed(buffers: &mut [Vec<String>]) {
+fn line_ending_if_needed(buffers: &mut [Vec<String>], default: &LineEnding) {
let slice = buf_tail_slice(buffers);
let last_char = if let Some(x) = slice {
x.chars().last()
@@ -532,6 +589,6 @@ fn line_ending_if_needed(buffers: &mut [Vec<String>]) {
}
if add {
- line_ending(buffers);
+ line_ending(buffers, default);
}
}
diff --git a/src/lib.rs b/src/lib.rs
index 906cd4b..ba129dc 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -14,7 +14,7 @@ mod tokenizer;
mod util;
use crate::compiler::compile;
-pub use crate::compiler::CompileOptions;
+pub use crate::compiler::{LineEnding, Options};
use crate::parser::parse;
/// Turn markdown into HTML.
@@ -30,7 +30,7 @@ use crate::parser::parse;
/// ```
#[must_use]
pub fn micromark(value: &str) -> String {
- micromark_with_options(value, &CompileOptions::default())
+ micromark_with_options(value, &Options::default())
}
/// Turn markdown into HTML, with configuration.
@@ -38,17 +38,18 @@ pub fn micromark(value: &str) -> String {
/// ## Examples
///
/// ```rust
-/// use micromark::{micromark_with_options, CompileOptions};
+/// use micromark::{micromark_with_options, Options};
///
-/// let result = micromark_with_options("<div>\n\n# Hello, world!\n\n</div>", &CompileOptions {
+/// let result = micromark_with_options("<div>\n\n# Hello, world!\n\n</div>", &Options {
/// allow_dangerous_html: true,
/// allow_dangerous_protocol: true,
+/// default_line_ending: None,
/// });
///
/// assert_eq!(result, "<div>\n<h1>Hello, world!</h1>\n</div>");
/// ```
#[must_use]
-pub fn micromark_with_options(value: &str, options: &CompileOptions) -> String {
+pub fn micromark_with_options(value: &str, options: &Options) -> String {
let (events, codes) = parse(value);
compile(&events, &codes, options)
}
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index c0a7105..ba9bcbb 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -677,7 +677,6 @@ fn attempt_impl(
}
/// Turn a string into codes.
-// To do: handle BOM at start?
pub fn as_codes(value: &str) -> Vec<Code> {
let mut codes: Vec<Code> = vec![];
let mut at_start = true;
@@ -748,7 +747,10 @@ pub fn as_codes(value: &str) -> Vec<Code> {
};
}
- // To do: handle a final CR?
+ // Send the last CR: we’re not at a next `\n`.
+ if at_carriage_return {
+ codes.push(Code::Char('\r'));
+ }
codes
}