diff options
author | Titus Wormer <tituswormer@gmail.com> | 2022-06-21 12:06:51 +0200 |
---|---|---|
committer | Titus Wormer <tituswormer@gmail.com> | 2022-06-21 12:06:51 +0200 |
commit | f99d131ec3ab60956344d001bcd40244343c241b (patch) | |
tree | ac798f9a6a1ab73021cdd5a5303e20424d37172e /src | |
parent | 182467c1d393dee2081ff80f1c049cb145f23123 (diff) | |
download | markdown-rs-f99d131ec3ab60956344d001bcd40244343c241b.tar.gz markdown-rs-f99d131ec3ab60956344d001bcd40244343c241b.tar.bz2 markdown-rs-f99d131ec3ab60956344d001bcd40244343c241b.zip |
Add support for inferring line ending, configurable
* Rename `CompileOptions` to `Options`
* Add support for an optional default line ending style
* Add support for inferring the used line ending style
Diffstat (limited to 'src')
-rw-r--r-- | src/compiler.rs | 89 | ||||
-rw-r--r-- | src/lib.rs | 11 | ||||
-rw-r--r-- | src/tokenizer.rs | 6 |
3 files changed, 83 insertions, 23 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index 366dcd9..5c7f6d8 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -8,9 +8,17 @@ use crate::util::{ span::{codes as codes_from_span, from_exit_event, serialize}, }; +/// To do. +#[derive(Debug, Clone)] +pub enum LineEnding { + CarriageReturnLineFeed, + CarriageReturn, + LineFeed, +} + /// Configuration (optional). #[derive(Default, Debug)] -pub struct CompileOptions { +pub struct Options { /// Whether to allow (dangerous) HTML. /// The default is `false`, you can turn it on to `true` for trusted /// content. @@ -18,7 +26,7 @@ pub struct CompileOptions { /// ## Examples /// /// ```rust - /// use micromark::{micromark, micromark_with_options, CompileOptions}; + /// use micromark::{micromark, micromark_with_options, Options}; /// /// // micromark is safe by default: /// assert_eq!( @@ -30,9 +38,11 @@ pub struct CompileOptions { /// assert_eq!( /// micromark_with_options( /// "Hi, <i>venus</i>!", - /// &CompileOptions { + /// &Options { /// allow_dangerous_html: true, /// allow_dangerous_protocol: false, + /// default_line_ending: None, + /// /// } /// ), /// "<p>Hi, <i>venus</i>!</p>" @@ -47,7 +57,7 @@ pub struct CompileOptions { /// ## Examples /// /// ```rust - /// use micromark::{micromark, micromark_with_options, CompileOptions}; + /// use micromark::{micromark, micromark_with_options, Options}; /// /// // micromark is safe by default: /// assert_eq!( @@ -59,20 +69,24 @@ pub struct CompileOptions { /// assert_eq!( /// micromark_with_options( /// "<javascript:alert(1)>", - /// &CompileOptions { + /// &Options { /// allow_dangerous_html: false, /// allow_dangerous_protocol: true, + /// default_line_ending: None, /// } /// ), /// "<p><a href=\"javascript:alert(1)\">javascript:alert(1)</a></p>" /// ); /// ``` pub allow_dangerous_protocol: bool, + + /// To do. + pub default_line_ending: Option<LineEnding>, } /// Turn events and codes into a string of HTML. #[allow(clippy::too_many_lines)] -pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> String { +pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { let mut index = 0; // let mut last_was_tag = false; let buffers: &mut Vec<Vec<String>> = &mut vec![vec![]]; @@ -89,6 +103,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St } else { Some(vec!["http", "https", "irc", "ircs", "mailto", "xmpp"]) }; + let mut line_ending_inferred: Option<LineEnding> = None; // let protocol_src = if options.allow_dangerous_protocol { // None // } else { @@ -96,6 +111,40 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St // }; // let mut slurp_all_line_endings = false; + while index < events.len() { + let event = &events[index]; + + if event.event_type == EventType::Exit + && (event.token_type == TokenType::BlankLineEnding + || event.token_type == TokenType::CodeTextLineEnding + || event.token_type == TokenType::LineEnding) + { + let codes = codes_from_span(codes, &from_exit_event(events, index)); + let code = *codes.first().unwrap(); + line_ending_inferred = Some(if code == Code::CarriageReturnLineFeed { + LineEnding::CarriageReturnLineFeed + } else if code == Code::Char('\r') { + LineEnding::CarriageReturn + } else { + LineEnding::LineFeed + }); + break; + } + + index += 1; + } + + let line_ending_default: LineEnding; + + if let Some(value) = line_ending_inferred { + line_ending_default = value; + } else if let Some(value) = &options.default_line_ending { + line_ending_default = value.clone(); + } else { + line_ending_default = LineEnding::LineFeed; + } + + index = 0; while index < events.len() { let event = &events[index]; @@ -162,12 +211,12 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St } TokenType::CodeIndented => { code_flow_seen_data = Some(false); - line_ending_if_needed(buffers); + line_ending_if_needed(buffers, &line_ending_default); buf_tail_mut(buffers).push("<pre><code>".to_string()); } TokenType::CodeFenced => { code_flow_seen_data = Some(false); - line_ending_if_needed(buffers); + line_ending_if_needed(buffers, &line_ending_default); // Note that no `>` is used, which is added later. buf_tail_mut(buffers).push("<pre><code".to_string()); code_fenced_fences_count = Some(0); @@ -177,7 +226,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St buffer(buffers); } TokenType::HtmlFlow => { - line_ending_if_needed(buffers); + line_ending_if_needed(buffers, &line_ending_default); if options.allow_dangerous_html { ignore_encode = true; } @@ -297,14 +346,14 @@ pub fn compile(events: &[Event], codes: &[Code], options: &CompileOptions) -> St // But in most cases, it’s simpler: when we’ve seen some data, emit an extra // line ending when needed. if seen_data { - line_ending_if_needed(buffers); + line_ending_if_needed(buffers, &line_ending_default); } buf_tail_mut(buffers).push("</code></pre>".to_string()); if let Some(count) = code_fenced_fences_count { if count < 2 { - line_ending_if_needed(buffers); + line_ending_if_needed(buffers, &line_ending_default); } } @@ -506,15 +555,23 @@ fn buf_tail(buffers: &mut [Vec<String>]) -> &Vec<String> { } /// Add a line ending. -fn line_ending(buffers: &mut [Vec<String>]) { +fn line_ending(buffers: &mut [Vec<String>], default: &LineEnding) { let tail = buf_tail_mut(buffers); - // To do: use inferred line ending style. + + println!("xxx: {:?}", default); + + let line_ending = match default { + LineEnding::CarriageReturnLineFeed => "\r\n", + LineEnding::CarriageReturn => "\r", + LineEnding::LineFeed => "\n", + }; + // lastWasTag = false - tail.push("\n".to_string()); + tail.push(line_ending.to_string()); } /// Add a line ending if needed (as in, there’s no eol/eof already). -fn line_ending_if_needed(buffers: &mut [Vec<String>]) { +fn line_ending_if_needed(buffers: &mut [Vec<String>], default: &LineEnding) { let slice = buf_tail_slice(buffers); let last_char = if let Some(x) = slice { x.chars().last() @@ -532,6 +589,6 @@ fn line_ending_if_needed(buffers: &mut [Vec<String>]) { } if add { - line_ending(buffers); + line_ending(buffers, default); } } @@ -14,7 +14,7 @@ mod tokenizer; mod util; use crate::compiler::compile; -pub use crate::compiler::CompileOptions; +pub use crate::compiler::{LineEnding, Options}; use crate::parser::parse; /// Turn markdown into HTML. @@ -30,7 +30,7 @@ use crate::parser::parse; /// ``` #[must_use] pub fn micromark(value: &str) -> String { - micromark_with_options(value, &CompileOptions::default()) + micromark_with_options(value, &Options::default()) } /// Turn markdown into HTML, with configuration. @@ -38,17 +38,18 @@ pub fn micromark(value: &str) -> String { /// ## Examples /// /// ```rust -/// use micromark::{micromark_with_options, CompileOptions}; +/// use micromark::{micromark_with_options, Options}; /// -/// let result = micromark_with_options("<div>\n\n# Hello, world!\n\n</div>", &CompileOptions { +/// let result = micromark_with_options("<div>\n\n# Hello, world!\n\n</div>", &Options { /// allow_dangerous_html: true, /// allow_dangerous_protocol: true, +/// default_line_ending: None, /// }); /// /// assert_eq!(result, "<div>\n<h1>Hello, world!</h1>\n</div>"); /// ``` #[must_use] -pub fn micromark_with_options(value: &str, options: &CompileOptions) -> String { +pub fn micromark_with_options(value: &str, options: &Options) -> String { let (events, codes) = parse(value); compile(&events, &codes, options) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index c0a7105..ba9bcbb 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -677,7 +677,6 @@ fn attempt_impl( } /// Turn a string into codes. -// To do: handle BOM at start? pub fn as_codes(value: &str) -> Vec<Code> { let mut codes: Vec<Code> = vec![]; let mut at_start = true; @@ -748,7 +747,10 @@ pub fn as_codes(value: &str) -> Vec<Code> { }; } - // To do: handle a final CR? + // Send the last CR: we’re not at a next `\n`. + if at_carriage_return { + codes.push(Code::Char('\r')); + } codes } |