Refactor to improve allocation around strings

author: Titus Wormer <tituswormer@gmail.com> 2022-07-20 17:19:17 +0200
committer: Titus Wormer <tituswormer@gmail.com> 2022-07-20 17:19:17 +0200
commit: a820d849c3e20a1d72137072d70a7c8e00306f98 (patch)
tree: c2916ab31d6d481e0b53a06aa9b95dfcddd4163f /src
parent: 7894ec75a7070591c3499fce1f409563c4edc7d7 (diff)
download: markdown-rs-a820d849c3e20a1d72137072d70a7c8e00306f98.tar.gz
markdown-rs-a820d849c3e20a1d72137072d70a7c8e00306f98.tar.bz2
markdown-rs-a820d849c3e20a1d72137072d70a7c8e00306f98.zip
6 files changed, 149 insertions, 182 deletions
diff --git a/src/compiler.rs b/src/compiler.rs
index c79abed..1723190 100644
--- a/src/compiler.rs
+++ b/src/compiler.rs
@@ -84,7 +84,7 @@ struct CompileContext<'a> {
     pub line_ending_default: LineEnding,
     pub allow_dangerous_html: bool,
     /// Intermediate results.
-    pub buffers: Vec<Vec<String>>,
+    pub buffers: Vec<String>,
     pub index: usize,
 }
 
@@ -125,78 +125,62 @@ impl<'a> CompileContext<'a> {
             },
             line_ending_default: line_ending,
             allow_dangerous_html: options.allow_dangerous_html,
-            buffers: vec![vec![]],
+            buffers: vec![String::new()],
             index: 0,
         }
     }
 
     /// Push a buffer.
     pub fn buffer(&mut self) {
-        self.buffers.push(vec![]);
+        self.buffers.push(String::new());
     }
 
     /// Pop a buffer, returning its value.
     pub fn resume(&mut self) -> String {
-        self.buffers
-            .pop()
-            .expect("Cannot resume w/o buffer")
-            .concat()
+        self.buffers.pop().expect("Cannot resume w/o buffer")
     }
 
-    pub fn push(&mut self, value: String) {
+    pub fn push<'x, S: Into<&'x str>>(&mut self, value: S) {
+        let value = value.into();
         self.buffers
             .last_mut()
             .expect("Cannot push w/o buffer")
-            .push(value);
+            .push_str(value);
         self.last_was_tag = false;
     }
 
-    pub fn tag(&mut self, value: String) {
-        if self.tags {
-            self.buffers
-                .last_mut()
-                .expect("Cannot push w/o buffer")
-                .push(value);
-            self.last_was_tag = true;
+    pub fn push_raw<'x, S: Into<&'x str>>(&mut self, value: S) {
+        let value = value.into();
+        if self.ignore_encode {
+            self.push(value);
+        } else {
+            self.push(&*encode(value));
         }
     }
 
-    /// Get the last chunk of current buffer.
-    pub fn buf_tail_slice(&self) -> Option<&String> {
-        self.buf_tail().last()
+    pub fn tag<'x, S: Into<&'x str>>(&mut self, value: S) {
+        if self.tags {
+            self.push(value.into());
+            self.last_was_tag = true;
+        }
     }
 
     /// Get the current buffer.
-    pub fn buf_tail(&self) -> &Vec<String> {
+    pub fn buf_tail(&self) -> &String {
         self.buffers
             .last()
             .expect("at least one buffer should exist")
     }
 
-    /// Optionally encode.
-    pub fn encode_opt(&self, value: &str) -> String {
-        if self.ignore_encode {
-            value.to_string()
-        } else {
-            encode(value)
-        }
-    }
-
     /// Add a line ending.
     pub fn line_ending(&mut self) {
-        let line_ending = self.line_ending_default.as_str().to_string();
-        // lastWasTag = false
-        self.push(line_ending);
+        let eol = self.line_ending_default.as_str().to_string();
+        self.push(&*eol);
     }
 
     /// Add a line ending if needed (as in, there’s no eol/eof already).
     pub fn line_ending_if_needed(&mut self) {
-        let slice = self.buf_tail_slice();
-        let last_char = if let Some(x) = slice {
-            x.chars().last()
-        } else {
-            None
-        };
+        let last_char = self.buf_tail().chars().last();
         let mut add = true;
 
         if let Some(x) = last_char {
@@ -314,7 +298,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String {
         .buffers
         .get(0)
         .expect("expected 1 final buffer")
-        .concat()
+        .to_string()
 }
 
 /// Handle [`Enter`][EventType::Enter].
@@ -415,14 +399,14 @@ fn on_enter_buffer(context: &mut CompileContext) {
 fn on_enter_block_quote(context: &mut CompileContext) {
     context.tight_stack.push(false);
     context.line_ending_if_needed();
-    context.tag("<blockquote>".to_string());
+    context.tag("<blockquote>");
 }
 
 /// Handle [`Enter`][EventType::Enter]:[`CodeIndented`][Token::CodeIndented].
 fn on_enter_code_indented(context: &mut CompileContext) {
     context.code_flow_seen_data = Some(false);
     context.line_ending_if_needed();
-    context.tag("<pre><code>".to_string());
+    context.tag("<pre><code>");
 }
 
 /// Handle [`Enter`][EventType::Enter]:[`CodeFenced`][Token::CodeFenced].
@@ -430,14 +414,14 @@ fn on_enter_code_fenced(context: &mut CompileContext) {
     context.code_flow_seen_data = Some(false);
     context.line_ending_if_needed();
     // Note that no `>` is used, which is added later.
-    context.tag("<pre><code".to_string());
+    context.tag("<pre><code");
     context.code_fenced_fences_count = Some(0);
 }
 
 /// Handle [`Enter`][EventType::Enter]:[`CodeText`][Token::CodeText].
 fn on_enter_code_text(context: &mut CompileContext) {
     context.code_text_inside = true;
-    context.tag("<code>".to_string());
+    context.tag("<code>");
     context.buffer();
 }
 
@@ -462,7 +446,7 @@ fn on_enter_definition_destination_string(context: &mut CompileContext) {
 
 /// Handle [`Enter`][EventType::Enter]:[`Emphasis`][Token::Emphasis].
 fn on_enter_emphasis(context: &mut CompileContext) {
-    context.tag("<em>".to_string());
+    context.tag("<em>");
 }
 
 /// Handle [`Enter`][EventType::Enter]:[`HtmlFlow`][Token::HtmlFlow].
@@ -563,7 +547,7 @@ fn on_enter_list(context: &mut CompileContext) {
     context.tight_stack.push(!loose);
     context.line_ending_if_needed();
     // Note: no `>`.
-    context.tag(format!(
+    context.tag(&*format!(
         "<{}",
         if *token_type == Token::ListOrdered {
             "ol"
@@ -579,11 +563,11 @@ fn on_enter_list_item_marker(context: &mut CompileContext) {
     let expect_first_item = context.expect_first_item.take().unwrap();
 
     if expect_first_item {
-        context.tag(">".to_string());
+        context.tag(">");
     }
 
     context.line_ending_if_needed();
-    context.tag("<li>".to_string());
+    context.tag("<li>");
     context.expect_first_item = Some(false);
     // “Hack” to prevent a line ending from showing up if the item is empty.
     context.last_was_tag = false;
@@ -595,7 +579,7 @@ fn on_enter_paragraph(context: &mut CompileContext) {
 
     if !tight {
         context.line_ending_if_needed();
-        context.tag("<p>".to_string());
+        context.tag("<p>");
     }
 }
 
@@ -616,7 +600,7 @@ fn on_enter_resource_destination_string(context: &mut CompileContext) {
 
 /// Handle [`Enter`][EventType::Enter]:[`Strong`][Token::Strong].
 fn on_enter_strong(context: &mut CompileContext) {
-    context.tag("<strong>".to_string());
+    context.tag("<strong>");
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`AutolinkEmail`][Token::AutolinkEmail].
@@ -626,15 +610,15 @@ fn on_exit_autolink_email(context: &mut CompileContext) {
         &from_exit_event(context.events, context.index),
         false,
     );
-    context.tag(format!(
+    context.tag(&*format!(
         "<a href=\"{}\">",
         sanitize_uri(
             format!("mailto:{}", slice.as_str()).as_str(),
             &context.protocol_href
         )
     ));
-    context.push(context.encode_opt(&slice));
-    context.tag("</a>".to_string());
+    context.push_raw(&*slice);
+    context.tag("</a>");
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`AutolinkProtocol`][Token::AutolinkProtocol].
@@ -644,17 +628,17 @@ fn on_exit_autolink_protocol(context: &mut CompileContext) {
         &from_exit_event(context.events, context.index),
         false,
     );
-    context.tag(format!(
+    context.tag(&*format!(
         "<a href=\"{}\">",
         sanitize_uri(slice.as_str(), &context.protocol_href)
     ));
-    context.push(context.encode_opt(&slice));
-    context.tag("</a>".to_string());
+    context.push_raw(&*slice);
+    context.tag("</a>");
 }
 
 /// Handle [`Exit`][EventType::Exit]:{[`HardBreakEscape`][Token::HardBreakEscape],[`HardBreakTrailing`][Token::HardBreakTrailing]}.
 fn on_exit_break(context: &mut CompileContext) {
-    context.tag("<br />".to_string());
+    context.tag("<br />");
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`BlankLineEnding`][Token::BlankLineEnding].
@@ -669,7 +653,7 @@ fn on_exit_block_quote(context: &mut CompileContext) {
     context.tight_stack.pop();
     context.line_ending_if_needed();
     context.slurp_one_line_ending = false;
-    context.tag("</blockquote>".to_string());
+    context.tag("</blockquote>");
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker].
@@ -705,17 +689,17 @@ fn on_exit_character_reference_value(context: &mut CompileContext) {
         CharacterReferenceKind::Named => decode_named(ref_string),
     };
 
-    context.push(context.encode_opt(&value));
+    context.push_raw(&*value);
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`CodeFlowChunk`][Token::CodeFlowChunk].
 fn on_exit_code_flow_chunk(context: &mut CompileContext) {
     context.code_flow_seen_data = Some(true);
-    context.push(context.encode_opt(&serialize(
+    context.push_raw(&*serialize(
         context.codes,
         &from_exit_event(context.events, context.index),
         false,
-    )));
+    ));
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`CodeFencedFence`][Token::CodeFencedFence].
@@ -727,7 +711,7 @@ fn on_exit_code_fenced_fence(context: &mut CompileContext) {
     };
 
     if count == 0 {
-        context.tag(">".to_string());
+        context.tag(">");
         context.slurp_one_line_ending = true;
     }
 
@@ -737,7 +721,7 @@ fn on_exit_code_fenced_fence(context: &mut CompileContext) {
 /// Handle [`Exit`][EventType::Exit]:[`CodeFencedFenceInfo`][Token::CodeFencedFenceInfo].
 fn on_exit_code_fenced_fence_info(context: &mut CompileContext) {
     let value = context.resume();
-    context.tag(format!(" class=\"language-{}\"", value));
+    context.tag(&*format!(" class=\"language-{}\"", value));
 }
 
 /// Handle [`Exit`][EventType::Exit]:{[`CodeFenced`][Token::CodeFenced],[`CodeIndented`][Token::CodeIndented]}.
@@ -764,7 +748,7 @@ fn on_exit_code_flow(context: &mut CompileContext) {
         context.line_ending_if_needed();
     }
 
-    context.tag("</code></pre>".to_string());
+    context.tag("</code></pre>");
 
     if let Some(count) = context.code_fenced_fences_count.take() {
         if count < 2 {
@@ -792,12 +776,12 @@ fn on_exit_code_text(context: &mut CompileContext) {
     }
 
     context.code_text_inside = false;
-    context.push(if trim {
+    context.push(&*if trim {
         result[1..(result.len() - 1)].to_string()
     } else {
         result
     });
-    context.tag("</code>".to_string());
+    context.tag("</code>");
 }
 
 /// Handle [`Exit`][EventType::Exit]:*.
@@ -810,11 +794,11 @@ fn on_exit_drop(context: &mut CompileContext) {
 /// Handle [`Exit`][EventType::Exit]:{[`CodeTextData`][Token::CodeTextData],[`Data`][Token::Data],[`CharacterEscapeValue`][Token::CharacterEscapeValue]}.
 fn on_exit_data(context: &mut CompileContext) {
     // Just output it.
-    context.push(context.encode_opt(&serialize(
+    context.push_raw(&*serialize(
         context.codes,
         &from_exit_event(context.events, context.index),
         false,
-    )));
+    ));
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`Definition`][Token::Definition].
@@ -870,7 +854,7 @@ fn on_exit_definition_title_string(context: &mut CompileContext) {
 
 /// Handle [`Exit`][EventType::Exit]:[`Strong`][Token::Emphasis].
 fn on_exit_emphasis(context: &mut CompileContext) {
-    context.tag("</em>".to_string());
+    context.tag("</em>");
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`HeadingAtx`][Token::HeadingAtx].
@@ -880,7 +864,7 @@ fn on_exit_heading_atx(context: &mut CompileContext) {
         .take()
         .expect("`atx_opening_sequence_size` must be set in headings");
 
-    context.tag(format!("</h{}>", rank));
+    context.tag(&*format!("</h{}>", rank));
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`HeadingAtxSequence`][Token::HeadingAtxSequence].
@@ -895,14 +879,14 @@ fn on_exit_heading_atx_sequence(context: &mut CompileContext) {
         .len();
         context.line_ending_if_needed();
         context.atx_opening_sequence_size = Some(rank);
-        context.tag(format!("<h{}>", rank));
+        context.tag(&*format!("<h{}>", rank));
     }
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`HeadingAtxText`][Token::HeadingAtxText].
 fn on_exit_heading_atx_text(context: &mut CompileContext) {
     let value = context.resume();
-    context.push(value);
+    context.push(&*value);
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`HeadingSetextText`][Token::HeadingSetextText].
@@ -925,9 +909,9 @@ fn on_exit_heading_setext_underline(context: &mut CompileContext) {
     let level: usize = if head == Code::Char('-') { 2 } else { 1 };
 
     context.line_ending_if_needed();
-    context.tag(format!("<h{}>", level));
-    context.push(text);
-    context.tag(format!("</h{}>", level));
+    context.tag(&*format!("<h{}>", level));
+    context.push(&*text);
+    context.tag(&*format!("</h{}>", level));
 }
 
 /// Handle [`Exit`][EventType::Exit]:{[`HtmlFlow`][Token::HtmlFlow],[`HtmlText`][Token::HtmlText]}.
@@ -942,7 +926,7 @@ fn on_exit_html_data(context: &mut CompileContext) {
         &from_exit_event(context.events, context.index),
         false,
     );
-    context.push(context.encode_opt(&slice));
+    context.push_raw(&*slice);
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`Label`][Token::Label].
@@ -965,15 +949,15 @@ fn on_exit_label_text(context: &mut CompileContext) {
 /// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding].
 fn on_exit_line_ending(context: &mut CompileContext) {
     if context.code_text_inside {
-        context.push(" ".to_string());
+        context.push(" ");
     } else if context.slurp_one_line_ending {
         context.slurp_one_line_ending = false;
     } else {
-        context.push(context.encode_opt(&serialize(
+        context.push_raw(&*serialize(
             context.codes,
             &from_exit_event(context.events, context.index),
             false,
-        )));
+        ));
     }
 }
 
@@ -986,7 +970,7 @@ fn on_exit_list(context: &mut CompileContext) {
     };
     context.tight_stack.pop();
     context.line_ending();
-    context.tag(format!("</{}>", tag_name));
+    context.tag(&*format!("</{}>", tag_name));
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`ListItem`][Token::ListItem].
@@ -1012,7 +996,7 @@ fn on_exit_list_item(context: &mut CompileContext) {
         context.line_ending_if_needed();
     }
 
-    context.tag("</li>".to_string());
+    context.tag("</li>");
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`ListItemValue`][Token::ListItemValue].
@@ -1028,7 +1012,9 @@ fn on_exit_list_item_value(context: &mut CompileContext) {
         let value = slice.parse::<u32>().ok().unwrap();
 
         if value != 1 {
-            context.tag(format!(" start=\"{}\"", encode(&value.to_string())));
+            context.tag(" start=\"");
+            context.tag(&*value.to_string());
+            context.tag("\"");
         }
     }
 }
@@ -1082,9 +1068,9 @@ fn on_exit_media(context: &mut CompileContext) {
     };
 
     let destination = if let Some(destination) = destination {
-        destination.clone()
+        destination
     } else {
-        "".to_string()
+        ""
     };
 
     let title = if let Some(title) = title {
@@ -1094,20 +1080,20 @@ fn on_exit_media(context: &mut CompileContext) {
     };
 
     if media.image {
-        context.tag(format!(
+        context.tag(&*format!(
             "<img src=\"{}\" alt=\"",
-            sanitize_uri(&destination, &context.protocol_src),
+            sanitize_uri(destination, &context.protocol_src),
         ));
-        context.push(label);
-        context.tag(format!("\"{} />", title));
+        context.push(&*label);
+        context.tag(&*format!("\"{} />", title));
     } else {
-        context.tag(format!(
+        context.tag(&*format!(
             "<a href=\"{}\"{}>",
-            sanitize_uri(&destination, &context.protocol_href),
+            sanitize_uri(destination, &context.protocol_href),
             title,
         ));
-        context.push(label);
-        context.tag("</a>".to_string());
+        context.push(&*label);
+        context.tag("</a>");
     };
 }
 
@@ -1118,7 +1104,7 @@ fn on_exit_paragraph(context: &mut CompileContext) {
     if *tight {
         context.slurp_one_line_ending = true;
     } else {
-        context.tag("</p>".to_string());
+        context.tag("</p>");
     }
 }
 
@@ -1151,11 +1137,11 @@ fn on_exit_resource_title_string(context: &mut CompileContext) {
 
 /// Handle [`Exit`][EventType::Exit]:[`Strong`][Token::Strong].
 fn on_exit_strong(context: &mut CompileContext) {
-    context.tag("</strong>".to_string());
+    context.tag("</strong>");
 }
 
 /// Handle [`Exit`][EventType::Exit]:[`ThematicBreak`][Token::ThematicBreak].
 fn on_exit_thematic_break(context: &mut CompileContext) {
     context.line_ending_if_needed();
-    context.tag("<hr />".to_string());
+    context.tag("<hr />");
 }
diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs
index ce7cd31..a4cbec1 100644
--- a/src/construct/character_reference.rs
+++ b/src/construct/character_reference.rs
@@ -121,7 +121,7 @@ impl Kind {
 #[derive(Debug, Clone)]
 struct Info {
     /// All parsed characters.
-    buffer: Vec<char>,
+    buffer: String,
     /// Kind of character reference.
     kind: Kind,
 }
@@ -162,7 +162,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
 /// ```
 fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult {
     let info = Info {
-        buffer: vec![],
+        buffer: String::new(),
         kind: Kind::Named,
     };
     if let Code::Char('#') = code {
@@ -216,10 +216,8 @@ fn numeric(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResu
 fn value(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult {
     match code {
         Code::Char(';') if !info.buffer.is_empty() => {
-            let unknown_named = Kind::Named == info.kind && {
-                let value = info.buffer.iter().collect::<String>();
-                !CHARACTER_REFERENCES.iter().any(|d| d.0 == value)
-            };
+            let unknown_named = Kind::Named == info.kind
+                && !CHARACTER_REFERENCES.iter().any(|d| d.0 == info.buffer);
 
             if unknown_named {
                 (State::Nok, None)
diff --git a/src/util/codes.rs b/src/util/codes.rs
index 9b6ad39..d35d7d9 100644
--- a/src/util/codes.rs
+++ b/src/util/codes.rs
@@ -5,19 +5,21 @@ use crate::tokenizer::Code;
 
 /// Turn a string into codes.
 pub fn parse(value: &str) -> Vec<Code> {
-    let mut codes: Vec<Code> = vec![];
+    // Note: It’ll grow a bit bigger with each `Code::VirtualSpace`, smaller
+    // with `Code::CarriageReturnLineFeed`.
+    let mut codes: Vec<Code> = Vec::with_capacity(value.len());
     let mut at_start = true;
     let mut at_carriage_return = false;
     let mut column = 1;
 
     for char in value.chars() {
         if at_start {
+            at_start = false;
+
             if char == '\u{feff}' {
                 // Ignore.
                 continue;
             }
-
-            at_start = false;
         }
 
         // Send a CRLF.
@@ -83,34 +85,33 @@ pub fn parse(value: &str) -> Vec<Code> {
 /// Serialize codes, optionally expanding tabs.
 pub fn serialize(codes: &[Code], expand_tabs: bool) -> String {
     let mut at_tab = false;
-    let mut index = 0;
-    let mut value: Vec<char> = vec![];
+    // Note: It’ll grow a bit smaller with each
+    // `Code::Char('\t') | Code::VirtualSpace` if `expand_tabs` is false,
+    // and bigger with `Code::CarriageReturnLineFeed`,
+    let mut value = String::with_capacity(codes.len());
 
-    while index < codes.len() {
-        let code = codes[index];
+    for code in codes {
         let mut at_tab_next = false;
 
         match code {
             Code::CarriageReturnLineFeed => {
-                value.push('\r');
-                value.push('\n');
+                value.push_str("\r\n");
             }
-            Code::Char(char) if char == '\n' || char == '\r' => {
-                value.push(char);
+            Code::Char(char) if *char == '\n' || *char == '\r' => {
+                value.push(*char);
             }
-            Code::Char(char) if char == '\t' => {
+            Code::Char(char) if *char == '\t' => {
                 at_tab_next = true;
-                value.push(if expand_tabs { ' ' } else { char });
+                value.push(if expand_tabs { ' ' } else { *char });
             }
             Code::VirtualSpace => {
                 if !expand_tabs && at_tab {
-                    index += 1;
                     continue;
                 }
                 value.push(' ');
             }
             Code::Char(char) => {
-                value.push(char);
+                value.push(*char);
             }
             Code::None => {
                 unreachable!("unexpected EOF code in codes");
@@ -118,9 +119,7 @@ pub fn serialize(codes: &[Code], expand_tabs: bool) -> String {
         }
 
         at_tab = at_tab_next;
-
-        index += 1;
     }
 
-    value.into_iter().collect()
+    value
 }
diff --git a/src/util/encode.rs b/src/util/encode.rs
index a3bd589..965ea5c 100644
--- a/src/util/encode.rs
+++ b/src/util/encode.rs
@@ -20,37 +20,31 @@
 /// ## References
 ///
 /// *   [`micromark-util-encode` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-encode)
-pub fn encode(value: &str) -> String {
-    let mut result: Vec<&str> = vec![];
-    let mut start = 0;
-    let mut index = 0;
+pub fn encode<S: Into<String>>(value: S) -> String {
+    let mut value = value.into();
 
-    for byte in value.bytes() {
-        if let Some(replacement) = match byte {
-            b'&' => Some("&amp;"),
-            b'"' => Some("&quot;"),
-            b'<' => Some("&lt;"),
-            b'>' => Some("&gt;"),
-            _ => None,
-        } {
-            if start != index {
-                result.push(&value[start..index]);
-            }
+    // It’ll grow a bit bigger for each dangerous character.
+    let mut result = String::with_capacity(value.len());
 
-            result.push(replacement);
-            start = index + 1;
-        }
-
-        index += 1;
+    while let Some(indice) = value.find(check) {
+        let after = value.split_off(indice + 1);
+        let dangerous = value.pop().unwrap();
+        result.push_str(&value);
+        result.push_str(match dangerous {
+            '&' => "&amp;",
+            '"' => "&quot;",
+            '<' => "&lt;",
+            '>' => "&gt;",
+            _ => unreachable!("xxx"),
+        });
+        value = after;
     }
 
-    if start == 0 {
-        value.to_string()
-    } else {
-        if start < index {
-            result.push(&value[start..index]);
-        }
+    result.push_str(&value);
 
-        result.join("")
-    }
+    result
+}
+
+fn check(char: char) -> bool {
+    matches!(char, '&' | '"' | '<' | '>')
 }
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index feb7239..42a2bb0 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -32,7 +32,8 @@
 /// [definition]: crate::construct::definition
 /// [label_end]: crate::construct::label_end
 pub fn normalize_identifier(value: &str) -> String {
-    let mut codes = vec![];
+    // Note: it’ll grow a bit smaller for consecutive whitespace.
+    let mut result = String::with_capacity(value.len());
     let mut at_start = true;
     let mut at_whitespace = true;
 
@@ -44,10 +45,10 @@ pub fn normalize_identifier(value: &str) -> String {
             }
             _ => {
                 if at_whitespace && !at_start {
-                    codes.push(' ');
+                    result.push(' ');
                 }
 
-                codes.push(char);
+                result.push(char);
                 at_start = false;
                 at_whitespace = false;
             }
@@ -66,9 +67,5 @@ pub fn normalize_identifier(value: &str) -> String {
     // to `SS` (U+0053 U+0053).
     // If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
     // change, and then lowercase to `ß`, which would not match `ss`.
-    codes
-        .iter()
-        .collect::<String>()
-        .to_lowercase()
-        .to_uppercase()
+    result.to_lowercase().to_uppercase()
 }
diff --git a/src/util/sanitize_uri.rs b/src/util/sanitize_uri.rs
index 55b15e4..81450ae 100644
--- a/src/util/sanitize_uri.rs
+++ b/src/util/sanitize_uri.rs
@@ -32,32 +32,25 @@ use crate::util::encode::encode;
 ///
 /// *   [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri)
 pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String {
-    let value = encode(&normalize_uri(value));
+    let value = encode(normalize_uri(value));
 
     if let Some(protocols) = protocols {
-        let chars: Vec<char> = value.chars().collect();
-        let mut index = 0;
-        let mut colon: Option<usize> = None;
-
-        while index < chars.len() {
-            let char = chars[index];
-
-            match char {
-                ':' => {
-                    colon = Some(index);
-                    break;
+        let end = value.find(|c| matches!(c, '?' | '#' | '/'));
+        let mut colon = value.find(|c| matches!(c, ':'));
+
+        // If the first colon is after `?`, `#`, or `/`, it’s not a protocol.
+        if let Some(end) = end {
+            if let Some(index) = colon {
+                if index > end {
+                    colon = None;
                 }
-                '?' | '#' | '/' => break,
-                _ => {}
             }
-
-            index += 1;
         }
 
-        // If there is no protocol, or the first colon is after `?`, `#`, or `/`, it’s relative.
-        // It is a protocol, it should be allowed.
+        // If there is no protocol, it’s relative, and fine.
         if let Some(colon) = colon {
-            let protocol = chars[0..colon].iter().collect::<String>().to_lowercase();
+            // If it is a protocol, it should be allowed.
+            let protocol = value[0..colon].to_lowercase();
             if !protocols.contains(&protocol.as_str()) {
                 return "".to_string();
             }
@@ -85,8 +78,9 @@ pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String {
 ///
 /// *   [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri)
 fn normalize_uri(value: &str) -> String {
-    let chars: Vec<char> = value.chars().collect();
-    let mut result: Vec<String> = vec![];
+    let chars = value.chars().collect::<Vec<_>>();
+    // Note: it’ll grow bigger for each non-ascii or non-safe character.
+    let mut result = String::with_capacity(value.len());
     let mut index = 0;
     let mut start = 0;
     let mut buff = [0; 4];
@@ -104,16 +98,15 @@ fn normalize_uri(value: &str) -> String {
             continue;
         }
 
-        // Note: Rust already takes care of lone astral surrogates.
+        // Note: Rust already takes care of lone surrogates.
         // Non-ascii or not allowed ascii.
         if char >= '\u{0080}'
             || !matches!(char, '!' | '#' | '$' | '&'..=';' | '=' | '?'..='Z' | '_' | 'a'..='z' | '~')
         {
-            result.push(chars[start..index].iter().collect::<String>());
-
+            result.push_str(&chars[start..index].iter().collect::<String>());
             char.encode_utf8(&mut buff);
-            result.push(
-                buff[0..char.len_utf8()]
+            result.push_str(
+                &buff[0..char.len_utf8()]
                     .iter()
                     .map(|&byte| format!("%{:>02X}", byte))
                     .collect::<String>(),
@@ -125,7 +118,7 @@ fn normalize_uri(value: &str) -> String {
         index += 1;
     }
 
-    result.push(chars[start..].iter().collect::<String>());
+    result.push_str(&chars[start..].iter().collect::<String>());
 
-    result.join("")
+    result
 }
author	Titus Wormer <tituswormer@gmail.com>	2022-07-20 17:19:17 +0200
committer	Titus Wormer <tituswormer@gmail.com>	2022-07-20 17:19:17 +0200
commit	a820d849c3e20a1d72137072d70a7c8e00306f98 (patch)
tree	c2916ab31d6d481e0b53a06aa9b95dfcddd4163f /src
parent	7894ec75a7070591c3499fce1f409563c4edc7d7 (diff)
download	markdown-rs-a820d849c3e20a1d72137072d70a7c8e00306f98.tar.gz markdown-rs-a820d849c3e20a1d72137072d70a7c8e00306f98.tar.bz2 markdown-rs-a820d849c3e20a1d72137072d70a7c8e00306f98.zip