diff options
Diffstat (limited to '')
-rw-r--r-- | src/compiler.rs | 174 | ||||
-rw-r--r-- | src/construct/character_reference.rs | 10 | ||||
-rw-r--r-- | src/util/codes.rs | 35 | ||||
-rw-r--r-- | src/util/encode.rs | 50 | ||||
-rw-r--r-- | src/util/normalize_identifier.rs | 13 | ||||
-rw-r--r-- | src/util/sanitize_uri.rs | 49 |
6 files changed, 149 insertions, 182 deletions
diff --git a/src/compiler.rs b/src/compiler.rs index c79abed..1723190 100644 --- a/src/compiler.rs +++ b/src/compiler.rs @@ -84,7 +84,7 @@ struct CompileContext<'a> { pub line_ending_default: LineEnding, pub allow_dangerous_html: bool, /// Intermediate results. - pub buffers: Vec<Vec<String>>, + pub buffers: Vec<String>, pub index: usize, } @@ -125,78 +125,62 @@ impl<'a> CompileContext<'a> { }, line_ending_default: line_ending, allow_dangerous_html: options.allow_dangerous_html, - buffers: vec![vec![]], + buffers: vec![String::new()], index: 0, } } /// Push a buffer. pub fn buffer(&mut self) { - self.buffers.push(vec![]); + self.buffers.push(String::new()); } /// Pop a buffer, returning its value. pub fn resume(&mut self) -> String { - self.buffers - .pop() - .expect("Cannot resume w/o buffer") - .concat() + self.buffers.pop().expect("Cannot resume w/o buffer") } - pub fn push(&mut self, value: String) { + pub fn push<'x, S: Into<&'x str>>(&mut self, value: S) { + let value = value.into(); self.buffers .last_mut() .expect("Cannot push w/o buffer") - .push(value); + .push_str(value); self.last_was_tag = false; } - pub fn tag(&mut self, value: String) { - if self.tags { - self.buffers - .last_mut() - .expect("Cannot push w/o buffer") - .push(value); - self.last_was_tag = true; + pub fn push_raw<'x, S: Into<&'x str>>(&mut self, value: S) { + let value = value.into(); + if self.ignore_encode { + self.push(value); + } else { + self.push(&*encode(value)); } } - /// Get the last chunk of current buffer. - pub fn buf_tail_slice(&self) -> Option<&String> { - self.buf_tail().last() + pub fn tag<'x, S: Into<&'x str>>(&mut self, value: S) { + if self.tags { + self.push(value.into()); + self.last_was_tag = true; + } } /// Get the current buffer. - pub fn buf_tail(&self) -> &Vec<String> { + pub fn buf_tail(&self) -> &String { self.buffers .last() .expect("at least one buffer should exist") } - /// Optionally encode. - pub fn encode_opt(&self, value: &str) -> String { - if self.ignore_encode { - value.to_string() - } else { - encode(value) - } - } - /// Add a line ending. pub fn line_ending(&mut self) { - let line_ending = self.line_ending_default.as_str().to_string(); - // lastWasTag = false - self.push(line_ending); + let eol = self.line_ending_default.as_str().to_string(); + self.push(&*eol); } /// Add a line ending if needed (as in, there’s no eol/eof already). pub fn line_ending_if_needed(&mut self) { - let slice = self.buf_tail_slice(); - let last_char = if let Some(x) = slice { - x.chars().last() - } else { - None - }; + let last_char = self.buf_tail().chars().last(); let mut add = true; if let Some(x) = last_char { @@ -314,7 +298,7 @@ pub fn compile(events: &[Event], codes: &[Code], options: &Options) -> String { .buffers .get(0) .expect("expected 1 final buffer") - .concat() + .to_string() } /// Handle [`Enter`][EventType::Enter]. @@ -415,14 +399,14 @@ fn on_enter_buffer(context: &mut CompileContext) { fn on_enter_block_quote(context: &mut CompileContext) { context.tight_stack.push(false); context.line_ending_if_needed(); - context.tag("<blockquote>".to_string()); + context.tag("<blockquote>"); } /// Handle [`Enter`][EventType::Enter]:[`CodeIndented`][Token::CodeIndented]. fn on_enter_code_indented(context: &mut CompileContext) { context.code_flow_seen_data = Some(false); context.line_ending_if_needed(); - context.tag("<pre><code>".to_string()); + context.tag("<pre><code>"); } /// Handle [`Enter`][EventType::Enter]:[`CodeFenced`][Token::CodeFenced]. @@ -430,14 +414,14 @@ fn on_enter_code_fenced(context: &mut CompileContext) { context.code_flow_seen_data = Some(false); context.line_ending_if_needed(); // Note that no `>` is used, which is added later. - context.tag("<pre><code".to_string()); + context.tag("<pre><code"); context.code_fenced_fences_count = Some(0); } /// Handle [`Enter`][EventType::Enter]:[`CodeText`][Token::CodeText]. fn on_enter_code_text(context: &mut CompileContext) { context.code_text_inside = true; - context.tag("<code>".to_string()); + context.tag("<code>"); context.buffer(); } @@ -462,7 +446,7 @@ fn on_enter_definition_destination_string(context: &mut CompileContext) { /// Handle [`Enter`][EventType::Enter]:[`Emphasis`][Token::Emphasis]. fn on_enter_emphasis(context: &mut CompileContext) { - context.tag("<em>".to_string()); + context.tag("<em>"); } /// Handle [`Enter`][EventType::Enter]:[`HtmlFlow`][Token::HtmlFlow]. @@ -563,7 +547,7 @@ fn on_enter_list(context: &mut CompileContext) { context.tight_stack.push(!loose); context.line_ending_if_needed(); // Note: no `>`. - context.tag(format!( + context.tag(&*format!( "<{}", if *token_type == Token::ListOrdered { "ol" @@ -579,11 +563,11 @@ fn on_enter_list_item_marker(context: &mut CompileContext) { let expect_first_item = context.expect_first_item.take().unwrap(); if expect_first_item { - context.tag(">".to_string()); + context.tag(">"); } context.line_ending_if_needed(); - context.tag("<li>".to_string()); + context.tag("<li>"); context.expect_first_item = Some(false); // “Hack” to prevent a line ending from showing up if the item is empty. context.last_was_tag = false; @@ -595,7 +579,7 @@ fn on_enter_paragraph(context: &mut CompileContext) { if !tight { context.line_ending_if_needed(); - context.tag("<p>".to_string()); + context.tag("<p>"); } } @@ -616,7 +600,7 @@ fn on_enter_resource_destination_string(context: &mut CompileContext) { /// Handle [`Enter`][EventType::Enter]:[`Strong`][Token::Strong]. fn on_enter_strong(context: &mut CompileContext) { - context.tag("<strong>".to_string()); + context.tag("<strong>"); } /// Handle [`Exit`][EventType::Exit]:[`AutolinkEmail`][Token::AutolinkEmail]. @@ -626,15 +610,15 @@ fn on_exit_autolink_email(context: &mut CompileContext) { &from_exit_event(context.events, context.index), false, ); - context.tag(format!( + context.tag(&*format!( "<a href=\"{}\">", sanitize_uri( format!("mailto:{}", slice.as_str()).as_str(), &context.protocol_href ) )); - context.push(context.encode_opt(&slice)); - context.tag("</a>".to_string()); + context.push_raw(&*slice); + context.tag("</a>"); } /// Handle [`Exit`][EventType::Exit]:[`AutolinkProtocol`][Token::AutolinkProtocol]. @@ -644,17 +628,17 @@ fn on_exit_autolink_protocol(context: &mut CompileContext) { &from_exit_event(context.events, context.index), false, ); - context.tag(format!( + context.tag(&*format!( "<a href=\"{}\">", sanitize_uri(slice.as_str(), &context.protocol_href) )); - context.push(context.encode_opt(&slice)); - context.tag("</a>".to_string()); + context.push_raw(&*slice); + context.tag("</a>"); } /// Handle [`Exit`][EventType::Exit]:{[`HardBreakEscape`][Token::HardBreakEscape],[`HardBreakTrailing`][Token::HardBreakTrailing]}. fn on_exit_break(context: &mut CompileContext) { - context.tag("<br />".to_string()); + context.tag("<br />"); } /// Handle [`Exit`][EventType::Exit]:[`BlankLineEnding`][Token::BlankLineEnding]. @@ -669,7 +653,7 @@ fn on_exit_block_quote(context: &mut CompileContext) { context.tight_stack.pop(); context.line_ending_if_needed(); context.slurp_one_line_ending = false; - context.tag("</blockquote>".to_string()); + context.tag("</blockquote>"); } /// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker]. @@ -705,17 +689,17 @@ fn on_exit_character_reference_value(context: &mut CompileContext) { CharacterReferenceKind::Named => decode_named(ref_string), }; - context.push(context.encode_opt(&value)); + context.push_raw(&*value); } /// Handle [`Exit`][EventType::Exit]:[`CodeFlowChunk`][Token::CodeFlowChunk]. fn on_exit_code_flow_chunk(context: &mut CompileContext) { context.code_flow_seen_data = Some(true); - context.push(context.encode_opt(&serialize( + context.push_raw(&*serialize( context.codes, &from_exit_event(context.events, context.index), false, - ))); + )); } /// Handle [`Exit`][EventType::Exit]:[`CodeFencedFence`][Token::CodeFencedFence]. @@ -727,7 +711,7 @@ fn on_exit_code_fenced_fence(context: &mut CompileContext) { }; if count == 0 { - context.tag(">".to_string()); + context.tag(">"); context.slurp_one_line_ending = true; } @@ -737,7 +721,7 @@ fn on_exit_code_fenced_fence(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`CodeFencedFenceInfo`][Token::CodeFencedFenceInfo]. fn on_exit_code_fenced_fence_info(context: &mut CompileContext) { let value = context.resume(); - context.tag(format!(" class=\"language-{}\"", value)); + context.tag(&*format!(" class=\"language-{}\"", value)); } /// Handle [`Exit`][EventType::Exit]:{[`CodeFenced`][Token::CodeFenced],[`CodeIndented`][Token::CodeIndented]}. @@ -764,7 +748,7 @@ fn on_exit_code_flow(context: &mut CompileContext) { context.line_ending_if_needed(); } - context.tag("</code></pre>".to_string()); + context.tag("</code></pre>"); if let Some(count) = context.code_fenced_fences_count.take() { if count < 2 { @@ -792,12 +776,12 @@ fn on_exit_code_text(context: &mut CompileContext) { } context.code_text_inside = false; - context.push(if trim { + context.push(&*if trim { result[1..(result.len() - 1)].to_string() } else { result }); - context.tag("</code>".to_string()); + context.tag("</code>"); } /// Handle [`Exit`][EventType::Exit]:*. @@ -810,11 +794,11 @@ fn on_exit_drop(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:{[`CodeTextData`][Token::CodeTextData],[`Data`][Token::Data],[`CharacterEscapeValue`][Token::CharacterEscapeValue]}. fn on_exit_data(context: &mut CompileContext) { // Just output it. - context.push(context.encode_opt(&serialize( + context.push_raw(&*serialize( context.codes, &from_exit_event(context.events, context.index), false, - ))); + )); } /// Handle [`Exit`][EventType::Exit]:[`Definition`][Token::Definition]. @@ -870,7 +854,7 @@ fn on_exit_definition_title_string(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`Strong`][Token::Emphasis]. fn on_exit_emphasis(context: &mut CompileContext) { - context.tag("</em>".to_string()); + context.tag("</em>"); } /// Handle [`Exit`][EventType::Exit]:[`HeadingAtx`][Token::HeadingAtx]. @@ -880,7 +864,7 @@ fn on_exit_heading_atx(context: &mut CompileContext) { .take() .expect("`atx_opening_sequence_size` must be set in headings"); - context.tag(format!("</h{}>", rank)); + context.tag(&*format!("</h{}>", rank)); } /// Handle [`Exit`][EventType::Exit]:[`HeadingAtxSequence`][Token::HeadingAtxSequence]. @@ -895,14 +879,14 @@ fn on_exit_heading_atx_sequence(context: &mut CompileContext) { .len(); context.line_ending_if_needed(); context.atx_opening_sequence_size = Some(rank); - context.tag(format!("<h{}>", rank)); + context.tag(&*format!("<h{}>", rank)); } } /// Handle [`Exit`][EventType::Exit]:[`HeadingAtxText`][Token::HeadingAtxText]. fn on_exit_heading_atx_text(context: &mut CompileContext) { let value = context.resume(); - context.push(value); + context.push(&*value); } /// Handle [`Exit`][EventType::Exit]:[`HeadingSetextText`][Token::HeadingSetextText]. @@ -925,9 +909,9 @@ fn on_exit_heading_setext_underline(context: &mut CompileContext) { let level: usize = if head == Code::Char('-') { 2 } else { 1 }; context.line_ending_if_needed(); - context.tag(format!("<h{}>", level)); - context.push(text); - context.tag(format!("</h{}>", level)); + context.tag(&*format!("<h{}>", level)); + context.push(&*text); + context.tag(&*format!("</h{}>", level)); } /// Handle [`Exit`][EventType::Exit]:{[`HtmlFlow`][Token::HtmlFlow],[`HtmlText`][Token::HtmlText]}. @@ -942,7 +926,7 @@ fn on_exit_html_data(context: &mut CompileContext) { &from_exit_event(context.events, context.index), false, ); - context.push(context.encode_opt(&slice)); + context.push_raw(&*slice); } /// Handle [`Exit`][EventType::Exit]:[`Label`][Token::Label]. @@ -965,15 +949,15 @@ fn on_exit_label_text(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`LineEnding`][Token::LineEnding]. fn on_exit_line_ending(context: &mut CompileContext) { if context.code_text_inside { - context.push(" ".to_string()); + context.push(" "); } else if context.slurp_one_line_ending { context.slurp_one_line_ending = false; } else { - context.push(context.encode_opt(&serialize( + context.push_raw(&*serialize( context.codes, &from_exit_event(context.events, context.index), false, - ))); + )); } } @@ -986,7 +970,7 @@ fn on_exit_list(context: &mut CompileContext) { }; context.tight_stack.pop(); context.line_ending(); - context.tag(format!("</{}>", tag_name)); + context.tag(&*format!("</{}>", tag_name)); } /// Handle [`Exit`][EventType::Exit]:[`ListItem`][Token::ListItem]. @@ -1012,7 +996,7 @@ fn on_exit_list_item(context: &mut CompileContext) { context.line_ending_if_needed(); } - context.tag("</li>".to_string()); + context.tag("</li>"); } /// Handle [`Exit`][EventType::Exit]:[`ListItemValue`][Token::ListItemValue]. @@ -1028,7 +1012,9 @@ fn on_exit_list_item_value(context: &mut CompileContext) { let value = slice.parse::<u32>().ok().unwrap(); if value != 1 { - context.tag(format!(" start=\"{}\"", encode(&value.to_string()))); + context.tag(" start=\""); + context.tag(&*value.to_string()); + context.tag("\""); } } } @@ -1082,9 +1068,9 @@ fn on_exit_media(context: &mut CompileContext) { }; let destination = if let Some(destination) = destination { - destination.clone() + destination } else { - "".to_string() + "" }; let title = if let Some(title) = title { @@ -1094,20 +1080,20 @@ fn on_exit_media(context: &mut CompileContext) { }; if media.image { - context.tag(format!( + context.tag(&*format!( "<img src=\"{}\" alt=\"", - sanitize_uri(&destination, &context.protocol_src), + sanitize_uri(destination, &context.protocol_src), )); - context.push(label); - context.tag(format!("\"{} />", title)); + context.push(&*label); + context.tag(&*format!("\"{} />", title)); } else { - context.tag(format!( + context.tag(&*format!( "<a href=\"{}\"{}>", - sanitize_uri(&destination, &context.protocol_href), + sanitize_uri(destination, &context.protocol_href), title, )); - context.push(label); - context.tag("</a>".to_string()); + context.push(&*label); + context.tag("</a>"); }; } @@ -1118,7 +1104,7 @@ fn on_exit_paragraph(context: &mut CompileContext) { if *tight { context.slurp_one_line_ending = true; } else { - context.tag("</p>".to_string()); + context.tag("</p>"); } } @@ -1151,11 +1137,11 @@ fn on_exit_resource_title_string(context: &mut CompileContext) { /// Handle [`Exit`][EventType::Exit]:[`Strong`][Token::Strong]. fn on_exit_strong(context: &mut CompileContext) { - context.tag("</strong>".to_string()); + context.tag("</strong>"); } /// Handle [`Exit`][EventType::Exit]:[`ThematicBreak`][Token::ThematicBreak]. fn on_exit_thematic_break(context: &mut CompileContext) { context.line_ending_if_needed(); - context.tag("<hr />".to_string()); + context.tag("<hr />"); } diff --git a/src/construct/character_reference.rs b/src/construct/character_reference.rs index ce7cd31..a4cbec1 100644 --- a/src/construct/character_reference.rs +++ b/src/construct/character_reference.rs @@ -121,7 +121,7 @@ impl Kind { #[derive(Debug, Clone)] struct Info { /// All parsed characters. - buffer: Vec<char>, + buffer: String, /// Kind of character reference. kind: Kind, } @@ -162,7 +162,7 @@ pub fn start(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { /// ``` fn open(tokenizer: &mut Tokenizer, code: Code) -> StateFnResult { let info = Info { - buffer: vec![], + buffer: String::new(), kind: Kind::Named, }; if let Code::Char('#') = code { @@ -216,10 +216,8 @@ fn numeric(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResu fn value(tokenizer: &mut Tokenizer, code: Code, mut info: Info) -> StateFnResult { match code { Code::Char(';') if !info.buffer.is_empty() => { - let unknown_named = Kind::Named == info.kind && { - let value = info.buffer.iter().collect::<String>(); - !CHARACTER_REFERENCES.iter().any(|d| d.0 == value) - }; + let unknown_named = Kind::Named == info.kind + && !CHARACTER_REFERENCES.iter().any(|d| d.0 == info.buffer); if unknown_named { (State::Nok, None) diff --git a/src/util/codes.rs b/src/util/codes.rs index 9b6ad39..d35d7d9 100644 --- a/src/util/codes.rs +++ b/src/util/codes.rs @@ -5,19 +5,21 @@ use crate::tokenizer::Code; /// Turn a string into codes. pub fn parse(value: &str) -> Vec<Code> { - let mut codes: Vec<Code> = vec![]; + // Note: It’ll grow a bit bigger with each `Code::VirtualSpace`, smaller + // with `Code::CarriageReturnLineFeed`. + let mut codes: Vec<Code> = Vec::with_capacity(value.len()); let mut at_start = true; let mut at_carriage_return = false; let mut column = 1; for char in value.chars() { if at_start { + at_start = false; + if char == '\u{feff}' { // Ignore. continue; } - - at_start = false; } // Send a CRLF. @@ -83,34 +85,33 @@ pub fn parse(value: &str) -> Vec<Code> { /// Serialize codes, optionally expanding tabs. pub fn serialize(codes: &[Code], expand_tabs: bool) -> String { let mut at_tab = false; - let mut index = 0; - let mut value: Vec<char> = vec![]; + // Note: It’ll grow a bit smaller with each + // `Code::Char('\t') | Code::VirtualSpace` if `expand_tabs` is false, + // and bigger with `Code::CarriageReturnLineFeed`, + let mut value = String::with_capacity(codes.len()); - while index < codes.len() { - let code = codes[index]; + for code in codes { let mut at_tab_next = false; match code { Code::CarriageReturnLineFeed => { - value.push('\r'); - value.push('\n'); + value.push_str("\r\n"); } - Code::Char(char) if char == '\n' || char == '\r' => { - value.push(char); + Code::Char(char) if *char == '\n' || *char == '\r' => { + value.push(*char); } - Code::Char(char) if char == '\t' => { + Code::Char(char) if *char == '\t' => { at_tab_next = true; - value.push(if expand_tabs { ' ' } else { char }); + value.push(if expand_tabs { ' ' } else { *char }); } Code::VirtualSpace => { if !expand_tabs && at_tab { - index += 1; continue; } value.push(' '); } Code::Char(char) => { - value.push(char); + value.push(*char); } Code::None => { unreachable!("unexpected EOF code in codes"); @@ -118,9 +119,7 @@ pub fn serialize(codes: &[Code], expand_tabs: bool) -> String { } at_tab = at_tab_next; - - index += 1; } - value.into_iter().collect() + value } diff --git a/src/util/encode.rs b/src/util/encode.rs index a3bd589..965ea5c 100644 --- a/src/util/encode.rs +++ b/src/util/encode.rs @@ -20,37 +20,31 @@ /// ## References /// /// * [`micromark-util-encode` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-encode) -pub fn encode(value: &str) -> String { - let mut result: Vec<&str> = vec![]; - let mut start = 0; - let mut index = 0; +pub fn encode<S: Into<String>>(value: S) -> String { + let mut value = value.into(); - for byte in value.bytes() { - if let Some(replacement) = match byte { - b'&' => Some("&"), - b'"' => Some("""), - b'<' => Some("<"), - b'>' => Some(">"), - _ => None, - } { - if start != index { - result.push(&value[start..index]); - } + // It’ll grow a bit bigger for each dangerous character. + let mut result = String::with_capacity(value.len()); - result.push(replacement); - start = index + 1; - } - - index += 1; + while let Some(indice) = value.find(check) { + let after = value.split_off(indice + 1); + let dangerous = value.pop().unwrap(); + result.push_str(&value); + result.push_str(match dangerous { + '&' => "&", + '"' => """, + '<' => "<", + '>' => ">", + _ => unreachable!("xxx"), + }); + value = after; } - if start == 0 { - value.to_string() - } else { - if start < index { - result.push(&value[start..index]); - } + result.push_str(&value); - result.join("") - } + result +} + +fn check(char: char) -> bool { + matches!(char, '&' | '"' | '<' | '>') } diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs index feb7239..42a2bb0 100644 --- a/src/util/normalize_identifier.rs +++ b/src/util/normalize_identifier.rs @@ -32,7 +32,8 @@ /// [definition]: crate::construct::definition /// [label_end]: crate::construct::label_end pub fn normalize_identifier(value: &str) -> String { - let mut codes = vec![]; + // Note: it’ll grow a bit smaller for consecutive whitespace. + let mut result = String::with_capacity(value.len()); let mut at_start = true; let mut at_whitespace = true; @@ -44,10 +45,10 @@ pub fn normalize_identifier(value: &str) -> String { } _ => { if at_whitespace && !at_start { - codes.push(' '); + result.push(' '); } - codes.push(char); + result.push(char); at_start = false; at_whitespace = false; } @@ -66,9 +67,5 @@ pub fn normalize_identifier(value: &str) -> String { // to `SS` (U+0053 U+0053). // If we’d inverse the steps, for `ẞ`, we’d first uppercase without a // change, and then lowercase to `ß`, which would not match `ss`. - codes - .iter() - .collect::<String>() - .to_lowercase() - .to_uppercase() + result.to_lowercase().to_uppercase() } diff --git a/src/util/sanitize_uri.rs b/src/util/sanitize_uri.rs index 55b15e4..81450ae 100644 --- a/src/util/sanitize_uri.rs +++ b/src/util/sanitize_uri.rs @@ -32,32 +32,25 @@ use crate::util::encode::encode; /// /// * [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri) pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String { - let value = encode(&normalize_uri(value)); + let value = encode(normalize_uri(value)); if let Some(protocols) = protocols { - let chars: Vec<char> = value.chars().collect(); - let mut index = 0; - let mut colon: Option<usize> = None; - - while index < chars.len() { - let char = chars[index]; - - match char { - ':' => { - colon = Some(index); - break; + let end = value.find(|c| matches!(c, '?' | '#' | '/')); + let mut colon = value.find(|c| matches!(c, ':')); + + // If the first colon is after `?`, `#`, or `/`, it’s not a protocol. + if let Some(end) = end { + if let Some(index) = colon { + if index > end { + colon = None; } - '?' | '#' | '/' => break, - _ => {} } - - index += 1; } - // If there is no protocol, or the first colon is after `?`, `#`, or `/`, it’s relative. - // It is a protocol, it should be allowed. + // If there is no protocol, it’s relative, and fine. if let Some(colon) = colon { - let protocol = chars[0..colon].iter().collect::<String>().to_lowercase(); + // If it is a protocol, it should be allowed. + let protocol = value[0..colon].to_lowercase(); if !protocols.contains(&protocol.as_str()) { return "".to_string(); } @@ -85,8 +78,9 @@ pub fn sanitize_uri(value: &str, protocols: &Option<Vec<&str>>) -> String { /// /// * [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri) fn normalize_uri(value: &str) -> String { - let chars: Vec<char> = value.chars().collect(); - let mut result: Vec<String> = vec![]; + let chars = value.chars().collect::<Vec<_>>(); + // Note: it’ll grow bigger for each non-ascii or non-safe character. + let mut result = String::with_capacity(value.len()); let mut index = 0; let mut start = 0; let mut buff = [0; 4]; @@ -104,16 +98,15 @@ fn normalize_uri(value: &str) -> String { continue; } - // Note: Rust already takes care of lone astral surrogates. + // Note: Rust already takes care of lone surrogates. // Non-ascii or not allowed ascii. if char >= '\u{0080}' || !matches!(char, '!' | '#' | '$' | '&'..=';' | '=' | '?'..='Z' | '_' | 'a'..='z' | '~') { - result.push(chars[start..index].iter().collect::<String>()); - + result.push_str(&chars[start..index].iter().collect::<String>()); char.encode_utf8(&mut buff); - result.push( - buff[0..char.len_utf8()] + result.push_str( + &buff[0..char.len_utf8()] .iter() .map(|&byte| format!("%{:>02X}", byte)) .collect::<String>(), @@ -125,7 +118,7 @@ fn normalize_uri(value: &str) -> String { index += 1; } - result.push(chars[start..].iter().collect::<String>()); + result.push_str(&chars[start..].iter().collect::<String>()); - result.join("") + result } |