From a820d849c3e20a1d72137072d70a7c8e00306f98 Mon Sep 17 00:00:00 2001
From: Titus Wormer ".to_string());
+ context.tag(" ");
}
}
@@ -616,7 +600,7 @@ fn on_enter_resource_destination_string(context: &mut CompileContext) {
/// Handle [`Enter`][EventType::Enter]:[`Strong`][Token::Strong].
fn on_enter_strong(context: &mut CompileContext) {
- context.tag("".to_string());
+ context.tag("");
}
/// Handle [`Exit`][EventType::Exit]:[`AutolinkEmail`][Token::AutolinkEmail].
@@ -626,15 +610,15 @@ fn on_exit_autolink_email(context: &mut CompileContext) {
&from_exit_event(context.events, context.index),
false,
);
- context.tag(format!(
+ context.tag(&*format!(
"",
sanitize_uri(
format!("mailto:{}", slice.as_str()).as_str(),
&context.protocol_href
)
));
- context.push(context.encode_opt(&slice));
- context.tag("".to_string());
+ context.push_raw(&*slice);
+ context.tag("");
}
/// Handle [`Exit`][EventType::Exit]:[`AutolinkProtocol`][Token::AutolinkProtocol].
@@ -644,17 +628,17 @@ fn on_exit_autolink_protocol(context: &mut CompileContext) {
&from_exit_event(context.events, context.index),
false,
);
- context.tag(format!(
+ context.tag(&*format!(
"",
sanitize_uri(slice.as_str(), &context.protocol_href)
));
- context.push(context.encode_opt(&slice));
- context.tag("".to_string());
+ context.push_raw(&*slice);
+ context.tag("");
}
/// Handle [`Exit`][EventType::Exit]:{[`HardBreakEscape`][Token::HardBreakEscape],[`HardBreakTrailing`][Token::HardBreakTrailing]}.
fn on_exit_break(context: &mut CompileContext) {
- context.tag("".to_string());
+ context.tag("
");
}
/// Handle [`Exit`][EventType::Exit]:[`CharacterReferenceMarker`][Token::CharacterReferenceMarker].
@@ -705,17 +689,17 @@ fn on_exit_character_reference_value(context: &mut CompileContext) {
CharacterReferenceKind::Named => decode_named(ref_string),
};
- context.push(context.encode_opt(&value));
+ context.push_raw(&*value);
}
/// Handle [`Exit`][EventType::Exit]:[`CodeFlowChunk`][Token::CodeFlowChunk].
fn on_exit_code_flow_chunk(context: &mut CompileContext) {
context.code_flow_seen_data = Some(true);
- context.push(context.encode_opt(&serialize(
+ context.push_raw(&*serialize(
context.codes,
&from_exit_event(context.events, context.index),
false,
- )));
+ ));
}
/// Handle [`Exit`][EventType::Exit]:[`CodeFencedFence`][Token::CodeFencedFence].
@@ -727,7 +711,7 @@ fn on_exit_code_fenced_fence(context: &mut CompileContext) {
};
if count == 0 {
- context.tag(">".to_string());
+ context.tag(">");
context.slurp_one_line_ending = true;
}
@@ -737,7 +721,7 @@ fn on_exit_code_fenced_fence(context: &mut CompileContext) {
/// Handle [`Exit`][EventType::Exit]:[`CodeFencedFenceInfo`][Token::CodeFencedFenceInfo].
fn on_exit_code_fenced_fence_info(context: &mut CompileContext) {
let value = context.resume();
- context.tag(format!(" class=\"language-{}\"", value));
+ context.tag(&*format!(" class=\"language-{}\"", value));
}
/// Handle [`Exit`][EventType::Exit]:{[`CodeFenced`][Token::CodeFenced],[`CodeIndented`][Token::CodeIndented]}.
@@ -764,7 +748,7 @@ fn on_exit_code_flow(context: &mut CompileContext) {
context.line_ending_if_needed();
}
- context.tag("".to_string());
+ context.tag("");
if let Some(count) = context.code_fenced_fences_count.take() {
if count < 2 {
@@ -792,12 +776,12 @@ fn on_exit_code_text(context: &mut CompileContext) {
}
context.code_text_inside = false;
- context.push(if trim {
+ context.push(&*if trim {
result[1..(result.len() - 1)].to_string()
} else {
result
});
- context.tag("".to_string());
+ context.tag("");
}
/// Handle [`Exit`][EventType::Exit]:*.
@@ -810,11 +794,11 @@ fn on_exit_drop(context: &mut CompileContext) {
/// Handle [`Exit`][EventType::Exit]:{[`CodeTextData`][Token::CodeTextData],[`Data`][Token::Data],[`CharacterEscapeValue`][Token::CharacterEscapeValue]}.
fn on_exit_data(context: &mut CompileContext) {
// Just output it.
- context.push(context.encode_opt(&serialize(
+ context.push_raw(&*serialize(
context.codes,
&from_exit_event(context.events, context.index),
false,
- )));
+ ));
}
/// Handle [`Exit`][EventType::Exit]:[`Definition`][Token::Definition].
@@ -870,7 +854,7 @@ fn on_exit_definition_title_string(context: &mut CompileContext) {
/// Handle [`Exit`][EventType::Exit]:[`Strong`][Token::Emphasis].
fn on_exit_emphasis(context: &mut CompileContext) {
- context.tag("".to_string());
+ context.tag("");
}
/// Handle [`Exit`][EventType::Exit]:[`HeadingAtx`][Token::HeadingAtx].
@@ -880,7 +864,7 @@ fn on_exit_heading_atx(context: &mut CompileContext) {
.take()
.expect("`atx_opening_sequence_size` must be set in headings");
- context.tag(format!("", rank));
+ context.tag(&*format!("", rank));
}
/// Handle [`Exit`][EventType::Exit]:[`HeadingAtxSequence`][Token::HeadingAtxSequence].
@@ -895,14 +879,14 @@ fn on_exit_heading_atx_sequence(context: &mut CompileContext) {
.len();
context.line_ending_if_needed();
context.atx_opening_sequence_size = Some(rank);
- context.tag(format!("");
}
/// Handle [`Enter`][EventType::Enter]:[`CodeIndented`][Token::CodeIndented].
fn on_enter_code_indented(context: &mut CompileContext) {
context.code_flow_seen_data = Some(false);
context.line_ending_if_needed();
- context.tag("
".to_string());
+ context.tag("".to_string());
+ context.tag("
");
}
/// Handle [`Enter`][EventType::Enter]:[`CodeFenced`][Token::CodeFenced].
@@ -430,14 +414,14 @@ fn on_enter_code_fenced(context: &mut CompileContext) {
context.code_flow_seen_data = Some(false);
context.line_ending_if_needed();
// Note that no `>` is used, which is added later.
- context.tag("
".to_string());
+ context.tag("
");
context.buffer();
}
@@ -462,7 +446,7 @@ fn on_enter_definition_destination_string(context: &mut CompileContext) {
/// Handle [`Enter`][EventType::Enter]:[`Emphasis`][Token::Emphasis].
fn on_enter_emphasis(context: &mut CompileContext) {
- context.tag("".to_string());
+ context.tag("");
}
/// Handle [`Enter`][EventType::Enter]:[`HtmlFlow`][Token::HtmlFlow].
@@ -563,7 +547,7 @@ fn on_enter_list(context: &mut CompileContext) {
context.tight_stack.push(!loose);
context.line_ending_if_needed();
// Note: no `>`.
- context.tag(format!(
+ context.tag(&*format!(
"<{}",
if *token_type == Token::ListOrdered {
"ol"
@@ -579,11 +563,11 @@ fn on_enter_list_item_marker(context: &mut CompileContext) {
let expect_first_item = context.expect_first_item.take().unwrap();
if expect_first_item {
- context.tag(">".to_string());
+ context.tag(">");
}
context.line_ending_if_needed();
- context.tag("
".to_string());
+ context.tag("
");
}
/// Handle [`Exit`][EventType::Exit]:[`BlankLineEnding`][Token::BlankLineEnding].
@@ -669,7 +653,7 @@ fn on_exit_block_quote(context: &mut CompileContext) {
context.tight_stack.pop();
context.line_ending_if_needed();
context.slurp_one_line_ending = false;
- context.tag("
{
- let mut codes: Vec = vec![];
+ // Note: It’ll grow a bit bigger with each `Code::VirtualSpace`, smaller
+ // with `Code::CarriageReturnLineFeed`.
+ let mut codes: Vec = Vec::with_capacity(value.len());
let mut at_start = true;
let mut at_carriage_return = false;
let mut column = 1;
for char in value.chars() {
if at_start {
+ at_start = false;
+
if char == '\u{feff}' {
// Ignore.
continue;
}
-
- at_start = false;
}
// Send a CRLF.
@@ -83,34 +85,33 @@ pub fn parse(value: &str) -> Vec {
/// Serialize codes, optionally expanding tabs.
pub fn serialize(codes: &[Code], expand_tabs: bool) -> String {
let mut at_tab = false;
- let mut index = 0;
- let mut value: Vec = vec![];
+ // Note: It’ll grow a bit smaller with each
+ // `Code::Char('\t') | Code::VirtualSpace` if `expand_tabs` is false,
+ // and bigger with `Code::CarriageReturnLineFeed`,
+ let mut value = String::with_capacity(codes.len());
- while index < codes.len() {
- let code = codes[index];
+ for code in codes {
let mut at_tab_next = false;
match code {
Code::CarriageReturnLineFeed => {
- value.push('\r');
- value.push('\n');
+ value.push_str("\r\n");
}
- Code::Char(char) if char == '\n' || char == '\r' => {
- value.push(char);
+ Code::Char(char) if *char == '\n' || *char == '\r' => {
+ value.push(*char);
}
- Code::Char(char) if char == '\t' => {
+ Code::Char(char) if *char == '\t' => {
at_tab_next = true;
- value.push(if expand_tabs { ' ' } else { char });
+ value.push(if expand_tabs { ' ' } else { *char });
}
Code::VirtualSpace => {
if !expand_tabs && at_tab {
- index += 1;
continue;
}
value.push(' ');
}
Code::Char(char) => {
- value.push(char);
+ value.push(*char);
}
Code::None => {
unreachable!("unexpected EOF code in codes");
@@ -118,9 +119,7 @@ pub fn serialize(codes: &[Code], expand_tabs: bool) -> String {
}
at_tab = at_tab_next;
-
- index += 1;
}
- value.into_iter().collect()
+ value
}
diff --git a/src/util/encode.rs b/src/util/encode.rs
index a3bd589..965ea5c 100644
--- a/src/util/encode.rs
+++ b/src/util/encode.rs
@@ -20,37 +20,31 @@
/// ## References
///
/// * [`micromark-util-encode` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-encode)
-pub fn encode(value: &str) -> String {
- let mut result: Vec<&str> = vec![];
- let mut start = 0;
- let mut index = 0;
+pub fn encode>(value: S) -> String {
+ let mut value = value.into();
- for byte in value.bytes() {
- if let Some(replacement) = match byte {
- b'&' => Some("&"),
- b'"' => Some("""),
- b'<' => Some("<"),
- b'>' => Some(">"),
- _ => None,
- } {
- if start != index {
- result.push(&value[start..index]);
- }
+ // It’ll grow a bit bigger for each dangerous character.
+ let mut result = String::with_capacity(value.len());
- result.push(replacement);
- start = index + 1;
- }
-
- index += 1;
+ while let Some(indice) = value.find(check) {
+ let after = value.split_off(indice + 1);
+ let dangerous = value.pop().unwrap();
+ result.push_str(&value);
+ result.push_str(match dangerous {
+ '&' => "&",
+ '"' => """,
+ '<' => "<",
+ '>' => ">",
+ _ => unreachable!("xxx"),
+ });
+ value = after;
}
- if start == 0 {
- value.to_string()
- } else {
- if start < index {
- result.push(&value[start..index]);
- }
+ result.push_str(&value);
- result.join("")
- }
+ result
+}
+
+fn check(char: char) -> bool {
+ matches!(char, '&' | '"' | '<' | '>')
}
diff --git a/src/util/normalize_identifier.rs b/src/util/normalize_identifier.rs
index feb7239..42a2bb0 100644
--- a/src/util/normalize_identifier.rs
+++ b/src/util/normalize_identifier.rs
@@ -32,7 +32,8 @@
/// [definition]: crate::construct::definition
/// [label_end]: crate::construct::label_end
pub fn normalize_identifier(value: &str) -> String {
- let mut codes = vec![];
+ // Note: it’ll grow a bit smaller for consecutive whitespace.
+ let mut result = String::with_capacity(value.len());
let mut at_start = true;
let mut at_whitespace = true;
@@ -44,10 +45,10 @@ pub fn normalize_identifier(value: &str) -> String {
}
_ => {
if at_whitespace && !at_start {
- codes.push(' ');
+ result.push(' ');
}
- codes.push(char);
+ result.push(char);
at_start = false;
at_whitespace = false;
}
@@ -66,9 +67,5 @@ pub fn normalize_identifier(value: &str) -> String {
// to `SS` (U+0053 U+0053).
// If we’d inverse the steps, for `ẞ`, we’d first uppercase without a
// change, and then lowercase to `ß`, which would not match `ss`.
- codes
- .iter()
- .collect::()
- .to_lowercase()
- .to_uppercase()
+ result.to_lowercase().to_uppercase()
}
diff --git a/src/util/sanitize_uri.rs b/src/util/sanitize_uri.rs
index 55b15e4..81450ae 100644
--- a/src/util/sanitize_uri.rs
+++ b/src/util/sanitize_uri.rs
@@ -32,32 +32,25 @@ use crate::util::encode::encode;
///
/// * [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri)
pub fn sanitize_uri(value: &str, protocols: &Option>) -> String {
- let value = encode(&normalize_uri(value));
+ let value = encode(normalize_uri(value));
if let Some(protocols) = protocols {
- let chars: Vec = value.chars().collect();
- let mut index = 0;
- let mut colon: Option = None;
-
- while index < chars.len() {
- let char = chars[index];
-
- match char {
- ':' => {
- colon = Some(index);
- break;
+ let end = value.find(|c| matches!(c, '?' | '#' | '/'));
+ let mut colon = value.find(|c| matches!(c, ':'));
+
+ // If the first colon is after `?`, `#`, or `/`, it’s not a protocol.
+ if let Some(end) = end {
+ if let Some(index) = colon {
+ if index > end {
+ colon = None;
}
- '?' | '#' | '/' => break,
- _ => {}
}
-
- index += 1;
}
- // If there is no protocol, or the first colon is after `?`, `#`, or `/`, it’s relative.
- // It is a protocol, it should be allowed.
+ // If there is no protocol, it’s relative, and fine.
if let Some(colon) = colon {
- let protocol = chars[0..colon].iter().collect::().to_lowercase();
+ // If it is a protocol, it should be allowed.
+ let protocol = value[0..colon].to_lowercase();
if !protocols.contains(&protocol.as_str()) {
return "".to_string();
}
@@ -85,8 +78,9 @@ pub fn sanitize_uri(value: &str, protocols: &Option>) -> String {
///
/// * [`micromark-util-sanitize-uri` in `micromark`](https://github.com/micromark/micromark/tree/main/packages/micromark-util-sanitize-uri)
fn normalize_uri(value: &str) -> String {
- let chars: Vec = value.chars().collect();
- let mut result: Vec = vec![];
+ let chars = value.chars().collect::>();
+ // Note: it’ll grow bigger for each non-ascii or non-safe character.
+ let mut result = String::with_capacity(value.len());
let mut index = 0;
let mut start = 0;
let mut buff = [0; 4];
@@ -104,16 +98,15 @@ fn normalize_uri(value: &str) -> String {
continue;
}
- // Note: Rust already takes care of lone astral surrogates.
+ // Note: Rust already takes care of lone surrogates.
// Non-ascii or not allowed ascii.
if char >= '\u{0080}'
|| !matches!(char, '!' | '#' | '$' | '&'..=';' | '=' | '?'..='Z' | '_' | 'a'..='z' | '~')
{
- result.push(chars[start..index].iter().collect::());
-
+ result.push_str(&chars[start..index].iter().collect::());
char.encode_utf8(&mut buff);
- result.push(
- buff[0..char.len_utf8()]
+ result.push_str(
+ &buff[0..char.len_utf8()]
.iter()
.map(|&byte| format!("%{:>02X}", byte))
.collect::(),
@@ -125,7 +118,7 @@ fn normalize_uri(value: &str) -> String {
index += 1;
}
- result.push(chars[start..].iter().collect::());
+ result.push_str(&chars[start..].iter().collect::());
- result.join("")
+ result
}
--
cgit