aboutsummaryrefslogtreecommitdiffstats
path: root/src/util/codes.rs
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-05 13:03:09 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-07-05 13:03:09 +0200
commitfd860a975b84da9a79abfa247787e6adbd5ea34c (patch)
treebd9db168c57478f4f37c234eac4087c2d69a6445 /src/util/codes.rs
parent0bc099f8f8b6541a962e604b7ac25445a2a9252a (diff)
downloadmarkdown-rs-fd860a975b84da9a79abfa247787e6adbd5ea34c.tar.gz
markdown-rs-fd860a975b84da9a79abfa247787e6adbd5ea34c.tar.bz2
markdown-rs-fd860a975b84da9a79abfa247787e6adbd5ea34c.zip
Refactor code style
Diffstat (limited to 'src/util/codes.rs')
-rw-r--r--src/util/codes.rs126
1 files changed, 126 insertions, 0 deletions
diff --git a/src/util/codes.rs b/src/util/codes.rs
new file mode 100644
index 0000000..8a46d02
--- /dev/null
+++ b/src/util/codes.rs
@@ -0,0 +1,126 @@
+//! Utilities to deal with character codes.
+
+use crate::constant::TAB_SIZE;
+use crate::tokenizer::Code;
+
+/// Turn a string into codes.
+pub fn parse(value: &str) -> Vec<Code> {
+ let mut codes: Vec<Code> = vec![];
+ let mut at_start = true;
+ let mut at_carriage_return = false;
+ let mut column = 1;
+
+ for char in value.chars() {
+ if at_start {
+ if char == '\u{feff}' {
+ // Ignore.
+ continue;
+ }
+
+ at_start = false;
+ }
+
+ // Send a CRLF.
+ if at_carriage_return && '\n' == char {
+ at_carriage_return = false;
+ codes.push(Code::CarriageReturnLineFeed);
+ } else {
+ // Send the previous CR: we’re not at a next `\n`.
+ if at_carriage_return {
+ at_carriage_return = false;
+ codes.push(Code::Char('\r'));
+ }
+
+ match char {
+ // Send a replacement character.
+ '\0' => {
+ column += 1;
+ codes.push(Code::Char('�'));
+ }
+ // Send a tab and virtual spaces.
+ '\t' => {
+ let remainder = column % TAB_SIZE;
+ let mut virtual_spaces = if remainder == 0 {
+ 0
+ } else {
+ TAB_SIZE - remainder
+ };
+ codes.push(Code::Char(char));
+ column += 1;
+ while virtual_spaces > 0 {
+ codes.push(Code::VirtualSpace);
+ column += 1;
+ virtual_spaces -= 1;
+ }
+ }
+ // Send an LF.
+ '\n' => {
+ column = 1;
+ codes.push(Code::Char(char));
+ }
+ // Don’t send anything yet.
+ '\r' => {
+ column = 1;
+ at_carriage_return = true;
+ }
+ // Send the char.
+ _ => {
+ column += 1;
+ codes.push(Code::Char(char));
+ }
+ }
+ };
+ }
+
+ // Send the last CR: we’re not at a next `\n`.
+ if at_carriage_return {
+ codes.push(Code::Char('\r'));
+ }
+
+ codes
+}
+
+/// Serialize codes, optionally expanding tabs.
+pub fn serialize(codes: &[Code], expand_tabs: bool) -> String {
+ let mut at_tab = false;
+ let mut index = 0;
+ let mut value: Vec<char> = vec![];
+
+ while index < codes.len() {
+ let code = codes[index];
+ let mut at_tab_next = false;
+
+ match code {
+ Code::CarriageReturnLineFeed => {
+ value.push('\r');
+ value.push('\n');
+ }
+ Code::Char(char) if char == '\n' || char == '\r' => {
+ value.push(char);
+ }
+ Code::Char(char) if char == '\t' => {
+ at_tab_next = true;
+ value.push(if expand_tabs { ' ' } else { char });
+ }
+ Code::VirtualSpace => {
+ if !expand_tabs && at_tab {
+ index += 1;
+ continue;
+ }
+ value.push(' ');
+ }
+ Code::Char(char) => {
+ value.push(char);
+ }
+ Code::None => {
+ unreachable!("unexpected EOF code in codes");
+ }
+ }
+
+ at_tab = at_tab_next;
+
+ index += 1;
+ }
+
+ value.into_iter().collect()
+}