aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 18:53:38 +0200
committerLibravatar Titus Wormer <tituswormer@gmail.com>2022-06-20 18:53:38 +0200
commit182467c1d393dee2081ff80f1c049cb145f23123 (patch)
tree9ce529c815faab2db1f96b0820a78049b7633a8a
parentef5f9a97493fe4a616b49a744d5a571a99ead8e9 (diff)
downloadmarkdown-rs-182467c1d393dee2081ff80f1c049cb145f23123.tar.gz
markdown-rs-182467c1d393dee2081ff80f1c049cb145f23123.tar.bz2
markdown-rs-182467c1d393dee2081ff80f1c049cb145f23123.zip
Add support for BOM
-rw-r--r--readme.md2
-rw-r--r--src/subtokenize.rs4
-rw-r--r--src/tokenizer.rs10
-rw-r--r--tests/misc_bom.rs16
-rw-r--r--tests/misc_zero.rs2
5 files changed, 24 insertions, 10 deletions
diff --git a/readme.md b/readme.md
index 6c2e57d..86b6b6e 100644
--- a/readme.md
+++ b/readme.md
@@ -66,7 +66,6 @@ cargo doc --document-private-items
### Small things
-- [ ] (1) Handle BOM at start
- [ ] (1) Parse initial and final whitespace of paragraphs (in text)
- [ ] (1) Add docs to subtokenize
- [ ] (1) Add module docs to parser
@@ -171,6 +170,7 @@ cargo doc --document-private-items
- [x] (1) Parse whitespace in each flow construct
- [x] (1) Connect `ChunkString` in label, destination, title
- [x] (1) Add support for line endings in `string`
+- [x] (1) Handle BOM at start
### Extensions
diff --git a/src/subtokenize.rs b/src/subtokenize.rs
index 4a29a01..0623a37 100644
--- a/src/subtokenize.rs
+++ b/src/subtokenize.rs
@@ -15,6 +15,10 @@ pub fn subtokenize(events: Vec<Event>, codes: &[Code]) -> (Vec<Event>, bool) {
let mut link_to_info: HashMap<usize, (usize, usize, usize)> = HashMap::new();
let mut done = true;
+ if events.is_empty() {
+ return (events, true);
+ }
+
while index < events.len() {
let event = &events[index];
diff --git a/src/tokenizer.rs b/src/tokenizer.rs
index d31c8c5..c0a7105 100644
--- a/src/tokenizer.rs
+++ b/src/tokenizer.rs
@@ -680,10 +680,20 @@ fn attempt_impl(
// To do: handle BOM at start?
pub fn as_codes(value: &str) -> Vec<Code> {
let mut codes: Vec<Code> = vec![];
+ let mut at_start = true;
let mut at_carriage_return = false;
let mut column = 1;
for char in value.chars() {
+ if at_start {
+ if char == '\u{feff}' {
+ // Ignore.
+ continue;
+ }
+
+ at_start = false;
+ }
+
// Send a CRLF.
if at_carriage_return && '\n' == char {
at_carriage_return = false;
diff --git a/tests/misc_bom.rs b/tests/misc_bom.rs
index 9805616..44f661e 100644
--- a/tests/misc_bom.rs
+++ b/tests/misc_bom.rs
@@ -1,15 +1,13 @@
extern crate micromark;
-// use micromark::micromark;
+use micromark::micromark;
#[test]
fn bom() {
- // // To do: BOM.
- // assert_eq!(micromark("\u{FEFF}"), "", "should ignore just a bom");
+ assert_eq!(micromark("\u{FEFF}"), "", "should ignore just a bom");
- // // To do: BOM.
- // assert_eq!(
- // micromark("\u{FEFF}# hea\u{FEFF}ding"),
- // "<h1>hea\u{FEFF}ding</h1>",
- // "should ignore a bom"
- // );
+ assert_eq!(
+ micromark("\u{FEFF}# hea\u{FEFF}ding"),
+ "<h1>hea\u{FEFF}ding</h1>",
+ "should ignore a bom"
+ );
}
diff --git a/tests/misc_zero.rs b/tests/misc_zero.rs
index 946a3e2..47aa8ed 100644
--- a/tests/misc_zero.rs
+++ b/tests/misc_zero.rs
@@ -3,6 +3,8 @@ use micromark::micromark;
#[test]
fn zero() {
+ assert_eq!(micromark(""), "", "should support no markdown");
+
assert_eq!(
micromark("asd\0asd"),
"<p>asd�asd</p>",